PARALUTION  1.0.0
PARALUTION
ocl_kernels_vector.hpp
Go to the documentation of this file.
1 #ifndef PARALUTION_OCL_KERNELS_VECTOR_HPP_
2 #define PARALUTION_OCL_KERNELS_VECTOR_HPP_
3 
4 namespace paralution {
5 
6 const char *ocl_kernels_vector =
7  "__kernel void kernel_scale(const int size, const ValueType alpha, __global ValueType *x) {\n"
8  "\n"
9  " int gid = get_global_id(0);\n"
10  "\n"
11  " if (gid < size)\n"
12  " x[gid] = alpha * x[gid];\n"
13  "\n"
14  "}\n"
15  "\n"
16  "__kernel void kernel_scaleadd(const int size, const ValueType alpha,\n"
17  " __global const ValueType *x, __global ValueType *out) {\n"
18  "\n"
19  " int gid = get_global_id(0);\n"
20  "\n"
21  " if (gid < size)\n"
22  " out[gid] = alpha * out[gid] + x[gid];\n"
23  "\n"
24  "}\n"
25  "\n"
26  "__kernel void kernel_scaleaddscale(const int size, const ValueType alpha, const ValueType beta, \n"
27  " __global const ValueType *x, __global ValueType *out) {\n"
28  "\n"
29  " int gid = get_global_id(0);\n"
30  "\n"
31  " if (gid < size)\n"
32  " out[gid] = alpha * out[gid] + beta * x[gid];\n"
33  "\n"
34  "}\n"
35  "\n"
36  "__kernel void kernel_scaleaddscale_offset(const int size, const int src_offset, const int dst_offset, \n"
37  " const ValueType alpha, const ValueType beta, \n"
38  " __global const ValueType *x, __global ValueType *out) {\n"
39  "\n"
40  " int gid = get_global_id(0);\n"
41  "\n"
42  " if (gid < size)\n"
43  " out[gid+dst_offset] = alpha * out[gid+dst_offset] + beta * x[gid+src_offset];\n"
44  "\n"
45  "}\n"
46  "\n"
47  "__kernel void kernel_scaleadd2(const int size, const ValueType alpha, const ValueType beta, const ValueType gamma,\n"
48  " __global const ValueType *x, __global const ValueType *y, __global ValueType *out) {\n"
49  "\n"
50  " int gid = get_global_id(0);\n"
51  "\n"
52  " if (gid < size)\n"
53  " out[gid] = alpha * out[gid] + beta * x[gid] + gamma * y[gid];\n"
54  "\n"
55  "}\n"
56  "\n"
57  "__kernel void kernel_pointwisemult(const int size, __global const ValueType *x, __global ValueType *out) {\n"
58  "\n"
59  " int gid = get_global_id(0);\n"
60  "\n"
61  " if (gid < size)\n"
62  " out[gid] = out[gid] * x[gid];\n"
63  "\n"
64  "}\n"
65  "\n"
66  "__kernel void kernel_pointwisemult2(const int size, __global const ValueType *x, __global const ValueType *y,\n"
67  " __global ValueType *out) {\n"
68  "\n"
69  " int gid = get_global_id(0);\n"
70  "\n"
71  " if (gid < size)\n"
72  " out[gid] = y[gid] * x[gid];\n"
73  "\n"
74  "}\n"
75  "\n"
76  "__kernel void kernel_copy_offset_from(const int size, const int src_offset, const int dst_offset,\n"
77  " __global const ValueType *in, __global ValueType *out) {\n"
78  "\n"
79  " int gid = get_global_id(0);\n"
80  "\n"
81  " if (gid < size)\n"
82  " out[gid+dst_offset] = in[gid+src_offset];\n"
83  "\n"
84  "}\n"
85  "\n"
86  "__kernel void kernel_permute(const int size, __global const int *permute,\n"
87  " __global const ValueType *in, __global ValueType *out) {\n"
88  "\n"
89  " int gid = get_global_id(0);\n"
90  "\n"
91  " if (gid < size)\n"
92  " out[permute[gid]] = in[gid];\n"
93  "\n"
94  "}\n"
95  "\n"
96  "__kernel void kernel_permute_backward(const int size, __global const int *permute,\n"
97  " __global const ValueType *in, __global ValueType *out) {\n"
98  "\n"
99  " int gid = get_global_id(0);\n"
100  "\n"
101  " if (gid < size)\n"
102  " out[gid] = in[permute[gid]];\n"
103  "\n"
104  "}\n"
105  "\n"
106  "__kernel void kernel_dot(const int size,\n"
107  " __global const ValueType *x, __global const ValueType *y,\n"
108  " __global ValueType *out, __local ValueType *sdata,\n"
109  " const int GROUP_SIZE, const int LOCAL_SIZE) {\n"
110  "\n"
111  " int tid = get_local_id(0);\n"
112  "\n"
113  " sdata[tid] = (ValueType)(0.0);\n"
114  "\n"
115  " int group_id = GROUP_SIZE * get_group_id(0);\n"
116  " int gid = group_id + tid;\n"
117  "\n"
118  " for (int i = 0; i < LOCAL_SIZE; ++i, gid += BLOCK_SIZE) {\n"
119  "\n"
120  " if (gid < size)\n"
121  " sdata[tid] += x[gid] * y[gid];\n"
122  " else\n"
123  " i = LOCAL_SIZE;\n"
124  "\n"
125  " }\n"
126  "\n"
127  " barrier(CLK_LOCAL_MEM_FENCE);\n"
128  "\n"
129  " for (int i = BLOCK_SIZE/2; i > 0; i /= 2) {\n"
130  "\n"
131  " if (tid < i)\n"
132  " sdata[tid] += sdata[tid + i];\n"
133  "\n"
134  " barrier(CLK_LOCAL_MEM_FENCE);\n"
135  "\n"
136  " }\n"
137  "\n"
138  " if (tid == 0)\n"
139  " out[get_group_id(0)] = sdata[tid];\n"
140  "\n"
141  "}\n"
142  "\n"
143  "__kernel void kernel_dotc(const int size,\n"
144  " __global const ValueType *x, __global const ValueType *y,\n"
145  " __global ValueType *out, __local ValueType *sdata,\n"
146  " const int GROUP_SIZE, const int LOCAL_SIZE) {\n"
147  "\n"
148  " int tid = get_local_id(0);\n"
149  "\n"
150  " sdata[tid] = (ValueType)(0.0);\n"
151  "\n"
152  " int group_id = GROUP_SIZE * get_group_id(0);\n"
153  " int gid = group_id + tid;\n"
154  "\n"
155  " for (int i = 0; i < LOCAL_SIZE; ++i, gid += BLOCK_SIZE) {\n"
156  "\n"
157  " if (gid < size)\n"
158  " sdata[tid] += x[gid] * y[gid];\n"
159  " else\n"
160  " i = LOCAL_SIZE;\n"
161  "\n"
162  " }\n"
163  "\n"
164  " barrier(CLK_LOCAL_MEM_FENCE);\n"
165  "\n"
166  " for (int i = BLOCK_SIZE/2; i > 0; i /= 2) {\n"
167  "\n"
168  " if (tid < i)\n"
169  " sdata[tid] += sdata[tid + i];\n"
170  "\n"
171  " barrier(CLK_LOCAL_MEM_FENCE);\n"
172  "\n"
173  " }\n"
174  "\n"
175  " if (tid == 0)\n"
176  " out[get_group_id(0)] = sdata[tid];\n"
177  "\n"
178  "}\n"
179  "\n"
180  "__kernel void kernel_norm(const int size, __global const ValueType *x,\n"
181  " __global ValueType *out, __local ValueType *sdata,\n"
182  " const int GROUP_SIZE, const int LOCAL_SIZE) {\n"
183  "\n"
184  " int tid = get_local_id(0);\n"
185  "\n"
186  " sdata[tid] = (ValueType)(0.0);\n"
187  "\n"
188  " int group_id = GROUP_SIZE * get_group_id(0);\n"
189  " int gid = group_id + tid;\n"
190  "\n"
191  " for (int i = 0; i < LOCAL_SIZE; ++i, gid += BLOCK_SIZE) {\n"
192  "\n"
193  " if (gid < size)\n"
194  " sdata[tid] += x[gid] * x[gid];\n"
195  " else\n"
196  " i = LOCAL_SIZE;\n"
197  "\n"
198  " }\n"
199  "\n"
200  " barrier(CLK_LOCAL_MEM_FENCE);\n"
201  "\n"
202  " for (int i = BLOCK_SIZE/2; i > 0; i /= 2) {\n"
203  "\n"
204  " if (tid < i)\n"
205  " sdata[tid] += sdata[tid + i];\n"
206  "\n"
207  " barrier(CLK_LOCAL_MEM_FENCE);\n"
208  "\n"
209  " }\n"
210  "\n"
211  " if (tid == 0)\n"
212  " out[get_group_id(0)] = sdata[tid];\n"
213  "\n"
214  "}\n"
215  "\n"
216  "__kernel void kernel_axpy(const int size, const ValueType alpha,\n"
217  " __global const ValueType *x, __global ValueType *out) {\n"
218  "\n"
219  " int gid = get_global_id(0);\n"
220  "\n"
221  " if (gid < size)\n"
222  " out[gid] += alpha * x[gid];\n"
223  "\n"
224  "}\n"
225  "\n"
226  "__kernel void kernel_reduce( const int size,\n"
227  " __global const ValueType *data,\n"
228  " __global ValueType *out,\n"
229  " __local ValueType *sdata,\n"
230  " const int GROUP_SIZE,\n"
231  " const int LOCAL_SIZE) {\n"
232  "\n"
233  " int tid = get_local_id(0);\n"
234  "\n"
235  " sdata[tid] = (ValueType)(0.0);\n"
236  "\n"
237  " int gid = GROUP_SIZE * get_group_id(0) + tid;\n"
238  "\n"
239  " for (int i = 0; i < LOCAL_SIZE; ++i, gid += BLOCK_SIZE) {\n"
240  "\n"
241  " if (gid < size)\n"
242  " sdata[tid] += data[gid];\n"
243  " else\n"
244  " i = LOCAL_SIZE;\n"
245  "\n"
246  " }\n"
247  "\n"
248  " barrier(CLK_LOCAL_MEM_FENCE);\n"
249  "\n"
250  " for (int i = BLOCK_SIZE/2; i > 0; i /= 2) {\n"
251  "\n"
252  " if ( tid < i )\n"
253  " sdata[tid] += sdata[tid + i];\n"
254  "\n"
255  " barrier(CLK_LOCAL_MEM_FENCE);\n"
256  "\n"
257  " }\n"
258  "\n"
259  " if (tid == 0)\n"
260  " out[get_group_id(0)] = sdata[tid];\n"
261  "\n"
262  "}\n"
263  "\n"
264  "__kernel void kernel_asum( const int size,\n"
265  " __global const ValueType *data,\n"
266  " __global ValueType *out,\n"
267  " __local ValueType *sdata,\n"
268  " const int GROUP_SIZE,\n"
269  " const int LOCAL_SIZE) {\n"
270  "\n"
271  " int tid = get_local_id(0);\n"
272  "\n"
273  " sdata[tid] = (ValueType)(0.0);\n"
274  "\n"
275  " int gid = GROUP_SIZE * get_group_id(0) + tid;\n"
276  "\n"
277  " for (int i = 0; i < LOCAL_SIZE; ++i, gid += BLOCK_SIZE) {\n"
278  "\n"
279  " if (gid < size)\n"
280  " sdata[tid] += fabs(data[gid]);\n"
281  " else\n"
282  " i = LOCAL_SIZE;\n"
283  "\n"
284  " }\n"
285  "\n"
286  " barrier(CLK_LOCAL_MEM_FENCE);\n"
287  "\n"
288  " for (int i = BLOCK_SIZE/2; i > 0; i /= 2) {\n"
289  "\n"
290  " if (tid < i)\n"
291  " sdata[tid] += sdata[tid + i];\n"
292  "\n"
293  " barrier(CLK_LOCAL_MEM_FENCE);\n"
294  "\n"
295  " }\n"
296  "\n"
297  " if (tid == 0)\n"
298  " out[get_group_id(0)] = sdata[tid];\n"
299  "\n"
300  "}\n"
301  "\n"
302  "__kernel void kernel_amax( const int size,\n"
303  " __global const ValueType *data,\n"
304  " __global ValueType *out,\n"
305  " __global int *iout,\n"
306  " __local ValueType *sdata,\n"
307  " __local int *idata,\n"
308  " const int GROUP_SIZE,\n"
309  " const int LOCAL_SIZE) {\n"
310  "\n"
311  " int tid = get_local_id(0);\n"
312  "\n"
313  " sdata[tid] = (ValueType)(0.0);\n"
314  " idata[tid] = 0;\n"
315  "\n"
316  " int gid = GROUP_SIZE * get_group_id(0) + tid;\n"
317  "\n"
318  " for (int i = 0; i < LOCAL_SIZE; ++i, gid += BLOCK_SIZE) {\n"
319  "\n"
320  " if (gid < size) {\n"
321  " ValueType tmp = data[gid];\n"
322  " if (fabs(tmp) > fabs(sdata[tid])) {\n"
323  " sdata[tid] = fabs(tmp);\n"
324  " idata[tid] = gid;\n"
325  " }\n"
326  " }\n"
327  "\n"
328  " }\n"
329  "\n"
330  " barrier(CLK_LOCAL_MEM_FENCE);\n"
331  "\n"
332  " for (int i = BLOCK_SIZE/2; i > 0; i /= 2) {\n"
333  "\n"
334  " if (tid < i) {\n"
335  " ValueType tmp = sdata[tid+i];\n"
336  " if (fabs(tmp) > fabs(sdata[tid])) {\n"
337  " sdata[tid] = fabs(tmp);\n"
338  " idata[tid] = idata[tid+i];\n"
339  " }\n"
340  " }\n"
341  "\n"
342  " barrier(CLK_LOCAL_MEM_FENCE);\n"
343  "\n"
344  " }\n"
345  "\n"
346  " if (tid == 0) {\n"
347  " out[get_group_id(0)] = sdata[tid];\n"
348  " iout[get_group_id(0)] = idata[tid];\n"
349  " }\n"
350  "\n"
351  "}\n"
352  "\n"
353  "__kernel void kernel_power(const int n, const double power, __global ValueType *out) {\n"
354  "\n"
355  " int gid = get_global_id(0);\n"
356  "\n"
357  " if (gid < n)\n"
358  " out[gid] = pow(out[gid], (ValueType)(power));\n"
359  "\n"
360  "}\n"
361  "\n"
362  "__kernel void kernel_copy_from_float(const int n, __global const float *in, __global ValueType *out) {\n"
363  "\n"
364  " int ind = get_global_id(0);\n"
365  "\n"
366  " if (ind < n)\n"
367  " out[ind] = (ValueType)(in[ind]);\n"
368  "\n"
369  "}\n"
370  "\n"
371  "__kernel void kernel_copy_from_double(const int n, __global const double *in, __global ValueType *out) {\n"
372  "\n"
373  " int ind = get_global_id(0);\n"
374  "\n"
375  " if (ind < n)\n"
376  " out[ind] = (ValueType)(in[ind]);\n"
377  "\n"
378  "}\n"
379  "\n"
380 ;
381 }
382 
383 #endif // PARALUTION_OCL_KERNELS_VECTOR_HPP_
const char * ocl_kernels_vector
Definition: ocl_kernels_vector.hpp:6
Definition: backend_manager.cpp:43