PARALUTION  1.0.0
PARALUTION
ocl_kernels_general.hpp
Go to the documentation of this file.
1 #ifndef PARALUTION_OCL_KERNELS_GENERAL_HPP_
2 #define PARALUTION_OCL_KERNELS_GENERAL_HPP_
3 
4 namespace paralution {
5 
6 const char *ocl_kernels_general =
7  "__kernel void kernel_set_to(const int size, const ValueType val, __global ValueType *data) {\n"
8  "\n"
9  " int gid = get_global_id(0);\n"
10  "\n"
11  " if (gid < size)\n"
12  " data[gid] = val;\n"
13  "\n"
14  "}\n"
15  "\n"
16  "__kernel void kernel_red_recurse(__global int *dst, __global int *src, int numElems) {\n"
17  "\n"
18  " int index = BLOCK_SIZE * get_global_id(0);\n"
19  "\n"
20  " if (index >= numElems)\n"
21  " return;\n"
22  "\n"
23  " int i = index;\n"
24  "\n"
25  " if (i < BLOCK_SIZE)\n"
26  " return;\n"
27  "\n"
28  " int a = 0;\n"
29  "\n"
30  " while (i >= BLOCK_SIZE) {\n"
31  " a += src[i];\n"
32  " i -= BLOCK_SIZE;\n"
33  " }\n"
34  "\n"
35  " dst[index] = a;\n"
36  "\n"
37  "}\n"
38  "\n"
39  "__kernel void kernel_red_partial_sum(__global int *dst, __global const int *src,\n"
40  " const int numElems, const int shift) {\n"
41  "\n"
42  " int index = get_global_id(0);\n"
43  " int tid = get_local_id(0);\n"
44  " int gid = get_group_id(0);\n"
45  "\n"
46  " if (index < numElems) {\n"
47  "\n"
48  " __local int data[BLOCK_SIZE];\n"
49  "\n"
50  " data[tid] = src[index];\n"
51  "\n"
52  " barrier(CLK_LOCAL_MEM_FENCE);\n"
53  "\n"
54  " for (int i = BLOCK_SIZE/2; i > 0; i/=2) {\n"
55  "\n"
56  " if (tid < i)\n"
57  " data[tid] = data[tid] + data[tid+i];\n"
58  "\n"
59  " barrier(CLK_LOCAL_MEM_FENCE);\n"
60  "\n"
61  " }\n"
62  "\n"
63  " if (tid == 0 && BLOCK_SIZE*(1+gid)-1<numElems)\n"
64  " dst[BLOCK_SIZE*(1+gid)-1+shift] = data[0];\n"
65  "\n"
66  " }\n"
67  "\n"
68  "}\n"
69  "\n"
70  "__kernel void kernel_red_extrapolate(__global int *dst,\n"
71  " __global const int *srcBorder,\n"
72  " __global const int *srcData,\n"
73  " int numElems,\n"
74  " const int shift) {\n"
75  "\n"
76  " int index = get_local_size(0) * get_local_id(0);\n"
77  "\n"
78  " if (index < numElems-1) {\n"
79  "\n"
80  " int sum = srcBorder[index];\n"
81  "\n"
82  " for(int i = 0; i < get_local_size(0) && index+i<numElems; ++i) {\n"
83  " sum += srcData[index+i];\n"
84  " dst[index+i+shift] = sum;\n"
85  " }\n"
86  "\n"
87  " }\n"
88  "\n"
89  "}\n"
90  "\n"
91  "\n"
92 ;
93 }
94 
95 #endif // PARALUTION_OCL_KERNELS_GENERAL_HPP_
const char * ocl_kernels_general
Definition: ocl_kernels_general.hpp:6
Definition: backend_manager.cpp:43