1 #ifndef PARALUTION_OCL_KERNELS_GENERAL_HPP_
2 #define PARALUTION_OCL_KERNELS_GENERAL_HPP_
7 "__kernel void kernel_set_to(const int size, const ValueType val, __global ValueType *data) {\n"
9 " int gid = get_global_id(0);\n"
16 "__kernel void kernel_red_recurse(__global int *dst, __global int *src, int numElems) {\n"
18 " int index = BLOCK_SIZE * get_global_id(0);\n"
20 " if (index >= numElems)\n"
25 " if (i < BLOCK_SIZE)\n"
30 " while (i >= BLOCK_SIZE) {\n"
39 "__kernel void kernel_red_partial_sum(__global int *dst, __global const int *src,\n"
40 " const int numElems, const int shift) {\n"
42 " int index = get_global_id(0);\n"
43 " int tid = get_local_id(0);\n"
44 " int gid = get_group_id(0);\n"
46 " if (index < numElems) {\n"
48 " __local int data[BLOCK_SIZE];\n"
50 " data[tid] = src[index];\n"
52 " barrier(CLK_LOCAL_MEM_FENCE);\n"
54 " for (int i = BLOCK_SIZE/2; i > 0; i/=2) {\n"
57 " data[tid] = data[tid] + data[tid+i];\n"
59 " barrier(CLK_LOCAL_MEM_FENCE);\n"
63 " if (tid == 0 && BLOCK_SIZE*(1+gid)-1<numElems)\n"
64 " dst[BLOCK_SIZE*(1+gid)-1+shift] = data[0];\n"
70 "__kernel void kernel_red_extrapolate(__global int *dst,\n"
71 " __global const int *srcBorder,\n"
72 " __global const int *srcData,\n"
74 " const int shift) {\n"
76 " int index = get_local_size(0) * get_local_id(0);\n"
78 " if (index < numElems-1) {\n"
80 " int sum = srcBorder[index];\n"
82 " for(int i = 0; i < get_local_size(0) && index+i<numElems; ++i) {\n"
83 " sum += srcData[index+i];\n"
84 " dst[index+i+shift] = sum;\n"
95 #endif // PARALUTION_OCL_KERNELS_GENERAL_HPP_
const char * ocl_kernels_general
Definition: ocl_kernels_general.hpp:6
Definition: backend_manager.cpp:43