1 #ifndef PARALUTION_OCL_KERNELS_ELL_HPP_
2 #define PARALUTION_OCL_KERNELS_ELL_HPP_
10 "__kernel void kernel_ell_spmv(const int num_rows, \n"
11 " const int num_cols, \n"
12 " const int num_cols_per_row,\n"
13 " __global const int *Acol,\n"
14 " __global const ValueType *Aval, \n"
15 " __global const ValueType *x, \n"
16 " __global ValueType *y) {\n"
18 " int row = get_global_id(0);\n"
20 " if (row < num_rows) {\n"
22 " ValueType sum = (ValueType)(0.0);\n"
24 " for (int n=0; n<num_cols_per_row; ++n) {\n"
26 " const int ind = n * num_rows + row;\n"
27 " const int col = Acol[ind];\n"
29 " if ((col >= 0) && (col < num_cols))\n"
30 " sum += Aval[ind] * x[col];\n"
43 "__kernel void kernel_ell_add_spmv(const int num_rows, \n"
44 " const int num_cols, \n"
45 " const int num_cols_per_row,\n"
46 " __global const int *Acol,\n"
47 " __global const ValueType *Aval, \n"
48 " const ValueType scalar,\n"
49 " __global const ValueType *x, \n"
50 " __global ValueType *y) {\n"
52 " int row = get_global_id(0);\n"
54 " if (row < num_rows) {\n"
56 " ValueType sum = (ValueType)(0.0);\n"
58 " for (int n=0; n<num_cols_per_row; ++n) {\n"
60 " const int ind = n * num_rows + row;\n"
61 " const int col = Acol[ind];\n"
63 " if ((col >= 0) && (col < num_cols))\n"
64 " sum += Aval[ind] * x[col];\n"
68 " y[row] += scalar * sum;\n"
74 "__kernel void kernel_ell_max_row( const int nrow,\n"
75 " __global const int *data,\n"
76 " __global int *out,\n"
77 " const int GROUP_SIZE,\n"
78 " const int LOCAL_SIZE) {\n"
80 " int tid = get_local_id(0);\n"
82 " __local int sdata[BLOCK_SIZE];\n"
88 " int gid = GROUP_SIZE * get_group_id(0) + tid;\n"
90 " for (int i = 0; i < LOCAL_SIZE; ++i, gid += BLOCK_SIZE) {\n"
92 " if (gid < nrow) {\n"
93 " max = data[gid+1] - data[gid];\n"
94 " if (max > sdata[tid])\n"
95 " sdata[tid] = max;\n"
100 " barrier(CLK_LOCAL_MEM_FENCE);\n"
102 " for (int i = BLOCK_SIZE/2; i > 0; i /= 2) {\n"
105 " if (sdata[tid+i] > sdata[tid]) sdata[tid] = sdata[tid+i];\n"
107 " barrier(CLK_LOCAL_MEM_FENCE);\n"
112 " out[get_group_id(0)] = sdata[tid];\n"
116 "__kernel void kernel_ell_csr_to_ell(const int nrow, const int max_row,\n"
117 " __global const int *src_row_offset, __global const int *src_col,\n"
118 " __global const ValueType *src_val, __global int *ell_col,\n"
119 " __global ValueType *ell_val) {\n"
121 " int ai = get_global_id(0);\n"
126 " if (ai < nrow) {\n"
128 " for (aj=src_row_offset[ai]; aj<src_row_offset[ai+1]; ++aj) {\n"
130 " ell_ind = n * nrow + ai;\n"
132 " ell_col[ell_ind] = src_col[aj];\n"
133 " ell_val[ell_ind] = src_val[aj];\n"
139 " for (aj=src_row_offset[ai+1]-src_row_offset[ai]; aj<max_row; ++aj) {\n"
141 " ell_ind = n * nrow + ai;\n"
143 " ell_col[ell_ind] = (int)-1;\n"
144 " ell_val[ell_ind] = (ValueType)(0.0);\n"
158 #endif // PARALUTION_OCL_KERNELS_ELL_HPP_
const char * ocl_kernels_ell
Definition: ocl_kernels_ell.hpp:6
Definition: backend_manager.cpp:43