PARALUTION  1.0.0
PARALUTION
ocl_kernels_ell.hpp
Go to the documentation of this file.
1 #ifndef PARALUTION_OCL_KERNELS_ELL_HPP_
2 #define PARALUTION_OCL_KERNELS_ELL_HPP_
3 
4 namespace paralution {
5 
6 const char *ocl_kernels_ell =
7 // Nathan Bell and Michael Garland
8 // Efficient Sparse Matrix-Vector Multiplication on {CUDA}
9 // NVR-2008-004 / NVIDIA Technical Report
10  "__kernel void kernel_ell_spmv(const int num_rows, \n"
11  " const int num_cols, \n"
12  " const int num_cols_per_row,\n"
13  " __global const int *Acol,\n"
14  " __global const ValueType *Aval, \n"
15  " __global const ValueType *x, \n"
16  " __global ValueType *y) {\n"
17  "\n"
18  " int row = get_global_id(0);\n"
19  "\n"
20  " if (row < num_rows) {\n"
21  "\n"
22  " ValueType sum = (ValueType)(0.0);\n"
23  "\n"
24  " for (int n=0; n<num_cols_per_row; ++n) {\n"
25  "\n"
26  " const int ind = n * num_rows + row;\n"
27  " const int col = Acol[ind];\n"
28  "\n"
29  " if ((col >= 0) && (col < num_cols))\n"
30  " sum += Aval[ind] * x[col];\n"
31  "\n"
32  " }\n"
33  "\n"
34  " y[row] = sum;\n"
35  "\n"
36  " }\n"
37  "\n"
38  "}\n"
39  "\n"
40 // Nathan Bell and Michael Garland
41 // Efficient Sparse Matrix-Vector Multiplication on {CUDA}
42 // NVR-2008-004 / NVIDIA Technical Report
43  "__kernel void kernel_ell_add_spmv(const int num_rows, \n"
44  " const int num_cols, \n"
45  " const int num_cols_per_row,\n"
46  " __global const int *Acol,\n"
47  " __global const ValueType *Aval, \n"
48  " const ValueType scalar,\n"
49  " __global const ValueType *x, \n"
50  " __global ValueType *y) {\n"
51  "\n"
52  " int row = get_global_id(0);\n"
53  "\n"
54  " if (row < num_rows) {\n"
55  "\n"
56  " ValueType sum = (ValueType)(0.0);\n"
57  "\n"
58  " for (int n=0; n<num_cols_per_row; ++n) {\n"
59  "\n"
60  " const int ind = n * num_rows + row;\n"
61  " const int col = Acol[ind];\n"
62  " \n"
63  " if ((col >= 0) && (col < num_cols))\n"
64  " sum += Aval[ind] * x[col];\n"
65  "\n"
66  " }\n"
67  " \n"
68  " y[row] += scalar * sum;\n"
69  "\n"
70  " }\n"
71  "\n"
72  "}\n"
73  "\n"
74  "__kernel void kernel_ell_max_row( const int nrow,\n"
75  " __global const int *data,\n"
76  " __global int *out,\n"
77  " const int GROUP_SIZE,\n"
78  " const int LOCAL_SIZE) {\n"
79  "\n"
80  " int tid = get_local_id(0);\n"
81  "\n"
82  " __local int sdata[BLOCK_SIZE];\n"
83  "\n"
84  " sdata[tid] = 0;\n"
85  "\n"
86  " int max;\n"
87  "\n"
88  " int gid = GROUP_SIZE * get_group_id(0) + tid;\n"
89  "\n"
90  " for (int i = 0; i < LOCAL_SIZE; ++i, gid += BLOCK_SIZE) {\n"
91  "\n"
92  " if (gid < nrow) {\n"
93  " max = data[gid+1] - data[gid];\n"
94  " if (max > sdata[tid])\n"
95  " sdata[tid] = max;\n"
96  " }\n"
97  "\n"
98  " }\n"
99  "\n"
100  " barrier(CLK_LOCAL_MEM_FENCE);\n"
101  "\n"
102  " for (int i = BLOCK_SIZE/2; i > 0; i /= 2) {\n"
103  "\n"
104  " if (tid < i)\n"
105  " if (sdata[tid+i] > sdata[tid]) sdata[tid] = sdata[tid+i];\n"
106  "\n"
107  " barrier(CLK_LOCAL_MEM_FENCE);\n"
108  "\n"
109  " }\n"
110  "\n"
111  " if (tid == 0)\n"
112  " out[get_group_id(0)] = sdata[tid];\n"
113  "\n"
114  "}\n"
115  "\n"
116  "__kernel void kernel_ell_csr_to_ell(const int nrow, const int max_row,\n"
117  " __global const int *src_row_offset, __global const int *src_col,\n"
118  " __global const ValueType *src_val, __global int *ell_col,\n"
119  " __global ValueType *ell_val) {\n"
120  "\n"
121  " int ai = get_global_id(0);\n"
122  " int aj;\n"
123  " int n = 0;\n"
124  " int ell_ind;\n"
125  "\n"
126  " if (ai < nrow) {\n"
127  "\n"
128  " for (aj=src_row_offset[ai]; aj<src_row_offset[ai+1]; ++aj) {\n"
129  "\n"
130  " ell_ind = n * nrow + ai;\n"
131  "\n"
132  " ell_col[ell_ind] = src_col[aj];\n"
133  " ell_val[ell_ind] = src_val[aj];\n"
134  "\n"
135  " ++n;\n"
136  "\n"
137  " }\n"
138  "\n"
139  " for (aj=src_row_offset[ai+1]-src_row_offset[ai]; aj<max_row; ++aj) {\n"
140  "\n"
141  " ell_ind = n * nrow + ai;\n"
142  "\n"
143  " ell_col[ell_ind] = (int)-1;\n"
144  " ell_val[ell_ind] = (ValueType)(0.0);\n"
145  "\n"
146  " ++n;\n"
147  "\n"
148  " }\n"
149  "\n"
150  " }\n"
151  "\n"
152  "}\n"
153  "\n"
154  "\n"
155 ;
156 }
157 
158 #endif // PARALUTION_OCL_KERNELS_ELL_HPP_
const char * ocl_kernels_ell
Definition: ocl_kernels_ell.hpp:6
Definition: backend_manager.cpp:43