PARALUTION  1.0.0
PARALUTION
ocl_kernels_csr.hpp
Go to the documentation of this file.
1 #ifndef PARALUTION_OCL_KERNELS_CSR_HPP_
2 #define PARALUTION_OCL_KERNELS_CSR_HPP_
3 
4 namespace paralution {
5 
6 const char *ocl_kernels_csr =
7  "__kernel void kernel_csr_spmv_scalar(const int nrow, __global const int *row_offset, __global const int *col,\n"
8  " __global const ValueType *val, __global const ValueType *in,\n"
9  " __global ValueType *out) {\n"
10  "\n"
11  " int ai = get_global_id(0);\n"
12  "\n"
13  " if (ai < nrow) {\n"
14  "\n"
15  " ValueType sum = (ValueType)(0.0);\n"
16  "\n"
17  " for (int aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
18  " sum += val[aj] * in[col[aj]];\n"
19  "\n"
20  " out[ai] = sum;\n"
21  "\n"
22  " }\n"
23  "\n"
24  "}\n"
25  "\n"
26  "__kernel void kernel_csr_add_spmv_scalar(const int nrow, __global const int *row_offset,\n"
27  " __global const int *col, __global const ValueType *val,\n"
28  " const ValueType scalar, __global const ValueType *in,\n"
29  " __global ValueType *out) {\n"
30  "\n"
31  " int ai = get_global_id(0);\n"
32  " int aj;\n"
33  "\n"
34  " if (ai < nrow) {\n"
35  "\n"
36  " ValueType sum = out[ai];\n"
37  "\n"
38  " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
39  " sum += scalar * val[aj] * in[col[aj]];\n"
40  "\n"
41  " out[ai] = sum;\n"
42  "\n"
43  " }\n"
44  "\n"
45  "}\n"
46  "\n"
47  "__kernel void kernel_csr_scale_diagonal(const int nrow, __global const int *row_offset, __global const int *col,\n"
48  " const ValueType alpha, __global ValueType *val) {\n"
49  "\n"
50  " int ai = get_global_id(0);\n"
51  " int aj;\n"
52  "\n"
53  " if (ai < nrow)\n"
54  " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
55  " if (ai == col[aj])\n"
56  " val[aj] = alpha * val[aj];\n"
57  "\n"
58  "}\n"
59  "\n"
60  "__kernel void kernel_csr_scale_offdiagonal(const int nrow, __global const int *row_offset, __global const int *col,\n"
61  " const ValueType alpha, __global ValueType *val) {\n"
62  "\n"
63  " int ai = get_global_id(0);\n"
64  " int aj;\n"
65  "\n"
66  " if (ai < nrow)\n"
67  " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
68  " if (ai != col[aj])\n"
69  " val[aj] = alpha * val[aj];\n"
70  "\n"
71  "}\n"
72  "\n"
73  "__kernel void kernel_csr_add_diagonal(const int nrow, __global const int *row_offset,\n"
74  " __global const int *col, const ValueType alpha, __global ValueType *val) {\n"
75  "\n"
76  " int ai = get_global_id(0);\n"
77  " int aj;\n"
78  "\n"
79  " if (ai < nrow)\n"
80  " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
81  " if (ai == col[aj])\n"
82  " val[aj] += alpha;\n"
83  "\n"
84  "}\n"
85  "\n"
86  "__kernel void kernel_csr_add_offdiagonal(const int nrow, __global const int *row_offset,\n"
87  " __global const int *col, const ValueType alpha, __global ValueType *val) {\n"
88  "\n"
89  " int ai = get_global_id(0);\n"
90  " int aj;\n"
91  "\n"
92  " if (ai < nrow)\n"
93  " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
94  " if (ai != col[aj])\n"
95  " val[aj] += alpha;\n"
96  "\n"
97  "}\n"
98  "\n"
99  "__kernel void kernel_csr_extract_diag(const int nrow, __global const int *row_offset, __global const int *col,\n"
100  " __global const ValueType *val, __global ValueType *vec) {\n"
101  "\n"
102  " int ai = get_global_id(0);\n"
103  " int aj;\n"
104  "\n"
105  " if (ai < nrow)\n"
106  " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
107  " if (ai == col[aj])\n"
108  " vec[ai] = val[aj];\n"
109  "\n"
110  "}\n"
111  "\n"
112  "__kernel void kernel_csr_extract_inv_diag(const int nrow, __global const int *row_offset,\n"
113  " __global const int *col, __global const ValueType *val,\n"
114  " __global ValueType *vec) {\n"
115  "\n"
116  " int ai = get_global_id(0);\n"
117  " int aj;\n"
118  "\n"
119  " if (ai < nrow)\n"
120  " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
121  " if (ai == col[aj])\n"
122  " vec[ai] = (ValueType)(1.0) / val[aj];\n"
123  "\n"
124  "}\n"
125  "\n"
126  "__kernel void kernel_csr_extract_submatrix_row_nnz(__global const int *row_offset, __global const int *col,\n"
127  " __global const ValueType *val, const int smrow_offset,\n"
128  " const int smcol_offset, const int smrow_size,\n"
129  " const int smcol_size, __global int *row_nnz) {\n"
130  "\n"
131  " int ai = get_global_id(0);\n"
132  " int aj;\n"
133  "\n"
134  " if (ai < smrow_size) {\n"
135  "\n"
136  " int nnz = 0;\n"
137  " int ind = ai + smrow_offset;\n"
138  "\n"
139  " for (aj=row_offset[ind]; aj<row_offset[ind+1]; ++aj)\n"
140  "\n"
141  " if ( (col[aj] >= smcol_offset) &&\n"
142  " (col[aj] < smcol_offset + smcol_size) )\n"
143  " ++nnz;\n"
144  "\n"
145  " row_nnz[ai] = nnz;\n"
146  "\n"
147  " }\n"
148  "\n"
149  "}\n"
150  "\n"
151  "__kernel void kernel_csr_extract_submatrix_copy(__global const int *row_offset, __global const int *col,\n"
152  " __global const ValueType *val, const int smrow_offset,\n"
153  " const int smcol_offset, const int smrow_size,\n"
154  " const int smcol_size, __global const int *sm_row_offset,\n"
155  " __global int *sm_col, __global ValueType *sm_val) {\n"
156  "\n"
157  " int ai = get_global_id(0);\n"
158  " int aj;\n"
159  "\n"
160  " if (ai < smrow_size) {\n"
161  "\n"
162  " int row_nnz = sm_row_offset[ai];\n"
163  " int ind = ai + smrow_offset;\n"
164  "\n"
165  " for (aj=row_offset[ind]; aj<row_offset[ind+1]; ++aj) {\n"
166  "\n"
167  " if ( (col[aj] >= smcol_offset) &&\n"
168  " (col[aj] < smcol_offset + smcol_size) ) {\n"
169  "\n"
170  " sm_col[row_nnz] = col[aj] - smcol_offset;\n"
171  " sm_val[row_nnz] = val[aj];\n"
172  " ++row_nnz;\n"
173  "\n"
174  " }\n"
175  "\n"
176  " }\n"
177  "\n"
178  " }\n"
179  "\n"
180  "}\n"
181  "\n"
182  "__kernel void kernel_csr_diagmatmult_r(const int nrow, __global const int *row_offset, __global const int *col,\n"
183  " __global const ValueType *diag, __global ValueType *val) {\n"
184  "\n"
185  " int ai = get_global_id(0);\n"
186  " int aj;\n"
187  "\n"
188  " if (ai < nrow)\n"
189  " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
190  " val[aj] = val[aj] * diag[col[aj]];\n"
191  "\n"
192  "}\n"
193  "\n"
194  "__kernel void kernel_csr_diagmatmult_l(const int nrow, __global const int *row_offset,\n"
195  " __global const ValueType *diag, __global ValueType *val) {\n"
196  "\n"
197  " int ai = get_global_id(0);\n"
198  " int aj;\n"
199  "\n"
200  " if (ai < nrow)\n"
201  " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
202  " val[aj] = val[aj] * diag[ai];\n"
203  "\n"
204  "}\n"
205  "\n"
206  "__kernel void kernel_csr_add_csr_same_struct(const int nrow, __global const int *out_row_offset,\n"
207  " __global const int *out_col, __global const int *in_row_offset,\n"
208  " __global const int *in_col, __global const ValueType *in_val,\n"
209  " const ValueType alpha, const ValueType beta, __global ValueType *out_val) {\n"
210  "\n"
211  " int ai = get_global_id(0);\n"
212  " int aj, ajj;\n"
213  "\n"
214  " if (ai < nrow) {\n"
215  "\n"
216  " int first_col = in_row_offset[ai];\n"
217  " \n"
218  " for (ajj=out_row_offset[ai]; ajj<out_row_offset[ai+1]; ++ajj)\n"
219  " for (aj=first_col; aj<in_row_offset[ai+1]; ++aj)\n"
220  " if (in_col[aj] == out_col[ajj]) {\n"
221  " \n"
222  " out_val[ajj] = alpha * out_val[ajj] + beta * in_val[aj];\n"
223  " ++first_col;\n"
224  " break;\n"
225  "\n"
226  " }\n"
227  "\n"
228  " }\n"
229  "\n"
230  "}\n"
231  "\n"
232  "__kernel void kernel_buffer_addscalar(const int size, const ValueType scalar, __global ValueType *buff) {\n"
233  "\n"
234  " int gid = get_global_id(0);\n"
235  "\n"
236  " if (gid < size)\n"
237  " buff[gid] += scalar;\n"
238  "\n"
239  "}\n"
240  "\n"
241  "__kernel void kernel_reverse_index(const int size, __global const int *perm, __global int *out) {\n"
242  "\n"
243  " int gid = get_global_id(0);\n"
244  "\n"
245  " if (gid < size)\n"
246  " out[perm[gid]] = gid;\n"
247  "\n"
248  "}\n"
249  "\n"
250  "__kernel void kernel_csr_calc_row_nnz(const int nrow, __global const int *row_offset, __global int *row_nnz) {\n"
251  "\n"
252  " int ai = get_global_id(0);\n"
253  "\n"
254  " if (ai < nrow)\n"
255  " row_nnz[ai] = row_offset[ai+1]-row_offset[ai];\n"
256  "\n"
257  "}\n"
258  "\n"
259  "__kernel void kernel_csr_permute_row_nnz( const int nrow,\n"
260  " __global const int *row_nnz_src,\n"
261  " __global const int *perm_vec,\n"
262  " __global int *row_nnz_dst) {\n"
263  "\n"
264  " int ai = get_global_id(0);\n"
265  "\n"
266  " if (ai < nrow)\n"
267  " row_nnz_dst[perm_vec[ai]] = row_nnz_src[ai];\n"
268  "\n"
269  "}\n"
270  "\n"
271  "__kernel void kernel_csr_permute_rows( const int nrow,\n"
272  " __global const int *row_offset,\n"
273  " __global const int *perm_row_offset,\n"
274  " __global const int *col,\n"
275  " __global const ValueType *data,\n"
276  " __global const int *perm_vec,\n"
277  " __global const int *row_nnz,\n"
278  " __global int *perm_col,\n"
279  " __global ValueType *perm_data) {\n"
280  "\n"
281  " int ai = get_global_id(0);\n"
282  "\n"
283  " if (ai < nrow) {\n"
284  "\n"
285  " int num_elems = row_nnz[ai];\n"
286  " int perm_index = perm_row_offset[perm_vec[ai]];\n"
287  " int prev_index = row_offset[ai];\n"
288  "\n"
289  " for (int i = 0; i < num_elems; ++i) {\n"
290  " perm_data[perm_index + i] = data[prev_index + i];\n"
291  " perm_col[perm_index + i] = col[prev_index + i];\n"
292  " }\n"
293  "\n"
294  " }\n"
295  "\n"
296  "}\n"
297  "\n"
298  "__kernel void kernel_csr_permute_cols( const int nrow,\n"
299  " __global const int *row_offset,\n"
300  " __global const int *perm_vec,\n"
301  " __global const int *row_nnz,\n"
302  " __global const int *perm_col,\n"
303  " __global const ValueType *perm_data,\n"
304  " __global int *col,\n"
305  " __global ValueType *data) {\n"
306  "\n"
307  " int ai = get_global_id(0);\n"
308  " int j;\n"
309  "\n"
310  " if (ai < nrow) {\n"
311  "\n"
312  " int num_elems = row_nnz[ai];\n"
313  " int elem_index = row_offset[ai];\n"
314  "\n"
315  " for (int i = 0; i < num_elems; ++i) {\n"
316  "\n"
317  " int comp = perm_vec[perm_col[elem_index+i]];\n"
318  "\n"
319  " for (j = i-1; j >= 0 ; --j) {\n"
320  "\n"
321  " if (col[elem_index+j]>comp) {\n"
322  " data[elem_index+j+1] = data[elem_index+j];\n"
323  " col[elem_index+j+1] = col[elem_index+j];\n"
324  " } else\n"
325  " break;\n"
326  " }\n"
327  "\n"
328  " data[elem_index+j+1] = perm_data[elem_index+i];\n"
329  " col[elem_index+j+1] = comp;\n"
330  "\n"
331  " }\n"
332  "\n"
333  " }\n"
334  "\n"
335  "}\n"
336  "\n"
337  "__kernel void kernel_csr_extract_l_triangular(const int nrow,\n"
338  " __global const int *src_row_offset, __global const int *src_col,\n"
339  " __global const ValueType *src_val, __global int *nnz_per_row,\n"
340  " __global int *dst_col, __global ValueType *dst_val) {\n"
341  "\n"
342  " int ai = get_global_id(0);\n"
343  " int aj;\n"
344  "\n"
345  " if (ai < nrow) {\n"
346  "\n"
347  " int dst_index = nnz_per_row[ai];\n"
348  " int src_index = src_row_offset[ai];\n"
349  "\n"
350  " for (aj=0; aj<nnz_per_row[ai+1]-nnz_per_row[ai]; ++aj) {\n"
351  " \n"
352  " dst_col[dst_index] = src_col[src_index];\n"
353  " dst_val[dst_index] = src_val[src_index];\n"
354  " \n"
355  " ++dst_index;\n"
356  " ++src_index;\n"
357  " \n"
358  " }\n"
359  " }\n"
360  "\n"
361  "}\n"
362  "\n"
363  "__kernel void kernel_csr_extract_u_triangular(const int nrow,\n"
364  " __global const int *src_row_offset, __global const int *src_col,\n"
365  " __global const ValueType *src_val, __global int *nnz_per_row,\n"
366  " __global int *dst_col, __global ValueType *dst_val) {\n"
367  "\n"
368  " int ai = get_global_id(0);\n"
369  " int aj;\n"
370  "\n"
371  " if (ai < nrow) {\n"
372  "\n"
373  " int num_elements = nnz_per_row[ai+1]-nnz_per_row[ai];\n"
374  " int dst_index = nnz_per_row[ai];\n"
375  " int src_index = src_row_offset[ai+1]-num_elements;\n"
376  "\n"
377  " for (aj=0; aj<num_elements; ++aj) {\n"
378  " \n"
379  " dst_col[dst_index] = src_col[src_index];\n"
380  " dst_val[dst_index] = src_val[src_index];\n"
381  " \n"
382  " ++dst_index;\n"
383  " ++src_index;\n"
384  " \n"
385  " }\n"
386  " }\n"
387  "\n"
388  "}\n"
389  "\n"
390  "__kernel void kernel_csr_slower_nnz_per_row(const int nrow, __global const int *src_row_offset,\n"
391  " __global const int *src_col, __global int *nnz_per_row) {\n"
392  "\n"
393  " int ai = get_global_id(0);\n"
394  " int aj;\n"
395  " \n"
396  " if (ai < nrow) {\n"
397  " nnz_per_row[ai+1] = 0;\n"
398  " for (aj=src_row_offset[ai]; aj<src_row_offset[ai+1]; ++aj)\n"
399  " if (src_col[aj] < ai)\n"
400  " ++nnz_per_row[ai+1];\n"
401  " }\n"
402  "\n"
403  "}\n"
404  "\n"
405  "__kernel void kernel_csr_supper_nnz_per_row(const int nrow, __global const int *src_row_offset,\n"
406  " __global const int *src_col, __global int *nnz_per_row) {\n"
407  "\n"
408  " int ai = get_global_id(0);\n"
409  " int aj;\n"
410  "\n"
411  " if (ai < nrow) {\n"
412  " nnz_per_row[ai+1] = 0;\n"
413  " for (aj=src_row_offset[ai]; aj<src_row_offset[ai+1]; ++aj)\n"
414  " if (src_col[aj] > ai)\n"
415  " ++nnz_per_row[ai+1];\n"
416  " }\n"
417  "\n"
418  "}\n"
419  "\n"
420  "__kernel void kernel_csr_lower_nnz_per_row(const int nrow, __global const int *src_row_offset,\n"
421  " __global const int *src_col, __global int *nnz_per_row) {\n"
422  "\n"
423  " int ai = get_global_id(0);\n"
424  " int aj;\n"
425  "\n"
426  " if (ai < nrow) {\n"
427  " nnz_per_row[ai+1] = 0;\n"
428  " for (aj=src_row_offset[ai]; aj<src_row_offset[ai+1]; ++aj)\n"
429  " if (src_col[aj] <= ai)\n"
430  " ++nnz_per_row[ai+1];\n"
431  " }\n"
432  "\n"
433  "}\n"
434  "\n"
435  "__kernel void kernel_csr_upper_nnz_per_row(const int nrow, __global const int *src_row_offset,\n"
436  " __global const int *src_col, __global int *nnz_per_row) {\n"
437  "\n"
438  " int ai = get_global_id(0);\n"
439  " int aj;\n"
440  "\n"
441  " if (ai < nrow) {\n"
442  " nnz_per_row[ai+1] = 0;\n"
443  " for (aj=src_row_offset[ai]; aj<src_row_offset[ai+1]; ++aj)\n"
444  " if (src_col[aj] >= ai)\n"
445  " ++nnz_per_row[ai+1];\n"
446  " }\n"
447  "\n"
448  "}\n"
449  "\n"
450  "__kernel void kernel_csr_compress_count_nrow(__global const int *row_offset, __global const int *col,\n"
451  " __global const ValueType *val, const int nrow, const double drop_off,\n"
452  " __global int *row_offset_new) {\n"
453  "\n"
454  " int ai = get_global_id(0);\n"
455  " int aj;\n"
456  "\n"
457  " if (ai < nrow) {\n"
458  " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj) {\n"
459  "\n"
460  " if ( (fabs(val[aj]) > drop_off) ||\n"
461  " ( col[aj] == ai))\n"
462  " row_offset_new[ai]++;\n"
463  " }\n"
464  " }\n"
465  "\n"
466  "}\n"
467  "\n"
468  "__kernel void kernel_csr_compress_copy(__global const int *row_offset, __global const int *col,\n"
469  " __global const ValueType *val, const int nrow, const double drop_off,\n"
470  " __global const int *row_offset_new, __global int *col_new,\n"
471  " __global ValueType *val_new) {\n"
472  "\n"
473  " int ai = get_global_id(0);\n"
474  " int aj;\n"
475  " int ajj = row_offset_new[ai];\n"
476  "\n"
477  " if (ai < nrow) {\n"
478  "\n"
479  " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj) {\n"
480  "\n"
481  " if ( (fabs(val[aj]) > drop_off) ||\n"
482  " ( col[aj] == ai)) {\n"
483  " col_new[ajj] = col[aj];\n"
484  " val_new[ajj] = val[aj];\n"
485  " ajj++;\n"
486  " }\n"
487  " }\n"
488  "\n"
489  " }\n"
490  "\n"
491  "}\n"
492  "\n"
493 // Extract column vector
494  "__kernel void kernel_csr_extract_column_vector(__global const int *row_offset, __global const int *col,\n"
495  " __global const ValueType *val, const int nrow, const int idx,\n"
496  " __global ValueType *vec) {\n"
497  "\n"
498  " int ai = get_global_id(0);\n"
499  " int aj;\n"
500  "\n"
501  " if (ai < nrow) {\n"
502  "\n"
503  " vec[ai] = (ValueType)(0.0);\n"
504  "\n"
505  " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
506  " if (idx == col[aj])\n"
507  " vec[ai] = val[aj];\n"
508  "\n"
509  " }\n"
510  "\n"
511  "}\n"
512  "\n"
513 // Replace column vector - compute new offset
514  "__kernel void kernel_csr_replace_column_vector_offset(__global const int *row_offset, __global const int *col,\n"
515  " const int nrow, const int idx,\n"
516  " __global const ValueType *vec, __global int *offset) {\n"
517  "\n"
518  " int ai = get_global_id(0);\n"
519  " int aj;\n"
520  " int add = 1;\n"
521  "\n"
522  " if (ai < nrow) {\n"
523  "\n"
524  " offset[ai+1] = row_offset[ai+1] - row_offset[ai];\n"
525  "\n"
526  " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj) {\n"
527  " if (col[aj] == idx) {\n"
528  " add = 0;\n"
529  " break;\n"
530  " }\n"
531  " }\n"
532  "\n"
533  " if (add == 1 && vec[ai] != (ValueType)(0.0))\n"
534  " ++offset[ai+1];\n"
535  "\n"
536  " if (add == 0 && vec[ai] == (ValueType)(0.0))\n"
537  " --offset[ai+1];\n"
538  "\n"
539  " }\n"
540  "\n"
541  "}\n"
542  "\n"
543 // Replace column vector - compute new offset
544  "__kernel void kernel_csr_replace_column_vector(__global const int *row_offset, __global const int *col,\n"
545  " __global const ValueType *val, const int nrow, const int idx,\n"
546  " __global const ValueType *vec, __global const int *offset,\n"
547  " __global int *new_col, __global ValueType *new_val) {\n"
548  "\n"
549  " int ai = get_global_id(0);\n"
550  " int aj = row_offset[ai];\n"
551  " int k = offset[ai];\n"
552  "\n"
553  " if (ai < nrow) {\n"
554  "\n"
555  " for (; aj<row_offset[ai+1]; ++aj) {\n"
556  " if (col[aj] < idx) {\n"
557  " new_col[k] = col[aj];\n"
558  " new_val[k] = val[aj];\n"
559  " ++k;\n"
560  " } else\n"
561  " break;\n"
562  " }\n"
563  "\n"
564  " if (vec[ai] != (ValueType)(0.0)) {\n"
565  " new_col[k] = idx;\n"
566  " new_val[k] = vec[ai];\n"
567  " ++k;\n"
568  " ++aj;\n"
569  " }\n"
570  "\n"
571  " for (; aj<row_offset[ai+1]; ++aj) {\n"
572  " if (col[aj] > idx) {\n"
573  " new_col[k] = col[aj];\n"
574  " new_val[k] = val[aj];\n"
575  " ++k;\n"
576  " }\n"
577  " }\n"
578  "\n"
579  " }\n"
580  "\n"
581  "}\n"
582  "\n"
583 ;
584 }
585 
586 #endif // PARALUTION_OCL_KERNELS_CSR_HPP_
const char * ocl_kernels_csr
Definition: ocl_kernels_csr.hpp:6
Definition: backend_manager.cpp:43