1 #ifndef PARALUTION_OCL_KERNELS_CSR_HPP_
2 #define PARALUTION_OCL_KERNELS_CSR_HPP_
7 "__kernel void kernel_csr_spmv_scalar(const int nrow, __global const int *row_offset, __global const int *col,\n"
8 " __global const ValueType *val, __global const ValueType *in,\n"
9 " __global ValueType *out) {\n"
11 " int ai = get_global_id(0);\n"
15 " ValueType sum = (ValueType)(0.0);\n"
17 " for (int aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
18 " sum += val[aj] * in[col[aj]];\n"
26 "__kernel void kernel_csr_add_spmv_scalar(const int nrow, __global const int *row_offset,\n"
27 " __global const int *col, __global const ValueType *val,\n"
28 " const ValueType scalar, __global const ValueType *in,\n"
29 " __global ValueType *out) {\n"
31 " int ai = get_global_id(0);\n"
36 " ValueType sum = out[ai];\n"
38 " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
39 " sum += scalar * val[aj] * in[col[aj]];\n"
47 "__kernel void kernel_csr_scale_diagonal(const int nrow, __global const int *row_offset, __global const int *col,\n"
48 " const ValueType alpha, __global ValueType *val) {\n"
50 " int ai = get_global_id(0);\n"
54 " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
55 " if (ai == col[aj])\n"
56 " val[aj] = alpha * val[aj];\n"
60 "__kernel void kernel_csr_scale_offdiagonal(const int nrow, __global const int *row_offset, __global const int *col,\n"
61 " const ValueType alpha, __global ValueType *val) {\n"
63 " int ai = get_global_id(0);\n"
67 " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
68 " if (ai != col[aj])\n"
69 " val[aj] = alpha * val[aj];\n"
73 "__kernel void kernel_csr_add_diagonal(const int nrow, __global const int *row_offset,\n"
74 " __global const int *col, const ValueType alpha, __global ValueType *val) {\n"
76 " int ai = get_global_id(0);\n"
80 " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
81 " if (ai == col[aj])\n"
82 " val[aj] += alpha;\n"
86 "__kernel void kernel_csr_add_offdiagonal(const int nrow, __global const int *row_offset,\n"
87 " __global const int *col, const ValueType alpha, __global ValueType *val) {\n"
89 " int ai = get_global_id(0);\n"
93 " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
94 " if (ai != col[aj])\n"
95 " val[aj] += alpha;\n"
99 "__kernel void kernel_csr_extract_diag(const int nrow, __global const int *row_offset, __global const int *col,\n"
100 " __global const ValueType *val, __global ValueType *vec) {\n"
102 " int ai = get_global_id(0);\n"
106 " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
107 " if (ai == col[aj])\n"
108 " vec[ai] = val[aj];\n"
112 "__kernel void kernel_csr_extract_inv_diag(const int nrow, __global const int *row_offset,\n"
113 " __global const int *col, __global const ValueType *val,\n"
114 " __global ValueType *vec) {\n"
116 " int ai = get_global_id(0);\n"
120 " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
121 " if (ai == col[aj])\n"
122 " vec[ai] = (ValueType)(1.0) / val[aj];\n"
126 "__kernel void kernel_csr_extract_submatrix_row_nnz(__global const int *row_offset, __global const int *col,\n"
127 " __global const ValueType *val, const int smrow_offset,\n"
128 " const int smcol_offset, const int smrow_size,\n"
129 " const int smcol_size, __global int *row_nnz) {\n"
131 " int ai = get_global_id(0);\n"
134 " if (ai < smrow_size) {\n"
137 " int ind = ai + smrow_offset;\n"
139 " for (aj=row_offset[ind]; aj<row_offset[ind+1]; ++aj)\n"
141 " if ( (col[aj] >= smcol_offset) &&\n"
142 " (col[aj] < smcol_offset + smcol_size) )\n"
145 " row_nnz[ai] = nnz;\n"
151 "__kernel void kernel_csr_extract_submatrix_copy(__global const int *row_offset, __global const int *col,\n"
152 " __global const ValueType *val, const int smrow_offset,\n"
153 " const int smcol_offset, const int smrow_size,\n"
154 " const int smcol_size, __global const int *sm_row_offset,\n"
155 " __global int *sm_col, __global ValueType *sm_val) {\n"
157 " int ai = get_global_id(0);\n"
160 " if (ai < smrow_size) {\n"
162 " int row_nnz = sm_row_offset[ai];\n"
163 " int ind = ai + smrow_offset;\n"
165 " for (aj=row_offset[ind]; aj<row_offset[ind+1]; ++aj) {\n"
167 " if ( (col[aj] >= smcol_offset) &&\n"
168 " (col[aj] < smcol_offset + smcol_size) ) {\n"
170 " sm_col[row_nnz] = col[aj] - smcol_offset;\n"
171 " sm_val[row_nnz] = val[aj];\n"
182 "__kernel void kernel_csr_diagmatmult_r(const int nrow, __global const int *row_offset, __global const int *col,\n"
183 " __global const ValueType *diag, __global ValueType *val) {\n"
185 " int ai = get_global_id(0);\n"
189 " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
190 " val[aj] = val[aj] * diag[col[aj]];\n"
194 "__kernel void kernel_csr_diagmatmult_l(const int nrow, __global const int *row_offset,\n"
195 " __global const ValueType *diag, __global ValueType *val) {\n"
197 " int ai = get_global_id(0);\n"
201 " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
202 " val[aj] = val[aj] * diag[ai];\n"
206 "__kernel void kernel_csr_add_csr_same_struct(const int nrow, __global const int *out_row_offset,\n"
207 " __global const int *out_col, __global const int *in_row_offset,\n"
208 " __global const int *in_col, __global const ValueType *in_val,\n"
209 " const ValueType alpha, const ValueType beta, __global ValueType *out_val) {\n"
211 " int ai = get_global_id(0);\n"
214 " if (ai < nrow) {\n"
216 " int first_col = in_row_offset[ai];\n"
218 " for (ajj=out_row_offset[ai]; ajj<out_row_offset[ai+1]; ++ajj)\n"
219 " for (aj=first_col; aj<in_row_offset[ai+1]; ++aj)\n"
220 " if (in_col[aj] == out_col[ajj]) {\n"
222 " out_val[ajj] = alpha * out_val[ajj] + beta * in_val[aj];\n"
232 "__kernel void kernel_buffer_addscalar(const int size, const ValueType scalar, __global ValueType *buff) {\n"
234 " int gid = get_global_id(0);\n"
237 " buff[gid] += scalar;\n"
241 "__kernel void kernel_reverse_index(const int size, __global const int *perm, __global int *out) {\n"
243 " int gid = get_global_id(0);\n"
246 " out[perm[gid]] = gid;\n"
250 "__kernel void kernel_csr_calc_row_nnz(const int nrow, __global const int *row_offset, __global int *row_nnz) {\n"
252 " int ai = get_global_id(0);\n"
255 " row_nnz[ai] = row_offset[ai+1]-row_offset[ai];\n"
259 "__kernel void kernel_csr_permute_row_nnz( const int nrow,\n"
260 " __global const int *row_nnz_src,\n"
261 " __global const int *perm_vec,\n"
262 " __global int *row_nnz_dst) {\n"
264 " int ai = get_global_id(0);\n"
267 " row_nnz_dst[perm_vec[ai]] = row_nnz_src[ai];\n"
271 "__kernel void kernel_csr_permute_rows( const int nrow,\n"
272 " __global const int *row_offset,\n"
273 " __global const int *perm_row_offset,\n"
274 " __global const int *col,\n"
275 " __global const ValueType *data,\n"
276 " __global const int *perm_vec,\n"
277 " __global const int *row_nnz,\n"
278 " __global int *perm_col,\n"
279 " __global ValueType *perm_data) {\n"
281 " int ai = get_global_id(0);\n"
283 " if (ai < nrow) {\n"
285 " int num_elems = row_nnz[ai];\n"
286 " int perm_index = perm_row_offset[perm_vec[ai]];\n"
287 " int prev_index = row_offset[ai];\n"
289 " for (int i = 0; i < num_elems; ++i) {\n"
290 " perm_data[perm_index + i] = data[prev_index + i];\n"
291 " perm_col[perm_index + i] = col[prev_index + i];\n"
298 "__kernel void kernel_csr_permute_cols( const int nrow,\n"
299 " __global const int *row_offset,\n"
300 " __global const int *perm_vec,\n"
301 " __global const int *row_nnz,\n"
302 " __global const int *perm_col,\n"
303 " __global const ValueType *perm_data,\n"
304 " __global int *col,\n"
305 " __global ValueType *data) {\n"
307 " int ai = get_global_id(0);\n"
310 " if (ai < nrow) {\n"
312 " int num_elems = row_nnz[ai];\n"
313 " int elem_index = row_offset[ai];\n"
315 " for (int i = 0; i < num_elems; ++i) {\n"
317 " int comp = perm_vec[perm_col[elem_index+i]];\n"
319 " for (j = i-1; j >= 0 ; --j) {\n"
321 " if (col[elem_index+j]>comp) {\n"
322 " data[elem_index+j+1] = data[elem_index+j];\n"
323 " col[elem_index+j+1] = col[elem_index+j];\n"
328 " data[elem_index+j+1] = perm_data[elem_index+i];\n"
329 " col[elem_index+j+1] = comp;\n"
337 "__kernel void kernel_csr_extract_l_triangular(const int nrow,\n"
338 " __global const int *src_row_offset, __global const int *src_col,\n"
339 " __global const ValueType *src_val, __global int *nnz_per_row,\n"
340 " __global int *dst_col, __global ValueType *dst_val) {\n"
342 " int ai = get_global_id(0);\n"
345 " if (ai < nrow) {\n"
347 " int dst_index = nnz_per_row[ai];\n"
348 " int src_index = src_row_offset[ai];\n"
350 " for (aj=0; aj<nnz_per_row[ai+1]-nnz_per_row[ai]; ++aj) {\n"
352 " dst_col[dst_index] = src_col[src_index];\n"
353 " dst_val[dst_index] = src_val[src_index];\n"
363 "__kernel void kernel_csr_extract_u_triangular(const int nrow,\n"
364 " __global const int *src_row_offset, __global const int *src_col,\n"
365 " __global const ValueType *src_val, __global int *nnz_per_row,\n"
366 " __global int *dst_col, __global ValueType *dst_val) {\n"
368 " int ai = get_global_id(0);\n"
371 " if (ai < nrow) {\n"
373 " int num_elements = nnz_per_row[ai+1]-nnz_per_row[ai];\n"
374 " int dst_index = nnz_per_row[ai];\n"
375 " int src_index = src_row_offset[ai+1]-num_elements;\n"
377 " for (aj=0; aj<num_elements; ++aj) {\n"
379 " dst_col[dst_index] = src_col[src_index];\n"
380 " dst_val[dst_index] = src_val[src_index];\n"
390 "__kernel void kernel_csr_slower_nnz_per_row(const int nrow, __global const int *src_row_offset,\n"
391 " __global const int *src_col, __global int *nnz_per_row) {\n"
393 " int ai = get_global_id(0);\n"
396 " if (ai < nrow) {\n"
397 " nnz_per_row[ai+1] = 0;\n"
398 " for (aj=src_row_offset[ai]; aj<src_row_offset[ai+1]; ++aj)\n"
399 " if (src_col[aj] < ai)\n"
400 " ++nnz_per_row[ai+1];\n"
405 "__kernel void kernel_csr_supper_nnz_per_row(const int nrow, __global const int *src_row_offset,\n"
406 " __global const int *src_col, __global int *nnz_per_row) {\n"
408 " int ai = get_global_id(0);\n"
411 " if (ai < nrow) {\n"
412 " nnz_per_row[ai+1] = 0;\n"
413 " for (aj=src_row_offset[ai]; aj<src_row_offset[ai+1]; ++aj)\n"
414 " if (src_col[aj] > ai)\n"
415 " ++nnz_per_row[ai+1];\n"
420 "__kernel void kernel_csr_lower_nnz_per_row(const int nrow, __global const int *src_row_offset,\n"
421 " __global const int *src_col, __global int *nnz_per_row) {\n"
423 " int ai = get_global_id(0);\n"
426 " if (ai < nrow) {\n"
427 " nnz_per_row[ai+1] = 0;\n"
428 " for (aj=src_row_offset[ai]; aj<src_row_offset[ai+1]; ++aj)\n"
429 " if (src_col[aj] <= ai)\n"
430 " ++nnz_per_row[ai+1];\n"
435 "__kernel void kernel_csr_upper_nnz_per_row(const int nrow, __global const int *src_row_offset,\n"
436 " __global const int *src_col, __global int *nnz_per_row) {\n"
438 " int ai = get_global_id(0);\n"
441 " if (ai < nrow) {\n"
442 " nnz_per_row[ai+1] = 0;\n"
443 " for (aj=src_row_offset[ai]; aj<src_row_offset[ai+1]; ++aj)\n"
444 " if (src_col[aj] >= ai)\n"
445 " ++nnz_per_row[ai+1];\n"
450 "__kernel void kernel_csr_compress_count_nrow(__global const int *row_offset, __global const int *col,\n"
451 " __global const ValueType *val, const int nrow, const double drop_off,\n"
452 " __global int *row_offset_new) {\n"
454 " int ai = get_global_id(0);\n"
457 " if (ai < nrow) {\n"
458 " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj) {\n"
460 " if ( (fabs(val[aj]) > drop_off) ||\n"
461 " ( col[aj] == ai))\n"
462 " row_offset_new[ai]++;\n"
468 "__kernel void kernel_csr_compress_copy(__global const int *row_offset, __global const int *col,\n"
469 " __global const ValueType *val, const int nrow, const double drop_off,\n"
470 " __global const int *row_offset_new, __global int *col_new,\n"
471 " __global ValueType *val_new) {\n"
473 " int ai = get_global_id(0);\n"
475 " int ajj = row_offset_new[ai];\n"
477 " if (ai < nrow) {\n"
479 " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj) {\n"
481 " if ( (fabs(val[aj]) > drop_off) ||\n"
482 " ( col[aj] == ai)) {\n"
483 " col_new[ajj] = col[aj];\n"
484 " val_new[ajj] = val[aj];\n"
494 "__kernel void kernel_csr_extract_column_vector(__global const int *row_offset, __global const int *col,\n"
495 " __global const ValueType *val, const int nrow, const int idx,\n"
496 " __global ValueType *vec) {\n"
498 " int ai = get_global_id(0);\n"
501 " if (ai < nrow) {\n"
503 " vec[ai] = (ValueType)(0.0);\n"
505 " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)\n"
506 " if (idx == col[aj])\n"
507 " vec[ai] = val[aj];\n"
514 "__kernel void kernel_csr_replace_column_vector_offset(__global const int *row_offset, __global const int *col,\n"
515 " const int nrow, const int idx,\n"
516 " __global const ValueType *vec, __global int *offset) {\n"
518 " int ai = get_global_id(0);\n"
522 " if (ai < nrow) {\n"
524 " offset[ai+1] = row_offset[ai+1] - row_offset[ai];\n"
526 " for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj) {\n"
527 " if (col[aj] == idx) {\n"
533 " if (add == 1 && vec[ai] != (ValueType)(0.0))\n"
536 " if (add == 0 && vec[ai] == (ValueType)(0.0))\n"
544 "__kernel void kernel_csr_replace_column_vector(__global const int *row_offset, __global const int *col,\n"
545 " __global const ValueType *val, const int nrow, const int idx,\n"
546 " __global const ValueType *vec, __global const int *offset,\n"
547 " __global int *new_col, __global ValueType *new_val) {\n"
549 " int ai = get_global_id(0);\n"
550 " int aj = row_offset[ai];\n"
551 " int k = offset[ai];\n"
553 " if (ai < nrow) {\n"
555 " for (; aj<row_offset[ai+1]; ++aj) {\n"
556 " if (col[aj] < idx) {\n"
557 " new_col[k] = col[aj];\n"
558 " new_val[k] = val[aj];\n"
564 " if (vec[ai] != (ValueType)(0.0)) {\n"
565 " new_col[k] = idx;\n"
566 " new_val[k] = vec[ai];\n"
571 " for (; aj<row_offset[ai+1]; ++aj) {\n"
572 " if (col[aj] > idx) {\n"
573 " new_col[k] = col[aj];\n"
574 " new_val[k] = val[aj];\n"
586 #endif // PARALUTION_OCL_KERNELS_CSR_HPP_
const char * ocl_kernels_csr
Definition: ocl_kernels_csr.hpp:6
Definition: backend_manager.cpp:43