PARALUTION  1.0.0
PARALUTION
ocl_kernels_dia.hpp
Go to the documentation of this file.
1 #ifndef PARALUTION_OCL_KERNELS_DIA_HPP_
2 #define PARALUTION_OCL_KERNELS_DIA_HPP_
3 
4 namespace paralution {
5 
6 const char *ocl_kernels_dia =
7 // Nathan Bell and Michael Garland
8 // Efficient Sparse Matrix-Vector Multiplication on {CUDA}
9 // NVR-2008-004 / NVIDIA Technical Report
10  "__kernel void kernel_dia_spmv(const int num_rows, \n"
11  " const int num_cols, \n"
12  " const int num_diags,\n"
13  " __global const int *Aoffsets,\n"
14  " __global const ValueType *Aval, \n"
15  " __global const ValueType *x, \n"
16  " __global ValueType *y) {\n"
17  "\n"
18  " int row = get_global_id(0);\n"
19  "\n"
20  " if (row < num_rows) {\n"
21  "\n"
22  " ValueType sum = (ValueType)(0.0);\n"
23  "\n"
24  " for (int n=0; n<num_diags; ++n) {\n"
25  "\n"
26  " const int ind = n * num_rows + row;\n"
27  " const int col = row + Aoffsets[n];\n"
28  " \n"
29  " if ((col >= 0) && (col < num_cols))\n"
30  " sum += Aval[ind] * x[col];\n"
31  "\n"
32  " }\n"
33  " \n"
34  " y[row] = sum;\n"
35  "\n"
36  " }\n"
37  "\n"
38  "}\n"
39  "\n"
40 // Nathan Bell and Michael Garland
41 // Efficient Sparse Matrix-Vector Multiplication on {CUDA}
42 // NVR-2008-004 / NVIDIA Technical Report
43  "__kernel void kernel_dia_add_spmv(const int num_rows,\n"
44  " const int num_cols,\n"
45  " const int num_diags,\n"
46  " __global const int *Aoffsets,\n"
47  " __global const ValueType *Aval, \n"
48  " const ValueType scalar,\n"
49  " __global const ValueType *x, \n"
50  " __global ValueType *y) {\n"
51  "\n"
52  " int row = get_global_id(0);\n"
53  "\n"
54  " if (row < num_rows) {\n"
55  "\n"
56  " ValueType sum = (ValueType)(0.0);\n"
57  "\n"
58  " for (int n=0; n<num_diags; ++n) {\n"
59  "\n"
60  " const int ind = n * num_rows + row;\n"
61  " const int col = row + Aoffsets[n];\n"
62  " \n"
63  " if ((col >= 0) && (col < num_cols))\n"
64  " sum += Aval[ind] * x[col];\n"
65  "\n"
66  " }\n"
67  "\n"
68  " y[row] += scalar * sum;\n"
69  "\n"
70  " }\n"
71  "\n"
72  "}\n"
73  "\n"
74  "\n"
75 ;
76 }
77 
78 #endif // PARALUTION_OCL_KERNELS_DIA_HPP_
Definition: backend_manager.cpp:43
const char * ocl_kernels_dia
Definition: ocl_kernels_dia.hpp:6