PARALUTION  1.0.0
PARALUTION
cuda_kernels_dia.hpp
Go to the documentation of this file.
1 #ifndef PARALUTION_GPU_CUDA_KERNELS_DIA_HPP_
2 #define PARALUTION_GPU_CUDA_KERNELS_DIA_HPP_
3 
4 #include "../matrix_formats_ind.hpp"
5 
6 namespace paralution {
7 
8 // Nathan Bell and Michael Garland
9 // Efficient Sparse Matrix-Vector Multiplication on {CUDA}
10 // NVR-2008-004 / NVIDIA Technical Report
11 template <typename ValueType, typename IndexType>
12 __global__ void kernel_dia_spmv(const IndexType num_rows,
13  const IndexType num_cols,
14  const IndexType num_diags,
15  const IndexType *Aoffsets,
16  const ValueType *Aval,
17  const ValueType *x,
18  ValueType *y) {
19 
20  int row = blockDim.x * blockIdx.x + threadIdx.x;
21 
22  if (row < num_rows) {
23 
24  ValueType sum = ValueType(0.0);
25 
26  for (IndexType n=0; n<num_diags; ++n) {
27 
28  const IndexType ind = DIA_IND(row, n, num_rows, num_diags);
29  const IndexType col = row + Aoffsets[n];
30 
31  if ((col >= 0) && (col < num_cols))
32  sum = sum + Aval[ind] * x[col];
33 
34  }
35 
36  y[row] = sum;
37 
38  }
39 
40 }
41 
42 // Nathan Bell and Michael Garland
43 // Efficient Sparse Matrix-Vector Multiplication on {CUDA}
44 // NVR-2008-004 / NVIDIA Technical Report
45 template <typename ValueType, typename IndexType>
46 __global__ void kernel_dia_add_spmv(const IndexType num_rows,
47  const IndexType num_cols,
48  const IndexType num_diags,
49  const IndexType *Aoffsets,
50  const ValueType *Aval,
51  const ValueType scalar,
52  const ValueType *x,
53  ValueType *y) {
54 
55  int row = blockDim.x * blockIdx.x + threadIdx.x;
56 
57  if (row < num_rows) {
58 
59  ValueType sum = ValueType(0.0);
60 
61  for (IndexType n=0; n<num_diags; ++n) {
62 
63  const IndexType ind = DIA_IND(row, n, num_rows, num_diags);
64  const IndexType col = row + Aoffsets[n];
65 
66  if ((col >= 0) && (col < num_cols)) {
67 
68  sum = sum + Aval[ind] * x[col];
69 
70  }
71 
72  }
73 
74  y[row] = y[row] + scalar*sum;
75 
76  }
77 
78 }
79 
80 template <typename IndexType>
81 __global__ void kernel_dia_diag_map(const IndexType nrow, const IndexType *row_offset,
82  const IndexType *col, IndexType *diag_map) {
83 
84  int row = blockDim.x * blockIdx.x + threadIdx.x;
85  IndexType map_index;
86 
87  if (row < nrow) {
88 
89  for (IndexType j=row_offset[row]; j<row_offset[row+1]; ++j) {
90 
91  map_index = col[j] - row + nrow;
92  diag_map[map_index] = 1;
93 
94  }
95 
96  }
97 
98 }
99 
100 template <typename IndexType>
101 __global__ void kernel_dia_fill_offset(const IndexType nrow, const IndexType ncol,
102  IndexType *diag_map, const IndexType *offset_map,
103  IndexType *offset) {
104 
105  // 1D and 2D indexing
106  int i = threadIdx.x + ( ((gridDim.x * blockIdx.y) + blockIdx.x) * blockDim.x );
107 
108  if (i < nrow+ncol)
109  if (diag_map[i] == 1) {
110  offset[offset_map[i]] = i - nrow;
111  diag_map[i] = offset_map[i];
112  }
113 
114 }
115 
116 template <typename ValueType, typename IndexType>
117 __global__ void kernel_dia_convert(const IndexType nrow, const IndexType ndiag,
118  const IndexType *row_offset, const IndexType *col,
119  const ValueType *val, const IndexType *diag_map,
120  ValueType *dia_val) {
121 
122  int row = blockDim.x * blockIdx.x + threadIdx.x;
123  IndexType map_index;
124 
125  if (row < nrow) {
126 
127  for (int j=row_offset[row]; j<row_offset[row+1]; ++j) {
128 
129  map_index = col[j] - row + nrow;
130  dia_val[DIA_IND(row, diag_map[map_index], nrow, ndiag)] = val[j];
131 
132  }
133 
134  }
135 
136 }
137 
138 
139 }
140 
141 #endif
IndexType i
Definition: cuda_kernels_coo.hpp:195
__global__ void kernel_dia_diag_map(const IndexType nrow, const IndexType *row_offset, const IndexType *col, IndexType *diag_map)
Definition: cuda_kernels_dia.hpp:81
__global__ void kernel_dia_spmv(const IndexType num_rows, const IndexType num_cols, const IndexType num_diags, const IndexType *Aoffsets, const ValueType *Aval, const ValueType *x, ValueType *y)
Definition: cuda_kernels_dia.hpp:12
end if j
Definition: pcg_example.m:22
const IndexType const IndexType const IndexType const ValueType const ValueType scalar
Definition: cuda_kernels_coo.hpp:91
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType ValueType * y
Definition: cuda_kernels_coo.hpp:91
#define DIA_IND(row, el, nrow, ndiag)
Definition: matrix_formats_ind.hpp:31
__global__ void kernel_dia_add_spmv(const IndexType num_rows, const IndexType num_cols, const IndexType num_diags, const IndexType *Aoffsets, const ValueType *Aval, const ValueType scalar, const ValueType *x, ValueType *y)
Definition: cuda_kernels_dia.hpp:46
Definition: backend_manager.cpp:43
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType * x
Definition: cuda_kernels_coo.hpp:91
__global__ void kernel_dia_fill_offset(const IndexType nrow, const IndexType ncol, IndexType *diag_map, const IndexType *offset_map, IndexType *offset)
Definition: cuda_kernels_dia.hpp:101
__global__ void kernel_dia_convert(const IndexType nrow, const IndexType ndiag, const IndexType *row_offset, const IndexType *col, const ValueType *val, const IndexType *diag_map, ValueType *dia_val)
Definition: cuda_kernels_dia.hpp:117