1 #ifndef PARALUTION_GPU_CUDA_KERNELS_DIA_HPP_
2 #define PARALUTION_GPU_CUDA_KERNELS_DIA_HPP_
4 #include "../matrix_formats_ind.hpp"
11 template <
typename ValueType,
typename IndexType>
13 const IndexType num_cols,
14 const IndexType num_diags,
15 const IndexType *Aoffsets,
16 const ValueType *Aval,
20 int row = blockDim.x * blockIdx.x + threadIdx.x;
24 ValueType sum = ValueType(0.0);
26 for (IndexType n=0; n<num_diags; ++n) {
28 const IndexType ind =
DIA_IND(row, n, num_rows, num_diags);
29 const IndexType col = row + Aoffsets[n];
31 if ((col >= 0) && (col < num_cols))
32 sum = sum + Aval[ind] * x[col];
45 template <
typename ValueType,
typename IndexType>
47 const IndexType num_cols,
48 const IndexType num_diags,
49 const IndexType *Aoffsets,
50 const ValueType *Aval,
55 int row = blockDim.x * blockIdx.x + threadIdx.x;
59 ValueType sum = ValueType(0.0);
61 for (IndexType n=0; n<num_diags; ++n) {
63 const IndexType ind =
DIA_IND(row, n, num_rows, num_diags);
64 const IndexType col = row + Aoffsets[n];
66 if ((col >= 0) && (col < num_cols)) {
68 sum = sum + Aval[ind] * x[col];
74 y[row] = y[row] + scalar*sum;
80 template <
typename IndexType>
82 const IndexType *col, IndexType *diag_map) {
84 int row = blockDim.x * blockIdx.x + threadIdx.x;
89 for (IndexType
j=row_offset[row];
j<row_offset[row+1]; ++
j) {
91 map_index = col[
j] - row + nrow;
92 diag_map[map_index] = 1;
100 template <
typename IndexType>
102 IndexType *diag_map,
const IndexType *offset_map,
106 int i = threadIdx.x + ( ((gridDim.x * blockIdx.y) + blockIdx.x) * blockDim.x );
109 if (diag_map[i] == 1) {
110 offset[offset_map[
i]] = i - nrow;
111 diag_map[
i] = offset_map[
i];
116 template <
typename ValueType,
typename IndexType>
118 const IndexType *row_offset,
const IndexType *col,
119 const ValueType *val,
const IndexType *diag_map,
120 ValueType *dia_val) {
122 int row = blockDim.x * blockIdx.x + threadIdx.x;
127 for (
int j=row_offset[row];
j<row_offset[row+1]; ++
j) {
129 map_index = col[
j] - row + nrow;
130 dia_val[
DIA_IND(row, diag_map[map_index], nrow, ndiag)] = val[
j];
IndexType i
Definition: cuda_kernels_coo.hpp:195
__global__ void kernel_dia_diag_map(const IndexType nrow, const IndexType *row_offset, const IndexType *col, IndexType *diag_map)
Definition: cuda_kernels_dia.hpp:81
__global__ void kernel_dia_spmv(const IndexType num_rows, const IndexType num_cols, const IndexType num_diags, const IndexType *Aoffsets, const ValueType *Aval, const ValueType *x, ValueType *y)
Definition: cuda_kernels_dia.hpp:12
end if j
Definition: pcg_example.m:22
const IndexType const IndexType const IndexType const ValueType const ValueType scalar
Definition: cuda_kernels_coo.hpp:91
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType ValueType * y
Definition: cuda_kernels_coo.hpp:91
__global__ void kernel_dia_add_spmv(const IndexType num_rows, const IndexType num_cols, const IndexType num_diags, const IndexType *Aoffsets, const ValueType *Aval, const ValueType scalar, const ValueType *x, ValueType *y)
Definition: cuda_kernels_dia.hpp:46
Definition: backend_manager.cpp:43
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType * x
Definition: cuda_kernels_coo.hpp:91
__global__ void kernel_dia_fill_offset(const IndexType nrow, const IndexType ncol, IndexType *diag_map, const IndexType *offset_map, IndexType *offset)
Definition: cuda_kernels_dia.hpp:101
__global__ void kernel_dia_convert(const IndexType nrow, const IndexType ndiag, const IndexType *row_offset, const IndexType *col, const ValueType *val, const IndexType *diag_map, ValueType *dia_val)
Definition: cuda_kernels_dia.hpp:117