1 #ifndef PARALUTION_GPU_CUDA_KERNELS_MCSR_HPP_
2 #define PARALUTION_GPU_CUDA_KERNELS_MCSR_HPP_
4 #include "../matrix_formats_ind.hpp"
8 template <
typename ValueType,
typename IndexType>
10 const IndexType *col,
const ValueType *val,
11 const ValueType *in, ValueType *out) {
13 IndexType ai = blockIdx.x*blockDim.x+threadIdx.x;
18 ValueType sum = val[ai] * in[ai];
20 for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)
21 sum = sum + val[aj]*in[col[aj]];
30 template <
typename ValueType,
typename IndexType>
32 const IndexType *col,
const ValueType *val,
34 const ValueType *in, ValueType *out) {
36 IndexType ai = blockIdx.x*blockDim.x+threadIdx.x;
41 out[ai] = out[ai] + scalar*val[ai] * in[ai];
43 for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj) {
44 out[ai] = out[ai] + scalar*val[aj]*in[col[aj]];
const IndexType const IndexType const IndexType const ValueType const ValueType scalar
Definition: cuda_kernels_coo.hpp:91
Definition: backend_manager.cpp:43
__global__ void kernel_mcsr_add_spmv_scalar(const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *val, const ValueType scalar, const ValueType *in, ValueType *out)
Definition: cuda_kernels_mcsr.hpp:31
__global__ void kernel_mcsr_spmv_scalar(const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *val, const ValueType *in, ValueType *out)
Definition: cuda_kernels_mcsr.hpp:9