PARALUTION  1.0.0
PARALUTION
cuda_kernels_mcsr.hpp
Go to the documentation of this file.
1 #ifndef PARALUTION_GPU_CUDA_KERNELS_MCSR_HPP_
2 #define PARALUTION_GPU_CUDA_KERNELS_MCSR_HPP_
3 
4 #include "../matrix_formats_ind.hpp"
5 
6 namespace paralution {
7 
8 template <typename ValueType, typename IndexType>
9 __global__ void kernel_mcsr_spmv_scalar(const IndexType nrow, const IndexType *row_offset,
10  const IndexType *col, const ValueType *val,
11  const ValueType *in, ValueType *out) {
12 
13  IndexType ai = blockIdx.x*blockDim.x+threadIdx.x;
14  IndexType aj;
15 
16  if (ai <nrow) {
17 
18  ValueType sum = val[ai] * in[ai];
19 
20  for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj)
21  sum = sum + val[aj]*in[col[aj]];
22 
23  out[ai] = sum;
24 
25  }
26 
27 }
28 
29 
30 template <typename ValueType, typename IndexType>
31 __global__ void kernel_mcsr_add_spmv_scalar(const IndexType nrow, const IndexType *row_offset,
32  const IndexType *col, const ValueType *val,
33  const ValueType scalar,
34  const ValueType *in, ValueType *out) {
35 
36  IndexType ai = blockIdx.x*blockDim.x+threadIdx.x;
37  IndexType aj;
38 
39  if (ai <nrow) {
40 
41  out[ai] = out[ai] + scalar*val[ai] * in[ai];
42 
43  for (aj=row_offset[ai]; aj<row_offset[ai+1]; ++aj) {
44  out[ai] = out[ai] + scalar*val[aj]*in[col[aj]];
45  }
46 
47  }
48 }
49 
50 
51 }
52 
53 #endif
const IndexType const IndexType const IndexType const ValueType const ValueType scalar
Definition: cuda_kernels_coo.hpp:91
Definition: backend_manager.cpp:43
__global__ void kernel_mcsr_add_spmv_scalar(const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *val, const ValueType scalar, const ValueType *in, ValueType *out)
Definition: cuda_kernels_mcsr.hpp:31
__global__ void kernel_mcsr_spmv_scalar(const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *val, const ValueType *in, ValueType *out)
Definition: cuda_kernels_mcsr.hpp:9