1 #ifndef PARALUTION_GPU_CUDA_KERNELS_HYB_HPP_
2 #define PARALUTION_GPU_CUDA_KERNELS_HYB_HPP_
6 #include "../matrix_formats_ind.hpp"
10 template <
typename IndexType>
12 const IndexType *row_offset, IndexType *nnz_coo) {
14 IndexType gid = threadIdx.x + blockIdx.x * blockDim.x;
19 IndexType nnz_per_row = row_offset[gid+1] - row_offset[gid];
21 if (nnz_per_row > max_row)
23 nnz_coo[gid] = nnz_per_row - max_row;
29 template <
typename ValueType,
typename IndexType>
31 const IndexType *row_offset,
const IndexType *col,
32 const ValueType *val, IndexType *ELL_col,
33 ValueType *ELL_val, IndexType *nnz_ell) {
35 IndexType gid = threadIdx.x + blockIdx.x * blockDim.x;
41 for (IndexType
i=row_offset[gid];
i<row_offset[gid+1]; ++
i) {
43 if (n >= max_row)
break;
45 IndexType
idx =
ELL_IND(gid, n, nrow, max_row);
47 ELL_col[
idx] = col[
i];
48 ELL_val[
idx] = val[
i];
60 template <
typename ValueType,
typename IndexType>
62 const IndexType *col,
const ValueType *val,
63 const IndexType *nnz_coo,
const IndexType *nnz_ell,
64 IndexType *COO_row, IndexType *COO_col, ValueType *COO_val) {
66 IndexType gid = threadIdx.x + blockIdx.x * blockDim.x;
70 IndexType row_ptr = row_offset[gid+1];
72 for (IndexType
i=row_ptr - nnz_coo[gid];
i<row_ptr; ++
i) {
74 IndexType
idx =
i - nnz_ell[gid];
77 COO_col[
idx] = col[
i];
78 COO_val[
idx] = val[
i];
IndexType i
Definition: cuda_kernels_coo.hpp:195
const IndexType idx
Definition: cuda_kernels_coo.hpp:115
__global__ void kernel_ell_fill_ell(const IndexType nrow, const IndexType max_row, const IndexType *row_offset, const IndexType *col, const ValueType *val, IndexType *ELL_col, ValueType *ELL_val, IndexType *nnz_ell)
Definition: cuda_kernels_hyb.hpp:30
__global__ void kernel_ell_nnz_coo(const IndexType nrow, const IndexType max_row, const IndexType *row_offset, IndexType *nnz_coo)
Definition: cuda_kernels_hyb.hpp:11
__global__ void kernel_ell_fill_coo(const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *val, const IndexType *nnz_coo, const IndexType *nnz_ell, IndexType *COO_row, IndexType *COO_col, ValueType *COO_val)
Definition: cuda_kernels_hyb.hpp:61
Definition: backend_manager.cpp:43