PARALUTION  1.0.0
PARALUTION
cuda_kernels_hyb.hpp
Go to the documentation of this file.
1 #ifndef PARALUTION_GPU_CUDA_KERNELS_HYB_HPP_
2 #define PARALUTION_GPU_CUDA_KERNELS_HYB_HPP_
3 
4 #include "cuda_kernels_ell.hpp"
5 #include "cuda_kernels_coo.hpp"
6 #include "../matrix_formats_ind.hpp"
7 
8 namespace paralution {
9 
10 template <typename IndexType>
11 __global__ void kernel_ell_nnz_coo(const IndexType nrow, const IndexType max_row,
12  const IndexType *row_offset, IndexType *nnz_coo) {
13 
14  IndexType gid = threadIdx.x + blockIdx.x * blockDim.x;
15 
16  if (gid < nrow) {
17 
18  nnz_coo[gid] = 0;
19  IndexType nnz_per_row = row_offset[gid+1] - row_offset[gid];
20 
21  if (nnz_per_row > max_row)
22 
23  nnz_coo[gid] = nnz_per_row - max_row;
24 
25  }
26 
27 }
28 
29 template <typename ValueType, typename IndexType>
30 __global__ void kernel_ell_fill_ell(const IndexType nrow, const IndexType max_row,
31  const IndexType *row_offset, const IndexType *col,
32  const ValueType *val, IndexType *ELL_col,
33  ValueType *ELL_val, IndexType *nnz_ell) {
34 
35  IndexType gid = threadIdx.x + blockIdx.x * blockDim.x;
36 
37  if (gid < nrow) {
38 
39  IndexType n = 0;
40 
41  for (IndexType i=row_offset[gid]; i<row_offset[gid+1]; ++i) {
42 
43  if (n >= max_row) break;
44 
45  IndexType idx = ELL_IND(gid, n, nrow, max_row);
46 
47  ELL_col[idx] = col[i];
48  ELL_val[idx] = val[i];
49 
50  ++n;
51 
52  }
53 
54  nnz_ell[gid] = n;
55 
56  }
57 
58 }
59 
60 template <typename ValueType, typename IndexType>
61 __global__ void kernel_ell_fill_coo(const IndexType nrow, const IndexType *row_offset,
62  const IndexType *col, const ValueType *val,
63  const IndexType *nnz_coo, const IndexType *nnz_ell,
64  IndexType *COO_row, IndexType *COO_col, ValueType *COO_val) {
65 
66  IndexType gid = threadIdx.x + blockIdx.x * blockDim.x;
67 
68  if (gid < nrow) {
69 
70  IndexType row_ptr = row_offset[gid+1];
71 
72  for (IndexType i=row_ptr - nnz_coo[gid]; i<row_ptr; ++i) {
73 
74  IndexType idx = i - nnz_ell[gid];
75 
76  COO_row[idx] = gid;
77  COO_col[idx] = col[i];
78  COO_val[idx] = val[i];
79 
80  }
81 
82  }
83 
84 }
85 
86 
87 }
88 
89 #endif
IndexType i
Definition: cuda_kernels_coo.hpp:195
#define ELL_IND(row, el, nrow, max_row)
Definition: matrix_formats_ind.hpp:21
const IndexType idx
Definition: cuda_kernels_coo.hpp:115
__global__ void kernel_ell_fill_ell(const IndexType nrow, const IndexType max_row, const IndexType *row_offset, const IndexType *col, const ValueType *val, IndexType *ELL_col, ValueType *ELL_val, IndexType *nnz_ell)
Definition: cuda_kernels_hyb.hpp:30
__global__ void kernel_ell_nnz_coo(const IndexType nrow, const IndexType max_row, const IndexType *row_offset, IndexType *nnz_coo)
Definition: cuda_kernels_hyb.hpp:11
__global__ void kernel_ell_fill_coo(const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *val, const IndexType *nnz_coo, const IndexType *nnz_ell, IndexType *COO_row, IndexType *COO_col, ValueType *COO_val)
Definition: cuda_kernels_hyb.hpp:61
Definition: backend_manager.cpp:43