PARALUTION  1.0.0
PARALUTION
gpu_utils.hpp
Go to the documentation of this file.
1 #ifndef PARALUTION_GPU_GPU_UTILS_HPP_
2 #define PARALUTION_GPU_GPU_UTILS_HPP_
3 
4 #include "../backend_manager.hpp"
5 #include "../../utils/log.hpp"
6 #include "backend_gpu.hpp"
8 #include "gpu_allocate_free.hpp"
9 
10 #include <stdlib.h>
11 
12 #include <cuda.h>
13 #include <cublas_v2.h>
14 #include <cusparse_v2.h>
15 
16 #define CUBLAS_HANDLE(handle) *static_cast<cublasHandle_t*>(handle)
17 #define CUSPARSE_HANDLE(handle) *static_cast<cusparseHandle_t*>(handle)
18 
19 #define CHECK_CUDA_ERROR(file, line) { \
20  cudaError_t err_t; \
21  if ((err_t = cudaGetLastError() ) != cudaSuccess) { \
22  LOG_INFO("Cuda error: " << cudaGetErrorString(err_t)); \
23  LOG_INFO("File: " << file << "; line: " << line); \
24  exit(1); \
25  } \
26  }
27 
28 #define CHECK_CUBLAS_ERROR(stat_t, file, line) { \
29  if (stat_t != CUBLAS_STATUS_SUCCESS) { \
30  LOG_INFO("Cublas error!"); \
31  if (stat_t == CUBLAS_STATUS_NOT_INITIALIZED) \
32  LOG_INFO("CUBLAS_STATUS_NOT_INITIALIZED"); \
33  if (stat_t == CUBLAS_STATUS_ALLOC_FAILED) \
34  LOG_INFO("CUBLAS_STATUS_ALLOC_FAILED"); \
35  if (stat_t == CUBLAS_STATUS_INVALID_VALUE) \
36  LOG_INFO("CUBLAS_STATUS_INVALID_VALUE"); \
37  if (stat_t == CUBLAS_STATUS_ARCH_MISMATCH) \
38  LOG_INFO("CUBLAS_STATUS_ARCH_MISMATCH"); \
39  if (stat_t == CUBLAS_STATUS_MAPPING_ERROR) \
40  LOG_INFO("CUBLAS_STATUS_MAPPING_ERROR"); \
41  if (stat_t == CUBLAS_STATUS_EXECUTION_FAILED) \
42  LOG_INFO("CUBLAS_STATUS_EXECUTION_FAILED"); \
43  if (stat_t == CUBLAS_STATUS_INTERNAL_ERROR) \
44  LOG_INFO("CUBLAS_STATUS_INTERNAL_ERROR"); \
45  LOG_INFO("File: " << file << "; line: " << line); \
46  exit(1); \
47  } \
48  }
49 
50 #define CHECK_CUSPARSE_ERROR(stat_t, file, line) { \
51  if (stat_t != CUSPARSE_STATUS_SUCCESS) { \
52  LOG_INFO("Cusparse error!"); \
53  if (stat_t == CUSPARSE_STATUS_NOT_INITIALIZED) \
54  LOG_INFO("CUSPARSE_STATUS_NOT_INITIALIZED"); \
55  if (stat_t == CUSPARSE_STATUS_ALLOC_FAILED) \
56  LOG_INFO("CUSPARSE_STATUS_ALLOC_FAILED"); \
57  if (stat_t == CUSPARSE_STATUS_INVALID_VALUE) \
58  LOG_INFO("CUSPARSE_STATUS_INVALID_VALUE"); \
59  if (stat_t == CUSPARSE_STATUS_ARCH_MISMATCH) \
60  LOG_INFO("CUSPARSE_STATUS_ARCH_MISMATCH"); \
61  if (stat_t == CUSPARSE_STATUS_MAPPING_ERROR) \
62  LOG_INFO("CUSPARSE_STATUS_MAPPING_ERROR"); \
63  if (stat_t == CUSPARSE_STATUS_EXECUTION_FAILED) \
64  LOG_INFO("CUSPARSE_STATUS_EXECUTION_FAILED"); \
65  if (stat_t == CUSPARSE_STATUS_INTERNAL_ERROR) \
66  LOG_INFO("CUSPARSE_STATUS_INTERNAL_ERROR"); \
67  if (stat_t == CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED) \
68  LOG_INFO("CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED"); \
69  LOG_INFO("File: " << file << "; line: " << line); \
70  exit(1); \
71  } \
72  }
73 
74 namespace paralution {
75 
76 template <typename IndexType, unsigned int BLOCK_SIZE>
77 bool cum_sum( IndexType* dst,
78  const IndexType* src,
79  const IndexType numElems) {
80 
81  cudaMemset(dst, 0, (numElems+1)*sizeof(IndexType));
82  CHECK_CUDA_ERROR(__FILE__, __LINE__);
83 
84  IndexType* d_temp = NULL;
85  allocate_gpu<IndexType>(numElems+1, &d_temp);
86 
87  cudaMemset(d_temp, 0, (numElems+1)*sizeof(IndexType));
88  CHECK_CUDA_ERROR(__FILE__, __LINE__);
89 
90  kernel_red_partial_sum <IndexType, BLOCK_SIZE> <<< numElems/BLOCK_SIZE+1, BLOCK_SIZE>>>(dst+1, src, numElems);
91  CHECK_CUDA_ERROR(__FILE__,__LINE__);
92 
93  kernel_red_recurse <IndexType> <<< numElems/(BLOCK_SIZE*BLOCK_SIZE)+1, BLOCK_SIZE>>>(d_temp, dst+BLOCK_SIZE, BLOCK_SIZE, (numElems+1));
94  CHECK_CUDA_ERROR(__FILE__,__LINE__);
95 
96  kernel_red_extrapolate<IndexType> <<< numElems/(BLOCK_SIZE*BLOCK_SIZE)+1, BLOCK_SIZE>>>(dst+1, d_temp, src, numElems);
97  CHECK_CUDA_ERROR(__FILE__,__LINE__);
98  free_gpu<int>(&d_temp);
99 
100  return true;
101 
102 }
103 
104 
105 }
106 
107 #endif // PARALUTION_GPU_GPU_UTILS_HPP_
#define CHECK_CUDA_ERROR(file, line)
Definition: gpu_utils.hpp:19
Definition: backend_manager.cpp:43
bool cum_sum(IndexType *dst, const IndexType *src, const IndexType numElems)
Definition: gpu_utils.hpp:77