1 #ifndef PARALUTION_GPU_GPU_UTILS_HPP_
2 #define PARALUTION_GPU_GPU_UTILS_HPP_
4 #include "../backend_manager.hpp"
5 #include "../../utils/log.hpp"
13 #include <cublas_v2.h>
14 #include <cusparse_v2.h>
16 #define CUBLAS_HANDLE(handle) *static_cast<cublasHandle_t*>(handle)
17 #define CUSPARSE_HANDLE(handle) *static_cast<cusparseHandle_t*>(handle)
19 #define CHECK_CUDA_ERROR(file, line) { \
21 if ((err_t = cudaGetLastError() ) != cudaSuccess) { \
22 LOG_INFO("Cuda error: " << cudaGetErrorString(err_t)); \
23 LOG_INFO("File: " << file << "; line: " << line); \
28 #define CHECK_CUBLAS_ERROR(stat_t, file, line) { \
29 if (stat_t != CUBLAS_STATUS_SUCCESS) { \
30 LOG_INFO("Cublas error!"); \
31 if (stat_t == CUBLAS_STATUS_NOT_INITIALIZED) \
32 LOG_INFO("CUBLAS_STATUS_NOT_INITIALIZED"); \
33 if (stat_t == CUBLAS_STATUS_ALLOC_FAILED) \
34 LOG_INFO("CUBLAS_STATUS_ALLOC_FAILED"); \
35 if (stat_t == CUBLAS_STATUS_INVALID_VALUE) \
36 LOG_INFO("CUBLAS_STATUS_INVALID_VALUE"); \
37 if (stat_t == CUBLAS_STATUS_ARCH_MISMATCH) \
38 LOG_INFO("CUBLAS_STATUS_ARCH_MISMATCH"); \
39 if (stat_t == CUBLAS_STATUS_MAPPING_ERROR) \
40 LOG_INFO("CUBLAS_STATUS_MAPPING_ERROR"); \
41 if (stat_t == CUBLAS_STATUS_EXECUTION_FAILED) \
42 LOG_INFO("CUBLAS_STATUS_EXECUTION_FAILED"); \
43 if (stat_t == CUBLAS_STATUS_INTERNAL_ERROR) \
44 LOG_INFO("CUBLAS_STATUS_INTERNAL_ERROR"); \
45 LOG_INFO("File: " << file << "; line: " << line); \
50 #define CHECK_CUSPARSE_ERROR(stat_t, file, line) { \
51 if (stat_t != CUSPARSE_STATUS_SUCCESS) { \
52 LOG_INFO("Cusparse error!"); \
53 if (stat_t == CUSPARSE_STATUS_NOT_INITIALIZED) \
54 LOG_INFO("CUSPARSE_STATUS_NOT_INITIALIZED"); \
55 if (stat_t == CUSPARSE_STATUS_ALLOC_FAILED) \
56 LOG_INFO("CUSPARSE_STATUS_ALLOC_FAILED"); \
57 if (stat_t == CUSPARSE_STATUS_INVALID_VALUE) \
58 LOG_INFO("CUSPARSE_STATUS_INVALID_VALUE"); \
59 if (stat_t == CUSPARSE_STATUS_ARCH_MISMATCH) \
60 LOG_INFO("CUSPARSE_STATUS_ARCH_MISMATCH"); \
61 if (stat_t == CUSPARSE_STATUS_MAPPING_ERROR) \
62 LOG_INFO("CUSPARSE_STATUS_MAPPING_ERROR"); \
63 if (stat_t == CUSPARSE_STATUS_EXECUTION_FAILED) \
64 LOG_INFO("CUSPARSE_STATUS_EXECUTION_FAILED"); \
65 if (stat_t == CUSPARSE_STATUS_INTERNAL_ERROR) \
66 LOG_INFO("CUSPARSE_STATUS_INTERNAL_ERROR"); \
67 if (stat_t == CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED) \
68 LOG_INFO("CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED"); \
69 LOG_INFO("File: " << file << "; line: " << line); \
76 template <
typename IndexType,
unsigned int BLOCK_SIZE>
79 const IndexType numElems) {
81 cudaMemset(dst, 0, (numElems+1)*
sizeof(IndexType));
84 IndexType* d_temp = NULL;
85 allocate_gpu<IndexType>(numElems+1, &d_temp);
87 cudaMemset(d_temp, 0, (numElems+1)*
sizeof(IndexType));
90 kernel_red_partial_sum <IndexType, BLOCK_SIZE> <<< numElems/BLOCK_SIZE+1, BLOCK_SIZE>>>(dst+1, src, numElems);
93 kernel_red_recurse <IndexType> <<< numElems/(BLOCK_SIZE*BLOCK_SIZE)+1, BLOCK_SIZE>>>(d_temp, dst+BLOCK_SIZE, BLOCK_SIZE, (numElems+1));
96 kernel_red_extrapolate<IndexType> <<< numElems/(BLOCK_SIZE*BLOCK_SIZE)+1, BLOCK_SIZE>>>(dst+1, d_temp, src, numElems);
98 free_gpu<int>(&d_temp);
107 #endif // PARALUTION_GPU_GPU_UTILS_HPP_
#define CHECK_CUDA_ERROR(file, line)
Definition: gpu_utils.hpp:19
Definition: backend_manager.cpp:43
bool cum_sum(IndexType *dst, const IndexType *src, const IndexType numElems)
Definition: gpu_utils.hpp:77