PARALUTION
1.0.0
PARALUTION
|
#include "../matrix_formats_ind.hpp"
Go to the source code of this file.
Namespaces | |
paralution | |
Functions | |
template<typename ValueType , typename IndexType > | |
__global__ void | paralution::kernel_coo_permute (const IndexType nnz, const IndexType *in_row, const IndexType *in_col, const IndexType *perm, IndexType *out_row, IndexType *out_col) |
template<typename IndexType , typename ValueType > | |
__device__ ValueType | paralution::segreduce_warp (const IndexType thread_lane, IndexType row, ValueType val, IndexType *rows, ValueType *vals) |
template<typename IndexType , typename ValueType > | |
__device__ void | paralution::segreduce_block (const IndexType *idx, ValueType *val) |
template<typename IndexType , typename ValueType , unsigned int BLOCK_SIZE, unsigned int WARP_SIZE> | |
paralution::__launch_bounds__ (BLOCK_SIZE, 1) __global__ void kernel_spmv_coo_flat(const IndexType num_nonzeros | |
paralution::if (interval_end2 > num_nonzeros) interval_end2 | |
paralution::if (interval_begin >=interval_end) return | |
paralution::if (thread_lane==31) | |
paralution::for (IndexType n=interval_begin+thread_lane;n< interval_end;n+=WARP_SIZE) | |
template<typename IndexType , typename ValueType , unsigned int BLOCK_SIZE> | |
paralution::__launch_bounds__ (BLOCK_SIZE, 1) __global__ void kernel_spmv_coo_reduce_update(const IndexType num_warps | |
paralution::if (threadIdx.x==0) | |
paralution::__syncthreads () | |
paralution::while (i< end) | |
paralution::if (end< num_warps) | |
template<typename IndexType , typename ValueType > | |
__global__ void | paralution::kernel_spmv_coo_serial (const IndexType num_entries, const IndexType *I, const IndexType *J, const ValueType *V, const ValueType scalar, const ValueType *x, ValueType *y) |
Variables | |
const IndexType | paralution::interval_size |
const IndexType const IndexType * | paralution::I |
const IndexType const IndexType const IndexType * | paralution::J |
const IndexType const IndexType const IndexType const ValueType * | paralution::V |
const IndexType const IndexType const IndexType const ValueType const ValueType | paralution::scalar |
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType * | paralution::x |
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType ValueType * | paralution::y |
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType ValueType IndexType * | paralution::temp_rows |
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType ValueType IndexType ValueType * | paralution::temp_vals |
__shared__ volatile ValueType | paralution::vals [BLOCK_SIZE] |
const IndexType | paralution::thread_id = BLOCK_SIZE * blockIdx.x + threadIdx.x |
const IndexType | paralution::thread_lane = threadIdx.x & (WARP_SIZE-1) |
const IndexType | paralution::warp_id = thread_id / WARP_SIZE |
const IndexType | paralution::interval_begin = warp_id * interval_size |
IndexType | paralution::interval_end2 = interval_begin + interval_size |
const IndexType | paralution::interval_end = interval_end2 |
const IndexType | paralution::idx = 16 * (threadIdx.x/32 + 1) + threadIdx.x |
paralution::rows [idx-16] = -1 | |
const IndexType | paralution::end = num_warps - (num_warps & (BLOCK_SIZE - 1)) |
IndexType | paralution::i = threadIdx.x |