PARALUTION  1.0.0
PARALUTION
cuda_kernels_coo.hpp File Reference

Go to the source code of this file.

Namespaces

 paralution
 

Functions

template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_coo_permute (const IndexType nnz, const IndexType *in_row, const IndexType *in_col, const IndexType *perm, IndexType *out_row, IndexType *out_col)
 
template<typename IndexType , typename ValueType >
__device__ ValueType paralution::segreduce_warp (const IndexType thread_lane, IndexType row, ValueType val, IndexType *rows, ValueType *vals)
 
template<typename IndexType , typename ValueType >
__device__ void paralution::segreduce_block (const IndexType *idx, ValueType *val)
 
template<typename IndexType , typename ValueType , unsigned int BLOCK_SIZE, unsigned int WARP_SIZE>
 paralution::__launch_bounds__ (BLOCK_SIZE, 1) __global__ void kernel_spmv_coo_flat(const IndexType num_nonzeros
 
 paralution::if (interval_end2 > num_nonzeros) interval_end2
 
 paralution::if (interval_begin >=interval_end) return
 
 paralution::if (thread_lane==31)
 
 paralution::for (IndexType n=interval_begin+thread_lane;n< interval_end;n+=WARP_SIZE)
 
template<typename IndexType , typename ValueType , unsigned int BLOCK_SIZE>
 paralution::__launch_bounds__ (BLOCK_SIZE, 1) __global__ void kernel_spmv_coo_reduce_update(const IndexType num_warps
 
 paralution::if (threadIdx.x==0)
 
 paralution::__syncthreads ()
 
 paralution::while (i< end)
 
 paralution::if (end< num_warps)
 
template<typename IndexType , typename ValueType >
__global__ void paralution::kernel_spmv_coo_serial (const IndexType num_entries, const IndexType *I, const IndexType *J, const ValueType *V, const ValueType scalar, const ValueType *x, ValueType *y)
 

Variables

const IndexType paralution::interval_size
 
const IndexType const IndexType * paralution::I
 
const IndexType const IndexType const IndexType * paralution::J
 
const IndexType const IndexType const IndexType const ValueType * paralution::V
 
const IndexType const IndexType const IndexType const ValueType const ValueType paralution::scalar
 
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType * paralution::x
 
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType ValueType * paralution::y
 
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType ValueType IndexType * paralution::temp_rows
 
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType ValueType IndexType ValueType * paralution::temp_vals
 
__shared__ volatile ValueType paralution::vals [BLOCK_SIZE]
 
const IndexType paralution::thread_id = BLOCK_SIZE * blockIdx.x + threadIdx.x
 
const IndexType paralution::thread_lane = threadIdx.x & (WARP_SIZE-1)
 
const IndexType paralution::warp_id = thread_id / WARP_SIZE
 
const IndexType paralution::interval_begin = warp_id * interval_size
 
IndexType paralution::interval_end2 = interval_begin + interval_size
 
const IndexType paralution::interval_end = interval_end2
 
const IndexType paralution::idx = 16 * (threadIdx.x/32 + 1) + threadIdx.x
 
 paralution::rows [idx-16] = -1
 
const IndexType paralution::end = num_warps - (num_warps & (BLOCK_SIZE - 1))
 
IndexType paralution::i = threadIdx.x