PARALUTION  1.0.0
PARALUTION
paralution Namespace Reference

Data Structures

class  AcceleratorMatrix
 
class  AcceleratorStencil
 
class  AcceleratorVector
 
class  AIChebyshev
 Approximate Inverse - Chebyshev preconditioner see IEEE TRANSACTIONS ON POWER SYSTEMS, VOL. 18, NO. 4, NOVEMBER 2003; A New Preconditioned Conjugate Gradient Power Flow - Hasan Dag, Adam Semlyen. More...
 
class  AMG
 
class  AS
 AS preconditioner. More...
 
class  BaseAMG
 
class  BaseMatrix
 Base class for all host/accelerator matrices. More...
 
class  BaseMultiGrid
 
class  BaseParalution
 Base class for operator and vector (i.e. global/local matrix/stencil/vector) classes, all the backend-related interface and data are defined here. More...
 
class  BaseStencil
 Base class for all host/accelerator stencils. More...
 
class  BaseVector
 Base class for all host/accelerator vectors. More...
 
class  BiCGStab
 
class  BlockPreconditioner
 
class  CG
 
class  CG_HN
 
class  Chebyshev
 
class  CR
 
class  DiagJacobiSaddlePointPrecond
 
class  DirectLinearSolver
 Base class for all linear (direct) solvers. More...
 
class  DPCG
 
class  FGMRES
 
class  FixedPoint
 Fixed-point iteration $x_{k+1}=x_k-\omega M^{-1} (A x_k - b)$, where the solution of $M^{-1}$ is provide by solver via SetPreconditioner() More...
 
class  FSAI
 Factorized Approximate Inverse preconditioner. More...
 
class  GlobalVector
 
class  GMRES
 
class  GPUAcceleratorMatrix
 
class  GPUAcceleratorMatrixBCSR
 
class  GPUAcceleratorMatrixCOO
 
class  GPUAcceleratorMatrixCSR
 
class  GPUAcceleratorMatrixDENSE
 
class  GPUAcceleratorMatrixDIA
 
class  GPUAcceleratorMatrixELL
 
class  GPUAcceleratorMatrixHYB
 
class  GPUAcceleratorMatrixMCSR
 
class  GPUAcceleratorStencil
 
class  GPUAcceleratorStencilLaplace2D
 
class  GPUAcceleratorVector
 
class  GS
 Gauss-Seidel (GS) preconditioner. More...
 
class  HostMatrix
 
class  HostMatrixBCSR
 
class  HostMatrixCOO
 
class  HostMatrixCSR
 
class  HostMatrixDENSE
 
class  HostMatrixDIA
 
class  HostMatrixELL
 
class  HostMatrixHYB
 
class  HostMatrixMCSR
 
class  HostStencil
 
class  HostStencilLaplace2D
 
class  HostVector
 
class  IC
 Incomplete Cholesky with no fill-ins IC0. More...
 
class  IDR
 IDR(s) - Induced Dimension Reduction method, taken from "An Elegant IDR(s) Variant that Efficiently Exploits Biorthogonality Properties" by Martin B. van Gijzen and Peter Sonneveld, Delft University of Technology. More...
 
class  ILU
 ILU preconditioner based on levels. More...
 
class  ILUT
 ILUT(t,m) preconditioner based on threshold and maximum number of elements per row. More...
 
class  Inversion
 
class  IterationControl
 Iteration control for iterative solvers, monitor the residual (L2 norm) behavior. More...
 
class  IterativeLinearSolver
 Base class for all linear (iterative) solvers. More...
 
class  Jacobi
 
class  LocalMatrix
 
class  LocalStencil
 
class  LocalVector
 
class  LU
 
struct  matrix_market_banner
 
struct  MatrixBCSR
 
struct  MatrixCOO
 Sparse Matrix - Coordinate Format. More...
 
struct  MatrixCSR
 Sparse Matrix - Sparse Compressed Row Format. More...
 
struct  MatrixDENSE
 Dense Matrix (see DENSE_IND for indexing) More...
 
struct  MatrixDIA
 Sparse Matrix - Diagonal Format (see DIA_IND for indexing) More...
 
struct  MatrixELL
 Sparse Matrix - ELL Format (see ELL_IND for indexing) More...
 
struct  MatrixHYB
 Sparse Matrix - Contains ELL and COO Matrices. More...
 
struct  MatrixMCSR
 Sparse Matrix - Modified Sparse Compressed Row Format. More...
 
class  MICAcceleratorMatrix
 
class  MICAcceleratorMatrixBCSR
 
class  MICAcceleratorMatrixCOO
 
class  MICAcceleratorMatrixCSR
 
class  MICAcceleratorMatrixDENSE
 
class  MICAcceleratorMatrixDIA
 
class  MICAcceleratorMatrixELL
 
class  MICAcceleratorMatrixHYB
 
class  MICAcceleratorMatrixMCSR
 
class  MICAcceleratorStencil
 
class  MICAcceleratorStencilLaplace2D
 
class  MICAcceleratorVector
 
class  MixedPrecisionDC
 
class  MultiColored
 
class  MultiColoredGS
 
class  MultiColoredILU
 ILU(p,q) preconditioner (see power(q)-pattern method, D. Lukarski "Parallel Sparse Linear Algebra for Multi-core and Many-core Platforms - Parallel Solvers and Preconditioners", PhD Thesis, 2012, KIT) More...
 
class  MultiColoredSGS
 
class  MultiElimination
 MultiElimination (I)LU factorization (see 12.5.1 Multi-Elimination ILU from "Iterative Methods for Sparse Linear Systems", 2nd Edition, Yousef Saad); The ME-ILU preconditioner is build recursively. More...
 
class  MultiGrid
 
class  OCLAcceleratorMatrix
 
class  OCLAcceleratorMatrixBCSR
 
class  OCLAcceleratorMatrixCOO
 
class  OCLAcceleratorMatrixCSR
 
class  OCLAcceleratorMatrixDENSE
 
class  OCLAcceleratorMatrixDIA
 
class  OCLAcceleratorMatrixELL
 
class  OCLAcceleratorMatrixHYB
 
class  OCLAcceleratorMatrixMCSR
 
class  OCLAcceleratorStencil
 
class  OCLAcceleratorStencilLaplace2D
 
class  OCLAcceleratorVector
 
struct  oclHandle_t
 
class  Operator
 Operator class defines the generic interface for applying an operator (e.g. matrix, stencil) from/to global and local vectors. More...
 
struct  Paralution_Backend_Descriptor
 Backend descriptor - keeps information about the hardware - OpenMP (threads); CUDA (blocksizes, handles, etc); OpenCL (workgroupsizes, handles, etc);. More...
 
struct  Paralution_Object_Data
 Global data for all PARALUTION objects. More...
 
class  ParalutionObj
 
class  Preconditioner
 Base preconditioner class. More...
 
class  QR
 
class  RAS
 AS preconditioner. More...
 
class  SGS
 Symmetric Gauss-Seidel (SGS) preconditioner. More...
 
class  SIRA
 
class  Solver
 The base class for all solvers and preconditioners. More...
 
class  SPAI
 SParse Approximate Inverse preconditioner. More...
 
class  TNS
 Truncated Neumann Series (TNS) Preconditioner. More...
 
class  Vector
 

Enumerations

enum  _paralution_backend_id { None =0, GPU =1, OCL =2, MIC =3 }
 Backend IDs. More...
 
enum  _matrix_format {
  DENSE = 0, CSR = 1, MCSR = 2, BCSR = 3,
  COO = 4, DIA = 5, ELL = 6, HYB = 7
}
 Matrix Enumeration. More...
 
enum  _stencil_type { Laplace2D = 0 }
 Stencil Enumeration. More...
 
enum  _interp { Aggregation, SmoothedAggregation }
 
enum  _cycle { Vcycle = 0, Wcycle = 1, Kcycle = 2, Fcycle = 3 }
 

Functions

int init_paralution (void)
 Initialization of the paralution platform. More...
 
int stop_paralution (void)
 Shutdown the paralution platform. More...
 
int set_device_paralution (int dev)
 Select a device. More...
 
void set_omp_threads_paralution (int nthreads)
 Set the number of threads in the platform. More...
 
void set_gpu_cuda_paralution (int ngpu)
 Set a specific GPU device. More...
 
void set_ocl_paralution (int nplatform, int ndevice)
 Set a specific OpenCL platform and device. More...
 
void set_ocl_platform_paralution (int platform)
 Set a specific OpenCL platform. More...
 
void set_ocl_work_group_size_paralution (size_t size)
 Set OpenCL work group size. More...
 
void set_ocl_compute_units_paralution (size_t cu)
 Set OpenCL compute units. More...
 
void set_ocl_warp_size_paralution (int size)
 Set OpenCL warp size. More...
 
void info_paralution (void)
 Print information about the platform. More...
 
void info_paralution (const struct Paralution_Backend_Descriptor backend_descriptor)
 Print information about the platform via specific backend descriptor. More...
 
void set_omp_affinity (bool affinity)
 Set host affinity (true-on/false-off) More...
 
void set_omp_threshold (const int threshold)
 Set OpenMP threshold size. More...
 
bool _paralution_available_accelerator (void)
 Return true if any accelerator is available. More...
 
void disable_accelerator_paralution (const bool onoff=true)
 Disable/Enable the accelerator. More...
 
struct Paralution_Backend_Descriptor_get_backend_descriptor (void)
 Return backend descriptor. More...
 
void _set_backend_descriptor (const struct Paralution_Backend_Descriptor backend_descriptor)
 Set backend descriptor. More...
 
template<typename ValueType >
AcceleratorVector< ValueType > * _paralution_init_base_backend_vector (const struct Paralution_Backend_Descriptor backend_descriptor)
 Build (and return) a vector on the selected in the descriptor accelerator. More...
 
template<typename ValueType >
AcceleratorMatrix< ValueType > * _paralution_init_base_backend_matrix (const struct Paralution_Backend_Descriptor backend_descriptor, const unsigned int matrix_format)
 Build (and return) a matrix on the selected in the descriptor accelerator. More...
 
template<typename ValueType >
HostMatrix< ValueType > * _paralution_init_base_host_matrix (const struct Paralution_Backend_Descriptor backend_descriptor, const unsigned int matrix_format)
 Build (and return) a matrix on the host. More...
 
void _paralution_sync (void)
 Sync the active async transfers. More...
 
void _set_omp_backend_threads (const struct Paralution_Backend_Descriptor backend_descriptor, const int size)
 Set the OMP threads based on the size threshold. More...
 
size_t _paralution_add_obj (class ParalutionObj *ptr)
 
bool _paralution_del_obj (class ParalutionObj *ptr, size_t id)
 
void _paralution_delete_all_obj (void)
 
bool _paralution_check_if_any_obj (void)
 
bool paralution_init_gpu ()
 Initialize a GPU (CUDA, CUBLAS, CUSPARSE) More...
 
void paralution_stop_gpu ()
 Release the GPU resources (CUDA, CUBLAS, CUSPARSE) More...
 
void paralution_info_gpu (const struct Paralution_Backend_Descriptor)
 Print information about the GPUs in the systems. More...
 
void paralution_gpu_sync (void)
 Sync the device (for async transfers) More...
 
template<typename ValueType >
AcceleratorVector< ValueType > * _paralution_init_base_gpu_vector (const struct Paralution_Backend_Descriptor backend_descriptor)
 Build (and return) a vector on GPU. More...
 
template<typename ValueType >
AcceleratorMatrix< ValueType > * _paralution_init_base_gpu_matrix (const struct Paralution_Backend_Descriptor backend_descriptor, const unsigned int matrix_format)
 Build (and return) a matrix on GPU. More...
 
template<typename ValueType , typename IndexType >
__global__ void kernel_coo_permute (const IndexType nnz, const IndexType *in_row, const IndexType *in_col, const IndexType *perm, IndexType *out_row, IndexType *out_col)
 
template<typename IndexType , typename ValueType >
__device__ ValueType segreduce_warp (const IndexType thread_lane, IndexType row, ValueType val, IndexType *rows, ValueType *vals)
 
template<typename IndexType , typename ValueType >
__device__ void segreduce_block (const IndexType *idx, ValueType *val)
 
template<typename IndexType , typename ValueType , unsigned int BLOCK_SIZE, unsigned int WARP_SIZE>
 __launch_bounds__ (BLOCK_SIZE, 1) __global__ void kernel_spmv_coo_flat(const IndexType num_nonzeros
 
 if (interval_end2 > num_nonzeros) interval_end2
 
 if (interval_begin >=interval_end) return
 
 if (thread_lane==31)
 
 for (IndexType n=interval_begin+thread_lane;n< interval_end;n+=WARP_SIZE)
 
template<typename IndexType , typename ValueType , unsigned int BLOCK_SIZE>
 __launch_bounds__ (BLOCK_SIZE, 1) __global__ void kernel_spmv_coo_reduce_update(const IndexType num_warps
 
 if (threadIdx.x==0)
 
 __syncthreads ()
 
 while (i< end)
 
 if (end< num_warps)
 
template<typename IndexType , typename ValueType >
__global__ void kernel_spmv_coo_serial (const IndexType num_entries, const IndexType *I, const IndexType *J, const ValueType *V, const ValueType scalar, const ValueType *x, ValueType *y)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_spmv_scalar (const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *val, const ValueType *in, ValueType *out)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_add_spmv_scalar (const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *val, const ValueType scalar, const ValueType *in, ValueType *out)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_scale_diagonal (const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType alpha, ValueType *val)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_scale_offdiagonal (const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType alpha, ValueType *val)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_add_diagonal (const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType alpha, ValueType *val)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_add_offdiagonal (const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType alpha, ValueType *val)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_extract_diag (const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *val, ValueType *vec)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_extract_inv_diag (const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *val, ValueType *vec)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_extract_submatrix_row_nnz (const IndexType *row_offset, const IndexType *col, const ValueType *val, const IndexType smrow_offset, const IndexType smcol_offset, const IndexType smrow_size, const IndexType smcol_size, IndexType *row_nnz)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_extract_submatrix_copy (const IndexType *row_offset, const IndexType *col, const ValueType *val, const IndexType smrow_offset, const IndexType smcol_offset, const IndexType smrow_size, const IndexType smcol_size, const IndexType *sm_row_offset, IndexType *sm_col, ValueType *sm_val)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_diagmatmult_r (const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *diag, ValueType *val)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_diagmatmult_l (const IndexType nrow, const IndexType *row_offset, const ValueType *diag, ValueType *val)
 
template<typename IndexType >
__global__ void kernel_calc_row_nnz (const IndexType nrow, const IndexType *row_offset, IndexType *row_nnz)
 
template<typename IndexType >
__global__ void kernel_permute_row_nnz (const IndexType nrow, const IndexType *row_nnz_src, const IndexType *perm_vec, IndexType *row_nnz_dst)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_permute_rows (const IndexType nrow, const IndexType *row_offset, const IndexType *perm_row_offset, const IndexType *col, const ValueType *data, const IndexType *perm_vec, const IndexType *row_nnz, IndexType *perm_col, ValueType *perm_data)
 
template<typename ValueType , typename IndexType , const IndexType size>
__global__ void kernel_permute_cols (const IndexType nrow, const IndexType *row_offset, const IndexType *perm_vec, const IndexType *row_nnz, const IndexType *perm_col, const ValueType *perm_data, IndexType *col, ValueType *data)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_add_csr_same_struct (const IndexType nrow, const IndexType *out_row_offset, const IndexType *out_col, const IndexType *in_row_offset, const IndexType *in_col, const ValueType *in_val, const ValueType alpha, const ValueType beta, ValueType *out_val)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_lower_nnz_per_row (const IndexType nrow, const IndexType *src_row_offset, const IndexType *src_col, IndexType *nnz_per_row)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_upper_nnz_per_row (const IndexType nrow, const IndexType *src_row_offset, const IndexType *src_col, IndexType *nnz_per_row)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_slower_nnz_per_row (const IndexType nrow, const IndexType *src_row_offset, const IndexType *src_col, IndexType *nnz_per_row)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_supper_nnz_per_row (const IndexType nrow, const IndexType *src_row_offset, const IndexType *src_col, IndexType *nnz_per_row)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_extract_l_triangular (const IndexType nrow, const IndexType *src_row_offset, const IndexType *src_col, const ValueType *src_val, IndexType *nnz_per_row, IndexType *dst_col, ValueType *dst_val)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_extract_u_triangular (const IndexType nrow, const IndexType *src_row_offset, const IndexType *src_col, const ValueType *src_val, IndexType *nnz_per_row, IndexType *dst_col, ValueType *dst_val)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_compress_count_nrow (const IndexType *row_offset, const IndexType *col, const ValueType *val, const IndexType nrow, const double drop_off, IndexType *row_offset_new)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_compress_copy (const IndexType *row_offset, const IndexType *col, const ValueType *val, const IndexType nrow, const double drop_off, const IndexType *row_offset_new, IndexType *col_new, ValueType *val_new)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_extract_column_vector (const IndexType *row_offset, const IndexType *col, const ValueType *val, const IndexType nrow, const IndexType idx, ValueType *vec)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_replace_column_vector_offset (const IndexType *row_offset, const IndexType *col, const IndexType nrow, const IndexType idx, const ValueType *vec, IndexType *offset)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_replace_column_vector (const IndexType *row_offset, const IndexType *col, const ValueType *val, const IndexType nrow, const IndexType idx, const ValueType *vec, const IndexType *offset, IndexType *new_col, ValueType *new_val)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_csr_extract_row_vector (const IndexType *row_offset, const IndexType *col, const ValueType *val, const IndexType row_nnz, const IndexType idx, ValueType *vec)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_dense_replace_column_vector (const ValueType *vec, const IndexType idx, const IndexType nrow, const IndexType ncol, ValueType *mat)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_dense_replace_row_vector (const ValueType *vec, const IndexType idx, const IndexType nrow, const IndexType ncol, ValueType *mat)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_dense_extract_column_vector (ValueType *vec, const IndexType idx, const IndexType nrow, const IndexType ncol, const ValueType *mat)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_dense_extract_row_vector (ValueType *vec, const IndexType idx, const IndexType nrow, const IndexType ncol, const ValueType *mat)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_dia_spmv (const IndexType num_rows, const IndexType num_cols, const IndexType num_diags, const IndexType *Aoffsets, const ValueType *Aval, const ValueType *x, ValueType *y)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_dia_add_spmv (const IndexType num_rows, const IndexType num_cols, const IndexType num_diags, const IndexType *Aoffsets, const ValueType *Aval, const ValueType scalar, const ValueType *x, ValueType *y)
 
template<typename IndexType >
__global__ void kernel_dia_diag_map (const IndexType nrow, const IndexType *row_offset, const IndexType *col, IndexType *diag_map)
 
template<typename IndexType >
__global__ void kernel_dia_fill_offset (const IndexType nrow, const IndexType ncol, IndexType *diag_map, const IndexType *offset_map, IndexType *offset)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_dia_convert (const IndexType nrow, const IndexType ndiag, const IndexType *row_offset, const IndexType *col, const ValueType *val, const IndexType *diag_map, ValueType *dia_val)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_ell_spmv (const IndexType num_rows, const IndexType num_cols, const IndexType num_cols_per_row, const IndexType *Acol, const ValueType *Aval, const ValueType *x, ValueType *y)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_ell_add_spmv (const IndexType num_rows, const IndexType num_cols, const IndexType num_cols_per_row, const IndexType *Acol, const ValueType *Aval, const ValueType scalar, const ValueType *x, ValueType *y)
 
template<typename ValueType , typename IndexType , unsigned int BLOCK_SIZE>
__global__ void kernel_ell_max_row (const IndexType nrow, const ValueType *data, ValueType *out, const IndexType GROUP_SIZE, const IndexType LOCAL_SIZE)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_ell_csr_to_ell (const IndexType nrow, const IndexType max_row, const IndexType *src_row_offset, const IndexType *src_col, const ValueType *src_val, IndexType *ell_col, ValueType *ell_val)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_set_to_zeros (const IndexType n, ValueType *data)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_set_to_ones (const IndexType n, ValueType *data)
 
template<typename IndexType >
__device__ IndexType red_recurse (IndexType *src, IndexType *srcStart, IndexType stride)
 
template<typename IndexType >
__global__ void kernel_red_recurse (IndexType *dst, IndexType *src, IndexType stride, IndexType numElems)
 
template<typename IndexType , unsigned int BLOCK_SIZE>
__global__ void kernel_red_partial_sum (IndexType *dst, const IndexType *src, const IndexType numElems)
 
template<typename IndexType >
__global__ void kernel_red_extrapolate (IndexType *dst, const IndexType *srcBorder, const IndexType *srcData, IndexType numElems)
 
template<typename IndexType >
__global__ void kernel_reverse_index (const IndexType n, const IndexType *perm, IndexType *out)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_buffer_addscalar (const IndexType n, const ValueType scalar, ValueType *buff)
 
template<typename IndexType >
__global__ void kernel_ell_nnz_coo (const IndexType nrow, const IndexType max_row, const IndexType *row_offset, IndexType *nnz_coo)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_ell_fill_ell (const IndexType nrow, const IndexType max_row, const IndexType *row_offset, const IndexType *col, const ValueType *val, IndexType *ELL_col, ValueType *ELL_val, IndexType *nnz_ell)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_ell_fill_coo (const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *val, const IndexType *nnz_coo, const IndexType *nnz_ell, IndexType *COO_row, IndexType *COO_col, ValueType *COO_val)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_mcsr_spmv_scalar (const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *val, const ValueType *in, ValueType *out)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_mcsr_add_spmv_scalar (const IndexType nrow, const IndexType *row_offset, const IndexType *col, const ValueType *val, const ValueType scalar, const ValueType *in, ValueType *out)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_scaleadd (const IndexType n, const ValueType alpha, const ValueType *x, ValueType *out)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_scaleaddscale (const IndexType n, const ValueType alpha, const ValueType beta, const ValueType *x, ValueType *out)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_scaleaddscale_offset (const IndexType n, const IndexType src_offset, const IndexType dst_offset, const ValueType alpha, const ValueType beta, const ValueType *x, ValueType *out)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_scaleadd2 (const IndexType n, const ValueType alpha, const ValueType beta, const ValueType gamma, const ValueType *x, const ValueType *y, ValueType *out)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_pointwisemult (const IndexType n, const ValueType *x, ValueType *out)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_pointwisemult2 (const IndexType n, const ValueType *x, const ValueType *y, ValueType *out)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_copy_offset_from (const IndexType n, const IndexType src_offset, const IndexType dst_offset, const ValueType *in, ValueType *out)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_permute (const IndexType n, const IndexType *permute, const ValueType *in, ValueType *out)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_permute_backward (const IndexType n, const IndexType *permute, const ValueType *in, ValueType *out)
 
template<typename ValueType , typename IndexType , unsigned int BLOCK_SIZE>
__global__ void kernel_reduce (const IndexType n, const ValueType *data, ValueType *out, const IndexType GROUP_SIZE, const IndexType LOCAL_SIZE)
 
template<typename ValueType , typename IndexType , unsigned int BLOCK_SIZE>
__global__ void kernel_max (const IndexType n, const ValueType *data, ValueType *out, const IndexType GROUP_SIZE, const IndexType LOCAL_SIZE)
 
template<typename ValueType , typename IndexType , unsigned int BLOCK_SIZE>
__global__ void kernel_amax (const IndexType n, const ValueType *data, ValueType *out, const IndexType GROUP_SIZE, const IndexType LOCAL_SIZE)
 
template<typename IndexType >
__global__ void kernel_powerd (const IndexType n, const double power, double *out)
 
template<typename IndexType >
__global__ void kernel_powerf (const IndexType n, const double power, float *out)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_copy_from_float (const IndexType n, const float *in, ValueType *out)
 
template<typename ValueType , typename IndexType >
__global__ void kernel_copy_from_double (const IndexType n, const double *in, ValueType *out)
 
cusparseStatus_t __cusparseXcsrgeam__ (cusparseHandle_t handle, int m, int n, const double *alpha, const cusparseMatDescr_t descrA, int nnzA, const double *csrValA, const int *csrRowPtrA, const int *csrColIndA, const double *beta, const cusparseMatDescr_t descrB, int nnzB, const double *csrValB, const int *csrRowPtrB, const int *csrColIndB, const cusparseMatDescr_t descrC, double *csrValC, int *csrRowPtrC, int *csrColIndC)
 
cusparseStatus_t __cusparseXcsrgeam__ (cusparseHandle_t handle, int m, int n, const float *alpha, const cusparseMatDescr_t descrA, int nnzA, const float *csrValA, const int *csrRowPtrA, const int *csrColIndA, const float *beta, const cusparseMatDescr_t descrB, int nnzB, const float *csrValB, const int *csrRowPtrB, const int *csrColIndB, const cusparseMatDescr_t descrC, float *csrValC, int *csrRowPtrC, int *csrColIndC)
 
cusparseStatus_t __cusparseXcsrgemm__ (cusparseHandle_t handle, cusparseOperation_t transA, cusparseOperation_t transB, int m, int n, int k, const cusparseMatDescr_t descrA, const int nnzA, const double *csrValA, const int *csrRowPtrA, const int *csrColIndA, const cusparseMatDescr_t descrB, const int nnzB, const double *csrValB, const int *csrRowPtrB, const int *csrColIndB, const cusparseMatDescr_t descrC, double *csrValC, const int *csrRowPtrC, int *csrColIndC)
 
cusparseStatus_t __cusparseXcsrgemm__ (cusparseHandle_t handle, cusparseOperation_t transA, cusparseOperation_t transB, int m, int n, int k, const cusparseMatDescr_t descrA, const int nnzA, const float *csrValA, const int *csrRowPtrA, const int *csrColIndA, const cusparseMatDescr_t descrB, const int nnzB, const float *csrValB, const int *csrRowPtrB, const int *csrColIndB, const cusparseMatDescr_t descrC, float *csrValC, const int *csrRowPtrC, int *csrColIndC)
 
template<typename DataType >
void allocate_gpu (const int size, DataType **ptr)
 
template<typename DataType >
void free_gpu (DataType **ptr)
 
template<typename DataType >
void set_to_zero_gpu (const int blocksize, const int max_threads, const int size, DataType *ptr)
 
template<typename DataType >
void set_to_one_gpu (const int blocksize, const int max_threads, const int size, DataType *ptr)
 
template<typename IndexType , unsigned int BLOCK_SIZE>
bool cum_sum (IndexType *dst, const IndexType *src, const IndexType numElems)
 
void paralution_set_omp_affinity (bool aff)
 
template<typename ValueType , typename IndexType >
void csr_to_dense (const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixCSR< ValueType, IndexType > &src, MatrixDENSE< ValueType > *dst)
 
template<typename ValueType , typename IndexType >
void dense_to_csr (const int omp_threads, const IndexType nrow, const IndexType ncol, const MatrixDENSE< ValueType > &src, MatrixCSR< ValueType, IndexType > *dst, IndexType *nnz)
 
template<typename ValueType , typename IndexType >
void csr_to_mcsr (const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixCSR< ValueType, IndexType > &src, MatrixMCSR< ValueType, IndexType > *dst)
 
template<typename ValueType , typename IndexType >
void mcsr_to_csr (const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixMCSR< ValueType, IndexType > &src, MatrixCSR< ValueType, IndexType > *dst)
 
template<typename ValueType , typename IndexType >
void csr_to_coo (const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixCSR< ValueType, IndexType > &src, MatrixCOO< ValueType, IndexType > *dst)
 
template<typename ValueType , typename IndexType >
void csr_to_ell (const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixCSR< ValueType, IndexType > &src, MatrixELL< ValueType, IndexType > *dst, IndexType *nnz_ell)
 
template<typename ValueType , typename IndexType >
void ell_to_csr (const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixELL< ValueType, IndexType > &src, MatrixCSR< ValueType, IndexType > *dst, IndexType *nnz_csr)
 
template<typename ValueType , typename IndexType >
void hyb_to_csr (const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const IndexType nnz_ell, const IndexType nnz_coo, const MatrixHYB< ValueType, IndexType > &src, MatrixCSR< ValueType, IndexType > *dst, IndexType *nnz_csr)
 
template<typename ValueType , typename IndexType >
void coo_to_csr (const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixCOO< ValueType, IndexType > &src, MatrixCSR< ValueType, IndexType > *dst)
 
template<typename ValueType , typename IndexType >
void csr_to_dia (const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixCSR< ValueType, IndexType > &src, MatrixDIA< ValueType, IndexType > *dst, IndexType *nnz_dia)
 
template<typename ValueType , typename IndexType >
void dia_to_csr (const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixDIA< ValueType, IndexType > &src, MatrixCSR< ValueType, IndexType > *dst, IndexType *nnz_csr)
 
template<typename ValueType , typename IndexType >
void csr_to_hyb (const int omp_threads, const IndexType nnz, const IndexType nrow, const IndexType ncol, const MatrixCSR< ValueType, IndexType > &src, MatrixHYB< ValueType, IndexType > *dst, IndexType *nnz_hyb, IndexType *nnz_ell, IndexType *nnz_coo)
 
template void csr_to_coo (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< double, int > &src, MatrixCOO< double, int > *dst)
 
template void csr_to_coo (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< float, int > &src, MatrixCOO< float, int > *dst)
 
template void csr_to_coo (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< std::complex< double >, int > &src, MatrixCOO< std::complex< double >, int > *dst)
 
template void csr_to_coo (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< std::complex< float >, int > &src, MatrixCOO< std::complex< float >, int > *dst)
 
template void csr_to_coo (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< int, int > &src, MatrixCOO< int, int > *dst)
 
template void csr_to_mcsr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< double, int > &src, MatrixMCSR< double, int > *dst)
 
template void csr_to_mcsr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< float, int > &src, MatrixMCSR< float, int > *dst)
 
template void csr_to_mcsr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< std::complex< double >, int > &src, MatrixMCSR< std::complex< double >, int > *dst)
 
template void csr_to_mcsr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< std::complex< float >, int > &src, MatrixMCSR< std::complex< float >, int > *dst)
 
template void csr_to_mcsr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< int, int > &src, MatrixMCSR< int, int > *dst)
 
template void mcsr_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixMCSR< double, int > &src, MatrixCSR< double, int > *dst)
 
template void mcsr_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixMCSR< float, int > &src, MatrixCSR< float, int > *dst)
 
template void mcsr_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixMCSR< std::complex< double >, int > &src, MatrixCSR< std::complex< double >, int > *dst)
 
template void mcsr_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixMCSR< std::complex< float >, int > &src, MatrixCSR< std::complex< float >, int > *dst)
 
template void mcsr_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixMCSR< int, int > &src, MatrixCSR< int, int > *dst)
 
template void csr_to_dia (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< double, int > &src, MatrixDIA< double, int > *dst, int *nnz_dia)
 
template void csr_to_dia (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< float, int > &src, MatrixDIA< float, int > *dst, int *nnz_dia)
 
template void csr_to_dia (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< std::complex< double >, int > &src, MatrixDIA< std::complex< double >, int > *dst, int *nnz_dia)
 
template void csr_to_dia (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< std::complex< float >, int > &src, MatrixDIA< std::complex< float >, int > *dst, int *nnz_dia)
 
template void csr_to_dia (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< int, int > &src, MatrixDIA< int, int > *dst, int *nnz_dia)
 
template void csr_to_hyb (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< double, int > &src, MatrixHYB< double, int > *dst, int *nnz_hyb, int *nnz_ell, int *nnz_coo)
 
template void csr_to_hyb (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< float, int > &src, MatrixHYB< float, int > *dst, int *nnz_hyb, int *nnz_ell, int *nnz_coo)
 
template void csr_to_hyb (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< std::complex< double >, int > &src, MatrixHYB< std::complex< double >, int > *dst, int *nnz_hyb, int *nnz_ell, int *nnz_coo)
 
template void csr_to_hyb (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< std::complex< float >, int > &src, MatrixHYB< std::complex< float >, int > *dst, int *nnz_hyb, int *nnz_ell, int *nnz_coo)
 
template void csr_to_hyb (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< int, int > &src, MatrixHYB< int, int > *dst, int *nnz_hyb, int *nnz_ell, int *nnz_coo)
 
template void csr_to_ell (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< double, int > &src, MatrixELL< double, int > *dst, int *nnz_ell)
 
template void csr_to_ell (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< float, int > &src, MatrixELL< float, int > *dst, int *nnz_ell)
 
template void csr_to_ell (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< std::complex< double >, int > &src, MatrixELL< std::complex< double >, int > *dst, int *nnz_ell)
 
template void csr_to_ell (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< std::complex< float >, int > &src, MatrixELL< std::complex< float >, int > *dst, int *nnz_ell)
 
template void csr_to_ell (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< int, int > &src, MatrixELL< int, int > *dst, int *nnz_ell)
 
template void csr_to_dense (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< double, int > &src, MatrixDENSE< double > *dst)
 
template void csr_to_dense (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< float, int > &src, MatrixDENSE< float > *dst)
 
template void csr_to_dense (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< std::complex< double >, int > &src, MatrixDENSE< std::complex< double > > *dst)
 
template void csr_to_dense (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< std::complex< float >, int > &src, MatrixDENSE< std::complex< float > > *dst)
 
template void csr_to_dense (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCSR< int, int > &src, MatrixDENSE< int > *dst)
 
template void dense_to_csr (const int omp_threads, const int nrow, const int ncol, const MatrixDENSE< double > &src, MatrixCSR< double, int > *dst, int *nnz)
 
template void dense_to_csr (const int omp_threads, const int nrow, const int ncol, const MatrixDENSE< float > &src, MatrixCSR< float, int > *dst, int *nnz)
 
template void dense_to_csr (const int omp_threads, const int nrow, const int ncol, const MatrixDENSE< std::complex< double > > &src, MatrixCSR< std::complex< double >, int > *dst, int *nnz)
 
template void dense_to_csr (const int omp_threads, const int nrow, const int ncol, const MatrixDENSE< std::complex< float > > &src, MatrixCSR< std::complex< float >, int > *dst, int *nnz)
 
template void dense_to_csr (const int omp_threads, const int nrow, const int ncol, const MatrixDENSE< int > &src, MatrixCSR< int, int > *dst, int *nnz)
 
template void dia_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixDIA< double, int > &src, MatrixCSR< double, int > *dst, int *nnz_csr)
 
template void dia_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixDIA< float, int > &src, MatrixCSR< float, int > *dst, int *nnz_csr)
 
template void dia_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixDIA< std::complex< double >, int > &src, MatrixCSR< std::complex< double >, int > *dst, int *nnz_csr)
 
template void dia_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixDIA< std::complex< float >, int > &src, MatrixCSR< std::complex< float >, int > *dst, int *nnz_csr)
 
template void dia_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixDIA< int, int > &src, MatrixCSR< int, int > *dst, int *nnz_csr)
 
template void ell_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixELL< double, int > &src, MatrixCSR< double, int > *dst, int *nnz_csr)
 
template void ell_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixELL< float, int > &src, MatrixCSR< float, int > *dst, int *nnz_csr)
 
template void ell_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixELL< std::complex< double >, int > &src, MatrixCSR< std::complex< double >, int > *dst, int *nnz_csr)
 
template void ell_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixELL< std::complex< float >, int > &src, MatrixCSR< std::complex< float >, int > *dst, int *nnz_csr)
 
template void ell_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixELL< int, int > &src, MatrixCSR< int, int > *dst, int *nnz_csr)
 
template void coo_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCOO< double, int > &src, MatrixCSR< double, int > *dst)
 
template void coo_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCOO< float, int > &src, MatrixCSR< float, int > *dst)
 
template void coo_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCOO< std::complex< double >, int > &src, MatrixCSR< std::complex< double >, int > *dst)
 
template void coo_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCOO< std::complex< float >, int > &src, MatrixCSR< std::complex< float >, int > *dst)
 
template void coo_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const MatrixCOO< int, int > &src, MatrixCSR< int, int > *dst)
 
template void hyb_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const int nnz_ell, const int nnz_coo, const MatrixHYB< double, int > &src, MatrixCSR< double, int > *dst, int *nnz_csr)
 
template void hyb_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const int nnz_ell, const int nnz_coo, const MatrixHYB< float, int > &src, MatrixCSR< float, int > *dst, int *nnz_csr)
 
template void hyb_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const int nnz_ell, const int nnz_coo, const MatrixHYB< std::complex< double >, int > &src, MatrixCSR< std::complex< double >, int > *dst, int *nnz_csr)
 
template void hyb_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const int nnz_ell, const int nnz_coo, const MatrixHYB< std::complex< float >, int > &src, MatrixCSR< std::complex< float >, int > *dst, int *nnz_csr)
 
template void hyb_to_csr (const int omp_threads, const int nnz, const int nrow, const int ncol, const int nnz_ell, const int nnz_coo, const MatrixHYB< int, int > &src, MatrixCSR< int, int > *dst, int *nnz_csr)
 
void tokenize (std::vector< std::string > &tokens, const std::string &str, const std::string &delimiters="\n\r\t ")
 
template<typename ValueType >
void assign_complex (ValueType &val, double real, double imag)
 
template<typename ValueType >
void assign_complex (std::complex< ValueType > &val, double real, double imag)
 
template<typename ValueType >
void write_value (std::ofstream &output, const ValueType &val)
 
template<typename ValueType >
void write_value (std::ofstream &output, const std::complex< ValueType > &val)
 
bool read_matrix_market_banner (matrix_market_banner &banner, std::ifstream &input)
 
template<typename ValueType >
bool read_coordinate_stream (int &nrow, int &ncol, int &nnz, int **row, int **col, ValueType **val, std::ifstream &input, matrix_market_banner &banner)
 
template<typename ValueType >
bool read_matrix_mtx (int &nrow, int &ncol, int &nnz, int **row, int **col, ValueType **val, const std::string filename)
 
template<typename ValueType >
bool write_matrix_mtx (const int nrow, const int ncol, const int nnz, const int *row, const int *col, const ValueType *val, const std::string filename)
 
template bool read_matrix_mtx (int &nrow, int &ncol, int &nnz, int **row, int **col, float **val, const std::string filename)
 
template bool read_matrix_mtx (int &nrow, int &ncol, int &nnz, int **row, int **col, double **val, const std::string filename)
 
template bool read_matrix_mtx (int &nrow, int &ncol, int &nnz, int **row, int **col, std::complex< float > **val, const std::string filename)
 
template bool read_matrix_mtx (int &nrow, int &ncol, int &nnz, int **row, int **col, std::complex< double > **val, const std::string filename)
 
template bool write_matrix_mtx (const int nrow, const int ncol, const int nnz, const int *row, const int *col, const float *val, const std::string filename)
 
template bool write_matrix_mtx (const int nrow, const int ncol, const int nnz, const int *row, const int *col, const double *val, const std::string filename)
 
template bool write_matrix_mtx (const int nrow, const int ncol, const int nnz, const int *row, const int *col, const std::complex< float > *val, const std::string filename)
 
template bool write_matrix_mtx (const int nrow, const int ncol, const int nnz, const int *row, const int *col, const std::complex< double > *val, const std::string filename)
 
bool paralution_init_mic ()
 Initialize a MIC. More...
 
void paralution_stop_mic ()
 Release the MIC accelerator. More...
 
void paralution_info_mic (const struct Paralution_Backend_Descriptor)
 Print information about the MICs in the systems. More...
 
template<typename ValueType >
AcceleratorMatrix< ValueType > * _paralution_init_base_mic_matrix (const struct Paralution_Backend_Descriptor backend_descriptor, const unsigned int matrix_format)
 Build (and return) a matrix on MIC. More...
 
template<typename ValueType >
AcceleratorVector< ValueType > * _paralution_init_base_mic_vector (const struct Paralution_Backend_Descriptor backend_descriptor)
 Build (and return) a vector on MIC. More...
 
template<typename DataType >
void allocate_mic (const int mic_dev, const int size, DataType **ptr)
 
template<typename DataType >
void free_mic (const int mic_dev, DataType **ptr)
 
template<typename DataType >
void set_to_zero_mic (const int mic_dev, const int size, DataType *ptr)
 
template<typename DataType >
void set_to_one_mic (const int mic_dev, const int size, DataType *ptr)
 
template void allocate_mic< float > (const int mic_dev, const int size, float **ptr)
 
template void allocate_mic< double > (const int mic_dev, const int size, double **ptr)
 
template void allocate_mic< int > (const int mic_dev, const int size, int **ptr)
 
template void allocate_mic< unsigned int > (const int mic_dev, const int size, unsigned int **ptr)
 
template void allocate_mic< char > (const int mic_dev, const int size, char **ptr)
 
template void free_mic< float > (const int mic_dev, float **ptr)
 
template void free_mic< double > (const int mic_dev, double **ptr)
 
template void free_mic< int > (const int mic_dev, int **ptr)
 
template void free_mic< unsigned int > (const int mic_dev, unsigned int **ptr)
 
template void free_mic< char > (const int mic_dev, char **ptr)
 
template void set_to_zero_mic< float > (const int mic_dev, const int size, float *ptr)
 
template void set_to_zero_mic< double > (const int mic_dev, const int size, double *ptr)
 
template void set_to_zero_mic< int > (const int mic_dev, const int size, int *ptr)
 
template void set_to_zero_mic< unsigned int > (const int mic_dev, const int size, unsigned int *ptr)
 
template void set_to_zero_mic< char > (const int mic_dev, const int size, char *ptr)
 
template void set_to_one_mic< float > (const int mic_dev, const int size, float *ptr)
 
template void set_to_one_mic< double > (const int mic_dev, const int size, double *ptr)
 
template void set_to_one_mic< int > (const int mic_dev, const int size, int *ptr)
 
template void set_to_one_mic< unsigned int > (const int mic_dev, const int size, unsigned int *ptr)
 
template void set_to_one_mic< char > (const int mic_dev, const int size, char *ptr)
 
template<typename ValueType >
void spmv_coo (const int mic_dev, const int *row, const int *col, const ValueType *val, const int nrow, const int nnz, const ValueType *in, ValueType *out)
 
template<typename ValueType >
void spmv_add_coo (const int mic_dev, const int *row, const int *col, const ValueType *val, const int nrow, const int nnz, const ValueType scalar, const ValueType *in, ValueType *out)
 
template void spmv_coo< double > (const int mic_dev, const int *row, const int *col, const double *val, const int nrow, const int nnz, const double *in, double *out)
 
template void spmv_coo< float > (const int mic_dev, const int *row, const int *col, const float *val, const int nrow, const int nnz, const float *in, float *out)
 
template void spmv_coo< int > (const int mic_dev, const int *row, const int *col, const int *val, const int nrow, const int nnz, const int *in, int *out)
 
template void spmv_add_coo< double > (const int mic_dev, const int *row, const int *col, const double *val, const int nrow, const int nnz, const double scalar, const double *in, double *out)
 
template void spmv_add_coo< float > (const int mic_dev, const int *row, const int *col, const float *val, const int nrow, const int nnz, const float scalar, const float *in, float *out)
 
template void spmv_add_coo< int > (const int mic_dev, const int *row, const int *col, const int *val, const int nrow, const int nnz, const int scalar, const int *in, int *out)
 
template<typename ValueType >
void spmv_csr (const int mic_dev, const int *row, const int *col, const ValueType *val, const int nrow, const ValueType *in, ValueType *out)
 
template<typename ValueType >
void spmv_add_csr (const int mic_dev, const int *row, const int *col, const ValueType *val, const int nrow, const ValueType scalar, const ValueType *in, ValueType *out)
 
template void spmv_csr< double > (const int mic_dev, const int *row, const int *col, const double *val, const int nrow, const double *in, double *out)
 
template void spmv_csr< float > (const int mic_dev, const int *row, const int *col, const float *val, const int nrow, const float *in, float *out)
 
template void spmv_csr< int > (const int mic_dev, const int *row, const int *col, const int *val, const int nrow, const int *in, int *out)
 
template void spmv_add_csr< double > (const int mic_dev, const int *row, const int *col, const double *val, const int nrow, const double scalar, const double *in, double *out)
 
template void spmv_add_csr< float > (const int mic_dev, const int *row, const int *col, const float *val, const int nrow, const float scalar, const float *in, float *out)
 
template void spmv_add_csr< int > (const int mic_dev, const int *row, const int *col, const int *val, const int nrow, const int scalar, const int *in, int *out)
 
template<typename ValueType >
void spmv_dia (const int mic_dev, const int *offset, const ValueType *val, const int nrow, const int ndiag, const ValueType *in, ValueType *out)
 
template<typename ValueType >
void spmv_add_dia (const int mic_dev, const int *offset, const ValueType *val, const int nrow, const int ndiag, const ValueType scalar, const ValueType *in, ValueType *out)
 
template void spmv_dia< double > (const int mic_dev, const int *offset, const double *val, const int nrow, const int ndiag, const double *in, double *out)
 
template void spmv_dia< float > (const int mic_dev, const int *offset, const float *val, const int nrow, const int ndiag, const float *in, float *out)
 
template void spmv_dia< int > (const int mic_dev, const int *offset, const int *val, const int nrow, const int ndiag, const int *in, int *out)
 
template void spmv_add_dia< double > (const int mic_dev, const int *offset, const double *val, const int nrow, const int ndiag, const double scalar, const double *in, double *out)
 
template void spmv_add_dia< float > (const int mic_dev, const int *offset, const float *val, const int nrow, const int ndiag, const float scalar, const float *in, float *out)
 
template void spmv_add_dia< int > (const int mic_dev, const int *offset, const int *val, const int nrow, const int ndiag, const int scalar, const int *in, int *out)
 
template<typename ValueType >
void spmv_ell (const int mic_dev, const int *col, const ValueType *val, const int nrow, const int ncol, const int max_row, const ValueType *in, ValueType *out)
 
template<typename ValueType >
void spmv_add_ell (const int mic_dev, const int *col, const ValueType *val, const int nrow, const int ncol, const int max_row, const ValueType scalar, const ValueType *in, ValueType *out)
 
template void spmv_ell< double > (const int mic_dev, const int *col, const double *val, const int nrow, const int ncol, const int max_row, const double *in, double *out)
 
template void spmv_ell< float > (const int mic_dev, const int *col, const float *val, const int nrow, const int ncol, const int max_row, const float *in, float *out)
 
template void spmv_ell< int > (const int mic_dev, const int *col, const int *val, const int nrow, const int ncol, const int max_row, const int *in, int *out)
 
template void spmv_add_ell< double > (const int mic_dev, const int *col, const double *val, const int nrow, const int ncol, const int max_row, const double scalar, const double *in, double *out)
 
template void spmv_add_ell< float > (const int mic_dev, const int *col, const float *val, const int nrow, const int ncol, const int max_row, const float scalar, const float *in, float *out)
 
template void spmv_add_ell< int > (const int mic_dev, const int *col, const int *val, const int nrow, const int ncol, const int max_row, const int scalar, const int *in, int *out)
 
template<typename ValueType >
void spmv_mcsr (const int mic_dev, const int *row, const int *col, const ValueType *val, const int nrow, const ValueType *in, ValueType *out)
 
template<typename ValueType >
void spmv_add_mcsr (const int mic_dev, const int *row, const int *col, const ValueType *val, const int nrow, const ValueType scalar, const ValueType *in, ValueType *out)
 
template void spmv_mcsr< double > (const int mic_dev, const int *row, const int *col, const double *val, const int nrow, const double *in, double *out)
 
template void spmv_mcsr< float > (const int mic_dev, const int *row, const int *col, const float *val, const int nrow, const float *in, float *out)
 
template void spmv_mcsr< int > (const int mic_dev, const int *row, const int *col, const int *val, const int nrow, const int *in, int *out)
 
template void spmv_add_mcsr< double > (const int mic_dev, const int *row, const int *col, const double *val, const int nrow, const double scalar, const double *in, double *out)
 
template void spmv_add_mcsr< float > (const int mic_dev, const int *row, const int *col, const float *val, const int nrow, const float scalar, const float *in, float *out)
 
template void spmv_add_mcsr< int > (const int mic_dev, const int *row, const int *col, const int *val, const int nrow, const int scalar, const int *in, int *out)
 
template<typename ValueType >
void copy_to_mic (const int mic_dev, const ValueType *src, ValueType *dst, const int size)
 
template<typename ValueType >
void copy_to_host (const int mic_dev, const ValueType *src, ValueType *dst, const int size)
 
template<typename ValueType >
void copy_mic_mic (const int mic_dev, const ValueType *src, ValueType *dst, const int size)
 
template void copy_to_mic< float > (const int mic_dev, const float *src, float *dst, const int size)
 
template void copy_to_mic< double > (const int mic_dev, const double *src, double *dst, const int size)
 
template void copy_to_mic< int > (const int mic_dev, const int *src, int *dst, const int size)
 
template void copy_to_mic< unsigned int > (const int mic_dev, const unsigned int *src, unsigned int *dst, const int size)
 
template void copy_to_host< double > (const int mic_dev, const double *src, double *dst, const int size)
 
template void copy_to_host< float > (const int mic_dev, const float *src, float *dst, const int size)
 
template void copy_to_host< int > (const int mic_dev, const int *src, int *dst, const int size)
 
template void copy_to_host< unsigned int > (const int mic_dev, const unsigned int *src, unsigned int *dst, const int size)
 
template void copy_mic_mic< float > (const int mic_dev, const float *src, float *dst, const int size)
 
template void copy_mic_mic< double > (const int mic_dev, const double *src, double *dst, const int size)
 
template void copy_mic_mic< int > (const int mic_dev, const int *src, int *dst, const int size)
 
template void copy_mic_mic< unsigned int > (const int mic_dev, const unsigned int *src, unsigned int *dst, const int size)
 
template<typename ValueType >
void dot (const int mic_dev, const ValueType *vec1, const ValueType *vec2, const int size, ValueType &d)
 
template<typename ValueType >
void asum (const int mic_dev, const ValueType *vec, const int size, ValueType &d)
 
template<typename ValueType >
void amax (const int mic_dev, const ValueType *vec, const int size, ValueType &d, int &index)
 
template<typename ValueType >
void norm (const int mic_dev, const ValueType *vec, const int size, ValueType &d)
 
template<typename ValueType >
void reduce (const int mic_dev, const ValueType *vec, const int size, ValueType &d)
 
template<typename ValueType >
void scaleadd (const int mic_dev, const ValueType *vec1, const ValueType alpha, const int size, ValueType *vec2)
 
template<typename ValueType >
void addscale (const int mic_dev, const ValueType *vec1, const ValueType alpha, const int size, ValueType *vec2)
 
template<typename ValueType >
void scaleaddscale (const int mic_dev, const ValueType *vec1, const ValueType alpha, const ValueType beta, const int size, ValueType *vec2)
 
template<typename ValueType >
void scaleaddscale (const int mic_dev, const ValueType *vec1, const ValueType alpha, const ValueType beta, ValueType *vec2, const int src_offset, const int dst_offset, const int size)
 
template<typename ValueType >
void scaleadd2 (const int mic_dev, const ValueType *vec1, const ValueType *vec2, const ValueType alpha, const ValueType beta, const ValueType gamma, const int size, ValueType *vec3)
 
template<typename ValueType >
void scale (const int mic_dev, const ValueType alpha, const int size, ValueType *vec)
 
template<typename ValueType >
void pointwisemult (const int mic_dev, const ValueType *vec1, const int size, ValueType *vec2)
 
template<typename ValueType >
void pointwisemult2 (const int mic_dev, const ValueType *vec1, const ValueType *vec2, const int size, ValueType *vec3)
 
template<typename ValueType >
void permute (const int mic_dev, const int *perm, const ValueType *in, const int size, ValueType *out)
 
template<typename ValueType >
void permuteback (const int mic_dev, const int *perm, const ValueType *in, const int size, ValueType *out)
 
template<typename ValueType >
void power (const int mic_dev, const int size, const double val, ValueType *vec)
 
template void dot< double > (const int mic_dev, const double *vec1, const double *vec2, const int size, double &d)
 
template void asum< double > (const int mic_dev, const double *vec, const int size, double &d)
 
template void amax< double > (const int mic_dev, const double *vec, const int size, double &d, int &index)
 
template void norm< double > (const int mic_dev, const double *vec, const int size, double &d)
 
template void reduce< double > (const int mic_dev, const double *vec, const int size, double &d)
 
template void scaleadd< double > (const int mic_dev, const double *vec1, const double alpha, const int size, double *vec2)
 
template void addscale< double > (const int mic_dev, const double *vec1, const double alpha, const int size, double *vec2)
 
template void scaleaddscale< double > (const int mic_dev, const double *vec1, const double alpha, const double beta, const int size, double *vec2)
 
template void scaleaddscale< double > (const int mic_dev, const double *vec1, const double alpha, const double beta, double *vec2, const int src_offset, const int dst_offset, const int size)
 
template void scaleadd2< double > (const int mic_dev, const double *vec1, const double *vec2, const double alpha, const double beta, const double gamma, const int size, double *vec3)
 
template void scale< double > (const int mic_dev, const double alpha, const int size, double *vec)
 
template void pointwisemult< double > (const int mic_dev, const double *vec1, const int size, double *vec2)
 
template void pointwisemult2< double > (const int mic_dev, const double *vec1, const double *vec2, const int size, double *vec3)
 
template void permute< double > (const int mic_dev, const int *perm, const double *in, const int size, double *out)
 
template void permuteback< double > (const int mic_dev, const int *perm, const double *in, const int size, double *out)
 
template void power< double > (const int mic_dev, const int size, const double val, double *vec)
 
template void dot< float > (const int mic_dev, const float *vec1, const float *vec2, const int size, float &d)
 
template void asum< float > (const int mic_dev, const float *vec, const int size, float &d)
 
template void amax< float > (const int mic_dev, const float *vec, const int size, float &d, int &index)
 
template void norm< float > (const int mic_dev, const float *vec, const int size, float &d)
 
template void reduce< float > (const int mic_dev, const float *vec, const int size, float &d)
 
template void scaleadd< float > (const int mic_dev, const float *vec1, const float alpha, const int size, float *vec2)
 
template void addscale< float > (const int mic_dev, const float *vec1, const float alpha, const int size, float *vec2)
 
template void scaleaddscale< float > (const int mic_dev, const float *vec1, const float alpha, const float beta, const int size, float *vec2)
 
template void scaleaddscale< float > (const int mic_dev, const float *vec1, const float alpha, const float beta, float *vec2, const int src_offset, const int dst_offset, const int size)
 
template void scaleadd2< float > (const int mic_dev, const float *vec1, const float *vec2, const float alpha, const float beta, const float gamma, const int size, float *vec3)
 
template void scale< float > (const int mic_dev, const float alpha, const int size, float *vec)
 
template void pointwisemult< float > (const int mic_dev, const float *vec1, const int size, float *vec2)
 
template void pointwisemult2< float > (const int mic_dev, const float *vec1, const float *vec2, const int size, float *vec3)
 
template void permute< float > (const int mic_dev, const int *perm, const float *in, const int size, float *out)
 
template void permuteback< float > (const int mic_dev, const int *perm, const float *in, const int size, float *out)
 
template void power< float > (const int mic_dev, const int size, const double val, float *vec)
 
template void dot< int > (const int mic_dev, const int *vec1, const int *vec2, const int size, int &d)
 
template void asum< int > (const int mic_dev, const int *vec, const int size, int &d)
 
template void amax< int > (const int mic_dev, const int *vec, const int size, int &d, int &index)
 
template void norm< int > (const int mic_dev, const int *vec, const int size, int &d)
 
template void reduce< int > (const int mic_dev, const int *vec, const int size, int &d)
 
template void scaleadd< int > (const int mic_dev, const int *vec1, const int alpha, const int size, int *vec2)
 
template void addscale< int > (const int mic_dev, const int *vec1, const int alpha, const int size, int *vec2)
 
template void scaleaddscale< int > (const int mic_dev, const int *vec1, const int alpha, const int beta, const int size, int *vec2)
 
template void scaleaddscale< int > (const int mic_dev, const int *vec1, const int alpha, const int beta, int *vec2, const int src_offset, const int dst_offset, const int size)
 
template void scaleadd2< int > (const int mic_dev, const int *vec1, const int *vec2, const int alpha, const int beta, const int gamma, const int size, int *vec3)
 
template void scale< int > (const int mic_dev, const int alpha, const int size, int *vec)
 
template void pointwisemult< int > (const int mic_dev, const int *vec1, const int size, int *vec2)
 
template void pointwisemult2< int > (const int mic_dev, const int *vec1, const int *vec2, const int size, int *vec3)
 
template void permute< int > (const int mic_dev, const int *perm, const int *in, const int size, int *out)
 
template void permuteback< int > (const int mic_dev, const int *perm, const int *in, const int size, int *out)
 
template void power< int > (const int mic_dev, const int size, const double val, int *vec)
 
bool paralution_get_platform_ocl (cl_platform_id **ocl_platforms, cl_uint *ocl_numPlatforms)
 
bool paralution_get_device_ocl (const cl_platform_id &ocl_platform, cl_device_id **ocl_devices, cl_uint *ocl_numDevices)
 
bool paralution_set_kernels_ocl (cl_kernel *ocl_kernels)
 
template<>
cl_kernel paralution_get_kernel_ocl< double > (int kernel)
 
template<>
cl_kernel paralution_get_kernel_ocl< float > (int kernel)
 
template<>
cl_kernel paralution_get_kernel_ocl< int > (int kernel)
 
bool paralution_init_ocl ()
 Initialize OpenCL. More...
 
void paralution_stop_ocl ()
 Release the OpenCL resources. More...
 
void paralution_info_ocl (const struct Paralution_Backend_Descriptor)
 Print information about the GPUs in the systems. More...
 
template<typename ValueType >
AcceleratorMatrix< ValueType > * _paralution_init_base_ocl_matrix (const struct Paralution_Backend_Descriptor backend_descriptor, const unsigned int matrix_format)
 Build (and return) an OpenCL matrix. More...
 
template<typename ValueType >
AcceleratorVector< ValueType > * _paralution_init_base_ocl_vector (const struct Paralution_Backend_Descriptor backend_descriptor)
 Build (and return) an OpenCL vector. More...
 
template<typename ValueType >
cl_kernel paralution_get_kernel_ocl (int)
 Get OpenCL kernel. More...
 
template<typename DataType >
void allocate_ocl (const int, cl_context, cl_mem **)
 Allocate device memory. More...
 
void free_ocl (cl_mem **)
 Free device memory. More...
 
template<typename DataType >
void ocl_set_to (cl_kernel, cl_command_queue, const size_t, const size_t, const int, const DataType, cl_mem *)
 Set device object to specific values. More...
 
template<typename DataType >
void ocl_host2dev (const int, const DataType *, cl_mem *, cl_command_queue)
 Copy object from host to device memory. More...
 
template<typename DataType >
void ocl_dev2host (const int, cl_mem *, DataType *, cl_command_queue)
 Copy object from device to host memory. More...
 
template<typename DataType >
void ocl_dev2dev (const int, cl_mem *, cl_mem *, cl_command_queue)
 Copy object from device to device (intra) memory. More...
 
template void allocate_ocl< double > (const int size, cl_context ocl_context, cl_mem **ptr)
 
template void allocate_ocl< float > (const int size, cl_context ocl_context, cl_mem **ptr)
 
template void allocate_ocl< int > (const int size, cl_context ocl_context, cl_mem **ptr)
 
template void allocate_ocl< unsigned int > (const int size, cl_context ocl_context, cl_mem **ptr)
 
template void allocate_ocl< char > (const int size, cl_context ocl_context, cl_mem **ptr)
 
template void ocl_set_to< double > (cl_kernel ocl_kernel, cl_command_queue ocl_cmdQueue, const size_t localWorkSize, const size_t globalWorkSize, const int size, const double val, cl_mem *ptr)
 
template void ocl_set_to< float > (cl_kernel ocl_kernel, cl_command_queue ocl_cmdQueue, const size_t localWorkSize, const size_t globalWorkSize, const int size, const float val, cl_mem *ptr)
 
template void ocl_set_to< int > (cl_kernel ocl_kernel, cl_command_queue ocl_cmdQueue, const size_t localWorkSize, const size_t globalWorkSize, const int size, const int val, cl_mem *ptr)
 
template void ocl_set_to< unsigned int > (cl_kernel ocl_kernel, cl_command_queue ocl_cmdQueue, const size_t localWorkSize, const size_t globalWorkSize, const int size, const unsigned int val, cl_mem *ptr)
 
template void ocl_set_to< char > (cl_kernel ocl_kernel, cl_command_queue ocl_cmdQueue, const size_t localWorkSize, const size_t globalWorkSize, const int size, const char val, cl_mem *ptr)
 
template void ocl_host2dev< double > (const int size, const double *src, cl_mem *dst, cl_command_queue ocl_cmdQueue)
 
template void ocl_host2dev< float > (const int size, const float *src, cl_mem *dst, cl_command_queue ocl_cmdQueue)
 
template void ocl_host2dev< int > (const int size, const int *src, cl_mem *dst, cl_command_queue ocl_cmdQueue)
 
template void ocl_host2dev< unsigned int > (const int size, const unsigned int *src, cl_mem *dst, cl_command_queue ocl_cmdQueue)
 
template void ocl_host2dev< char > (const int size, const char *src, cl_mem *dst, cl_command_queue ocl_cmdQueue)
 
template void ocl_dev2host< double > (const int size, cl_mem *src, double *dst, cl_command_queue ocl_cmdQueue)
 
template void ocl_dev2host< float > (const int size, cl_mem *src, float *dst, cl_command_queue ocl_cmdQueue)
 
template void ocl_dev2host< int > (const int size, cl_mem *src, int *dst, cl_command_queue ocl_cmdQueue)
 
template void ocl_dev2host< unsigned int > (const int size, cl_mem *src, unsigned int *dst, cl_command_queue ocl_cmdQueue)
 
template void ocl_dev2host< char > (const int size, cl_mem *src, char *dst, cl_command_queue ocl_cmdQueue)
 
template void ocl_dev2dev< double > (const int size, cl_mem *src, cl_mem *dst, cl_command_queue ocl_cmdQueue)
 
template void ocl_dev2dev< float > (const int size, cl_mem *src, cl_mem *dst, cl_command_queue ocl_cmdQueue)
 
template void ocl_dev2dev< int > (const int size, cl_mem *src, cl_mem *dst, cl_command_queue ocl_cmdQueue)
 
template void ocl_dev2dev< unsigned int > (const int size, cl_mem *src, cl_mem *dst, cl_command_queue ocl_cmdQueue)
 
template void ocl_dev2dev< char > (const int size, cl_mem *src, cl_mem *dst, cl_command_queue ocl_cmdQueue)
 
template<typename DataType >
void allocate_host (const int size, DataType **ptr)
 Allocate buffer on the host. More...
 
template<typename DataType >
void free_host (DataType **ptr)
 Free buffer on the host. More...
 
template<typename DataType >
void set_to_zero_host (const int size, DataType *ptr)
 set a buffer to zero on the host More...
 
template void allocate_host< float > (const int size, float **ptr)
 
template void allocate_host< double > (const int size, double **ptr)
 
template void allocate_host< std::complex< float > > (const int size, std::complex< float > **ptr)
 
template void allocate_host< std::complex< double > > (const int size, std::complex< double > **ptr)
 
template void allocate_host< int > (const int size, int **ptr)
 
template void allocate_host< unsigned int > (const int size, unsigned int **ptr)
 
template void allocate_host< char > (const int size, char **ptr)
 
template void free_host< float > (float **ptr)
 
template void free_host< double > (double **ptr)
 
template void free_host< std::complex< float > > (std::complex< float > **ptr)
 
template void free_host< std::complex< double > > (std::complex< double > **ptr)
 
template void free_host< int > (int **ptr)
 
template void free_host< unsigned int > (unsigned int **ptr)
 
template void free_host< char > (char **ptr)
 
template void set_to_zero_host< float > (const int size, float *ptr)
 
template void set_to_zero_host< double > (const int size, double *ptr)
 
template void set_to_zero_host< std::complex< float > > (const int size, std::complex< float > *ptr)
 
template void set_to_zero_host< std::complex< double > > (const int size, std::complex< double > *ptr)
 
template void set_to_zero_host< int > (const int size, int *ptr)
 
template void set_to_zero_host< unsigned int > (const int size, unsigned int *ptr)
 
template void set_to_zero_host< char > (const int size, char *ptr)
 
void _paralution_open_log_file (void)
 
void _paralution_close_log_file (void)
 
float paralution_abs (const float val)
 Return absolute float value. More...
 
double paralution_abs (const double val)
 Return absolute double value. More...
 
float paralution_abs (const std::complex< float > val)
 Return absolute float value. More...
 
double paralution_abs (const std::complex< double > val)
 Return absolute double value. More...
 
int paralution_abs (const int val)
 Return absolute int value. More...
 
template<typename ValueType >
bool operator< (const std::complex< ValueType > &lhs, const std::complex< ValueType > &rhs)
 Overloaded < operator for complex numbers. More...
 
template<typename ValueType >
bool operator> (const std::complex< ValueType > &lhs, const std::complex< ValueType > &rhs)
 Overloaded > operator for complex numbers. More...
 
template<typename ValueType >
bool operator<= (const std::complex< ValueType > &lhs, const std::complex< ValueType > &rhs)
 Overloaded <= operator for complex numbers. More...
 
template<typename ValueType >
bool operator>= (const std::complex< ValueType > &lhs, const std::complex< ValueType > &rhs)
 Overloaded >= operator for complex numbers. More...
 
template bool operator< (const std::complex< float > &lhs, const std::complex< float > &rhs)
 
template bool operator< (const std::complex< double > &lhs, const std::complex< double > &rhs)
 
template bool operator> (const std::complex< float > &lhs, const std::complex< float > &rhs)
 
template bool operator> (const std::complex< double > &lhs, const std::complex< double > &rhs)
 
template bool operator<= (const std::complex< float > &lhs, const std::complex< float > &rhs)
 
template bool operator<= (const std::complex< double > &lhs, const std::complex< double > &rhs)
 
template bool operator>= (const std::complex< float > &lhs, const std::complex< float > &rhs)
 
template bool operator>= (const std::complex< double > &lhs, const std::complex< double > &rhs)
 
double paralution_time (void)
 Return current time in microseconds. More...
 

Variables

Paralution_Backend_Descriptor _Backend_Descriptor
 Global backend descriptor. More...
 
const std::string _paralution_host_name [1]
 Host name. More...
 
const std::string _paralution_backend_name [4]
 Backend names. More...
 
Paralution_Object_Data Paralution_Object_Data_Tracking
 Global obj tracking structure. More...
 
const IndexType interval_size
 
const IndexType const IndexType * I
 
const IndexType const IndexType const IndexType * J
 
const IndexType const IndexType const IndexType const ValueType * V
 
const IndexType const IndexType const IndexType const ValueType const ValueType scalar
 
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType * x
 
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType ValueType * y
 
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType ValueType IndexType * temp_rows
 
const IndexType const IndexType const IndexType const ValueType const ValueType const ValueType ValueType IndexType ValueType * temp_vals
 
__shared__ volatile ValueType vals [BLOCK_SIZE]
 
const IndexType thread_id = BLOCK_SIZE * blockIdx.x + threadIdx.x
 
const IndexType thread_lane = threadIdx.x & (WARP_SIZE-1)
 
const IndexType warp_id = thread_id / WARP_SIZE
 
const IndexType interval_begin = warp_id * interval_size
 
IndexType interval_end2 = interval_begin + interval_size
 
const IndexType interval_end = interval_end2
 
const IndexType idx = 16 * (threadIdx.x/32 + 1) + threadIdx.x
 
 rows [idx-16] = -1
 
const IndexType end = num_warps - (num_warps & (BLOCK_SIZE - 1))
 
IndexType i = threadIdx.x
 
const std::string _matrix_format_names [8]
 Matrix Names. More...
 
const char * ocl_kernels_bcsr
 
const char * ocl_kernels_coo
 
const char * ocl_kernels_csr
 
const char * ocl_kernels_dense
 
const char * ocl_kernels_dia
 
const char * ocl_kernels_ell
 
const char * ocl_kernels_general
 
const char * ocl_kernels_hyb
 
const char * ocl_kernels_mcsr
 
const char * ocl_kernels_vector
 
const std::string _stencil_type_names [1] = {"Laplace2D"}
 Stencil Names. More...
 

Enumeration Type Documentation

Enumerator
Vcycle 
Wcycle 
Kcycle 
Fcycle 
Enumerator
Aggregation 
SmoothedAggregation 

Matrix Enumeration.

Enumerator
DENSE 
CSR 
MCSR 
BCSR 
COO 
DIA 
ELL 
HYB 

Backend IDs.

Enumerator
None 
GPU 
OCL 
MIC 

Stencil Enumeration.

Enumerator
Laplace2D 

Function Documentation

cusparseStatus_t paralution::__cusparseXcsrgeam__ ( cusparseHandle_t  handle,
int  m,
int  n,
const double *  alpha,
const cusparseMatDescr_t  descrA,
int  nnzA,
const double *  csrValA,
const int csrRowPtrA,
const int csrColIndA,
const double *  beta,
const cusparseMatDescr_t  descrB,
int  nnzB,
const double *  csrValB,
const int csrRowPtrB,
const int csrColIndB,
const cusparseMatDescr_t  descrC,
double *  csrValC,
int csrRowPtrC,
int csrColIndC 
)
cusparseStatus_t paralution::__cusparseXcsrgeam__ ( cusparseHandle_t  handle,
int  m,
int  n,
const float *  alpha,
const cusparseMatDescr_t  descrA,
int  nnzA,
const float *  csrValA,
const int csrRowPtrA,
const int csrColIndA,
const float *  beta,
const cusparseMatDescr_t  descrB,
int  nnzB,
const float *  csrValB,
const int csrRowPtrB,
const int csrColIndB,
const cusparseMatDescr_t  descrC,
float *  csrValC,
int csrRowPtrC,
int csrColIndC 
)
cusparseStatus_t paralution::__cusparseXcsrgemm__ ( cusparseHandle_t  handle,
cusparseOperation_t  transA,
cusparseOperation_t  transB,
int  m,
int  n,
int  k,
const cusparseMatDescr_t  descrA,
const int  nnzA,
const double *  csrValA,
const int csrRowPtrA,
const int csrColIndA,
const cusparseMatDescr_t  descrB,
const int  nnzB,
const double *  csrValB,
const int csrRowPtrB,
const int csrColIndB,
const cusparseMatDescr_t  descrC,
double *  csrValC,
const int csrRowPtrC,
int csrColIndC 
)
cusparseStatus_t paralution::__cusparseXcsrgemm__ ( cusparseHandle_t  handle,
cusparseOperation_t  transA,
cusparseOperation_t  transB,
int  m,
int  n,
int  k,
const cusparseMatDescr_t  descrA,
const int  nnzA,
const float *  csrValA,
const int csrRowPtrA,
const int csrColIndA,
const cusparseMatDescr_t  descrB,
const int  nnzB,
const float *  csrValB,
const int csrRowPtrB,
const int csrColIndB,
const cusparseMatDescr_t  descrC,
float *  csrValC,
const int csrRowPtrC,
int csrColIndC 
)
template<typename IndexType , typename ValueType , unsigned int BLOCK_SIZE, unsigned int WARP_SIZE>
paralution::__launch_bounds__ ( BLOCK_SIZE  ,
 
) const
template<typename IndexType , typename ValueType , unsigned int BLOCK_SIZE>
paralution::__launch_bounds__ ( BLOCK_SIZE  ,
 
) const
size_t paralution::_paralution_add_obj ( class ParalutionObj ptr)
bool paralution::_paralution_check_if_any_obj ( void  )

Referenced by init_paralution().

void paralution::_paralution_close_log_file ( void  )
bool paralution::_paralution_del_obj ( class ParalutionObj ptr,
size_t  id 
)
void paralution::_paralution_delete_all_obj ( void  )

Referenced by stop_paralution().

template<typename ValueType >
AcceleratorMatrix< ValueType > * paralution::_paralution_init_base_backend_matrix ( const struct Paralution_Backend_Descriptor  backend_descriptor,
const unsigned int  matrix_format 
)

Build (and return) a matrix on the selected in the descriptor accelerator.

template<typename ValueType >
AcceleratorVector< ValueType > * paralution::_paralution_init_base_backend_vector ( const struct Paralution_Backend_Descriptor  backend_descriptor)

Build (and return) a vector on the selected in the descriptor accelerator.

template<typename ValueType >
AcceleratorMatrix<ValueType>* paralution::_paralution_init_base_gpu_matrix ( const struct Paralution_Backend_Descriptor  backend_descriptor,
const unsigned int  matrix_format 
)

Build (and return) a matrix on GPU.

template<typename ValueType >
AcceleratorVector<ValueType>* paralution::_paralution_init_base_gpu_vector ( const struct Paralution_Backend_Descriptor  backend_descriptor)

Build (and return) a vector on GPU.

template<typename ValueType >
HostMatrix< ValueType > * paralution::_paralution_init_base_host_matrix ( const struct Paralution_Backend_Descriptor  backend_descriptor,
const unsigned int  matrix_format 
)

Build (and return) a matrix on the host.

template<typename ValueType >
AcceleratorMatrix< ValueType > * paralution::_paralution_init_base_mic_matrix ( const struct Paralution_Backend_Descriptor  backend_descriptor,
const unsigned int  matrix_format 
)

Build (and return) a matrix on MIC.

template<typename ValueType >
AcceleratorVector< ValueType > * paralution::_paralution_init_base_mic_vector ( const struct Paralution_Backend_Descriptor  backend_descriptor)

Build (and return) a vector on MIC.

template<typename ValueType >
AcceleratorMatrix< ValueType > * paralution::_paralution_init_base_ocl_matrix ( const struct Paralution_Backend_Descriptor  backend_descriptor,
const unsigned int  matrix_format 
)

Build (and return) an OpenCL matrix.

template<typename ValueType >
AcceleratorVector< ValueType > * paralution::_paralution_init_base_ocl_vector ( const struct Paralution_Backend_Descriptor  backend_descriptor)

Build (and return) an OpenCL vector.

void paralution::_paralution_open_log_file ( void  )

Referenced by init_paralution().

void paralution::_paralution_sync ( void  )
void paralution::_set_backend_descriptor ( const struct Paralution_Backend_Descriptor  backend_descriptor)

Set backend descriptor.

void paralution::_set_omp_backend_threads ( const struct Paralution_Backend_Descriptor  backend_descriptor,
const int  size 
)

Set the OMP threads based on the size threshold.

Referenced by paralution::HostMatrixCOO< ValueType >::AddScalar(), paralution::HostMatrixCSR< ValueType >::AddScalar(), paralution::HostMatrixCOO< ValueType >::AddScalarDiagonal(), paralution::HostMatrixCSR< ValueType >::AddScalarDiagonal(), paralution::HostMatrixCOO< ValueType >::AddScalarOffDiagonal(), paralution::HostMatrixCSR< ValueType >::AddScalarOffDiagonal(), paralution::HostVector< ValueType >::AddScale(), paralution::HostVector< ValueType >::Amax(), paralution::HostStencilLaplace2D< ValueType >::Apply(), paralution::HostMatrixBCSR< ValueType >::Apply(), paralution::HostMatrixDENSE< ValueType >::Apply(), paralution::HostMatrixDIA< ValueType >::Apply(), paralution::HostMatrixHYB< ValueType >::Apply(), paralution::HostMatrixELL< ValueType >::Apply(), paralution::HostMatrixMCSR< ValueType >::Apply(), paralution::HostMatrixCOO< ValueType >::Apply(), paralution::HostMatrixCSR< ValueType >::Apply(), paralution::HostStencilLaplace2D< ValueType >::ApplyAdd(), paralution::HostMatrixBCSR< ValueType >::ApplyAdd(), paralution::HostMatrixDENSE< ValueType >::ApplyAdd(), paralution::HostMatrixDIA< ValueType >::ApplyAdd(), paralution::HostMatrixHYB< ValueType >::ApplyAdd(), paralution::HostMatrixELL< ValueType >::ApplyAdd(), paralution::HostMatrixMCSR< ValueType >::ApplyAdd(), paralution::HostMatrixCSR< ValueType >::ApplyAdd(), paralution::HostMatrixCSR< ValueType >::Assemble(), paralution::HostVector< ValueType >::Assemble(), paralution::HostMatrixCSR< ValueType >::AssembleUpdate(), paralution::HostVector< ValueType >::Asum(), paralution::HostMatrixCSR< ValueType >::Compress(), paralution::HostMatrixBCSR< ValueType >::CopyFrom(), paralution::HostMatrixMCSR< ValueType >::CopyFrom(), paralution::HostMatrixDENSE< ValueType >::CopyFrom(), paralution::HostMatrixDIA< ValueType >::CopyFrom(), paralution::HostMatrixELL< ValueType >::CopyFrom(), paralution::HostVector< ValueType >::CopyFrom(), paralution::HostMatrixCOO< ValueType >::CopyFrom(), paralution::HostMatrixCSR< ValueType >::CopyFrom(), paralution::HostMatrixCOO< ValueType >::CopyFromCOO(), paralution::HostMatrixCSR< ValueType >::CopyFromCSR(), paralution::HostVector< ValueType >::CopyFromData(), paralution::HostVector< ValueType >::CopyFromDouble(), paralution::HostVector< ValueType >::CopyFromFloat(), paralution::HostVector< ValueType >::CopyFromPermute(), paralution::HostVector< ValueType >::CopyFromPermuteBackward(), paralution::HostMatrixCOO< ValueType >::CopyToCOO(), paralution::HostMatrixCSR< ValueType >::CopyToCSR(), paralution::HostVector< ValueType >::CopyToData(), paralution::HostMatrixCSR< ValueType >::DiagonalMatrixMultL(), paralution::HostMatrixCSR< ValueType >::DiagonalMatrixMultR(), paralution::HostVector< ValueType >::Dot(), paralution::HostVector< ValueType >::DotNonConj(), paralution::HostMatrixDENSE< ValueType >::ExtractColumnVector(), paralution::HostMatrixCSR< ValueType >::ExtractColumnVector(), paralution::HostMatrixCSR< ValueType >::ExtractDiagonal(), paralution::HostMatrixCSR< ValueType >::ExtractInverseDiagonal(), paralution::HostMatrixDENSE< ValueType >::ExtractRowVector(), paralution::HostMatrixCSR< ValueType >::ExtractRowVector(), paralution::HostMatrixCSR< ValueType >::Gershgorin(), paralution::HostMatrixCSR< ValueType >::ILUpFactorizeNumeric(), paralution::HostMatrixCSR< ValueType >::MatrixAdd(), paralution::HostVector< ValueType >::Norm(), paralution::HostMatrixCSR< ValueType >::NumericMatMatMult(), paralution::HostVector< ValueType >::Ones(), paralution::HostMatrixCOO< ValueType >::Permute(), paralution::HostVector< ValueType >::Permute(), paralution::HostMatrixCSR< ValueType >::Permute(), paralution::HostMatrixCOO< ValueType >::PermuteBackward(), paralution::HostVector< ValueType >::PermuteBackward(), paralution::HostVector< ValueType >::PointWiseMult(), paralution::HostVector< ValueType >::Power(), paralution::HostVector< ValueType >::Reduce(), paralution::HostMatrixDENSE< ValueType >::ReplaceColumnVector(), paralution::HostMatrixDENSE< ValueType >::ReplaceRowVector(), paralution::HostMatrixCOO< ValueType >::Scale(), paralution::HostMatrixCSR< ValueType >::Scale(), paralution::HostVector< ValueType >::Scale(), paralution::HostVector< ValueType >::ScaleAdd(), paralution::HostVector< ValueType >::ScaleAdd2(), paralution::HostVector< ValueType >::ScaleAddScale(), paralution::HostMatrixCOO< ValueType >::ScaleDiagonal(), paralution::HostMatrixCSR< ValueType >::ScaleDiagonal(), paralution::HostMatrixCOO< ValueType >::ScaleOffDiagonal(), paralution::HostMatrixCSR< ValueType >::ScaleOffDiagonal(), paralution::HostVector< ValueType >::SetRandom(), paralution::HostVector< ValueType >::SetValues(), paralution::HostMatrixCSR< ValueType >::SymbolicMatMatMult(), and paralution::HostVector< ValueType >::Zeros().

template<typename ValueType >
void paralution::addscale ( const int  mic_dev,
const ValueType *  vec1,
const ValueType  alpha,
const int  size,
ValueType *  vec2 
)
template void paralution::addscale< double > ( const int  mic_dev,
const double *  vec1,
const double  alpha,
const int  size,
double *  vec2 
)
template void paralution::addscale< float > ( const int  mic_dev,
const float *  vec1,
const float  alpha,
const int  size,
float *  vec2 
)
template void paralution::addscale< int > ( const int  mic_dev,
const int vec1,
const int  alpha,
const int  size,
int vec2 
)
template<typename DataType >
void paralution::allocate_gpu ( const int  size,
DataType **  ptr 
)
template<typename DataType >
void paralution::allocate_host ( const int  size,
DataType **  ptr 
)

Allocate buffer on the host.

Referenced by paralution::HostVector< ValueType >::Allocate(), paralution::HostMatrixCOO< ValueType >::AllocateCOO(), paralution::HostMatrixCSR< ValueType >::AllocateCSR(), paralution::HostMatrixDENSE< ValueType >::AllocateDENSE(), paralution::HostMatrixDIA< ValueType >::AllocateDIA(), paralution::HostMatrixELL< ValueType >::AllocateELL(), paralution::HostMatrixHYB< ValueType >::AllocateHYB(), paralution::HostMatrixMCSR< ValueType >::AllocateMCSR(), paralution::OCLAcceleratorVector< ValueType >::Amax(), paralution::HostMatrixCSR< ValueType >::AMGAggregation(), paralution::HostMatrixCSR< ValueType >::AMGSmoothedAggregation(), paralution::HostMatrixCSR< ValueType >::Assemble(), paralution::HostVector< ValueType >::Assemble(), paralution::OCLAcceleratorVector< ValueType >::Asum(), paralution::MixedPrecisionDC< OperatorTypeH, VectorTypeH, ValueTypeH, OperatorTypeL, VectorTypeL, ValueTypeL >::Build(), paralution::IDR< OperatorType, VectorType, ValueType >::Build(), paralution::BlockPreconditioner< OperatorType, VectorType, ValueType >::Build(), paralution::SIRA< OperatorTypeH, VectorTypeH, ValueTypeH, OperatorTypeL, VectorTypeL, ValueTypeL >::Build_(), paralution::OCLAcceleratorMatrixCSR< ValueType >::Compress(), paralution::OCLAcceleratorMatrixELL< ValueType >::ConvertFrom(), coo_to_csr(), csr_to_coo(), csr_to_dense(), csr_to_dia(), csr_to_ell(), csr_to_hyb(), csr_to_mcsr(), paralution::MultiColored< OperatorType, VectorType, ValueType >::Decompose_(), dense_to_csr(), dia_to_csr(), paralution::OCLAcceleratorVector< ValueType >::Dot(), paralution::OCLAcceleratorVector< ValueType >::DotNonConj(), ell_to_csr(), paralution::OCLAcceleratorMatrixCSR< ValueType >::ExtractL(), paralution::HostMatrixCSR< ValueType >::ExtractL(), paralution::OCLAcceleratorMatrixCSR< ValueType >::ExtractLDiagonal(), paralution::HostMatrixCSR< ValueType >::ExtractLDiagonal(), paralution::OCLAcceleratorMatrixCSR< ValueType >::ExtractSubMatrix(), paralution::HostMatrixCSR< ValueType >::ExtractU(), paralution::OCLAcceleratorMatrixCSR< ValueType >::ExtractU(), paralution::HostMatrixCSR< ValueType >::ExtractUDiagonal(), paralution::OCLAcceleratorMatrixCSR< ValueType >::ExtractUDiagonal(), hyb_to_csr(), paralution::HostMatrixMCSR< ValueType >::ILU0Factorize(), paralution::HostMatrixCSR< ValueType >::ILU0Factorize(), paralution::HostMatrixCSR< ValueType >::ILUpFactorizeNumeric(), paralution::HostMatrixCSR< ValueType >::ILUTFactorize(), import_dealii_matrix(), import_dealii_vector(), paralution::HostMatrixDENSE< ValueType >::Invert(), paralution::HostMatrixCSR< ValueType >::MatMatMult(), paralution::OCLAcceleratorMatrixCSR< ValueType >::MaximalIndependentSet(), paralution::HostMatrixCSR< ValueType >::MaximalIndependentSet(), mcsr_to_csr(), mexFunction(), paralution::OCLAcceleratorMatrixCSR< ValueType >::MultiColoring(), paralution::HostMatrixCSR< ValueType >::MultiColoring(), paralution::OCLAcceleratorVector< ValueType >::Norm(), paralution_fortran_solve_coo(), paralution_fortran_solve_csr(), paralution_solve(), paralution::HostMatrixCOO< ValueType >::PermuteBackward(), paralution::HostMatrixDENSE< ValueType >::QRSolve(), read_coordinate_stream(), paralution::OCLAcceleratorVector< ValueType >::Reduce(), paralution::OCLAcceleratorMatrixCSR< ValueType >::ReplaceColumnVector(), paralution::HostMatrixCSR< ValueType >::ReplaceColumnVector(), paralution::HostMatrixCSR< ValueType >::ReplaceRowVector(), paralution::IDR< OperatorType, VectorType, ValueType >::SolveNonPrecond_(), paralution::IDR< OperatorType, VectorType, ValueType >::SolvePrecond_(), and paralution::HostMatrixCSR< ValueType >::SPAI().

template void paralution::allocate_host< char > ( const int  size,
char **  ptr 
)
template void paralution::allocate_host< double > ( const int  size,
double **  ptr 
)
template void paralution::allocate_host< float > ( const int  size,
float **  ptr 
)
template void paralution::allocate_host< std::complex< double > > ( const int  size,
std::complex< double > **  ptr 
)
template void paralution::allocate_host< std::complex< float > > ( const int  size,
std::complex< float > **  ptr 
)
template void paralution::allocate_host< unsigned int > ( const int  size,
unsigned int **  ptr 
)
template void paralution::allocate_mic< char > ( const int  mic_dev,
const int  size,
char **  ptr 
)
template void paralution::allocate_mic< double > ( const int  mic_dev,
const int  size,
double **  ptr 
)
template void paralution::allocate_mic< float > ( const int  mic_dev,
const int  size,
float **  ptr 
)
template void paralution::allocate_mic< int > ( const int  mic_dev,
const int  size,
int **  ptr 
)
template void paralution::allocate_mic< unsigned int > ( const int  mic_dev,
const int  size,
unsigned int **  ptr 
)
template<typename DataType >
void paralution::allocate_ocl ( const int  size,
cl_context  ocl_context,
cl_mem **  ptr 
)

Allocate device memory.

template void paralution::allocate_ocl< char > ( const int  size,
cl_context  ocl_context,
cl_mem **  ptr 
)
template void paralution::allocate_ocl< double > ( const int  size,
cl_context  ocl_context,
cl_mem **  ptr 
)
template void paralution::allocate_ocl< float > ( const int  size,
cl_context  ocl_context,
cl_mem **  ptr 
)
template void paralution::allocate_ocl< unsigned int > ( const int  size,
cl_context  ocl_context,
cl_mem **  ptr 
)
template<typename ValueType >
void paralution::amax ( const int  mic_dev,
const ValueType *  vec,
const int  size,
ValueType &  d,
int index 
)
template void paralution::amax< double > ( const int  mic_dev,
const double *  vec,
const int  size,
double &  d,
int index 
)
template void paralution::amax< float > ( const int  mic_dev,
const float *  vec,
const int  size,
float &  d,
int index 
)
template void paralution::amax< int > ( const int  mic_dev,
const int vec,
const int  size,
int d,
int index 
)
template<typename ValueType >
void paralution::assign_complex ( ValueType &  val,
double  real,
double  imag 
)

Referenced by read_coordinate_stream().

template<typename ValueType >
void paralution::assign_complex ( std::complex< ValueType > &  val,
double  real,
double  imag 
)
template<typename ValueType >
void paralution::asum ( const int  mic_dev,
const ValueType *  vec,
const int  size,
ValueType &  d 
)
template void paralution::asum< double > ( const int  mic_dev,
const double *  vec,
const int  size,
double &  d 
)
template void paralution::asum< float > ( const int  mic_dev,
const float *  vec,
const int  size,
float &  d 
)
template void paralution::asum< int > ( const int  mic_dev,
const int vec,
const int  size,
int d 
)
template<typename ValueType , typename IndexType >
void paralution::coo_to_csr ( const int  omp_threads,
const IndexType  nnz,
const IndexType  nrow,
const IndexType  ncol,
const MatrixCOO< ValueType, IndexType > &  src,
MatrixCSR< ValueType, IndexType > *  dst 
)
template void paralution::coo_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCOO< double, int > &  src,
MatrixCSR< double, int > *  dst 
)
template void paralution::coo_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCOO< float, int > &  src,
MatrixCSR< float, int > *  dst 
)
template void paralution::coo_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCOO< std::complex< double >, int > &  src,
MatrixCSR< std::complex< double >, int > *  dst 
)
template void paralution::coo_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCOO< std::complex< float >, int > &  src,
MatrixCSR< std::complex< float >, int > *  dst 
)
template void paralution::coo_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCOO< int, int > &  src,
MatrixCSR< int, int > *  dst 
)
template void paralution::copy_mic_mic< double > ( const int  mic_dev,
const double *  src,
double *  dst,
const int  size 
)
template void paralution::copy_mic_mic< float > ( const int  mic_dev,
const float *  src,
float *  dst,
const int  size 
)
template void paralution::copy_mic_mic< int > ( const int  mic_dev,
const int src,
int dst,
const int  size 
)
template void paralution::copy_mic_mic< unsigned int > ( const int  mic_dev,
const unsigned int src,
unsigned int dst,
const int  size 
)
template void paralution::copy_to_host< double > ( const int  mic_dev,
const double *  src,
double *  dst,
const int  size 
)
template void paralution::copy_to_host< float > ( const int  mic_dev,
const float *  src,
float *  dst,
const int  size 
)
template void paralution::copy_to_host< int > ( const int  mic_dev,
const int src,
int dst,
const int  size 
)
template void paralution::copy_to_host< unsigned int > ( const int  mic_dev,
const unsigned int src,
unsigned int dst,
const int  size 
)
template void paralution::copy_to_mic< double > ( const int  mic_dev,
const double *  src,
double *  dst,
const int  size 
)
template void paralution::copy_to_mic< float > ( const int  mic_dev,
const float *  src,
float *  dst,
const int  size 
)
template void paralution::copy_to_mic< int > ( const int  mic_dev,
const int src,
int dst,
const int  size 
)
template void paralution::copy_to_mic< unsigned int > ( const int  mic_dev,
const unsigned int src,
unsigned int dst,
const int  size 
)
template<typename ValueType , typename IndexType >
void paralution::csr_to_coo ( const int  omp_threads,
const IndexType  nnz,
const IndexType  nrow,
const IndexType  ncol,
const MatrixCSR< ValueType, IndexType > &  src,
MatrixCOO< ValueType, IndexType > *  dst 
)
template void paralution::csr_to_coo ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< double, int > &  src,
MatrixCOO< double, int > *  dst 
)
template void paralution::csr_to_coo ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< float, int > &  src,
MatrixCOO< float, int > *  dst 
)
template void paralution::csr_to_coo ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< std::complex< double >, int > &  src,
MatrixCOO< std::complex< double >, int > *  dst 
)
template void paralution::csr_to_coo ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< std::complex< float >, int > &  src,
MatrixCOO< std::complex< float >, int > *  dst 
)
template void paralution::csr_to_coo ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< int, int > &  src,
MatrixCOO< int, int > *  dst 
)
template<typename ValueType , typename IndexType >
void paralution::csr_to_dense ( const int  omp_threads,
const IndexType  nnz,
const IndexType  nrow,
const IndexType  ncol,
const MatrixCSR< ValueType, IndexType > &  src,
MatrixDENSE< ValueType > *  dst 
)
template void paralution::csr_to_dense ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< double, int > &  src,
MatrixDENSE< double > *  dst 
)
template void paralution::csr_to_dense ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< float, int > &  src,
MatrixDENSE< float > *  dst 
)
template void paralution::csr_to_dense ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< std::complex< double >, int > &  src,
MatrixDENSE< std::complex< double > > *  dst 
)
template void paralution::csr_to_dense ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< std::complex< float >, int > &  src,
MatrixDENSE< std::complex< float > > *  dst 
)
template void paralution::csr_to_dense ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< int, int > &  src,
MatrixDENSE< int > *  dst 
)
template<typename ValueType , typename IndexType >
void paralution::csr_to_dia ( const int  omp_threads,
const IndexType  nnz,
const IndexType  nrow,
const IndexType  ncol,
const MatrixCSR< ValueType, IndexType > &  src,
MatrixDIA< ValueType, IndexType > *  dst,
IndexType *  nnz_dia 
)
template void paralution::csr_to_dia ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< double, int > &  src,
MatrixDIA< double, int > *  dst,
int nnz_dia 
)
template void paralution::csr_to_dia ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< float, int > &  src,
MatrixDIA< float, int > *  dst,
int nnz_dia 
)
template void paralution::csr_to_dia ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< std::complex< double >, int > &  src,
MatrixDIA< std::complex< double >, int > *  dst,
int nnz_dia 
)
template void paralution::csr_to_dia ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< std::complex< float >, int > &  src,
MatrixDIA< std::complex< float >, int > *  dst,
int nnz_dia 
)
template void paralution::csr_to_dia ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< int, int > &  src,
MatrixDIA< int, int > *  dst,
int nnz_dia 
)
template<typename ValueType , typename IndexType >
void paralution::csr_to_ell ( const int  omp_threads,
const IndexType  nnz,
const IndexType  nrow,
const IndexType  ncol,
const MatrixCSR< ValueType, IndexType > &  src,
MatrixELL< ValueType, IndexType > *  dst,
IndexType *  nnz_ell 
)
template void paralution::csr_to_ell ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< double, int > &  src,
MatrixELL< double, int > *  dst,
int nnz_ell 
)
template void paralution::csr_to_ell ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< float, int > &  src,
MatrixELL< float, int > *  dst,
int nnz_ell 
)
template void paralution::csr_to_ell ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< std::complex< double >, int > &  src,
MatrixELL< std::complex< double >, int > *  dst,
int nnz_ell 
)
template void paralution::csr_to_ell ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< std::complex< float >, int > &  src,
MatrixELL< std::complex< float >, int > *  dst,
int nnz_ell 
)
template void paralution::csr_to_ell ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< int, int > &  src,
MatrixELL< int, int > *  dst,
int nnz_ell 
)
template<typename ValueType , typename IndexType >
void paralution::csr_to_hyb ( const int  omp_threads,
const IndexType  nnz,
const IndexType  nrow,
const IndexType  ncol,
const MatrixCSR< ValueType, IndexType > &  src,
MatrixHYB< ValueType, IndexType > *  dst,
IndexType *  nnz_hyb,
IndexType *  nnz_ell,
IndexType *  nnz_coo 
)
template void paralution::csr_to_hyb ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< double, int > &  src,
MatrixHYB< double, int > *  dst,
int nnz_hyb,
int nnz_ell,
int nnz_coo 
)
template void paralution::csr_to_hyb ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< float, int > &  src,
MatrixHYB< float, int > *  dst,
int nnz_hyb,
int nnz_ell,
int nnz_coo 
)
template void paralution::csr_to_hyb ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< std::complex< double >, int > &  src,
MatrixHYB< std::complex< double >, int > *  dst,
int nnz_hyb,
int nnz_ell,
int nnz_coo 
)
template void paralution::csr_to_hyb ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< std::complex< float >, int > &  src,
MatrixHYB< std::complex< float >, int > *  dst,
int nnz_hyb,
int nnz_ell,
int nnz_coo 
)
template void paralution::csr_to_hyb ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< int, int > &  src,
MatrixHYB< int, int > *  dst,
int nnz_hyb,
int nnz_ell,
int nnz_coo 
)
template<typename ValueType , typename IndexType >
void paralution::csr_to_mcsr ( const int  omp_threads,
const IndexType  nnz,
const IndexType  nrow,
const IndexType  ncol,
const MatrixCSR< ValueType, IndexType > &  src,
MatrixMCSR< ValueType, IndexType > *  dst 
)
template void paralution::csr_to_mcsr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< double, int > &  src,
MatrixMCSR< double, int > *  dst 
)
template void paralution::csr_to_mcsr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< float, int > &  src,
MatrixMCSR< float, int > *  dst 
)
template void paralution::csr_to_mcsr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< std::complex< double >, int > &  src,
MatrixMCSR< std::complex< double >, int > *  dst 
)
template void paralution::csr_to_mcsr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< std::complex< float >, int > &  src,
MatrixMCSR< std::complex< float >, int > *  dst 
)
template void paralution::csr_to_mcsr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixCSR< int, int > &  src,
MatrixMCSR< int, int > *  dst 
)
template<typename IndexType , unsigned int BLOCK_SIZE>
bool paralution::cum_sum ( IndexType *  dst,
const IndexType *  src,
const IndexType  numElems 
)
template<typename ValueType , typename IndexType >
void paralution::dense_to_csr ( const int  omp_threads,
const IndexType  nrow,
const IndexType  ncol,
const MatrixDENSE< ValueType > &  src,
MatrixCSR< ValueType, IndexType > *  dst,
IndexType *  nnz 
)
template void paralution::dense_to_csr ( const int  omp_threads,
const int  nrow,
const int  ncol,
const MatrixDENSE< double > &  src,
MatrixCSR< double, int > *  dst,
int nnz 
)
template void paralution::dense_to_csr ( const int  omp_threads,
const int  nrow,
const int  ncol,
const MatrixDENSE< float > &  src,
MatrixCSR< float, int > *  dst,
int nnz 
)
template void paralution::dense_to_csr ( const int  omp_threads,
const int  nrow,
const int  ncol,
const MatrixDENSE< std::complex< double > > &  src,
MatrixCSR< std::complex< double >, int > *  dst,
int nnz 
)
template void paralution::dense_to_csr ( const int  omp_threads,
const int  nrow,
const int  ncol,
const MatrixDENSE< std::complex< float > > &  src,
MatrixCSR< std::complex< float >, int > *  dst,
int nnz 
)
template void paralution::dense_to_csr ( const int  omp_threads,
const int  nrow,
const int  ncol,
const MatrixDENSE< int > &  src,
MatrixCSR< int, int > *  dst,
int nnz 
)
template<typename ValueType , typename IndexType >
void paralution::dia_to_csr ( const int  omp_threads,
const IndexType  nnz,
const IndexType  nrow,
const IndexType  ncol,
const MatrixDIA< ValueType, IndexType > &  src,
MatrixCSR< ValueType, IndexType > *  dst,
IndexType *  nnz_csr 
)
template void paralution::dia_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixDIA< double, int > &  src,
MatrixCSR< double, int > *  dst,
int nnz_csr 
)
template void paralution::dia_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixDIA< float, int > &  src,
MatrixCSR< float, int > *  dst,
int nnz_csr 
)
template void paralution::dia_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixDIA< std::complex< double >, int > &  src,
MatrixCSR< std::complex< double >, int > *  dst,
int nnz_csr 
)
template void paralution::dia_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixDIA< std::complex< float >, int > &  src,
MatrixCSR< std::complex< float >, int > *  dst,
int nnz_csr 
)
template void paralution::dia_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixDIA< int, int > &  src,
MatrixCSR< int, int > *  dst,
int nnz_csr 
)
void paralution::disable_accelerator_paralution ( const bool  onoff)

Disable/Enable the accelerator.

template<typename ValueType >
void paralution::dot ( const int  mic_dev,
const ValueType *  vec1,
const ValueType *  vec2,
const int  size,
ValueType &  d 
)
template void paralution::dot< double > ( const int  mic_dev,
const double *  vec1,
const double *  vec2,
const int  size,
double &  d 
)
template void paralution::dot< float > ( const int  mic_dev,
const float *  vec1,
const float *  vec2,
const int  size,
float &  d 
)
template void paralution::dot< int > ( const int  mic_dev,
const int vec1,
const int vec2,
const int  size,
int d 
)
template<typename ValueType , typename IndexType >
void paralution::ell_to_csr ( const int  omp_threads,
const IndexType  nnz,
const IndexType  nrow,
const IndexType  ncol,
const MatrixELL< ValueType, IndexType > &  src,
MatrixCSR< ValueType, IndexType > *  dst,
IndexType *  nnz_csr 
)
template void paralution::ell_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixELL< double, int > &  src,
MatrixCSR< double, int > *  dst,
int nnz_csr 
)
template void paralution::ell_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixELL< float, int > &  src,
MatrixCSR< float, int > *  dst,
int nnz_csr 
)
template void paralution::ell_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixELL< std::complex< double >, int > &  src,
MatrixCSR< std::complex< double >, int > *  dst,
int nnz_csr 
)
template void paralution::ell_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixELL< std::complex< float >, int > &  src,
MatrixCSR< std::complex< float >, int > *  dst,
int nnz_csr 
)
template void paralution::ell_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixELL< int, int > &  src,
MatrixCSR< int, int > *  dst,
int nnz_csr 
)
template<typename DataType >
void paralution::free_gpu ( DataType **  ptr)
template<typename DataType >
void paralution::free_host ( DataType **  ptr)

Free buffer on the host.

Referenced by paralution::OCLAcceleratorVector< ValueType >::Amax(), paralution::HostMatrixCSR< ValueType >::Assemble(), paralution::HostVector< ValueType >::Assemble(), paralution::OCLAcceleratorVector< ValueType >::Asum(), paralution::MixedPrecisionDC< OperatorTypeH, VectorTypeH, ValueTypeH, OperatorTypeL, VectorTypeL, ValueTypeL >::Build(), paralution::BlockPreconditioner< OperatorType, VectorType, ValueType >::Build(), paralution::SIRA< OperatorTypeH, VectorTypeH, ValueTypeH, OperatorTypeL, VectorTypeL, ValueTypeL >::Build_(), paralution::MultiColored< OperatorType, VectorType, ValueType >::Clear(), paralution::BlockPreconditioner< OperatorType, VectorType, ValueType >::Clear(), paralution::HostMatrixMCSR< ValueType >::Clear(), paralution::HostMatrixCOO< ValueType >::Clear(), paralution::HostMatrixDENSE< ValueType >::Clear(), paralution::HostMatrixELL< ValueType >::Clear(), paralution::HostMatrixDIA< ValueType >::Clear(), paralution::IDR< OperatorType, VectorType, ValueType >::Clear(), paralution::HostMatrixHYB< ValueType >::Clear(), paralution::HostMatrixCSR< ValueType >::Clear(), paralution::HostVector< ValueType >::Clear(), paralution::HostMatrixCSR< ValueType >::CMK(), paralution::OCLAcceleratorMatrixCSR< ValueType >::Compress(), paralution::OCLAcceleratorMatrixELL< ValueType >::ConvertFrom(), csr_to_dia(), csr_to_hyb(), paralution::MultiColored< OperatorType, VectorType, ValueType >::Decompose_(), paralution::OCLAcceleratorVector< ValueType >::Dot(), paralution::OCLAcceleratorVector< ValueType >::DotNonConj(), paralution::SIRA< OperatorTypeH, VectorTypeH, ValueTypeH, OperatorTypeL, VectorTypeL, ValueTypeL >::Eigpair(), export_dealii_vector(), paralution::OCLAcceleratorMatrixCSR< ValueType >::ExtractL(), paralution::OCLAcceleratorMatrixCSR< ValueType >::ExtractLDiagonal(), paralution::OCLAcceleratorMatrixCSR< ValueType >::ExtractSubMatrix(), paralution::OCLAcceleratorMatrixCSR< ValueType >::ExtractU(), paralution::OCLAcceleratorMatrixCSR< ValueType >::ExtractUDiagonal(), paralution::LocalMatrix< ValueType >::free_assembly_data(), paralution::HostMatrixMCSR< ValueType >::ILU0Factorize(), paralution::HostMatrixCSR< ValueType >::ILU0Factorize(), paralution::HostMatrixCSR< ValueType >::ILUpFactorizeNumeric(), paralution::HostMatrixCSR< ValueType >::ILUTFactorize(), paralution::HostMatrixDENSE< ValueType >::Invert(), paralution::OCLAcceleratorMatrixCSR< ValueType >::MaximalIndependentSet(), paralution::HostMatrixCSR< ValueType >::MaximalIndependentSet(), mexFunction(), paralution::OCLAcceleratorMatrixCSR< ValueType >::MultiColoring(), paralution::HostMatrixCSR< ValueType >::MultiColoring(), paralution::OCLAcceleratorVector< ValueType >::Norm(), paralution::HostMatrixCOO< ValueType >::PermuteBackward(), paralution::HostMatrixDENSE< ValueType >::QRSolve(), read_coordinate_stream(), paralution::OCLAcceleratorVector< ValueType >::Reduce(), paralution::IDR< OperatorType, VectorType, ValueType >::SolveNonPrecond_(), paralution::IDR< OperatorType, VectorType, ValueType >::SolvePrecond_(), and paralution::HostMatrixCSR< ValueType >::SPAI().

template void paralution::free_host< char > ( char **  ptr)
template void paralution::free_host< double > ( double **  ptr)
template void paralution::free_host< float > ( float **  ptr)
template void paralution::free_host< std::complex< double > > ( std::complex< double > **  ptr)
template void paralution::free_host< std::complex< float > > ( std::complex< float > **  ptr)
template void paralution::free_host< unsigned int > ( unsigned int **  ptr)
template void paralution::free_mic< char > ( const int  mic_dev,
char **  ptr 
)
template void paralution::free_mic< double > ( const int  mic_dev,
double **  ptr 
)
template void paralution::free_mic< float > ( const int  mic_dev,
float **  ptr 
)
template void paralution::free_mic< int > ( const int  mic_dev,
int **  ptr 
)
template void paralution::free_mic< unsigned int > ( const int  mic_dev,
unsigned int **  ptr 
)
template<typename ValueType , typename IndexType >
void paralution::hyb_to_csr ( const int  omp_threads,
const IndexType  nnz,
const IndexType  nrow,
const IndexType  ncol,
const IndexType  nnz_ell,
const IndexType  nnz_coo,
const MatrixHYB< ValueType, IndexType > &  src,
MatrixCSR< ValueType, IndexType > *  dst,
IndexType *  nnz_csr 
)
template void paralution::hyb_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const int  nnz_ell,
const int  nnz_coo,
const MatrixHYB< double, int > &  src,
MatrixCSR< double, int > *  dst,
int nnz_csr 
)
template void paralution::hyb_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const int  nnz_ell,
const int  nnz_coo,
const MatrixHYB< float, int > &  src,
MatrixCSR< float, int > *  dst,
int nnz_csr 
)
template void paralution::hyb_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const int  nnz_ell,
const int  nnz_coo,
const MatrixHYB< std::complex< double >, int > &  src,
MatrixCSR< std::complex< double >, int > *  dst,
int nnz_csr 
)
template void paralution::hyb_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const int  nnz_ell,
const int  nnz_coo,
const MatrixHYB< std::complex< float >, int > &  src,
MatrixCSR< std::complex< float >, int > *  dst,
int nnz_csr 
)
template void paralution::hyb_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const int  nnz_ell,
const int  nnz_coo,
const MatrixHYB< int, int > &  src,
MatrixCSR< int, int > *  dst,
int nnz_csr 
)
paralution::if ( interval_end2  ,
num_nonzeros   
)
paralution::if ( interval_begin >=  interval_end)
paralution::if ( thread_lane  = = 31)
paralution::if ( threadIdx.  x = = 0)
paralution::if ( )
void paralution::info_paralution ( void  )

Print information about the platform.

Referenced by main(), paralution_fortran_solve_coo(), and paralution_fortran_solve_csr().

void paralution::info_paralution ( const struct Paralution_Backend_Descriptor  backend_descriptor)

Print information about the platform via specific backend descriptor.

int paralution::init_paralution ( void  )

Initialization of the paralution platform.

Referenced by main(), mexFunction(), paralution_fortran_solve_coo(), paralution_fortran_solve_csr(), and paralution_solve().

template<typename ValueType , typename IndexType , unsigned int BLOCK_SIZE>
__global__ void paralution::kernel_amax ( const IndexType  n,
const ValueType *  data,
ValueType *  out,
const IndexType  GROUP_SIZE,
const IndexType  LOCAL_SIZE 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_buffer_addscalar ( const IndexType  n,
const ValueType  scalar,
ValueType *  buff 
)
template<typename IndexType >
__global__ void paralution::kernel_calc_row_nnz ( const IndexType  nrow,
const IndexType *  row_offset,
IndexType *  row_nnz 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_coo_permute ( const IndexType  nnz,
const IndexType *  in_row,
const IndexType *  in_col,
const IndexType *  perm,
IndexType *  out_row,
IndexType *  out_col 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_copy_from_double ( const IndexType  n,
const double *  in,
ValueType *  out 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_copy_from_float ( const IndexType  n,
const float *  in,
ValueType *  out 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_copy_offset_from ( const IndexType  n,
const IndexType  src_offset,
const IndexType  dst_offset,
const ValueType *  in,
ValueType *  out 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_add_csr_same_struct ( const IndexType  nrow,
const IndexType *  out_row_offset,
const IndexType *  out_col,
const IndexType *  in_row_offset,
const IndexType *  in_col,
const ValueType *  in_val,
const ValueType  alpha,
const ValueType  beta,
ValueType *  out_val 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_add_diagonal ( const IndexType  nrow,
const IndexType *  row_offset,
const IndexType *  col,
const ValueType  alpha,
ValueType *  val 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_add_offdiagonal ( const IndexType  nrow,
const IndexType *  row_offset,
const IndexType *  col,
const ValueType  alpha,
ValueType *  val 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_add_spmv_scalar ( const IndexType  nrow,
const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
const ValueType  scalar,
const ValueType *  in,
ValueType *  out 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_compress_copy ( const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
const IndexType  nrow,
const double  drop_off,
const IndexType *  row_offset_new,
IndexType *  col_new,
ValueType *  val_new 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_compress_count_nrow ( const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
const IndexType  nrow,
const double  drop_off,
IndexType *  row_offset_new 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_diagmatmult_l ( const IndexType  nrow,
const IndexType *  row_offset,
const ValueType *  diag,
ValueType *  val 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_diagmatmult_r ( const IndexType  nrow,
const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  diag,
ValueType *  val 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_extract_column_vector ( const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
const IndexType  nrow,
const IndexType  idx,
ValueType *  vec 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_extract_diag ( const IndexType  nrow,
const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
ValueType *  vec 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_extract_inv_diag ( const IndexType  nrow,
const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
ValueType *  vec 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_extract_l_triangular ( const IndexType  nrow,
const IndexType *  src_row_offset,
const IndexType *  src_col,
const ValueType *  src_val,
IndexType *  nnz_per_row,
IndexType *  dst_col,
ValueType *  dst_val 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_extract_row_vector ( const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
const IndexType  row_nnz,
const IndexType  idx,
ValueType *  vec 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_extract_submatrix_copy ( const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
const IndexType  smrow_offset,
const IndexType  smcol_offset,
const IndexType  smrow_size,
const IndexType  smcol_size,
const IndexType *  sm_row_offset,
IndexType *  sm_col,
ValueType *  sm_val 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_extract_submatrix_row_nnz ( const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
const IndexType  smrow_offset,
const IndexType  smcol_offset,
const IndexType  smrow_size,
const IndexType  smcol_size,
IndexType *  row_nnz 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_extract_u_triangular ( const IndexType  nrow,
const IndexType *  src_row_offset,
const IndexType *  src_col,
const ValueType *  src_val,
IndexType *  nnz_per_row,
IndexType *  dst_col,
ValueType *  dst_val 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_lower_nnz_per_row ( const IndexType  nrow,
const IndexType *  src_row_offset,
const IndexType *  src_col,
IndexType *  nnz_per_row 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_replace_column_vector ( const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
const IndexType  nrow,
const IndexType  idx,
const ValueType *  vec,
const IndexType *  offset,
IndexType *  new_col,
ValueType *  new_val 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_replace_column_vector_offset ( const IndexType *  row_offset,
const IndexType *  col,
const IndexType  nrow,
const IndexType  idx,
const ValueType *  vec,
IndexType *  offset 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_scale_diagonal ( const IndexType  nrow,
const IndexType *  row_offset,
const IndexType *  col,
const ValueType  alpha,
ValueType *  val 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_scale_offdiagonal ( const IndexType  nrow,
const IndexType *  row_offset,
const IndexType *  col,
const ValueType  alpha,
ValueType *  val 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_slower_nnz_per_row ( const IndexType  nrow,
const IndexType *  src_row_offset,
const IndexType *  src_col,
IndexType *  nnz_per_row 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_spmv_scalar ( const IndexType  nrow,
const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
const ValueType *  in,
ValueType *  out 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_supper_nnz_per_row ( const IndexType  nrow,
const IndexType *  src_row_offset,
const IndexType *  src_col,
IndexType *  nnz_per_row 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_csr_upper_nnz_per_row ( const IndexType  nrow,
const IndexType *  src_row_offset,
const IndexType *  src_col,
IndexType *  nnz_per_row 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_dense_extract_column_vector ( ValueType *  vec,
const IndexType  idx,
const IndexType  nrow,
const IndexType  ncol,
const ValueType *  mat 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_dense_extract_row_vector ( ValueType *  vec,
const IndexType  idx,
const IndexType  nrow,
const IndexType  ncol,
const ValueType *  mat 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_dense_replace_column_vector ( const ValueType *  vec,
const IndexType  idx,
const IndexType  nrow,
const IndexType  ncol,
ValueType *  mat 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_dense_replace_row_vector ( const ValueType *  vec,
const IndexType  idx,
const IndexType  nrow,
const IndexType  ncol,
ValueType *  mat 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_dia_add_spmv ( const IndexType  num_rows,
const IndexType  num_cols,
const IndexType  num_diags,
const IndexType *  Aoffsets,
const ValueType *  Aval,
const ValueType  scalar,
const ValueType *  x,
ValueType *  y 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_dia_convert ( const IndexType  nrow,
const IndexType  ndiag,
const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
const IndexType *  diag_map,
ValueType *  dia_val 
)
template<typename IndexType >
__global__ void paralution::kernel_dia_diag_map ( const IndexType  nrow,
const IndexType *  row_offset,
const IndexType *  col,
IndexType *  diag_map 
)
template<typename IndexType >
__global__ void paralution::kernel_dia_fill_offset ( const IndexType  nrow,
const IndexType  ncol,
IndexType *  diag_map,
const IndexType *  offset_map,
IndexType *  offset 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_dia_spmv ( const IndexType  num_rows,
const IndexType  num_cols,
const IndexType  num_diags,
const IndexType *  Aoffsets,
const ValueType *  Aval,
const ValueType *  x,
ValueType *  y 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_ell_add_spmv ( const IndexType  num_rows,
const IndexType  num_cols,
const IndexType  num_cols_per_row,
const IndexType *  Acol,
const ValueType *  Aval,
const ValueType  scalar,
const ValueType *  x,
ValueType *  y 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_ell_csr_to_ell ( const IndexType  nrow,
const IndexType  max_row,
const IndexType *  src_row_offset,
const IndexType *  src_col,
const ValueType *  src_val,
IndexType *  ell_col,
ValueType *  ell_val 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_ell_fill_coo ( const IndexType  nrow,
const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
const IndexType *  nnz_coo,
const IndexType *  nnz_ell,
IndexType *  COO_row,
IndexType *  COO_col,
ValueType *  COO_val 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_ell_fill_ell ( const IndexType  nrow,
const IndexType  max_row,
const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
IndexType *  ELL_col,
ValueType *  ELL_val,
IndexType *  nnz_ell 
)
template<typename ValueType , typename IndexType , unsigned int BLOCK_SIZE>
__global__ void paralution::kernel_ell_max_row ( const IndexType  nrow,
const ValueType *  data,
ValueType *  out,
const IndexType  GROUP_SIZE,
const IndexType  LOCAL_SIZE 
)
template<typename IndexType >
__global__ void paralution::kernel_ell_nnz_coo ( const IndexType  nrow,
const IndexType  max_row,
const IndexType *  row_offset,
IndexType *  nnz_coo 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_ell_spmv ( const IndexType  num_rows,
const IndexType  num_cols,
const IndexType  num_cols_per_row,
const IndexType *  Acol,
const ValueType *  Aval,
const ValueType *  x,
ValueType *  y 
)
template<typename ValueType , typename IndexType , unsigned int BLOCK_SIZE>
__global__ void paralution::kernel_max ( const IndexType  n,
const ValueType *  data,
ValueType *  out,
const IndexType  GROUP_SIZE,
const IndexType  LOCAL_SIZE 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_mcsr_add_spmv_scalar ( const IndexType  nrow,
const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
const ValueType  scalar,
const ValueType *  in,
ValueType *  out 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_mcsr_spmv_scalar ( const IndexType  nrow,
const IndexType *  row_offset,
const IndexType *  col,
const ValueType *  val,
const ValueType *  in,
ValueType *  out 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_permute ( const IndexType  n,
const IndexType *  permute,
const ValueType *  in,
ValueType *  out 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_permute_backward ( const IndexType  n,
const IndexType *  permute,
const ValueType *  in,
ValueType *  out 
)
template<typename ValueType , typename IndexType , const IndexType size>
__global__ void paralution::kernel_permute_cols ( const IndexType  nrow,
const IndexType *  row_offset,
const IndexType *  perm_vec,
const IndexType *  row_nnz,
const IndexType *  perm_col,
const ValueType *  perm_data,
IndexType *  col,
ValueType *  data 
)
template<typename IndexType >
__global__ void paralution::kernel_permute_row_nnz ( const IndexType  nrow,
const IndexType *  row_nnz_src,
const IndexType *  perm_vec,
IndexType *  row_nnz_dst 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_permute_rows ( const IndexType  nrow,
const IndexType *  row_offset,
const IndexType *  perm_row_offset,
const IndexType *  col,
const ValueType *  data,
const IndexType *  perm_vec,
const IndexType *  row_nnz,
IndexType *  perm_col,
ValueType *  perm_data 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_pointwisemult ( const IndexType  n,
const ValueType *  x,
ValueType *  out 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_pointwisemult2 ( const IndexType  n,
const ValueType *  x,
const ValueType *  y,
ValueType *  out 
)
template<typename IndexType >
__global__ void paralution::kernel_powerd ( const IndexType  n,
const double  power,
double *  out 
)
template<typename IndexType >
__global__ void paralution::kernel_powerf ( const IndexType  n,
const double  power,
float *  out 
)
template<typename IndexType >
__global__ void paralution::kernel_red_extrapolate ( IndexType *  dst,
const IndexType *  srcBorder,
const IndexType *  srcData,
IndexType  numElems 
)
template<typename IndexType , unsigned int BLOCK_SIZE>
__global__ void paralution::kernel_red_partial_sum ( IndexType *  dst,
const IndexType *  src,
const IndexType  numElems 
)
template<typename IndexType >
__global__ void paralution::kernel_red_recurse ( IndexType *  dst,
IndexType *  src,
IndexType  stride,
IndexType  numElems 
)
template<typename ValueType , typename IndexType , unsigned int BLOCK_SIZE>
__global__ void paralution::kernel_reduce ( const IndexType  n,
const ValueType *  data,
ValueType *  out,
const IndexType  GROUP_SIZE,
const IndexType  LOCAL_SIZE 
)
template<typename IndexType >
__global__ void paralution::kernel_reverse_index ( const IndexType  n,
const IndexType *  perm,
IndexType *  out 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_scaleadd ( const IndexType  n,
const ValueType  alpha,
const ValueType *  x,
ValueType *  out 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_scaleadd2 ( const IndexType  n,
const ValueType  alpha,
const ValueType  beta,
const ValueType  gamma,
const ValueType *  x,
const ValueType *  y,
ValueType *  out 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_scaleaddscale ( const IndexType  n,
const ValueType  alpha,
const ValueType  beta,
const ValueType *  x,
ValueType *  out 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_scaleaddscale_offset ( const IndexType  n,
const IndexType  src_offset,
const IndexType  dst_offset,
const ValueType  alpha,
const ValueType  beta,
const ValueType *  x,
ValueType *  out 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_set_to_ones ( const IndexType  n,
ValueType *  data 
)
template<typename ValueType , typename IndexType >
__global__ void paralution::kernel_set_to_zeros ( const IndexType  n,
ValueType *  data 
)
template<typename IndexType , typename ValueType >
__global__ void paralution::kernel_spmv_coo_serial ( const IndexType  num_entries,
const IndexType *  I,
const IndexType *  J,
const ValueType *  V,
const ValueType  scalar,
const ValueType *  x,
ValueType *  y 
)
template<typename ValueType , typename IndexType >
void paralution::mcsr_to_csr ( const int  omp_threads,
const IndexType  nnz,
const IndexType  nrow,
const IndexType  ncol,
const MatrixMCSR< ValueType, IndexType > &  src,
MatrixCSR< ValueType, IndexType > *  dst 
)
template void paralution::mcsr_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixMCSR< double, int > &  src,
MatrixCSR< double, int > *  dst 
)
template void paralution::mcsr_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixMCSR< float, int > &  src,
MatrixCSR< float, int > *  dst 
)
template void paralution::mcsr_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixMCSR< std::complex< double >, int > &  src,
MatrixCSR< std::complex< double >, int > *  dst 
)
template void paralution::mcsr_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixMCSR< std::complex< float >, int > &  src,
MatrixCSR< std::complex< float >, int > *  dst 
)
template void paralution::mcsr_to_csr ( const int  omp_threads,
const int  nnz,
const int  nrow,
const int  ncol,
const MatrixMCSR< int, int > &  src,
MatrixCSR< int, int > *  dst 
)
template<typename ValueType >
void paralution::norm ( const int  mic_dev,
const ValueType *  vec,
const int  size,
ValueType &  d 
)
template void paralution::norm< double > ( const int  mic_dev,
const double *  vec,
const int  size,
double &  d 
)
template void paralution::norm< float > ( const int  mic_dev,
const float *  vec,
const int  size,
float &  d 
)
template void paralution::norm< int > ( const int  mic_dev,
const int vec,
const int  size,
int d 
)
template<typename DataType >
void paralution::ocl_dev2dev ( const int  size,
cl_mem *  src,
cl_mem *  dst,
cl_command_queue  ocl_cmdQueue 
)

Copy object from device to device (intra) memory.

template void paralution::ocl_dev2dev< char > ( const int  size,
cl_mem *  src,
cl_mem *  dst,
cl_command_queue  ocl_cmdQueue 
)
template void paralution::ocl_dev2dev< double > ( const int  size,
cl_mem *  src,
cl_mem *  dst,
cl_command_queue  ocl_cmdQueue 
)
template void paralution::ocl_dev2dev< float > ( const int  size,
cl_mem *  src,
cl_mem *  dst,
cl_command_queue  ocl_cmdQueue 
)
template void paralution::ocl_dev2dev< unsigned int > ( const int  size,
cl_mem *  src,
cl_mem *  dst,
cl_command_queue  ocl_cmdQueue 
)
template<typename DataType >
void paralution::ocl_dev2host ( const int  size,
cl_mem *  src,
DataType *  dst,
cl_command_queue  ocl_cmdQueue 
)
template void paralution::ocl_dev2host< char > ( const int  size,
cl_mem *  src,
char *  dst,
cl_command_queue  ocl_cmdQueue 
)
template void paralution::ocl_dev2host< double > ( const int  size,
cl_mem *  src,
double *  dst,
cl_command_queue  ocl_cmdQueue 
)
template void paralution::ocl_dev2host< float > ( const int  size,
cl_mem *  src,
float *  dst,
cl_command_queue  ocl_cmdQueue 
)
template void paralution::ocl_dev2host< unsigned int > ( const int  size,
cl_mem *  src,
unsigned int dst,
cl_command_queue  ocl_cmdQueue 
)
template<typename DataType >
void paralution::ocl_host2dev ( const int  size,
const DataType *  src,
cl_mem *  dst,
cl_command_queue  ocl_cmdQueue 
)
template void paralution::ocl_host2dev< char > ( const int  size,
const char *  src,
cl_mem *  dst,
cl_command_queue  ocl_cmdQueue 
)
template void paralution::ocl_host2dev< double > ( const int  size,
const double *  src,
cl_mem *  dst,
cl_command_queue  ocl_cmdQueue 
)
template void paralution::ocl_host2dev< float > ( const int  size,
const float *  src,
cl_mem *  dst,
cl_command_queue  ocl_cmdQueue 
)
template void paralution::ocl_host2dev< unsigned int > ( const int  size,
const unsigned int src,
cl_mem *  dst,
cl_command_queue  ocl_cmdQueue 
)
template<typename DataType >
void paralution::ocl_set_to ( cl_kernel  ocl_kernel,
cl_command_queue  ocl_cmdQueue,
const size_t  localWorkSize,
const size_t  globalWorkSize,
const int  size,
const DataType  val,
cl_mem *  ptr 
)

Set device object to specific values.

template void paralution::ocl_set_to< char > ( cl_kernel  ocl_kernel,
cl_command_queue  ocl_cmdQueue,
const size_t  localWorkSize,
const size_t  globalWorkSize,
const int  size,
const char  val,
cl_mem *  ptr 
)
template void paralution::ocl_set_to< double > ( cl_kernel  ocl_kernel,
cl_command_queue  ocl_cmdQueue,
const size_t  localWorkSize,
const size_t  globalWorkSize,
const int  size,
const double  val,
cl_mem *  ptr 
)
template void paralution::ocl_set_to< float > ( cl_kernel  ocl_kernel,
cl_command_queue  ocl_cmdQueue,
const size_t  localWorkSize,
const size_t  globalWorkSize,
const int  size,
const float  val,
cl_mem *  ptr 
)
template void paralution::ocl_set_to< unsigned int > ( cl_kernel  ocl_kernel,
cl_command_queue  ocl_cmdQueue,
const size_t  localWorkSize,
const size_t  globalWorkSize,
const int  size,
const unsigned int  val,
cl_mem *  ptr 
)
template<typename ValueType >
bool paralution::operator< ( const std::complex< ValueType > &  lhs,
const std::complex< ValueType > &  rhs 
)

Overloaded < operator for complex numbers.

template bool paralution::operator< ( const std::complex< float > &  lhs,
const std::complex< float > &  rhs 
)
template bool paralution::operator< ( const std::complex< double > &  lhs,
const std::complex< double > &  rhs 
)
template<typename ValueType >
bool paralution::operator<= ( const std::complex< ValueType > &  lhs,
const std::complex< ValueType > &  rhs 
)

Overloaded <= operator for complex numbers.

template bool paralution::operator<= ( const std::complex< float > &  lhs,
const std::complex< float > &  rhs 
)
template bool paralution::operator<= ( const std::complex< double > &  lhs,
const std::complex< double > &  rhs 
)
template<typename ValueType >
bool paralution::operator> ( const std::complex< ValueType > &  lhs,
const std::complex< ValueType > &  rhs 
)

Overloaded > operator for complex numbers.

template bool paralution::operator> ( const std::complex< float > &  lhs,
const std::complex< float > &  rhs 
)
template bool paralution::operator> ( const std::complex< double > &  lhs,
const std::complex< double > &  rhs 
)
template<typename ValueType >
bool paralution::operator>= ( const std::complex< ValueType > &  lhs,
const std::complex< ValueType > &  rhs 
)

Overloaded >= operator for complex numbers.

template bool paralution::operator>= ( const std::complex< float > &  lhs,
const std::complex< float > &  rhs 
)
template bool paralution::operator>= ( const std::complex< double > &  lhs,
const std::complex< double > &  rhs 
)
float paralution::paralution_abs ( const float  val)

Return absolute float value.

Referenced by paralution::OCLAcceleratorVector< ValueType >::Amax(), paralution::HostVector< ValueType >::Amax(), paralution::OCLAcceleratorVector< ValueType >::Asum(), paralution::HostVector< ValueType >::Asum(), paralution::HostMatrixCSR< ValueType >::Check(), paralution::HostVector< ValueType >::Check(), paralution::IterationControl::CheckResidual(), paralution::IterationControl::CheckResidualNoCount(), paralution::HostMatrixCSR< ValueType >::Compress(), paralution::LocalMatrix< ValueType >::Compress(), paralution::SIRA< OperatorTypeH, VectorTypeH, ValueTypeH, OperatorTypeL, VectorTypeL, ValueTypeL >::Eigpair(), paralution::HostMatrixCSR< ValueType >::FSAI(), paralution::FGMRES< OperatorType, VectorType, ValueType >::GenerateGivensRotation_(), paralution::GMRES< OperatorType, VectorType, ValueType >::GenerateGivensRotation_(), paralution::HostMatrixCSR< ValueType >::Gershgorin(), paralution::SIRA< OperatorTypeH, VectorTypeH, ValueTypeH, OperatorTypeL, VectorTypeL, ValueTypeL >::Givens(), paralution::HostMatrixCSR< ValueType >::ILUTFactorize(), paralution::IterationControl::InitResidual(), paralution::IterationControl::InitTolerance(), paralution::IterationControl::PrintStatus(), paralution::BaseMultiGrid< OperatorType, VectorType, ValueType >::Solve(), paralution::BiCGStab< OperatorType, VectorType, ValueType >::SolveNonPrecond_(), paralution::CG< OperatorType, VectorType, ValueType >::SolveNonPrecond_(), paralution::CR< OperatorType, VectorType, ValueType >::SolveNonPrecond_(), paralution::Chebyshev< OperatorType, VectorType, ValueType >::SolveNonPrecond_(), paralution::FGMRES< OperatorType, VectorType, ValueType >::SolveNonPrecond_(), paralution::GMRES< OperatorType, VectorType, ValueType >::SolveNonPrecond_(), paralution::IDR< OperatorType, VectorType, ValueType >::SolveNonPrecond_(), paralution::DPCG< OperatorType, VectorType, ValueType >::SolveNonPrecond_(), paralution::CG< OperatorType, VectorType, ValueType >::SolvePrecond_(), paralution::CR< OperatorType, VectorType, ValueType >::SolvePrecond_(), paralution::BiCGStab< OperatorType, VectorType, ValueType >::SolvePrecond_(), paralution::Chebyshev< OperatorType, VectorType, ValueType >::SolvePrecond_(), paralution::GMRES< OperatorType, VectorType, ValueType >::SolvePrecond_(), paralution::FGMRES< OperatorType, VectorType, ValueType >::SolvePrecond_(), paralution::IDR< OperatorType, VectorType, ValueType >::SolvePrecond_(), paralution::FixedPoint< OperatorType, VectorType, ValueType >::SolvePrecond_(), and paralution::BaseMultiGrid< OperatorType, VectorType, ValueType >::Vcycle_().

double paralution::paralution_abs ( const double  val)

Return absolute double value.

float paralution::paralution_abs ( const std::complex< float >  val)

Return absolute float value.

double paralution::paralution_abs ( const std::complex< double >  val)

Return absolute double value.

int paralution::paralution_abs ( const int  val)

Return absolute int value.

bool paralution::paralution_get_device_ocl ( const cl_platform_id &  ocl_platform,
cl_device_id **  ocl_devices,
cl_uint *  ocl_numDevices 
)

Referenced by paralution_init_ocl().

template<typename ValueType >
cl_kernel paralution::paralution_get_kernel_ocl ( int  )

Get OpenCL kernel.

template<>
cl_kernel paralution::paralution_get_kernel_ocl< double > ( int  kernel)
template<>
cl_kernel paralution::paralution_get_kernel_ocl< float > ( int  kernel)
template<>
cl_kernel paralution::paralution_get_kernel_ocl< int > ( int  kernel)
bool paralution::paralution_get_platform_ocl ( cl_platform_id **  ocl_platforms,
cl_uint *  ocl_numPlatforms 
)

Referenced by paralution_init_ocl().

void paralution::paralution_gpu_sync ( void  )

Sync the device (for async transfers)

Referenced by _paralution_sync().

void paralution::paralution_info_gpu ( const struct Paralution_Backend_Descriptor  )

Print information about the GPUs in the systems.

Referenced by info_paralution().

void paralution::paralution_info_mic ( const struct Paralution_Backend_Descriptor  backend_descriptor)

Print information about the MICs in the systems.

Referenced by info_paralution().

void paralution::paralution_info_ocl ( const struct Paralution_Backend_Descriptor  backend_descriptor)

Print information about the GPUs in the systems.

Referenced by info_paralution().

bool paralution::paralution_init_gpu ( )

Initialize a GPU (CUDA, CUBLAS, CUSPARSE)

Referenced by init_paralution().

bool paralution::paralution_init_mic ( void  )

Initialize a MIC.

Referenced by init_paralution().

bool paralution::paralution_init_ocl ( void  )

Initialize OpenCL.

Referenced by init_paralution().

bool paralution::paralution_set_kernels_ocl ( cl_kernel *  ocl_kernels)

Referenced by paralution_init_ocl().

void paralution::paralution_set_omp_affinity ( bool  aff)
void paralution::paralution_stop_gpu ( )

Release the GPU resources (CUDA, CUBLAS, CUSPARSE)

Referenced by stop_paralution().

void paralution::paralution_stop_mic ( void  )

Release the MIC accelerator.

Referenced by stop_paralution().

void paralution::paralution_stop_ocl ( void  )
double paralution::paralution_time ( void  )

Return current time in microseconds.

Referenced by _paralution_open_log_file(), and main().

template<typename ValueType >
void paralution::permute ( const int  mic_dev,
const int perm,
const ValueType *  in,
const int  size,
ValueType *  out 
)
template void paralution::permute< double > ( const int  mic_dev,
const int perm,
const double *  in,
const int  size,
double *  out 
)
template void paralution::permute< float > ( const int  mic_dev,
const int perm,
const float *  in,
const int  size,
float *  out 
)
template void paralution::permute< int > ( const int  mic_dev,
const int perm,
const int in,
const int  size,
int out 
)
template<typename ValueType >
void paralution::permuteback ( const int  mic_dev,
const int perm,
const ValueType *  in,
const int  size,
ValueType *  out 
)
template void paralution::permuteback< double > ( const int  mic_dev,
const int perm,
const double *  in,
const int  size,
double *  out 
)
template void paralution::permuteback< float > ( const int  mic_dev,
const int perm,
const float *  in,
const int  size,
float *  out 
)
template void paralution::permuteback< int > ( const int  mic_dev,
const int perm,
const int in,
const int  size,
int out 
)
template<typename ValueType >
void paralution::pointwisemult ( const int  mic_dev,
const ValueType *  vec1,
const int  size,
ValueType *  vec2 
)
template<typename ValueType >
void paralution::pointwisemult2 ( const int  mic_dev,
const ValueType *  vec1,
const ValueType *  vec2,
const int  size,
ValueType *  vec3 
)
template void paralution::pointwisemult2< double > ( const int  mic_dev,
const double *  vec1,
const double *  vec2,
const int  size,
double *  vec3 
)
template void paralution::pointwisemult2< float > ( const int  mic_dev,
const float *  vec1,
const float *  vec2,
const int  size,
float *  vec3 
)
template void paralution::pointwisemult2< int > ( const int  mic_dev,
const int vec1,
const int vec2,
const int  size,
int vec3 
)
template void paralution::pointwisemult< double > ( const int  mic_dev,
const double *  vec1,
const int  size,
double *  vec2 
)
template void paralution::pointwisemult< float > ( const int  mic_dev,
const float *  vec1,
const int  size,
float *  vec2 
)
template void paralution::pointwisemult< int > ( const int  mic_dev,
const int vec1,
const int  size,
int vec2 
)
template<typename ValueType >
void paralution::power ( const int  mic_dev,
const int  size,
const double  val,
ValueType *  vec 
)
template void paralution::power< double > ( const int  mic_dev,
const int  size,
const double  val,
double *  vec 
)
template void paralution::power< float > ( const int  mic_dev,
const int  size,
const double  val,
float *  vec 
)
template void paralution::power< int > ( const int  mic_dev,
const int  size,
const double  val,
int vec 
)
template<typename ValueType >
bool paralution::read_coordinate_stream ( int nrow,
int ncol,
int nnz,
int **  row,
int **  col,
ValueType **  val,
std::ifstream &  input,
matrix_market_banner banner 
)

Referenced by read_matrix_mtx().

bool paralution::read_matrix_market_banner ( matrix_market_banner banner,
std::ifstream &  input 
)

Referenced by read_matrix_mtx().

template<typename ValueType >
bool paralution::read_matrix_mtx ( int nrow,
int ncol,
int nnz,
int **  row,
int **  col,
ValueType **  val,
const std::string  filename 
)
template bool paralution::read_matrix_mtx ( int nrow,
int ncol,
int nnz,
int **  row,
int **  col,
float **  val,
const std::string  filename 
)
template bool paralution::read_matrix_mtx ( int nrow,
int ncol,
int nnz,
int **  row,
int **  col,
double **  val,
const std::string  filename 
)
template bool paralution::read_matrix_mtx ( int nrow,
int ncol,
int nnz,
int **  row,
int **  col,
std::complex< float > **  val,
const std::string  filename 
)
template bool paralution::read_matrix_mtx ( int nrow,
int ncol,
int nnz,
int **  row,
int **  col,
std::complex< double > **  val,
const std::string  filename 
)
template<typename IndexType >
__device__ IndexType paralution::red_recurse ( IndexType *  src,
IndexType *  srcStart,
IndexType  stride 
)
template<typename ValueType >
void paralution::reduce ( const int  mic_dev,
const ValueType *  vec,
const int  size,
ValueType &  d 
)
template void paralution::reduce< double > ( const int  mic_dev,
const double *  vec,
const int  size,
double &  d 
)
template void paralution::reduce< float > ( const int  mic_dev,
const float *  vec,
const int  size,
float &  d 
)
template void paralution::reduce< int > ( const int  mic_dev,
const int vec,
const int  size,
int d 
)
template<typename ValueType >
void paralution::scale ( const int  mic_dev,
const ValueType  alpha,
const int  size,
ValueType *  vec 
)
template void paralution::scale< double > ( const int  mic_dev,
const double  alpha,
const int  size,
double *  vec 
)
template void paralution::scale< float > ( const int  mic_dev,
const float  alpha,
const int  size,
float *  vec 
)
template void paralution::scale< int > ( const int  mic_dev,
const int  alpha,
const int  size,
int vec 
)
template<typename ValueType >
void paralution::scaleadd ( const int  mic_dev,
const ValueType *  vec1,
const ValueType  alpha,
const int  size,
ValueType *  vec2 
)
template<typename ValueType >
void paralution::scaleadd2 ( const int  mic_dev,
const ValueType *  vec1,
const ValueType *  vec2,
const ValueType  alpha,
const ValueType  beta,
const ValueType  gamma,
const int  size,
ValueType *  vec3 
)
template void paralution::scaleadd2< double > ( const int  mic_dev,
const double *  vec1,
const double *  vec2,
const double  alpha,
const double  beta,
const double  gamma,
const int  size,
double *  vec3 
)
template void paralution::scaleadd2< float > ( const int  mic_dev,
const float *  vec1,
const float *  vec2,
const float  alpha,
const float  beta,
const float  gamma,
const int  size,
float *  vec3 
)
template void paralution::scaleadd2< int > ( const int  mic_dev,
const int vec1,
const int vec2,
const int  alpha,
const int  beta,
const int  gamma,
const int  size,
int vec3 
)
template void paralution::scaleadd< double > ( const int  mic_dev,
const double *  vec1,
const double  alpha,
const int  size,
double *  vec2 
)
template void paralution::scaleadd< float > ( const int  mic_dev,
const float *  vec1,
const float  alpha,
const int  size,
float *  vec2 
)
template void paralution::scaleadd< int > ( const int  mic_dev,
const int vec1,
const int  alpha,
const int  size,
int vec2 
)
template<typename ValueType >
void paralution::scaleaddscale ( const int  mic_dev,
const ValueType *  vec1,
const ValueType  alpha,
const ValueType  beta,
const int  size,
ValueType *  vec2 
)
template<typename ValueType >
void paralution::scaleaddscale ( const int  mic_dev,
const ValueType *  vec1,
const ValueType  alpha,
const ValueType  beta,
ValueType *  vec2,
const int  src_offset,
const int  dst_offset,
const int  size 
)
template void paralution::scaleaddscale< double > ( const int  mic_dev,
const double *  vec1,
const double  alpha,
const double  beta,
const int  size,
double *  vec2 
)
template void paralution::scaleaddscale< double > ( const int  mic_dev,
const double *  vec1,
const double  alpha,
const double  beta,
double *  vec2,
const int  src_offset,
const int  dst_offset,
const int  size 
)
template void paralution::scaleaddscale< float > ( const int  mic_dev,
const float *  vec1,
const float  alpha,
const float  beta,
const int  size,
float *  vec2 
)
template void paralution::scaleaddscale< float > ( const int  mic_dev,
const float *  vec1,
const float  alpha,
const float  beta,
float *  vec2,
const int  src_offset,
const int  dst_offset,
const int  size 
)
template void paralution::scaleaddscale< int > ( const int  mic_dev,
const int vec1,
const int  alpha,
const int  beta,
const int  size,
int vec2 
)
template void paralution::scaleaddscale< int > ( const int  mic_dev,
const int vec1,
const int  alpha,
const int  beta,
int vec2,
const int  src_offset,
const int  dst_offset,
const int  size 
)
template<typename IndexType , typename ValueType >
__device__ void paralution::segreduce_block ( const IndexType *  idx,
ValueType *  val 
)

Referenced by if(), and while().

template<typename IndexType , typename ValueType >
__device__ ValueType paralution::segreduce_warp ( const IndexType  thread_lane,
IndexType  row,
ValueType  val,
IndexType *  rows,
ValueType *  vals 
)
int paralution::set_device_paralution ( int  dev)

Select a device.

void paralution::set_gpu_cuda_paralution ( int  ngpu)

Set a specific GPU device.

Referenced by set_device_paralution().

void paralution::set_ocl_compute_units_paralution ( size_t  cu)

Set OpenCL compute units.

void paralution::set_ocl_paralution ( int  nplatform,
int  ndevice 
)

Set a specific OpenCL platform and device.

void paralution::set_ocl_platform_paralution ( int  platform)

Set a specific OpenCL platform.

void paralution::set_ocl_warp_size_paralution ( int  size)

Set OpenCL warp size.

void paralution::set_ocl_work_group_size_paralution ( size_t  size)

Set OpenCL work group size.

void paralution::set_omp_affinity ( bool  affinity)

Set host affinity (true-on/false-off)

void paralution::set_omp_threads_paralution ( int  nthreads)

Set the number of threads in the platform.

Referenced by main().

void paralution::set_omp_threshold ( const int  threshold)

Set OpenMP threshold size.

template<typename DataType >
void paralution::set_to_one_gpu ( const int  blocksize,
const int  max_threads,
const int  size,
DataType *  ptr 
)
template<typename DataType >
void paralution::set_to_one_mic ( const int  mic_dev,
const int  size,
DataType *  ptr 
)
template void paralution::set_to_one_mic< char > ( const int  mic_dev,
const int  size,
char *  ptr 
)
template void paralution::set_to_one_mic< double > ( const int  mic_dev,
const int  size,
double *  ptr 
)
template void paralution::set_to_one_mic< float > ( const int  mic_dev,
const int  size,
float *  ptr 
)
template void paralution::set_to_one_mic< int > ( const int  mic_dev,
const int  size,
int ptr 
)
template void paralution::set_to_one_mic< unsigned int > ( const int  mic_dev,
const int  size,
unsigned int ptr 
)
template<typename DataType >
void paralution::set_to_zero_gpu ( const int  blocksize,
const int  max_threads,
const int  size,
DataType *  ptr 
)
template void paralution::set_to_zero_host< char > ( const int  size,
char *  ptr 
)
template void paralution::set_to_zero_host< double > ( const int  size,
double *  ptr 
)
template void paralution::set_to_zero_host< float > ( const int  size,
float *  ptr 
)
template void paralution::set_to_zero_host< int > ( const int  size,
int ptr 
)
template void paralution::set_to_zero_host< std::complex< double > > ( const int  size,
std::complex< double > *  ptr 
)
template void paralution::set_to_zero_host< std::complex< float > > ( const int  size,
std::complex< float > *  ptr 
)