opm-simulators/opm/simulators/linalg/bda/opencl/kernels/residual.cl
Jose Eduardo Bueno c7ac97e215 [OpenCL] Moves all OpenCL kernels to *.cl files.
Kernel files are located in opm/simulators/linalg/bda/opencl/kernels.
CMake will combine them for usage in
${PROJECT_BINARY_DIR}/clSources.cpp that becomes part of the library.
2021-12-22 12:43:18 +01:00

51 lines
1.4 KiB
Common Lisp

/// res = rhs - mat * x
/// algorithm based on:
/// Optimization of Block Sparse Matrix-Vector Multiplication on Shared-MemoryParallel Architectures,
/// Ryan Eberhardt, Mark Hoemmen, 2016, https://doi.org/10.1109/IPDPSW.2016.42
__kernel void residual(
__global const double *vals,
__global const int *cols,
__global const int *rows,
const int N,
__global const double *x,
__global const double *rhs,
__global double *out,
__local double *tmp)
{
const unsigned int bsize = get_local_size(0);
const unsigned int idx_b = get_global_id(0) / bsize;
const unsigned int idx_t = get_local_id(0);
const unsigned int num_workgroups = get_num_groups(0);
int row = idx_b;
while (row < N) {
int rowStart = rows[row];
int rowEnd = rows[row+1];
int rowLength = rowEnd - rowStart;
double local_sum = 0.0;
for (int j = rowStart + idx_t; j < rowEnd; j += bsize) {
int col = cols[j];
local_sum += vals[j] * x[col];
}
tmp[idx_t] = local_sum;
barrier(CLK_LOCAL_MEM_FENCE);
int offset = bsize / 2;
while(offset > 0) {
if (idx_t < offset) {
tmp[idx_t] += tmp[idx_t + offset];
}
barrier(CLK_LOCAL_MEM_FENCE);
offset = offset / 2;
}
if (idx_t == 0) {
out[row] = rhs[row] - tmp[idx_t];
}
row += num_workgroups;
}
}