diff --git a/opm/simulators/linalg/bda/openclKernels.cpp b/opm/simulators/linalg/bda/openclKernels.cpp index df449fc29..74b3ef883 100644 --- a/opm/simulators/linalg/bda/openclKernels.cpp +++ b/opm/simulators/linalg/bda/openclKernels.cpp @@ -68,7 +68,7 @@ unsigned int ceilDivision(const unsigned int A, const unsigned int B) return A / B + (A % B > 0); } -void add_kernel_string(cl::Program::Sources &sources, const std::string &source) { +void add_kernel_source(cl::Program::Sources &sources, const std::string &source) { sources.emplace_back(source); } @@ -83,47 +83,47 @@ void OpenclKernels::init(cl::Context *context, cl::CommandQueue *queue_, std::ve verbosity = verbosity_; cl::Program::Sources sources; - const std::string& axpy_s = get_axpy_string(); - add_kernel_string(sources, axpy_s); - const std::string& scale_s = get_scale_string(); - add_kernel_string(sources, scale_s); - const std::string& vmul_s = get_vmul_string(); - add_kernel_string(sources, vmul_s); - const std::string& dot_1_s = get_dot_1_string(); - add_kernel_string(sources, dot_1_s); - const std::string& norm_s = get_norm_string(); - add_kernel_string(sources, norm_s); - const std::string& custom_s = get_custom_string(); - add_kernel_string(sources, custom_s); - const std::string& move_to_coarse_s = get_move_to_coarse_string(); - add_kernel_string(sources, move_to_coarse_s); - const std::string& move_to_fine_s = get_move_to_fine_string(); - add_kernel_string(sources, move_to_fine_s); - const std::string& spmv_blocked_s = get_blocked_matrix_operation_string(matrix_operation::spmv_op); - add_kernel_string(sources, spmv_blocked_s); - const std::string& spmv_s = get_matrix_operation_string(matrix_operation::spmv_op, true); - add_kernel_string(sources, spmv_s); - const std::string& spmv_noreset_s = get_matrix_operation_string(matrix_operation::spmv_op, false); - add_kernel_string(sources, spmv_noreset_s); - const std::string& residual_blocked_s = get_blocked_matrix_operation_string(matrix_operation::residual_op); - add_kernel_string(sources, residual_blocked_s); - const std::string& residual_s = get_matrix_operation_string(matrix_operation::residual_op); - add_kernel_string(sources, residual_s); + const std::string& axpy_s = get_axpy_source(); + add_kernel_source(sources, axpy_s); + const std::string& scale_s = get_scale_source(); + add_kernel_source(sources, scale_s); + const std::string& vmul_s = get_vmul_source(); + add_kernel_source(sources, vmul_s); + const std::string& dot_1_s = get_dot_1_source(); + add_kernel_source(sources, dot_1_s); + const std::string& norm_s = get_norm_source(); + add_kernel_source(sources, norm_s); + const std::string& custom_s = get_custom_source(); + add_kernel_source(sources, custom_s); + const std::string& move_to_coarse_s = get_move_to_coarse_source(); + add_kernel_source(sources, move_to_coarse_s); + const std::string& move_to_fine_s = get_move_to_fine_source(); + add_kernel_source(sources, move_to_fine_s); + const std::string& spmv_blocked_s = get_blocked_matrix_operation_source(matrix_operation::spmv_op); + add_kernel_source(sources, spmv_blocked_s); + const std::string& spmv_s = get_matrix_operation_source(matrix_operation::spmv_op, true); + add_kernel_source(sources, spmv_s); + const std::string& spmv_noreset_s = get_matrix_operation_source(matrix_operation::spmv_op, false); + add_kernel_source(sources, spmv_noreset_s); + const std::string& residual_blocked_s = get_blocked_matrix_operation_source(matrix_operation::residual_op); + add_kernel_source(sources, residual_blocked_s); + const std::string& residual_s = get_matrix_operation_source(matrix_operation::residual_op); + add_kernel_source(sources, residual_s); #if CHOW_PATEL bool ilu_operate_on_full_matrix = false; #else bool ilu_operate_on_full_matrix = true; #endif - const std::string& ILU_apply1_s = get_ILU_apply1_string(ilu_operate_on_full_matrix); - add_kernel_string(sources, ILU_apply1_s); - const std::string& ILU_apply2_s = get_ILU_apply2_string(ilu_operate_on_full_matrix); - add_kernel_string(sources, ILU_apply2_s); - const std::string& stdwell_apply_s = get_stdwell_apply_string(true); - add_kernel_string(sources, stdwell_apply_s); - const std::string& stdwell_apply_no_reorder_s = get_stdwell_apply_string(false); - add_kernel_string(sources, stdwell_apply_no_reorder_s); - const std::string& ilu_decomp_s = get_ilu_decomp_string(); - add_kernel_string(sources, ilu_decomp_s); + const std::string& ILU_apply1_s = get_ILU_apply1_source(ilu_operate_on_full_matrix); + add_kernel_source(sources, ILU_apply1_s); + const std::string& ILU_apply2_s = get_ILU_apply2_source(ilu_operate_on_full_matrix); + add_kernel_source(sources, ILU_apply2_s); + const std::string& stdwell_apply_s = get_stdwell_apply_source(true); + add_kernel_source(sources, stdwell_apply_s); + const std::string& stdwell_apply_no_reorder_s = get_stdwell_apply_source(false); + add_kernel_source(sources, stdwell_apply_no_reorder_s); + const std::string& ilu_decomp_s = get_ilu_decomp_source(); + add_kernel_source(sources, ilu_decomp_s); cl::Program program = cl::Program(*context, sources); program.build(devices); @@ -468,7 +468,7 @@ void OpenclKernels::apply_stdwells_no_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffe } - std::string OpenclKernels::get_axpy_string() { + std::string OpenclKernels::get_axpy_source() { return R"( __kernel void axpy( __global double *in, @@ -489,7 +489,7 @@ void OpenclKernels::apply_stdwells_no_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffe // scale vector with scalar - std::string OpenclKernels::get_scale_string() { + std::string OpenclKernels::get_scale_source() { return R"( __kernel void scale( __global double *vec, @@ -508,7 +508,7 @@ void OpenclKernels::apply_stdwells_no_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffe } // multiply vector with another vector, element-wise - std::string OpenclKernels::get_vmul_string() { + std::string OpenclKernels::get_vmul_source() { return R"( __kernel void vmul( const double alpha, @@ -530,7 +530,7 @@ void OpenclKernels::apply_stdwells_no_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffe // returns partial sums, instead of the final dot product - std::string OpenclKernels::get_dot_1_string() { + std::string OpenclKernels::get_dot_1_source() { return R"( __kernel void dot_1( __global double *in1, @@ -573,7 +573,7 @@ void OpenclKernels::apply_stdwells_no_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffe // returns partial sums, instead of the final norm // the square root must be computed on CPU - std::string OpenclKernels::get_norm_string() { + std::string OpenclKernels::get_norm_source() { return R"( __kernel void norm( __global double *in, @@ -614,7 +614,7 @@ void OpenclKernels::apply_stdwells_no_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffe // p = (p - omega * v) * beta + r - std::string OpenclKernels::get_custom_string() { + std::string OpenclKernels::get_custom_source() { return R"( __kernel void custom( __global double *p, @@ -642,7 +642,7 @@ void OpenclKernels::apply_stdwells_no_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffe // transform blocked vector to scalar vector using pressure-weights // every workitem handles one blockrow - std::string OpenclKernels::get_move_to_coarse_string() { + std::string OpenclKernels::get_move_to_coarse_source() { return R"( __kernel void move_to_coarse( __global const double *fine_y, @@ -669,7 +669,7 @@ void OpenclKernels::apply_stdwells_no_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffe // add the coarse pressure solution back to the finer, complete solution // every workitem handles one blockrow - std::string OpenclKernels::get_move_to_fine_string() { + std::string OpenclKernels::get_move_to_fine_source() { return R"( __kernel void move_to_fine( __global const double *coarse_x, @@ -692,7 +692,7 @@ void OpenclKernels::apply_stdwells_no_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffe /// either b = mat * x /// or res = rhs - mat * x -std::string OpenclKernels::get_blocked_matrix_operation_string(matrix_operation op) { +std::string OpenclKernels::get_blocked_matrix_operation_source(matrix_operation op) { std::string s; if (op == matrix_operation::spmv_op) { s += "__kernel void spmv_blocked("; @@ -779,7 +779,7 @@ std::string OpenclKernels::get_blocked_matrix_operation_string(matrix_operation /// either b = mat * x /// or res = rhs - mat * x -std::string OpenclKernels::get_matrix_operation_string(matrix_operation op, bool spmv_reset) { +std::string OpenclKernels::get_matrix_operation_source(matrix_operation op, bool spmv_reset) { std::string s; if (op == matrix_operation::spmv_op) { if (spmv_reset) { @@ -853,7 +853,7 @@ std::string OpenclKernels::get_matrix_operation_string(matrix_operation op, bool } - std::string OpenclKernels::get_ILU_apply1_string(bool full_matrix) { + std::string OpenclKernels::get_ILU_apply1_source(bool full_matrix) { std::string s = R"( __kernel void ILU_apply1( __global const double *LUvals, @@ -929,7 +929,7 @@ std::string OpenclKernels::get_matrix_operation_string(matrix_operation op, bool } - std::string OpenclKernels::get_ILU_apply2_string(bool full_matrix) { + std::string OpenclKernels::get_ILU_apply2_source(bool full_matrix) { std::string s = R"( __kernel void ILU_apply2( __global const double *LUvals, @@ -1012,7 +1012,7 @@ std::string OpenclKernels::get_matrix_operation_string(matrix_operation op, bool return s; } - std::string OpenclKernels::get_stdwell_apply_string(bool reorder) { + std::string OpenclKernels::get_stdwell_apply_source(bool reorder) { std::string kernel_name = reorder ? "stdwell_apply" : "stdwell_apply_no_reorder"; std::string s = "__kernel void " + kernel_name + R"(( __global const double *Cnnzs, @@ -1107,7 +1107,7 @@ std::string OpenclKernels::get_matrix_operation_string(matrix_operation op, bool } - std::string OpenclKernels::get_ilu_decomp_string() { + std::string OpenclKernels::get_ilu_decomp_source() { return R"( // a = a - (b * c) diff --git a/opm/simulators/linalg/bda/openclKernels.hpp b/opm/simulators/linalg/bda/openclKernels.hpp index 1aaab6bd5..265c6b631 100644 --- a/opm/simulators/linalg/bda/openclKernels.hpp +++ b/opm/simulators/linalg/bda/openclKernels.hpp @@ -87,34 +87,34 @@ private: /// Generate string with axpy kernel /// a = a + alpha * b - static std::string get_axpy_string(); + static std::string get_axpy_source(); /// Generate string with scale kernel /// a = a * alpha - static std::string get_scale_string(); + static std::string get_scale_source(); /// multiply vector with another vector and a scalar, element-wise /// add result to a third vector - static std::string get_vmul_string(); + static std::string get_vmul_source(); /// returns partial sums, instead of the final dot product /// partial sums are added on CPU - static std::string get_dot_1_string(); + static std::string get_dot_1_source(); /// returns partial sums, instead of the final norm /// the square root must be computed on CPU - static std::string get_norm_string(); + static std::string get_norm_source(); /// Generate string with custom kernel /// This kernel combines some ilubicgstab vector operations into 1 /// p = (p - omega * v) * beta + r - static std::string get_custom_string(); + static std::string get_custom_source(); /// Transform blocked vector to scalar vector using pressure-weights - static std::string get_move_to_coarse_string(); + static std::string get_move_to_coarse_source(); /// Add the coarse pressure solution back to the finer, complete solution - static std::string get_move_to_fine_string(); + static std::string get_move_to_fine_source(); /// b = mat * x /// algorithm based on: @@ -122,32 +122,32 @@ private: /// Ryan Eberhardt, Mark Hoemmen, 2016, https://doi.org/10.1109/IPDPSW.2016.42 /// or /// res = rhs - (mat * x) - static std::string get_blocked_matrix_operation_string(matrix_operation op); - static std::string get_matrix_operation_string(matrix_operation op, bool spmv_reset = true); + static std::string get_blocked_matrix_operation_source(matrix_operation op); + static std::string get_matrix_operation_source(matrix_operation op, bool spmv_reset = true); /// ILU apply part 1: forward substitution /// solves L*x=y where L is a lower triangular sparse blocked matrix /// this L can be it's own BSR matrix (if full_matrix is false), /// or it can be inside a normal, square matrix, in that case diagIndex indicates where the rows of L end /// \param[in] full_matrix whether the kernel should operate on a full (square) matrix or not - static std::string get_ILU_apply1_string(bool full_matrix); + static std::string get_ILU_apply1_source(bool full_matrix); /// ILU apply part 2: backward substitution /// solves U*x=y where U is an upper triangular sparse blocked matrix /// this U can be it's own BSR matrix (if full_matrix is false), /// or it can be inside a normal, square matrix, in that case diagIndex indicates where the rows of U start /// \param[in] full_matrix whether the kernel should operate on a full (square) matrix or not - static std::string get_ILU_apply2_string(bool full_matrix); + static std::string get_ILU_apply2_source(bool full_matrix); /// Generate string with the stdwell_apply kernels /// If reorder is true, the B/Ccols do not correspond with the x/y vector /// the x/y vector is reordered, use toOrder to address that /// \param[in] reorder whether the matrix is reordered or not - static std::string get_stdwell_apply_string(bool reorder); + static std::string get_stdwell_apply_source(bool reorder); /// Generate string with the exact ilu decomposition kernel /// The kernel takes a full BSR matrix and performs inplace ILU decomposition - static std::string get_ilu_decomp_string(); + static std::string get_ilu_decomp_source(); OpenclKernels(){}; // disable instantiation