Updated comments

This commit is contained in:
Tong Dong Qiu 2021-03-03 16:36:27 +01:00
parent 0caae966b8
commit b87e9dad9a
2 changed files with 25 additions and 25 deletions

View File

@ -152,11 +152,6 @@ namespace bda
)";
}
// b = mat * x
// algorithm based on:
// Optimization of Block Sparse Matrix-Vector Multiplication on Shared-MemoryParallel Architectures,
// Ryan Eberhardt, Mark Hoemmen, 2016, https://doi.org/10.1109/IPDPSW.2016.42
std::string get_spmv_blocked_string() {
return R"(
__kernel void spmv_blocked(
@ -227,8 +222,6 @@ namespace bda
// ILU apply part 1: forward substitution
// solves L*x=y where L is a lower triangular sparse blocked matrix
std::string get_ILU_apply1_string(bool full_matrix) {
std::string s = R"(
__kernel void ILU_apply1(
@ -305,8 +298,6 @@ namespace bda
}
// ILU apply part 2: backward substitution
// solves U*x=y where L is a lower triangular sparse blocked matrix
std::string get_ILU_apply2_string(bool full_matrix) {
std::string s = R"(
__kernel void ILU_apply2(
@ -391,10 +382,6 @@ namespace bda
return s;
}
/// Generate string with the stdwell_apply kernels
/// If reorder is true, the B/Ccols do not correspond with the x/y vector
/// the x/y vector is reordered, use toOrder to address that
/// \param[in] reorder whether the matrix is reordered or not
std::string get_stdwell_apply_string(bool reorder) {
std::string kernel_name = reorder ? "stdwell_apply" : "stdwell_apply_no_reorder";
std::string s = "__kernel void " + kernel_name + R"((

View File

@ -44,30 +44,41 @@ using stdwell_apply_no_reorder_kernel_type = cl::make_kernel<cl::Buffer&, cl::Bu
using ilu_decomp_kernel_type = cl::make_kernel<const unsigned int, const unsigned int, cl::Buffer&, cl::Buffer&,
cl::Buffer&, cl::Buffer&, cl::Buffer&, const int, cl::LocalSpaceArg>;
/// Generate string with axpy kernel
/// a = a + alpha * b
std::string get_axpy_string();
// returns partial sums, instead of the final dot product
/// returns partial sums, instead of the final dot product
/// partial sums are added on CPU
std::string get_dot_1_string();
// returns partial sums, instead of the final norm
// the square root must be computed on CPU
/// returns partial sums, instead of the final norm
/// the square root must be computed on CPU
std::string get_norm_string();
// p = (p - omega * v) * beta + r
/// Generate string with custom kernel
/// This kernel combines some ilubicgstab vector operations into 1
/// p = (p - omega * v) * beta + r
std::string get_custom_string();
// b = mat * x
// algorithm based on:
// Optimization of Block Sparse Matrix-Vector Multiplication on Shared-MemoryParallel Architectures,
// Ryan Eberhardt, Mark Hoemmen, 2016, https://doi.org/10.1109/IPDPSW.2016.42
/// b = mat * x
/// algorithm based on:
/// Optimization of Block Sparse Matrix-Vector Multiplication on Shared-MemoryParallel Architectures,
/// Ryan Eberhardt, Mark Hoemmen, 2016, https://doi.org/10.1109/IPDPSW.2016.42
std::string get_spmv_blocked_string();
// ILU apply part 1: forward substitution
// solves L*x=y where L is a lower triangular sparse blocked matrix
/// ILU apply part 1: forward substitution
/// solves L*x=y where L is a lower triangular sparse blocked matrix
/// this L can be it's own BSR matrix (if full_matrix is false),
/// or it can be inside a normal, square matrix, in that case diagIndex indicates where the rows of L end
/// \param[in] full_matrix whether the kernel should operate on a full (square) matrix or not
std::string get_ILU_apply1_string(bool full_matrix);
// ILU apply part 2: backward substitution
// solves U*x=y where L is a lower triangular sparse blocked matrix
/// ILU apply part 2: backward substitution
/// solves U*x=y where U is an upper triangular sparse blocked matrix
/// this U can be it's own BSR matrix (if full_matrix is false),
/// or it can be inside a normal, square matrix, in that case diagIndex indicates where the rows of U start
/// \param[in] full_matrix whether the kernel should operate on a full (square) matrix or not
std::string get_ILU_apply2_string(bool full_matrix);
/// Generate string with the stdwell_apply kernels
@ -76,6 +87,8 @@ using ilu_decomp_kernel_type = cl::make_kernel<const unsigned int, const unsigne
/// \param[in] reorder whether the matrix is reordered or not
std::string get_stdwell_apply_string(bool reorder);
/// Generate string with the exact ilu decomposition kernel
/// The kernel takes a full BSR matrix and performs inplace ILU decomposition
std::string get_ilu_decomp_string();
} // end namespace bda