Updated comments

2025-02-25 18:55:30 -06:00 · 2021-03-03 16:36:27 +01:00 · 2021-03-03 16:36:27 +01:00 · b87e9dad9a
commit b87e9dad9a
parent 0caae966b8
2 changed files with 25 additions and 25 deletions
--- a/opm/simulators/linalg/bda/openclKernels.cpp
+++ b/opm/simulators/linalg/bda/openclKernels.cpp
@ -152,11 +152,6 @@ namespace bda
        )";
    }

-
-    // b = mat * x
-    // algorithm based on:
-    // Optimization of Block Sparse Matrix-Vector Multiplication on Shared-MemoryParallel Architectures,
-    // Ryan Eberhardt, Mark Hoemmen, 2016, https://doi.org/10.1109/IPDPSW.2016.42
    std::string get_spmv_blocked_string() {
        return R"(
        __kernel void spmv_blocked(
@ -227,8 +222,6 @@ namespace bda



-    // ILU apply part 1: forward substitution
-    // solves L*x=y where L is a lower triangular sparse blocked matrix
    std::string get_ILU_apply1_string(bool full_matrix) {
        std::string s = R"(
            __kernel void ILU_apply1(
@ -305,8 +298,6 @@ namespace bda
    }


-    // ILU apply part 2: backward substitution
-    // solves U*x=y where L is a lower triangular sparse blocked matrix
    std::string get_ILU_apply2_string(bool full_matrix) {
        std::string s = R"(
            __kernel void ILU_apply2(
@ -391,10 +382,6 @@ namespace bda
        return s;
    }

-    /// Generate string with the stdwell_apply kernels
-    /// If reorder is true, the B/Ccols do not correspond with the x/y vector
-    /// the x/y vector is reordered, use toOrder to address that
-    /// \param[in] reorder   whether the matrix is reordered or not
    std::string get_stdwell_apply_string(bool reorder) {
        std::string kernel_name = reorder ? "stdwell_apply" : "stdwell_apply_no_reorder";
        std::string s = "__kernel void " + kernel_name + R"((
--- a/opm/simulators/linalg/bda/openclKernels.hpp
+++ b/opm/simulators/linalg/bda/openclKernels.hpp
@ -44,30 +44,41 @@ using stdwell_apply_no_reorder_kernel_type = cl::make_kernel<cl::Buffer&, cl::Bu
 using ilu_decomp_kernel_type = cl::make_kernel<const unsigned int, const unsigned int, cl::Buffer&, cl::Buffer&,
                                               cl::Buffer&, cl::Buffer&, cl::Buffer&, const int, cl::LocalSpaceArg>;

+    /// Generate string with axpy kernel
+    /// a = a + alpha * b
    std::string get_axpy_string();

-    // returns partial sums, instead of the final dot product
+    /// returns partial sums, instead of the final dot product
+    /// partial sums are added on CPU
    std::string get_dot_1_string();

-    // returns partial sums, instead of the final norm
-    // the square root must be computed on CPU
+    /// returns partial sums, instead of the final norm
+    /// the square root must be computed on CPU
    std::string get_norm_string();

-    // p = (p - omega * v) * beta + r
+    /// Generate string with custom kernel
+    /// This kernel combines some ilubicgstab vector operations into 1
+    /// p = (p - omega * v) * beta + r
    std::string get_custom_string();

-    // b = mat * x
-    // algorithm based on:
-    // Optimization of Block Sparse Matrix-Vector Multiplication on Shared-MemoryParallel Architectures,
-    // Ryan Eberhardt, Mark Hoemmen, 2016, https://doi.org/10.1109/IPDPSW.2016.42
+    /// b = mat * x
+    /// algorithm based on:
+    /// Optimization of Block Sparse Matrix-Vector Multiplication on Shared-MemoryParallel Architectures,
+    /// Ryan Eberhardt, Mark Hoemmen, 2016, https://doi.org/10.1109/IPDPSW.2016.42
    std::string get_spmv_blocked_string();

-    // ILU apply part 1: forward substitution
-    // solves L*x=y where L is a lower triangular sparse blocked matrix
+    /// ILU apply part 1: forward substitution
+    /// solves L*x=y where L is a lower triangular sparse blocked matrix
+    /// this L can be it's own BSR matrix (if full_matrix is false),
+    /// or it can be inside a normal, square matrix, in that case diagIndex indicates where the rows of L end
+    /// \param[in] full_matrix   whether the kernel should operate on a full (square) matrix or not
    std::string get_ILU_apply1_string(bool full_matrix);

-    // ILU apply part 2: backward substitution
-    // solves U*x=y where L is a lower triangular sparse blocked matrix
+    /// ILU apply part 2: backward substitution
+    /// solves U*x=y where U is an upper triangular sparse blocked matrix
+    /// this U can be it's own BSR matrix (if full_matrix is false),
+    /// or it can be inside a normal, square matrix, in that case diagIndex indicates where the rows of U start
+    /// \param[in] full_matrix   whether the kernel should operate on a full (square) matrix or not
    std::string get_ILU_apply2_string(bool full_matrix);

    /// Generate string with the stdwell_apply kernels
@ -76,6 +87,8 @@ using ilu_decomp_kernel_type = cl::make_kernel<const unsigned int, const unsigne
    /// \param[in] reorder   whether the matrix is reordered or not
    std::string get_stdwell_apply_string(bool reorder);

+    /// Generate string with the exact ilu decomposition kernel
+    /// The kernel takes a full BSR matrix and performs inplace ILU decomposition
    std::string get_ilu_decomp_string();

 } // end namespace bda