Minor changes:

- removed unused header inclusion - update comments - removed unnecessary cast
2025-02-25 18:55:30 -06:00 · 2022-10-13 14:29:39 +02:00
parent 860c209bac
commit 09e262bbfd
5 changed files with 29 additions and 11 deletions
--- a/opm/simulators/linalg/bda/opencl/BILU0.cpp
+++ b/opm/simulators/linalg/bda/opencl/BILU0.cpp
@@ -24,7 +24,6 @@
 #include <opm/common/ErrorMacros.hpp>
 #include <dune/common/timer.hh>

-#include <opm/simulators/linalg/bda/BdaSolver.hpp>
 #include <opm/simulators/linalg/bda/opencl/BILU0.hpp>
 #include <opm/simulators/linalg/bda/opencl/ChowPatelIlu.hpp>
 #include <opm/simulators/linalg/bda/opencl/openclKernels.hpp>
@@ -152,8 +151,9 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat
    if (opencl_ilu_parallel) {
        err |= queue->enqueueWriteBuffer(s.rowIndices, CL_FALSE, 0, Nb * sizeof(unsigned), fromOrder.data(), nullptr, &events[2]);
    } else {
-        // rowsPerColorPrefix is misused here
-        // s.rowIndices[i] == i must hold
+        // fromOrder is not initialized, so use something else to fill s.rowIndices
+        // s.rowIndices[i] == i must hold, since every rowidx is mapped to itself (i.e. no actual mapping)
+        // rowsPerColorPrefix is misused here, it contains an increasing sequence (0, 1, 2, ...)
        err |= queue->enqueueWriteBuffer(s.rowIndices, CL_FALSE, 0, Nb * sizeof(unsigned), rowsPerColorPrefix.data(), nullptr, &events[2]);
    }

--- a/opm/simulators/linalg/bda/opencl/BILU0.hpp
+++ b/opm/simulators/linalg/bda/opencl/BILU0.hpp
@@ -68,10 +68,12 @@ private:
    bool opencl_ilu_parallel;

    typedef struct {
-        cl::Buffer invDiagVals;
-        cl::Buffer diagIndex;
-        cl::Buffer rowsPerColor;
-        cl::Buffer rowIndices;
+        cl::Buffer invDiagVals;    // nnz values of diagonal blocks of the matrix, inverted
+        cl::Buffer diagIndex;      // index of diagonal block of each row, used to differentiate between lower and upper triangular part
+        cl::Buffer rowsPerColor;   // number of rows for every color
+        cl::Buffer rowIndices;     // mapping every row to another index
+                                   // after mapping, all rows that are processed in parallel are contiguous
+                                   // equal to the contents of fromOrder
 #if CHOW_PATEL
        cl::Buffer Lvals, Lcols, Lrows;
        cl::Buffer Uvals, Ucols, Urows;
--- a/opm/simulators/linalg/bda/opencl/openclSolverBackend.cpp
+++ b/opm/simulators/linalg/bda/opencl/openclSolverBackend.cpp
@@ -476,7 +476,7 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
    int sum = 0;
    for (int i = 0; i < Nb; ++i) {
        int size_row = mat->rowPointers[i + 1] - mat->rowPointers[i];
-        memcpy(vals_contiguous.data() + sum, reinterpret_cast<double*>(mat->nnzValues) + sum, size_row * sizeof(double) * block_size * block_size);
+        memcpy(vals_contiguous.data() + sum, mat->nnzValues + sum, size_row * sizeof(double) * block_size * block_size);
        sum += size_row * block_size * block_size;
    }
    err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, vals_contiguous.data(), nullptr, &events[0]);
@@ -513,7 +513,7 @@ void openclSolverBackend<block_size>::update_system_on_gpu() {
    int sum = 0;
    for (int i = 0; i < Nb; ++i) {
        int size_row = mat->rowPointers[i + 1] - mat->rowPointers[i];
-        memcpy(vals_contiguous.data() + sum, reinterpret_cast<double*>(mat->nnzValues) + sum, size_row * sizeof(double) * block_size * block_size);
+        memcpy(vals_contiguous.data() + sum, mat->nnzValues + sum, size_row * sizeof(double) * block_size * block_size);
        sum += size_row * block_size * block_size;
    }
    err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, vals_contiguous.data(), nullptr, &events[0]);
--- a/tests/test_cusparseSolver.cpp
+++ b/tests/test_cusparseSolver.cpp
@@ -113,7 +113,15 @@ testCusparseSolver(const boost::property_tree::ptree& prm, Matrix<bz>& matrix, V
    auto wellContribs = Opm::WellContributions::create("cusparse", false);
    std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
    try {
-        bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode, fpga_bitstream, linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_parallel, linsolver);
+        bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
+                                                                               fpga_bitstream,
+                                                                               linear_solver_verbosity,
+                                                                               maxit,
+                                                                               tolerance,
+                                                                               platformID,
+                                                                               deviceID,
+                                                                               opencl_ilu_parallel,
+                                                                               linsolver);
        auto mat2 = matrix; // deep copy to make sure nnz values are in contiguous memory
                            // matrix created by readMatrixMarket() did not have contiguous memory
        bridge->solve_system(&mat2, &mat2, /*numJacobiBlocks=*/0, rhs, *wellContribs, result);
--- a/tests/test_openclSolver.cpp
+++ b/tests/test_openclSolver.cpp
@@ -111,7 +111,15 @@ testOpenclSolver(const boost::property_tree::ptree& prm, Matrix<bz>& matrix, Vec
    auto wellContribs = Opm::WellContributions::create("opencl", false);
    std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
    try {
-        bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode, fpga_bitstream, linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_parallel, linsolver);
+        bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
+                                                                               fpga_bitstream,
+                                                                               linear_solver_verbosity,
+                                                                               maxit,
+                                                                               tolerance,
+                                                                               platformID,
+                                                                               deviceID,
+                                                                               opencl_ilu_parallel,
+                                                                               linsolver);
    } catch (const std::logic_error& error) {
        BOOST_WARN_MESSAGE(true, error.what());
        throw PlatformInitException(error.what());