OpenclMatrix: template Scalar type

2025-02-25 18:55:30 -06:00 · 2024-04-15 16:33:20 +02:00 · 2024-04-15 16:33:20 +02:00 · be59203179
commit be59203179
parent 5fbd7635cd
4 changed files with 32 additions and 18 deletions
--- a/opm/simulators/linalg/bda/opencl/CPR.cpp
+++ b/opm/simulators/linalg/bda/opencl/CPR.cpp
@ -187,7 +187,7 @@ void CPR<block_size>::init_opencl_buffers() {
    }
    d_weights = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
    d_rs = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
-    d_mat = std::make_unique<OpenclMatrix>(context.get(), Nb, Nb, nnzb, block_size);
+    d_mat = std::make_unique<OpenclMatrix<double>>(context.get(), Nb, Nb, nnzb, block_size);
    d_coarse_y = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * Nb);
    d_coarse_x = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * Nb);
 }
@ -453,9 +453,10 @@ void CPR<block_size>::analyzeAggregateMaps() {


 template <unsigned int block_size>
-void CPR<block_size>::amg_cycle_gpu(const int level, cl::Buffer &y, cl::Buffer &x) {
-    OpenclMatrix *A = &d_Amatrices[level];
-    OpenclMatrix *R = &d_Rmatrices[level];
+void CPR<block_size>::amg_cycle_gpu(const int level, cl::Buffer& y, cl::Buffer& x)
+{
+    OpenclMatrix<double>* A = &d_Amatrices[level];
+    OpenclMatrix<double>* R = &d_Rmatrices[level];
    int Ncur = A->Nb;

    if (level == num_levels - 1) {
--- a/opm/simulators/linalg/bda/opencl/CPR.hpp
+++ b/opm/simulators/linalg/bda/opencl/CPR.hpp
@ -60,7 +60,7 @@ private:
    int num_levels;
    std::vector<double> weights, coarse_vals, coarse_x, coarse_y;
    std::vector<Matrix<double>> Amatrices, Rmatrices; // scalar matrices that represent the AMG hierarchy
-    std::vector<OpenclMatrix> d_Amatrices, d_Rmatrices; // scalar matrices that represent the AMG hierarchy
+    std::vector<OpenclMatrix<double>> d_Amatrices, d_Rmatrices; // scalar matrices that represent the AMG hierarchy
    std::vector<std::vector<int> > PcolIndices; // prolongation does not need a full matrix, only store colIndices
    std::vector<cl::Buffer> d_PcolIndices;
    std::vector<std::vector<double> > invDiags; // inverse of diagonal of Amatrices
@ -68,7 +68,7 @@ private:
    std::vector<cl::Buffer> d_t, d_f, d_u; // intermediate vectors used during amg cycle
    std::unique_ptr<cl::Buffer> d_rs;      // use before extracting the pressure
    std::unique_ptr<cl::Buffer> d_weights; // the quasiimpes weights, used to extract pressure
-    std::unique_ptr<OpenclMatrix> d_mat;   // stores blocked matrix
+    std::unique_ptr<OpenclMatrix<double>> d_mat;   // stores blocked matrix
    std::unique_ptr<cl::Buffer> d_coarse_y, d_coarse_x; // stores the scalar vectors
    std::once_flag opencl_buffers_allocated;  // only allocate OpenCL Buffers once

--- a/opm/simulators/linalg/bda/opencl/OpenclMatrix.cpp
+++ b/opm/simulators/linalg/bda/opencl/OpenclMatrix.cpp
@ -31,12 +31,19 @@ namespace Opm
 namespace Accelerator
 {

-void OpenclMatrix::upload(cl::CommandQueue *queue, double *vals, int *cols, int *rows) {
+template<class Scalar>
+void OpenclMatrix<Scalar>::upload(cl::CommandQueue* queue,
+                                  Scalar* vals, int* cols, int* rows)
+{
    std::vector<cl::Event> events(3);

-    cl_int err = queue->enqueueWriteBuffer(nnzValues, CL_FALSE, 0, sizeof(double) * block_size * block_size * nnzbs, vals, nullptr, &events[0]);
-    err |= queue->enqueueWriteBuffer(colIndices, CL_FALSE, 0, sizeof(int) * nnzbs, cols, nullptr, &events[1]);
-    err |= queue->enqueueWriteBuffer(rowPointers, CL_FALSE, 0, sizeof(int) * (Nb + 1), rows, nullptr, &events[2]);
+    cl_int err = queue->enqueueWriteBuffer(nnzValues, CL_FALSE, 0,
+                                           sizeof(Scalar) * block_size * block_size * nnzbs,
+                                           vals, nullptr, &events[0]);
+    err |= queue->enqueueWriteBuffer(colIndices, CL_FALSE, 0, sizeof(int) * nnzbs,
+                                     cols, nullptr, &events[1]);
+    err |= queue->enqueueWriteBuffer(rowPointers, CL_FALSE, 0, sizeof(int) * (Nb + 1),
+                                     rows, nullptr, &events[2]);

    cl::WaitForEvents(events);
    events.clear();
@ -46,7 +53,8 @@ void OpenclMatrix::upload(cl::CommandQueue *queue, double *vals, int *cols, int
    }
 }

-void OpenclMatrix::upload(cl::CommandQueue* queue, Matrix<double>* matrix)
+template<class Scalar>
+void OpenclMatrix<Scalar>::upload(cl::CommandQueue* queue, Matrix<Scalar>* matrix)
 {
    if (block_size != 1) {
        OPM_THROW(std::logic_error, "Error trying to upload a BlockedMatrix to OpenclMatrix with different block_size");
@ -55,7 +63,8 @@ void OpenclMatrix::upload(cl::CommandQueue* queue, Matrix<double>* matrix)
    upload(queue, matrix->nnzValues.data(), matrix->colIndices.data(), matrix->rowPointers.data());
 }

-void OpenclMatrix::upload(cl::CommandQueue* queue, BlockedMatrix<double>* matrix)
+template<class Scalar>
+void OpenclMatrix<Scalar>::upload(cl::CommandQueue* queue, BlockedMatrix<Scalar>* matrix)
 {
    if (matrix->block_size != block_size) {
        OPM_THROW(std::logic_error, "Error trying to upload a BlockedMatrix to OpenclMatrix with different block_size");
@ -64,5 +73,7 @@ void OpenclMatrix::upload(cl::CommandQueue* queue, BlockedMatrix<double>* matrix
    upload(queue, matrix->nnzValues, matrix->colIndices, matrix->rowPointers);
 }

+template class OpenclMatrix<double>;
+
 } // namespace Accelerator
 } // namespace Opm
--- a/opm/simulators/linalg/bda/opencl/OpenclMatrix.hpp
+++ b/opm/simulators/linalg/bda/opencl/OpenclMatrix.hpp
@ -34,23 +34,25 @@ template<class Scalar> class BlockedMatrix;

 /// This struct resembles a csr matrix, only doubles are supported
 /// The matrix data is stored in OpenCL Buffers
-class OpenclMatrix {
+template<class Scalar>
+class OpenclMatrix
+{
 public:
-
    OpenclMatrix(cl::Context *context, int Nb_, int Mb_, int nnzbs_, unsigned int block_size_)
    : Nb(Nb_),
      Mb(Mb_),
      nnzbs(nnzbs_),
      block_size(block_size_)
    {
-        nnzValues = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * block_size * block_size * nnzbs);
+        nnzValues = cl::Buffer(*context, CL_MEM_READ_WRITE,
+                               sizeof(Scalar) * block_size * block_size * nnzbs);
        colIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzbs);
        rowPointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1));
    }

-    void upload(cl::CommandQueue *queue, double *vals, int *cols, int *rows);
-    void upload(cl::CommandQueue* queue, Matrix<double>* matrix);
-    void upload(cl::CommandQueue* queue, BlockedMatrix<double>* matrix);
+    void upload(cl::CommandQueue* queue, Scalar* vals, int* cols, int* rows);
+    void upload(cl::CommandQueue* queue, Matrix<Scalar>* matrix);
+    void upload(cl::CommandQueue* queue, BlockedMatrix<Scalar>* matrix);

    cl::Buffer nnzValues;
    cl::Buffer colIndices;