From e0a4d271ea1ea45ce19d959c92ef16e0f2555b62 Mon Sep 17 00:00:00 2001 From: Tong Dong Qiu Date: Tue, 30 Nov 2021 16:05:58 +0100 Subject: [PATCH] Formatting changes --- opm/simulators/linalg/bda/BILU0.cpp | 412 ++++++++++++++-------------- opm/simulators/linalg/bda/BILU0.hpp | 120 ++++---- 2 files changed, 267 insertions(+), 265 deletions(-) diff --git a/opm/simulators/linalg/bda/BILU0.cpp b/opm/simulators/linalg/bda/BILU0.cpp index 332797d50..c61db9de2 100644 --- a/opm/simulators/linalg/bda/BILU0.cpp +++ b/opm/simulators/linalg/bda/BILU0.cpp @@ -47,6 +47,7 @@ BILU0::BILU0(ILUReorder opencl_ilu_reorder_, int verbosity_) : #endif } + template BILU0::~BILU0() { @@ -61,247 +62,248 @@ void BILU0::init(int Nb, int nnzb, std::shared_ptr& con queue = queue_.get(); } - template - bool BILU0::analyze_matrix(BlockedMatrix *mat) - { - const unsigned int bs = block_size; - this->N = mat->Nb * block_size; - this->Nb = mat->Nb; - this->nnz = mat->nnzbs * block_size * block_size; - this->nnzb = mat->nnzbs; +template +bool BILU0::analyze_matrix(BlockedMatrix *mat) +{ + const unsigned int bs = block_size; - int *CSCRowIndices = nullptr; - int *CSCColPointers = nullptr; + this->N = mat->Nb * block_size; + this->Nb = mat->Nb; + this->nnz = mat->nnzbs * block_size * block_size; + this->nnzb = mat->nnzbs; - if (opencl_ilu_reorder == ILUReorder::NONE) { - LUmat = std::make_unique(*mat); - } else { - toOrder.resize(Nb); - fromOrder.resize(Nb); - CSCRowIndices = new int[nnzb]; - CSCColPointers = new int[Nb + 1]; - rmat = std::make_shared(mat->Nb, mat->nnzbs, block_size); - LUmat = std::make_unique(*rmat); + int *CSCRowIndices = nullptr; + int *CSCColPointers = nullptr; - Timer t_convert; - csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb); - if(verbosity >= 3){ - std::ostringstream out; - out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s"; - OpmLog::info(out.str()); - } + if (opencl_ilu_reorder == ILUReorder::NONE) { + LUmat = std::make_unique(*mat); + } else { + toOrder.resize(Nb); + fromOrder.resize(Nb); + CSCRowIndices = new int[nnzb]; + CSCColPointers = new int[Nb + 1]; + rmat = std::make_shared(mat->Nb, mat->nnzbs, block_size); + LUmat = std::make_unique(*rmat); + + Timer t_convert; + csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb); + if (verbosity >= 3) { + std::ostringstream out; + out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s"; + OpmLog::info(out.str()); } + } - Timer t_analysis; - std::ostringstream out; - if (opencl_ilu_reorder == ILUReorder::LEVEL_SCHEDULING) { - out << "BILU0 reordering strategy: " << "level_scheduling\n"; - findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor); - } else if (opencl_ilu_reorder == ILUReorder::GRAPH_COLORING) { - out << "BILU0 reordering strategy: " << "graph_coloring\n"; - findGraphColoring(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, mat->Nb, mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor); - } else if (opencl_ilu_reorder == ILUReorder::NONE) { - out << "BILU0 reordering strategy: none\n"; - // numColors = 1; - // rowsPerColor.emplace_back(Nb); - numColors = Nb; - for(int i = 0; i < Nb; ++i){ - rowsPerColor.emplace_back(1); - } - } else { - OPM_THROW(std::logic_error, "Error ilu reordering strategy not set correctly\n"); - } - if(verbosity >= 1){ - out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors\n"; + Timer t_analysis; + std::ostringstream out; + if (opencl_ilu_reorder == ILUReorder::LEVEL_SCHEDULING) { + out << "BILU0 reordering strategy: " << "level_scheduling\n"; + findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor); + } else if (opencl_ilu_reorder == ILUReorder::GRAPH_COLORING) { + out << "BILU0 reordering strategy: " << "graph_coloring\n"; + findGraphColoring(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, mat->Nb, mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor); + } else if (opencl_ilu_reorder == ILUReorder::NONE) { + out << "BILU0 reordering strategy: none\n"; + // numColors = 1; + // rowsPerColor.emplace_back(Nb); + numColors = Nb; + for (int i = 0; i < Nb; ++i) { + rowsPerColor.emplace_back(1); } + } else { + OPM_THROW(std::logic_error, "Error ilu reordering strategy not set correctly\n"); + } + if (verbosity >= 1) { + out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors\n"; + } #if CHOW_PATEL - out << "BILU0 CHOW_PATEL: " << CHOW_PATEL << ", CHOW_PATEL_GPU: " << CHOW_PATEL_GPU; + out << "BILU0 CHOW_PATEL: " << CHOW_PATEL << ", CHOW_PATEL_GPU: " << CHOW_PATEL_GPU; #endif - OpmLog::info(out.str()); + OpmLog::info(out.str()); - if (opencl_ilu_reorder != ILUReorder::NONE) { - delete[] CSCRowIndices; - delete[] CSCColPointers; - } + if (opencl_ilu_reorder != ILUReorder::NONE) { + delete[] CSCRowIndices; + delete[] CSCColPointers; + } - diagIndex.resize(mat->Nb); - invDiagVals = new double[mat->Nb * bs * bs]; + diagIndex.resize(mat->Nb); + invDiagVals = new double[mat->Nb * bs * bs]; #if CHOW_PATEL - Lmat = std::make_unique(mat->Nb, (mat->nnzbs - mat->Nb) / 2); - Umat = std::make_unique(mat->Nb, (mat->nnzbs - mat->Nb) / 2); + Lmat = std::make_unique(mat->Nb, (mat->nnzbs - mat->Nb) / 2); + Umat = std::make_unique(mat->Nb, (mat->nnzbs - mat->Nb) / 2); #endif - LUmat->nnzValues = new double[mat->nnzbs * bs * bs]; + LUmat->nnzValues = new double[mat->nnzbs * bs * bs]; - s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * mat->Nb); - s.rowsPerColor = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (numColors + 1)); - s.diagIndex = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LUmat->Nb); + s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * mat->Nb); + s.rowsPerColor = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (numColors + 1)); + s.diagIndex = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LUmat->Nb); #if CHOW_PATEL - s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Lmat->nnzbs); - s.Lcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Lmat->nnzbs); - s.Lrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Lmat->Nb + 1)); - s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Lmat->nnzbs); - s.Ucols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Lmat->nnzbs); - s.Urows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Lmat->Nb + 1)); + s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Lmat->nnzbs); + s.Lcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Lmat->nnzbs); + s.Lrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Lmat->Nb + 1)); + s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Lmat->nnzbs); + s.Ucols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Lmat->nnzbs); + s.Urows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Lmat->Nb + 1)); #else - s.LUvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * LUmat->nnzbs); - s.LUcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LUmat->nnzbs); - s.LUrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (LUmat->Nb + 1)); + s.LUvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * LUmat->nnzbs); + s.LUcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LUmat->nnzbs); + s.LUrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (LUmat->Nb + 1)); #endif - events.resize(2); - err = queue->enqueueWriteBuffer(s.invDiagVals, CL_FALSE, 0, mat->Nb * sizeof(double) * bs * bs, invDiagVals, nullptr, &events[0]); + events.resize(2); + err = queue->enqueueWriteBuffer(s.invDiagVals, CL_FALSE, 0, mat->Nb * sizeof(double) * bs * bs, invDiagVals, nullptr, &events[0]); - rowsPerColorPrefix.resize(numColors + 1); // resize initializes value 0.0 - for (int i = 0; i < numColors; ++i) { - rowsPerColorPrefix[i+1] = rowsPerColorPrefix[i] + rowsPerColor[i]; - } - err |= queue->enqueueWriteBuffer(s.rowsPerColor, CL_FALSE, 0, (numColors + 1) * sizeof(int), rowsPerColorPrefix.data(), nullptr, &events[1]); + rowsPerColorPrefix.resize(numColors + 1); // resize initializes value 0.0 + for (int i = 0; i < numColors; ++i) { + rowsPerColorPrefix[i + 1] = rowsPerColorPrefix[i] + rowsPerColor[i]; + } + err |= queue->enqueueWriteBuffer(s.rowsPerColor, CL_FALSE, 0, (numColors + 1) * sizeof(int), rowsPerColorPrefix.data(), nullptr, &events[1]); - cl::WaitForEvents(events); - events.clear(); - if (err != CL_SUCCESS) { - // enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL - OPM_THROW(std::logic_error, "BILU0 OpenCL enqueueWriteBuffer error"); - } + cl::WaitForEvents(events); + events.clear(); + if (err != CL_SUCCESS) { + // enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL + OPM_THROW(std::logic_error, "BILU0 OpenCL enqueueWriteBuffer error"); + } return true; } // end init() +template +bool BILU0::create_preconditioner(BlockedMatrix *mat) +{ + const unsigned int bs = block_size; + auto *m = mat; - template - bool BILU0::create_preconditioner(BlockedMatrix *mat) - { - const unsigned int bs = block_size; - auto *m = mat; - - if (opencl_ilu_reorder != ILUReorder::NONE) { - m = rmat.get(); - Timer t_reorder; - reorderBlockedMatrixByPattern(mat, toOrder.data(), fromOrder.data(), rmat.get()); - - if (verbosity >= 3){ - std::ostringstream out; - out << "BILU0 reorder matrix: " << t_reorder.stop() << " s"; - OpmLog::info(out.str()); - } - } - - // TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp - // this copy can have mat or rmat ->nnzValues as origin, depending on the reorder strategy - Timer t_copy; - memcpy(LUmat->nnzValues, m->nnzValues, sizeof(double) * bs * bs * m->nnzbs); - - if (verbosity >= 3){ - std::ostringstream out; - out << "BILU0 memcpy: " << t_copy.stop() << " s"; - OpmLog::info(out.str()); - } - -#if CHOW_PATEL - chowPatelIlu.decomposition(queue, context, - LUmat.get(), Lmat.get(), Umat.get(), - invDiagVals, diagIndex, - s.diagIndex, s.invDiagVals, - s.Lvals, s.Lcols, s.Lrows, - s.Uvals, s.Ucols, s.Urows); -#else - Timer t_copyToGpu; - - events.resize(1); - err = queue->enqueueWriteBuffer(s.LUvals, CL_FALSE, 0, LUmat->nnzbs * bs * bs * sizeof(double), LUmat->nnzValues, nullptr, &events[0]); - - std::call_once(pattern_uploaded, [&](){ - // find the positions of each diagonal block - // must be done after reordering - for (int row = 0; row < Nb; ++row) { - int rowStart = LUmat->rowPointers[row]; - int rowEnd = LUmat->rowPointers[row+1]; - - auto candidate = std::find(LUmat->colIndices + rowStart, LUmat->colIndices + rowEnd, row); - assert(candidate != LUmat->colIndices + rowEnd); - diagIndex[row] = candidate - LUmat->colIndices; - } - events.resize(4); - err |= queue->enqueueWriteBuffer(s.diagIndex, CL_FALSE, 0, Nb * sizeof(int), diagIndex.data(), nullptr, &events[1]); - err |= queue->enqueueWriteBuffer(s.LUcols, CL_FALSE, 0, LUmat->nnzbs * sizeof(int), LUmat->colIndices, nullptr, &events[2]); - err |= queue->enqueueWriteBuffer(s.LUrows, CL_FALSE, 0, (LUmat->Nb + 1) * sizeof(int), LUmat->rowPointers, nullptr, &events[3]); - }); - - cl::WaitForEvents(events); - events.clear(); - if (err != CL_SUCCESS) { - // enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL - OPM_THROW(std::logic_error, "BILU0 OpenCL enqueueWriteBuffer error"); - } + if (opencl_ilu_reorder != ILUReorder::NONE) { + m = rmat.get(); + Timer t_reorder; + reorderBlockedMatrixByPattern(mat, toOrder.data(), fromOrder.data(), rmat.get()); if (verbosity >= 3) { std::ostringstream out; - out << "BILU0 copy to GPU: " << t_copyToGpu.stop() << " s"; - OpmLog::info(out.str()); - } - - Timer t_decomposition; - std::ostringstream out; - cl::Event event; - for (int color = 0; color < numColors; ++color) { - const unsigned int firstRow = rowsPerColorPrefix[color]; - const unsigned int lastRow = rowsPerColorPrefix[color+1]; - if (verbosity >= 4) { - out << "color " << color << ": " << firstRow << " - " << lastRow << " = " << lastRow - firstRow << "\n"; - } - OpenclKernels::ILU_decomp(firstRow, lastRow, s.LUvals, s.LUcols, s.LUrows, s.diagIndex, s.invDiagVals, Nb, block_size); - } - - if (verbosity >= 3) { - out << "BILU0 decomposition: " << t_decomposition.stop() << " s"; - OpmLog::info(out.str()); - } -#endif // CHOW_PATEL - - return true; - } // end create_preconditioner() - - // kernels are blocking on an NVIDIA GPU, so waiting for events is not needed - // however, if individual kernel calls are timed, waiting for events is needed - // behavior on other GPUs is untested - template - void BILU0::apply(const cl::Buffer& y, cl::Buffer& x) - { - const double relaxation = 0.9; - cl::Event event; - Timer t_apply; - - for(int color = 0; color < numColors; ++color){ -#if CHOW_PATEL - OpenclKernels::ILU_apply1(s.Lvals, s.Lcols, s.Lrows, s.diagIndex, y, x, s.rowsPerColor, color, Nb, block_size); -#else - OpenclKernels::ILU_apply1(s.LUvals, s.LUcols, s.LUrows, s.diagIndex, y, x, s.rowsPerColor, color, Nb, block_size); -#endif - } - - for(int color = numColors-1; color >= 0; --color){ -#if CHOW_PATEL - OpenclKernels::ILU_apply2(s.Uvals, s.Ucols, s.Urows, s.diagIndex, s.invDiagVals, x, s.rowsPerColor, color, Nb, block_size); -#else - OpenclKernels::ILU_apply2(s.LUvals, s.LUcols, s.LUrows, s.diagIndex, s.invDiagVals, x, s.rowsPerColor, color, Nb, block_size); -#endif - } - - // apply relaxation - OpenclKernels::scale(x, relaxation, N); - - if (verbosity >= 4) { - std::ostringstream out; - out << "BILU0 apply: " << t_apply.stop() << " s"; + out << "BILU0 reorder matrix: " << t_reorder.stop() << " s"; OpmLog::info(out.str()); } } + // TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp + // this copy can have mat or rmat ->nnzValues as origin, depending on the reorder strategy + Timer t_copy; + memcpy(LUmat->nnzValues, m->nnzValues, sizeof(double) * bs * bs * m->nnzbs); + + if (verbosity >= 3) { + std::ostringstream out; + out << "BILU0 memcpy: " << t_copy.stop() << " s"; + OpmLog::info(out.str()); + } + +#if CHOW_PATEL + chowPatelIlu.decomposition(queue, context, + LUmat.get(), Lmat.get(), Umat.get(), + invDiagVals, diagIndex, + s.diagIndex, s.invDiagVals, + s.Lvals, s.Lcols, s.Lrows, + s.Uvals, s.Ucols, s.Urows); +#else + Timer t_copyToGpu; + + events.resize(1); + err = queue->enqueueWriteBuffer(s.LUvals, CL_FALSE, 0, LUmat->nnzbs * bs * bs * sizeof(double), LUmat->nnzValues, nullptr, &events[0]); + + std::call_once(pattern_uploaded, [&]() { + // find the positions of each diagonal block + // must be done after reordering + for (int row = 0; row < Nb; ++row) { + int rowStart = LUmat->rowPointers[row]; + int rowEnd = LUmat->rowPointers[row + 1]; + + auto candidate = std::find(LUmat->colIndices + rowStart, LUmat->colIndices + rowEnd, row); + assert(candidate != LUmat->colIndices + rowEnd); + diagIndex[row] = candidate - LUmat->colIndices; + } + events.resize(4); + err |= queue->enqueueWriteBuffer(s.diagIndex, CL_FALSE, 0, Nb * sizeof(int), diagIndex.data(), nullptr, &events[1]); + err |= queue->enqueueWriteBuffer(s.LUcols, CL_FALSE, 0, LUmat->nnzbs * sizeof(int), LUmat->colIndices, nullptr, &events[2]); + err |= queue->enqueueWriteBuffer(s.LUrows, CL_FALSE, 0, (LUmat->Nb + 1) * sizeof(int), LUmat->rowPointers, nullptr, &events[3]); + }); + + cl::WaitForEvents(events); + events.clear(); + if (err != CL_SUCCESS) { + // enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL + OPM_THROW(std::logic_error, "BILU0 OpenCL enqueueWriteBuffer error"); + } + + if (verbosity >= 3) { + std::ostringstream out; + out << "BILU0 copy to GPU: " << t_copyToGpu.stop() << " s"; + OpmLog::info(out.str()); + } + + Timer t_decomposition; + std::ostringstream out; + cl::Event event; + for (int color = 0; color < numColors; ++color) { + const unsigned int firstRow = rowsPerColorPrefix[color]; + const unsigned int lastRow = rowsPerColorPrefix[color + 1]; + if (verbosity >= 4) { + out << "color " << color << ": " << firstRow << " - " << lastRow << " = " << lastRow - firstRow << "\n"; + } + OpenclKernels::ILU_decomp(firstRow, lastRow, s.LUvals, s.LUcols, s.LUrows, s.diagIndex, s.invDiagVals, Nb, block_size); + } + + if (verbosity >= 3) { + out << "BILU0 decomposition: " << t_decomposition.stop() << " s"; + OpmLog::info(out.str()); + } +#endif // CHOW_PATEL + + return true; +} // end create_preconditioner() + + +// kernels are blocking on an NVIDIA GPU, so waiting for events is not needed +// however, if individual kernel calls are timed, waiting for events is needed +// behavior on other GPUs is untested +template +void BILU0::apply(const cl::Buffer& y, cl::Buffer& x) +{ + const double relaxation = 0.9; + cl::Event event; + Timer t_apply; + + for (int color = 0; color < numColors; ++color) { +#if CHOW_PATEL + OpenclKernels::ILU_apply1(s.Lvals, s.Lcols, s.Lrows, s.diagIndex, y, x, s.rowsPerColor, color, Nb, block_size); +#else + OpenclKernels::ILU_apply1(s.LUvals, s.LUcols, s.LUrows, s.diagIndex, y, x, s.rowsPerColor, color, Nb, block_size); +#endif + } + + for (int color = numColors - 1; color >= 0; --color) { +#if CHOW_PATEL + OpenclKernels::ILU_apply2(s.Uvals, s.Ucols, s.Urows, s.diagIndex, s.invDiagVals, x, s.rowsPerColor, color, Nb, block_size); +#else + OpenclKernels::ILU_apply2(s.LUvals, s.LUcols, s.LUrows, s.diagIndex, s.invDiagVals, x, s.rowsPerColor, color, Nb, block_size); +#endif + } + + // apply relaxation + OpenclKernels::scale(x, relaxation, N); + + if (verbosity >= 4) { + std::ostringstream out; + out << "BILU0 apply: " << t_apply.stop() << " s"; + OpmLog::info(out.str()); + } +} + #define INSTANTIATE_BDA_FUNCTIONS(n) \ diff --git a/opm/simulators/linalg/bda/BILU0.hpp b/opm/simulators/linalg/bda/BILU0.hpp index a9dcc9bb0..9561b39e5 100644 --- a/opm/simulators/linalg/bda/BILU0.hpp +++ b/opm/simulators/linalg/bda/BILU0.hpp @@ -36,90 +36,90 @@ namespace Opm namespace Accelerator { - /// This class implements a Blocked ILU0 preconditioner - /// The decomposition is done on CPU, and reorders the rows of the matrix - template - class BILU0 : public Preconditioner - { - typedef Preconditioner Base; +/// This class implements a Blocked ILU0 preconditioner +/// The decomposition is done on CPU, and reorders the rows of the matrix +template +class BILU0 : public Preconditioner +{ + typedef Preconditioner Base; - using Base::N; - using Base::Nb; - using Base::nnz; - using Base::nnzb; - using Base::verbosity; + using Base::N; + using Base::Nb; + using Base::nnz; + using Base::nnzb; + using Base::verbosity; - private: - std::unique_ptr LUmat = nullptr; - std::shared_ptr rmat = nullptr; // only used with PAR_SIM +private: + std::unique_ptr LUmat = nullptr; + std::shared_ptr rmat = nullptr; // only used with PAR_SIM #if CHOW_PATEL - std::unique_ptr Lmat = nullptr, Umat = nullptr; + std::unique_ptr Lmat = nullptr, Umat = nullptr; #endif - double *invDiagVals = nullptr; - std::vector diagIndex; - std::vector rowsPerColor; // color i contains rowsPerColor[i] rows, which are processed in parallel - std::vector rowsPerColorPrefix; // the prefix sum of rowsPerColor - std::vector toOrder, fromOrder; - int numColors; - std::once_flag pattern_uploaded; + double *invDiagVals = nullptr; + std::vector diagIndex; + std::vector rowsPerColor; // color i contains rowsPerColor[i] rows, which are processed in parallel + std::vector rowsPerColorPrefix; // the prefix sum of rowsPerColor + std::vector toOrder, fromOrder; + int numColors; + std::once_flag pattern_uploaded; - ILUReorder opencl_ilu_reorder; + ILUReorder opencl_ilu_reorder; - typedef struct { - cl::Buffer invDiagVals; - cl::Buffer diagIndex; - cl::Buffer rowsPerColor; + typedef struct { + cl::Buffer invDiagVals; + cl::Buffer diagIndex; + cl::Buffer rowsPerColor; #if CHOW_PATEL - cl::Buffer Lvals, Lcols, Lrows; - cl::Buffer Uvals, Ucols, Urows; + cl::Buffer Lvals, Lcols, Lrows; + cl::Buffer Uvals, Ucols, Urows; #else - cl::Buffer LUvals, LUcols, LUrows; + cl::Buffer LUvals, LUcols, LUrows; #endif - } GPU_storage; + } GPU_storage; - GPU_storage s; - cl::Context *context; - cl::CommandQueue *queue; - std::vector events; - cl_int err; + GPU_storage s; + cl::Context *context; + cl::CommandQueue *queue; + std::vector events; + cl_int err; #if CHOW_PATEL - ChowPatelIlu chowPatelIlu; + ChowPatelIlu chowPatelIlu; #endif - public: +public: - BILU0(ILUReorder opencl_ilu_reorder, int verbosity); + BILU0(ILUReorder opencl_ilu_reorder, int verbosity); - ~BILU0(); + ~BILU0(); - void init(int Nb, int nnzb, std::shared_ptr& context, std::shared_ptr& queue) override; + void init(int Nb, int nnzb, std::shared_ptr& context, std::shared_ptr& queue) override; - // analysis, find reordering if specified - bool analyze_matrix(BlockedMatrix *mat) override; + // analysis, find reordering if specified + bool analyze_matrix(BlockedMatrix *mat) override; - // ilu_decomposition - bool create_preconditioner(BlockedMatrix *mat) override; + // ilu_decomposition + bool create_preconditioner(BlockedMatrix *mat) override; - // apply preconditioner, x = prec(y) - void apply(const cl::Buffer& y, cl::Buffer& x) override; + // apply preconditioner, x = prec(y) + void apply(const cl::Buffer& y, cl::Buffer& x) override; - int* getToOrder() override - { - return toOrder.data(); - } + int* getToOrder() override + { + return toOrder.data(); + } - int* getFromOrder() override - { - return fromOrder.data(); - } + int* getFromOrder() override + { + return fromOrder.data(); + } - BlockedMatrix* getRMat() override - { - return rmat.get(); - } + BlockedMatrix* getRMat() override + { + return rmat.get(); + } - }; +}; } // namespace Accelerator } // namespace Opm