Change C to C++

Also bugfix wrong size when COPY_ROW_BY_ROW is active
This commit is contained in:
Tong Dong Qiu 2021-12-01 12:00:20 +01:00
parent e0a4d271ea
commit 94ea2dcd30
4 changed files with 17 additions and 37 deletions

View File

@ -48,13 +48,6 @@ BILU0<block_size>::BILU0(ILUReorder opencl_ilu_reorder_, int verbosity_) :
}
template <unsigned int block_size>
BILU0<block_size>::~BILU0()
{
delete[] invDiagVals;
}
template <unsigned int block_size>
void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQueue>& queue_)
{
@ -73,21 +66,21 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
this->nnz = mat->nnzbs * block_size * block_size;
this->nnzb = mat->nnzbs;
int *CSCRowIndices = nullptr;
int *CSCColPointers = nullptr;
std::vector<int> CSCRowIndices;
std::vector<int> CSCColPointers;
if (opencl_ilu_reorder == ILUReorder::NONE) {
LUmat = std::make_unique<BlockedMatrix>(*mat);
} else {
toOrder.resize(Nb);
fromOrder.resize(Nb);
CSCRowIndices = new int[nnzb];
CSCColPointers = new int[Nb + 1];
CSCRowIndices.resize(nnzb);
CSCColPointers.resize(Nb + 1);
rmat = std::make_shared<BlockedMatrix>(mat->Nb, mat->nnzbs, block_size);
LUmat = std::make_unique<BlockedMatrix>(*rmat);
Timer t_convert;
csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb);
csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), mat->Nb);
if (verbosity >= 3) {
std::ostringstream out;
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s";
@ -99,10 +92,10 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
std::ostringstream out;
if (opencl_ilu_reorder == ILUReorder::LEVEL_SCHEDULING) {
out << "BILU0 reordering strategy: " << "level_scheduling\n";
findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
} else if (opencl_ilu_reorder == ILUReorder::GRAPH_COLORING) {
out << "BILU0 reordering strategy: " << "graph_coloring\n";
findGraphColoring<block_size>(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, mat->Nb, mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
findGraphColoring<block_size>(mat->colIndices, mat->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), mat->Nb, mat->Nb, mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
} else if (opencl_ilu_reorder == ILUReorder::NONE) {
out << "BILU0 reordering strategy: none\n";
// numColors = 1;
@ -122,22 +115,14 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
#endif
OpmLog::info(out.str());
if (opencl_ilu_reorder != ILUReorder::NONE) {
delete[] CSCRowIndices;
delete[] CSCColPointers;
}
diagIndex.resize(mat->Nb);
invDiagVals = new double[mat->Nb * bs * bs];
invDiagVals.resize(mat->Nb * bs * bs);
#if CHOW_PATEL
Lmat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
Umat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
#endif
LUmat->nnzValues = new double[mat->nnzbs * bs * bs];
s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * mat->Nb);
s.rowsPerColor = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (numColors + 1));
s.diagIndex = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LUmat->Nb);
@ -155,7 +140,7 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
#endif
events.resize(2);
err = queue->enqueueWriteBuffer(s.invDiagVals, CL_FALSE, 0, mat->Nb * sizeof(double) * bs * bs, invDiagVals, nullptr, &events[0]);
err = queue->enqueueWriteBuffer(s.invDiagVals, CL_FALSE, 0, mat->Nb * sizeof(double) * bs * bs, invDiagVals.data(), nullptr, &events[0]);
rowsPerColorPrefix.resize(numColors + 1); // resize initializes value 0.0
for (int i = 0; i < numColors; ++i) {
@ -206,7 +191,7 @@ bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
#if CHOW_PATEL
chowPatelIlu.decomposition(queue, context,
LUmat.get(), Lmat.get(), Umat.get(),
invDiagVals, diagIndex,
invDiagVals.data(), diagIndex,
s.diagIndex, s.invDiagVals,
s.Lvals, s.Lcols, s.Lrows,
s.Uvals, s.Ucols, s.Urows);

View File

@ -55,7 +55,7 @@ private:
#if CHOW_PATEL
std::unique_ptr<BlockedMatrix> Lmat = nullptr, Umat = nullptr;
#endif
double *invDiagVals = nullptr;
std::vector<double> invDiagVals;
std::vector<int> diagIndex;
std::vector<int> rowsPerColor; // color i contains rowsPerColor[i] rows, which are processed in parallel
std::vector<int> rowsPerColorPrefix; // the prefix sum of rowsPerColor
@ -91,8 +91,6 @@ public:
BILU0(ILUReorder opencl_ilu_reorder, int verbosity);
~BILU0();
void init(int Nb, int nnzb, std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
// analysis, find reordering if specified

View File

@ -391,7 +391,7 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
prec->init(Nb, nnzb, context, queue);
#if COPY_ROW_BY_ROW
vals_contiguous = new double[N];
vals_contiguous.resize(nnz);
#endif
mat.reset(new BlockedMatrix(Nb, nnzb, block_size, vals, cols, rows));
@ -437,9 +437,6 @@ void openclSolverBackend<block_size>::finalize() {
if (opencl_ilu_reorder != ILUReorder::NONE) {
delete[] rb;
}
#if COPY_ROW_BY_ROW
delete[] vals_contiguous;
#endif
} // end finalize()
template <unsigned int block_size>
@ -451,10 +448,10 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
int sum = 0;
for (int i = 0; i < Nb; ++i) {
int size_row = rmat->rowPointers[i + 1] - rmat->rowPointers[i];
memcpy(vals_contiguous + sum, reinterpret_cast<double*>(rmat->nnzValues) + sum, size_row * sizeof(double) * block_size * block_size);
memcpy(vals_contiguous.data() + sum, reinterpret_cast<double*>(rmat->nnzValues) + sum, size_row * sizeof(double) * block_size * block_size);
sum += size_row * block_size * block_size;
}
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, vals_contiguous, nullptr, &events[0]);
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, vals_contiguous.data(), nullptr, &events[0]);
#else
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, rmat->nnzValues, nullptr, &events[0]);
#endif
@ -491,10 +488,10 @@ void openclSolverBackend<block_size>::update_system_on_gpu() {
int sum = 0;
for (int i = 0; i < Nb; ++i) {
int size_row = rmat->rowPointers[i + 1] - rmat->rowPointers[i];
memcpy(vals_contiguous + sum, reinterpret_cast<double*>(rmat->nnzValues) + sum, size_row * sizeof(double) * block_size * block_size);
memcpy(vals_contiguous.data() + sum, reinterpret_cast<double*>(rmat->nnzValues) + sum, size_row * sizeof(double) * block_size * block_size);
sum += size_row * block_size * block_size;
}
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, vals_contiguous, nullptr, &events[0]);
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, vals_contiguous.data(), nullptr, &events[0]);
#else
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, rmat->nnzValues, nullptr, &events[0]);
#endif

View File

@ -59,7 +59,7 @@ class openclSolverBackend : public BdaSolver<block_size>
private:
double *rb = nullptr; // reordered b vector, if the matrix is reordered, rb is newly allocated, otherwise it just points to b
double *vals_contiguous = nullptr; // only used if COPY_ROW_BY_ROW is true in openclSolverBackend.cpp
std::vector<double> vals_contiguous; // only used if COPY_ROW_BY_ROW is true in openclSolverBackend.cpp
// OpenCL variables must be reusable, they are initialized in initialize()
cl::Buffer d_Avals, d_Acols, d_Arows; // (reordered) matrix in BSR format on GPU