mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Change C to C++
Also bugfix wrong size when COPY_ROW_BY_ROW is active
This commit is contained in:
parent
e0a4d271ea
commit
94ea2dcd30
@ -48,13 +48,6 @@ BILU0<block_size>::BILU0(ILUReorder opencl_ilu_reorder_, int verbosity_) :
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
|
||||||
BILU0<block_size>::~BILU0()
|
|
||||||
{
|
|
||||||
delete[] invDiagVals;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQueue>& queue_)
|
void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& context_, std::shared_ptr<cl::CommandQueue>& queue_)
|
||||||
{
|
{
|
||||||
@ -73,21 +66,21 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
|
|||||||
this->nnz = mat->nnzbs * block_size * block_size;
|
this->nnz = mat->nnzbs * block_size * block_size;
|
||||||
this->nnzb = mat->nnzbs;
|
this->nnzb = mat->nnzbs;
|
||||||
|
|
||||||
int *CSCRowIndices = nullptr;
|
std::vector<int> CSCRowIndices;
|
||||||
int *CSCColPointers = nullptr;
|
std::vector<int> CSCColPointers;
|
||||||
|
|
||||||
if (opencl_ilu_reorder == ILUReorder::NONE) {
|
if (opencl_ilu_reorder == ILUReorder::NONE) {
|
||||||
LUmat = std::make_unique<BlockedMatrix>(*mat);
|
LUmat = std::make_unique<BlockedMatrix>(*mat);
|
||||||
} else {
|
} else {
|
||||||
toOrder.resize(Nb);
|
toOrder.resize(Nb);
|
||||||
fromOrder.resize(Nb);
|
fromOrder.resize(Nb);
|
||||||
CSCRowIndices = new int[nnzb];
|
CSCRowIndices.resize(nnzb);
|
||||||
CSCColPointers = new int[Nb + 1];
|
CSCColPointers.resize(Nb + 1);
|
||||||
rmat = std::make_shared<BlockedMatrix>(mat->Nb, mat->nnzbs, block_size);
|
rmat = std::make_shared<BlockedMatrix>(mat->Nb, mat->nnzbs, block_size);
|
||||||
LUmat = std::make_unique<BlockedMatrix>(*rmat);
|
LUmat = std::make_unique<BlockedMatrix>(*rmat);
|
||||||
|
|
||||||
Timer t_convert;
|
Timer t_convert;
|
||||||
csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb);
|
csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), mat->Nb);
|
||||||
if (verbosity >= 3) {
|
if (verbosity >= 3) {
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s";
|
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s";
|
||||||
@ -99,10 +92,10 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
|
|||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
if (opencl_ilu_reorder == ILUReorder::LEVEL_SCHEDULING) {
|
if (opencl_ilu_reorder == ILUReorder::LEVEL_SCHEDULING) {
|
||||||
out << "BILU0 reordering strategy: " << "level_scheduling\n";
|
out << "BILU0 reordering strategy: " << "level_scheduling\n";
|
||||||
findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
|
findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
|
||||||
} else if (opencl_ilu_reorder == ILUReorder::GRAPH_COLORING) {
|
} else if (opencl_ilu_reorder == ILUReorder::GRAPH_COLORING) {
|
||||||
out << "BILU0 reordering strategy: " << "graph_coloring\n";
|
out << "BILU0 reordering strategy: " << "graph_coloring\n";
|
||||||
findGraphColoring<block_size>(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, mat->Nb, mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
|
findGraphColoring<block_size>(mat->colIndices, mat->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), mat->Nb, mat->Nb, mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
|
||||||
} else if (opencl_ilu_reorder == ILUReorder::NONE) {
|
} else if (opencl_ilu_reorder == ILUReorder::NONE) {
|
||||||
out << "BILU0 reordering strategy: none\n";
|
out << "BILU0 reordering strategy: none\n";
|
||||||
// numColors = 1;
|
// numColors = 1;
|
||||||
@ -122,22 +115,14 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
|
|||||||
#endif
|
#endif
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
|
|
||||||
|
|
||||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
|
||||||
delete[] CSCRowIndices;
|
|
||||||
delete[] CSCColPointers;
|
|
||||||
}
|
|
||||||
|
|
||||||
diagIndex.resize(mat->Nb);
|
diagIndex.resize(mat->Nb);
|
||||||
invDiagVals = new double[mat->Nb * bs * bs];
|
invDiagVals.resize(mat->Nb * bs * bs);
|
||||||
|
|
||||||
#if CHOW_PATEL
|
#if CHOW_PATEL
|
||||||
Lmat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
Lmat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||||
Umat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
Umat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
LUmat->nnzValues = new double[mat->nnzbs * bs * bs];
|
|
||||||
|
|
||||||
s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * mat->Nb);
|
s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * mat->Nb);
|
||||||
s.rowsPerColor = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (numColors + 1));
|
s.rowsPerColor = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (numColors + 1));
|
||||||
s.diagIndex = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LUmat->Nb);
|
s.diagIndex = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LUmat->Nb);
|
||||||
@ -155,7 +140,7 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
events.resize(2);
|
events.resize(2);
|
||||||
err = queue->enqueueWriteBuffer(s.invDiagVals, CL_FALSE, 0, mat->Nb * sizeof(double) * bs * bs, invDiagVals, nullptr, &events[0]);
|
err = queue->enqueueWriteBuffer(s.invDiagVals, CL_FALSE, 0, mat->Nb * sizeof(double) * bs * bs, invDiagVals.data(), nullptr, &events[0]);
|
||||||
|
|
||||||
rowsPerColorPrefix.resize(numColors + 1); // resize initializes value 0.0
|
rowsPerColorPrefix.resize(numColors + 1); // resize initializes value 0.0
|
||||||
for (int i = 0; i < numColors; ++i) {
|
for (int i = 0; i < numColors; ++i) {
|
||||||
@ -206,7 +191,7 @@ bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
|
|||||||
#if CHOW_PATEL
|
#if CHOW_PATEL
|
||||||
chowPatelIlu.decomposition(queue, context,
|
chowPatelIlu.decomposition(queue, context,
|
||||||
LUmat.get(), Lmat.get(), Umat.get(),
|
LUmat.get(), Lmat.get(), Umat.get(),
|
||||||
invDiagVals, diagIndex,
|
invDiagVals.data(), diagIndex,
|
||||||
s.diagIndex, s.invDiagVals,
|
s.diagIndex, s.invDiagVals,
|
||||||
s.Lvals, s.Lcols, s.Lrows,
|
s.Lvals, s.Lcols, s.Lrows,
|
||||||
s.Uvals, s.Ucols, s.Urows);
|
s.Uvals, s.Ucols, s.Urows);
|
||||||
|
@ -55,7 +55,7 @@ private:
|
|||||||
#if CHOW_PATEL
|
#if CHOW_PATEL
|
||||||
std::unique_ptr<BlockedMatrix> Lmat = nullptr, Umat = nullptr;
|
std::unique_ptr<BlockedMatrix> Lmat = nullptr, Umat = nullptr;
|
||||||
#endif
|
#endif
|
||||||
double *invDiagVals = nullptr;
|
std::vector<double> invDiagVals;
|
||||||
std::vector<int> diagIndex;
|
std::vector<int> diagIndex;
|
||||||
std::vector<int> rowsPerColor; // color i contains rowsPerColor[i] rows, which are processed in parallel
|
std::vector<int> rowsPerColor; // color i contains rowsPerColor[i] rows, which are processed in parallel
|
||||||
std::vector<int> rowsPerColorPrefix; // the prefix sum of rowsPerColor
|
std::vector<int> rowsPerColorPrefix; // the prefix sum of rowsPerColor
|
||||||
@ -91,8 +91,6 @@ public:
|
|||||||
|
|
||||||
BILU0(ILUReorder opencl_ilu_reorder, int verbosity);
|
BILU0(ILUReorder opencl_ilu_reorder, int verbosity);
|
||||||
|
|
||||||
~BILU0();
|
|
||||||
|
|
||||||
void init(int Nb, int nnzb, std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
|
void init(int Nb, int nnzb, std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
|
||||||
|
|
||||||
// analysis, find reordering if specified
|
// analysis, find reordering if specified
|
||||||
|
@ -391,7 +391,7 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
|||||||
prec->init(Nb, nnzb, context, queue);
|
prec->init(Nb, nnzb, context, queue);
|
||||||
|
|
||||||
#if COPY_ROW_BY_ROW
|
#if COPY_ROW_BY_ROW
|
||||||
vals_contiguous = new double[N];
|
vals_contiguous.resize(nnz);
|
||||||
#endif
|
#endif
|
||||||
mat.reset(new BlockedMatrix(Nb, nnzb, block_size, vals, cols, rows));
|
mat.reset(new BlockedMatrix(Nb, nnzb, block_size, vals, cols, rows));
|
||||||
|
|
||||||
@ -437,9 +437,6 @@ void openclSolverBackend<block_size>::finalize() {
|
|||||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||||
delete[] rb;
|
delete[] rb;
|
||||||
}
|
}
|
||||||
#if COPY_ROW_BY_ROW
|
|
||||||
delete[] vals_contiguous;
|
|
||||||
#endif
|
|
||||||
} // end finalize()
|
} // end finalize()
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
@ -451,10 +448,10 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
|
|||||||
int sum = 0;
|
int sum = 0;
|
||||||
for (int i = 0; i < Nb; ++i) {
|
for (int i = 0; i < Nb; ++i) {
|
||||||
int size_row = rmat->rowPointers[i + 1] - rmat->rowPointers[i];
|
int size_row = rmat->rowPointers[i + 1] - rmat->rowPointers[i];
|
||||||
memcpy(vals_contiguous + sum, reinterpret_cast<double*>(rmat->nnzValues) + sum, size_row * sizeof(double) * block_size * block_size);
|
memcpy(vals_contiguous.data() + sum, reinterpret_cast<double*>(rmat->nnzValues) + sum, size_row * sizeof(double) * block_size * block_size);
|
||||||
sum += size_row * block_size * block_size;
|
sum += size_row * block_size * block_size;
|
||||||
}
|
}
|
||||||
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, vals_contiguous, nullptr, &events[0]);
|
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, vals_contiguous.data(), nullptr, &events[0]);
|
||||||
#else
|
#else
|
||||||
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, rmat->nnzValues, nullptr, &events[0]);
|
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, rmat->nnzValues, nullptr, &events[0]);
|
||||||
#endif
|
#endif
|
||||||
@ -491,10 +488,10 @@ void openclSolverBackend<block_size>::update_system_on_gpu() {
|
|||||||
int sum = 0;
|
int sum = 0;
|
||||||
for (int i = 0; i < Nb; ++i) {
|
for (int i = 0; i < Nb; ++i) {
|
||||||
int size_row = rmat->rowPointers[i + 1] - rmat->rowPointers[i];
|
int size_row = rmat->rowPointers[i + 1] - rmat->rowPointers[i];
|
||||||
memcpy(vals_contiguous + sum, reinterpret_cast<double*>(rmat->nnzValues) + sum, size_row * sizeof(double) * block_size * block_size);
|
memcpy(vals_contiguous.data() + sum, reinterpret_cast<double*>(rmat->nnzValues) + sum, size_row * sizeof(double) * block_size * block_size);
|
||||||
sum += size_row * block_size * block_size;
|
sum += size_row * block_size * block_size;
|
||||||
}
|
}
|
||||||
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, vals_contiguous, nullptr, &events[0]);
|
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, vals_contiguous.data(), nullptr, &events[0]);
|
||||||
#else
|
#else
|
||||||
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, rmat->nnzValues, nullptr, &events[0]);
|
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, rmat->nnzValues, nullptr, &events[0]);
|
||||||
#endif
|
#endif
|
||||||
|
@ -59,7 +59,7 @@ class openclSolverBackend : public BdaSolver<block_size>
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
double *rb = nullptr; // reordered b vector, if the matrix is reordered, rb is newly allocated, otherwise it just points to b
|
double *rb = nullptr; // reordered b vector, if the matrix is reordered, rb is newly allocated, otherwise it just points to b
|
||||||
double *vals_contiguous = nullptr; // only used if COPY_ROW_BY_ROW is true in openclSolverBackend.cpp
|
std::vector<double> vals_contiguous; // only used if COPY_ROW_BY_ROW is true in openclSolverBackend.cpp
|
||||||
|
|
||||||
// OpenCL variables must be reusable, they are initialized in initialize()
|
// OpenCL variables must be reusable, they are initialized in initialize()
|
||||||
cl::Buffer d_Avals, d_Acols, d_Arows; // (reordered) matrix in BSR format on GPU
|
cl::Buffer d_Avals, d_Acols, d_Arows; // (reordered) matrix in BSR format on GPU
|
||||||
|
Loading…
Reference in New Issue
Block a user