mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Formatting changes
This commit is contained in:
parent
0881089406
commit
e0a4d271ea
@ -47,6 +47,7 @@ BILU0<block_size>::BILU0(ILUReorder opencl_ilu_reorder_, int verbosity_) :
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
BILU0<block_size>::~BILU0()
|
||||
{
|
||||
@ -61,247 +62,248 @@ void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& con
|
||||
queue = queue_.get();
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
|
||||
{
|
||||
const unsigned int bs = block_size;
|
||||
|
||||
this->N = mat->Nb * block_size;
|
||||
this->Nb = mat->Nb;
|
||||
this->nnz = mat->nnzbs * block_size * block_size;
|
||||
this->nnzb = mat->nnzbs;
|
||||
template <unsigned int block_size>
|
||||
bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
|
||||
{
|
||||
const unsigned int bs = block_size;
|
||||
|
||||
int *CSCRowIndices = nullptr;
|
||||
int *CSCColPointers = nullptr;
|
||||
this->N = mat->Nb * block_size;
|
||||
this->Nb = mat->Nb;
|
||||
this->nnz = mat->nnzbs * block_size * block_size;
|
||||
this->nnzb = mat->nnzbs;
|
||||
|
||||
if (opencl_ilu_reorder == ILUReorder::NONE) {
|
||||
LUmat = std::make_unique<BlockedMatrix>(*mat);
|
||||
} else {
|
||||
toOrder.resize(Nb);
|
||||
fromOrder.resize(Nb);
|
||||
CSCRowIndices = new int[nnzb];
|
||||
CSCColPointers = new int[Nb + 1];
|
||||
rmat = std::make_shared<BlockedMatrix>(mat->Nb, mat->nnzbs, block_size);
|
||||
LUmat = std::make_unique<BlockedMatrix>(*rmat);
|
||||
int *CSCRowIndices = nullptr;
|
||||
int *CSCColPointers = nullptr;
|
||||
|
||||
Timer t_convert;
|
||||
csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb);
|
||||
if(verbosity >= 3){
|
||||
std::ostringstream out;
|
||||
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
if (opencl_ilu_reorder == ILUReorder::NONE) {
|
||||
LUmat = std::make_unique<BlockedMatrix>(*mat);
|
||||
} else {
|
||||
toOrder.resize(Nb);
|
||||
fromOrder.resize(Nb);
|
||||
CSCRowIndices = new int[nnzb];
|
||||
CSCColPointers = new int[Nb + 1];
|
||||
rmat = std::make_shared<BlockedMatrix>(mat->Nb, mat->nnzbs, block_size);
|
||||
LUmat = std::make_unique<BlockedMatrix>(*rmat);
|
||||
|
||||
Timer t_convert;
|
||||
csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb);
|
||||
if (verbosity >= 3) {
|
||||
std::ostringstream out;
|
||||
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
}
|
||||
|
||||
Timer t_analysis;
|
||||
std::ostringstream out;
|
||||
if (opencl_ilu_reorder == ILUReorder::LEVEL_SCHEDULING) {
|
||||
out << "BILU0 reordering strategy: " << "level_scheduling\n";
|
||||
findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
|
||||
} else if (opencl_ilu_reorder == ILUReorder::GRAPH_COLORING) {
|
||||
out << "BILU0 reordering strategy: " << "graph_coloring\n";
|
||||
findGraphColoring<block_size>(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, mat->Nb, mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
|
||||
} else if (opencl_ilu_reorder == ILUReorder::NONE) {
|
||||
out << "BILU0 reordering strategy: none\n";
|
||||
// numColors = 1;
|
||||
// rowsPerColor.emplace_back(Nb);
|
||||
numColors = Nb;
|
||||
for(int i = 0; i < Nb; ++i){
|
||||
rowsPerColor.emplace_back(1);
|
||||
}
|
||||
} else {
|
||||
OPM_THROW(std::logic_error, "Error ilu reordering strategy not set correctly\n");
|
||||
}
|
||||
if(verbosity >= 1){
|
||||
out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors\n";
|
||||
Timer t_analysis;
|
||||
std::ostringstream out;
|
||||
if (opencl_ilu_reorder == ILUReorder::LEVEL_SCHEDULING) {
|
||||
out << "BILU0 reordering strategy: " << "level_scheduling\n";
|
||||
findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
|
||||
} else if (opencl_ilu_reorder == ILUReorder::GRAPH_COLORING) {
|
||||
out << "BILU0 reordering strategy: " << "graph_coloring\n";
|
||||
findGraphColoring<block_size>(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, mat->Nb, mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
|
||||
} else if (opencl_ilu_reorder == ILUReorder::NONE) {
|
||||
out << "BILU0 reordering strategy: none\n";
|
||||
// numColors = 1;
|
||||
// rowsPerColor.emplace_back(Nb);
|
||||
numColors = Nb;
|
||||
for (int i = 0; i < Nb; ++i) {
|
||||
rowsPerColor.emplace_back(1);
|
||||
}
|
||||
} else {
|
||||
OPM_THROW(std::logic_error, "Error ilu reordering strategy not set correctly\n");
|
||||
}
|
||||
if (verbosity >= 1) {
|
||||
out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors\n";
|
||||
}
|
||||
#if CHOW_PATEL
|
||||
out << "BILU0 CHOW_PATEL: " << CHOW_PATEL << ", CHOW_PATEL_GPU: " << CHOW_PATEL_GPU;
|
||||
out << "BILU0 CHOW_PATEL: " << CHOW_PATEL << ", CHOW_PATEL_GPU: " << CHOW_PATEL_GPU;
|
||||
#endif
|
||||
OpmLog::info(out.str());
|
||||
OpmLog::info(out.str());
|
||||
|
||||
|
||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||
delete[] CSCRowIndices;
|
||||
delete[] CSCColPointers;
|
||||
}
|
||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||
delete[] CSCRowIndices;
|
||||
delete[] CSCColPointers;
|
||||
}
|
||||
|
||||
diagIndex.resize(mat->Nb);
|
||||
invDiagVals = new double[mat->Nb * bs * bs];
|
||||
diagIndex.resize(mat->Nb);
|
||||
invDiagVals = new double[mat->Nb * bs * bs];
|
||||
|
||||
#if CHOW_PATEL
|
||||
Lmat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||
Umat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||
Lmat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||
Umat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||
#endif
|
||||
|
||||
LUmat->nnzValues = new double[mat->nnzbs * bs * bs];
|
||||
LUmat->nnzValues = new double[mat->nnzbs * bs * bs];
|
||||
|
||||
s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * mat->Nb);
|
||||
s.rowsPerColor = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (numColors + 1));
|
||||
s.diagIndex = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LUmat->Nb);
|
||||
s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * mat->Nb);
|
||||
s.rowsPerColor = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (numColors + 1));
|
||||
s.diagIndex = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LUmat->Nb);
|
||||
#if CHOW_PATEL
|
||||
s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Lmat->nnzbs);
|
||||
s.Lcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Lmat->nnzbs);
|
||||
s.Lrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Lmat->Nb + 1));
|
||||
s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Lmat->nnzbs);
|
||||
s.Ucols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Lmat->nnzbs);
|
||||
s.Urows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Lmat->Nb + 1));
|
||||
s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Lmat->nnzbs);
|
||||
s.Lcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Lmat->nnzbs);
|
||||
s.Lrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Lmat->Nb + 1));
|
||||
s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Lmat->nnzbs);
|
||||
s.Ucols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Lmat->nnzbs);
|
||||
s.Urows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Lmat->Nb + 1));
|
||||
#else
|
||||
s.LUvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * LUmat->nnzbs);
|
||||
s.LUcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LUmat->nnzbs);
|
||||
s.LUrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (LUmat->Nb + 1));
|
||||
s.LUvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * LUmat->nnzbs);
|
||||
s.LUcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LUmat->nnzbs);
|
||||
s.LUrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (LUmat->Nb + 1));
|
||||
#endif
|
||||
|
||||
events.resize(2);
|
||||
err = queue->enqueueWriteBuffer(s.invDiagVals, CL_FALSE, 0, mat->Nb * sizeof(double) * bs * bs, invDiagVals, nullptr, &events[0]);
|
||||
events.resize(2);
|
||||
err = queue->enqueueWriteBuffer(s.invDiagVals, CL_FALSE, 0, mat->Nb * sizeof(double) * bs * bs, invDiagVals, nullptr, &events[0]);
|
||||
|
||||
rowsPerColorPrefix.resize(numColors + 1); // resize initializes value 0.0
|
||||
for (int i = 0; i < numColors; ++i) {
|
||||
rowsPerColorPrefix[i+1] = rowsPerColorPrefix[i] + rowsPerColor[i];
|
||||
}
|
||||
err |= queue->enqueueWriteBuffer(s.rowsPerColor, CL_FALSE, 0, (numColors + 1) * sizeof(int), rowsPerColorPrefix.data(), nullptr, &events[1]);
|
||||
rowsPerColorPrefix.resize(numColors + 1); // resize initializes value 0.0
|
||||
for (int i = 0; i < numColors; ++i) {
|
||||
rowsPerColorPrefix[i + 1] = rowsPerColorPrefix[i] + rowsPerColor[i];
|
||||
}
|
||||
err |= queue->enqueueWriteBuffer(s.rowsPerColor, CL_FALSE, 0, (numColors + 1) * sizeof(int), rowsPerColorPrefix.data(), nullptr, &events[1]);
|
||||
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
if (err != CL_SUCCESS) {
|
||||
// enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL
|
||||
OPM_THROW(std::logic_error, "BILU0 OpenCL enqueueWriteBuffer error");
|
||||
}
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
if (err != CL_SUCCESS) {
|
||||
// enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL
|
||||
OPM_THROW(std::logic_error, "BILU0 OpenCL enqueueWriteBuffer error");
|
||||
}
|
||||
|
||||
return true;
|
||||
} // end init()
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
|
||||
{
|
||||
const unsigned int bs = block_size;
|
||||
auto *m = mat;
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
|
||||
{
|
||||
const unsigned int bs = block_size;
|
||||
auto *m = mat;
|
||||
|
||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||
m = rmat.get();
|
||||
Timer t_reorder;
|
||||
reorderBlockedMatrixByPattern(mat, toOrder.data(), fromOrder.data(), rmat.get());
|
||||
|
||||
if (verbosity >= 3){
|
||||
std::ostringstream out;
|
||||
out << "BILU0 reorder matrix: " << t_reorder.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
|
||||
// this copy can have mat or rmat ->nnzValues as origin, depending on the reorder strategy
|
||||
Timer t_copy;
|
||||
memcpy(LUmat->nnzValues, m->nnzValues, sizeof(double) * bs * bs * m->nnzbs);
|
||||
|
||||
if (verbosity >= 3){
|
||||
std::ostringstream out;
|
||||
out << "BILU0 memcpy: " << t_copy.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
|
||||
#if CHOW_PATEL
|
||||
chowPatelIlu.decomposition(queue, context,
|
||||
LUmat.get(), Lmat.get(), Umat.get(),
|
||||
invDiagVals, diagIndex,
|
||||
s.diagIndex, s.invDiagVals,
|
||||
s.Lvals, s.Lcols, s.Lrows,
|
||||
s.Uvals, s.Ucols, s.Urows);
|
||||
#else
|
||||
Timer t_copyToGpu;
|
||||
|
||||
events.resize(1);
|
||||
err = queue->enqueueWriteBuffer(s.LUvals, CL_FALSE, 0, LUmat->nnzbs * bs * bs * sizeof(double), LUmat->nnzValues, nullptr, &events[0]);
|
||||
|
||||
std::call_once(pattern_uploaded, [&](){
|
||||
// find the positions of each diagonal block
|
||||
// must be done after reordering
|
||||
for (int row = 0; row < Nb; ++row) {
|
||||
int rowStart = LUmat->rowPointers[row];
|
||||
int rowEnd = LUmat->rowPointers[row+1];
|
||||
|
||||
auto candidate = std::find(LUmat->colIndices + rowStart, LUmat->colIndices + rowEnd, row);
|
||||
assert(candidate != LUmat->colIndices + rowEnd);
|
||||
diagIndex[row] = candidate - LUmat->colIndices;
|
||||
}
|
||||
events.resize(4);
|
||||
err |= queue->enqueueWriteBuffer(s.diagIndex, CL_FALSE, 0, Nb * sizeof(int), diagIndex.data(), nullptr, &events[1]);
|
||||
err |= queue->enqueueWriteBuffer(s.LUcols, CL_FALSE, 0, LUmat->nnzbs * sizeof(int), LUmat->colIndices, nullptr, &events[2]);
|
||||
err |= queue->enqueueWriteBuffer(s.LUrows, CL_FALSE, 0, (LUmat->Nb + 1) * sizeof(int), LUmat->rowPointers, nullptr, &events[3]);
|
||||
});
|
||||
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
if (err != CL_SUCCESS) {
|
||||
// enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL
|
||||
OPM_THROW(std::logic_error, "BILU0 OpenCL enqueueWriteBuffer error");
|
||||
}
|
||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||
m = rmat.get();
|
||||
Timer t_reorder;
|
||||
reorderBlockedMatrixByPattern(mat, toOrder.data(), fromOrder.data(), rmat.get());
|
||||
|
||||
if (verbosity >= 3) {
|
||||
std::ostringstream out;
|
||||
out << "BILU0 copy to GPU: " << t_copyToGpu.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
|
||||
Timer t_decomposition;
|
||||
std::ostringstream out;
|
||||
cl::Event event;
|
||||
for (int color = 0; color < numColors; ++color) {
|
||||
const unsigned int firstRow = rowsPerColorPrefix[color];
|
||||
const unsigned int lastRow = rowsPerColorPrefix[color+1];
|
||||
if (verbosity >= 4) {
|
||||
out << "color " << color << ": " << firstRow << " - " << lastRow << " = " << lastRow - firstRow << "\n";
|
||||
}
|
||||
OpenclKernels::ILU_decomp(firstRow, lastRow, s.LUvals, s.LUcols, s.LUrows, s.diagIndex, s.invDiagVals, Nb, block_size);
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
out << "BILU0 decomposition: " << t_decomposition.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
#endif // CHOW_PATEL
|
||||
|
||||
return true;
|
||||
} // end create_preconditioner()
|
||||
|
||||
// kernels are blocking on an NVIDIA GPU, so waiting for events is not needed
|
||||
// however, if individual kernel calls are timed, waiting for events is needed
|
||||
// behavior on other GPUs is untested
|
||||
template <unsigned int block_size>
|
||||
void BILU0<block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
|
||||
{
|
||||
const double relaxation = 0.9;
|
||||
cl::Event event;
|
||||
Timer t_apply;
|
||||
|
||||
for(int color = 0; color < numColors; ++color){
|
||||
#if CHOW_PATEL
|
||||
OpenclKernels::ILU_apply1(s.Lvals, s.Lcols, s.Lrows, s.diagIndex, y, x, s.rowsPerColor, color, Nb, block_size);
|
||||
#else
|
||||
OpenclKernels::ILU_apply1(s.LUvals, s.LUcols, s.LUrows, s.diagIndex, y, x, s.rowsPerColor, color, Nb, block_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
for(int color = numColors-1; color >= 0; --color){
|
||||
#if CHOW_PATEL
|
||||
OpenclKernels::ILU_apply2(s.Uvals, s.Ucols, s.Urows, s.diagIndex, s.invDiagVals, x, s.rowsPerColor, color, Nb, block_size);
|
||||
#else
|
||||
OpenclKernels::ILU_apply2(s.LUvals, s.LUcols, s.LUrows, s.diagIndex, s.invDiagVals, x, s.rowsPerColor, color, Nb, block_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
// apply relaxation
|
||||
OpenclKernels::scale(x, relaxation, N);
|
||||
|
||||
if (verbosity >= 4) {
|
||||
std::ostringstream out;
|
||||
out << "BILU0 apply: " << t_apply.stop() << " s";
|
||||
out << "BILU0 reorder matrix: " << t_reorder.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
|
||||
// this copy can have mat or rmat ->nnzValues as origin, depending on the reorder strategy
|
||||
Timer t_copy;
|
||||
memcpy(LUmat->nnzValues, m->nnzValues, sizeof(double) * bs * bs * m->nnzbs);
|
||||
|
||||
if (verbosity >= 3) {
|
||||
std::ostringstream out;
|
||||
out << "BILU0 memcpy: " << t_copy.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
|
||||
#if CHOW_PATEL
|
||||
chowPatelIlu.decomposition(queue, context,
|
||||
LUmat.get(), Lmat.get(), Umat.get(),
|
||||
invDiagVals, diagIndex,
|
||||
s.diagIndex, s.invDiagVals,
|
||||
s.Lvals, s.Lcols, s.Lrows,
|
||||
s.Uvals, s.Ucols, s.Urows);
|
||||
#else
|
||||
Timer t_copyToGpu;
|
||||
|
||||
events.resize(1);
|
||||
err = queue->enqueueWriteBuffer(s.LUvals, CL_FALSE, 0, LUmat->nnzbs * bs * bs * sizeof(double), LUmat->nnzValues, nullptr, &events[0]);
|
||||
|
||||
std::call_once(pattern_uploaded, [&]() {
|
||||
// find the positions of each diagonal block
|
||||
// must be done after reordering
|
||||
for (int row = 0; row < Nb; ++row) {
|
||||
int rowStart = LUmat->rowPointers[row];
|
||||
int rowEnd = LUmat->rowPointers[row + 1];
|
||||
|
||||
auto candidate = std::find(LUmat->colIndices + rowStart, LUmat->colIndices + rowEnd, row);
|
||||
assert(candidate != LUmat->colIndices + rowEnd);
|
||||
diagIndex[row] = candidate - LUmat->colIndices;
|
||||
}
|
||||
events.resize(4);
|
||||
err |= queue->enqueueWriteBuffer(s.diagIndex, CL_FALSE, 0, Nb * sizeof(int), diagIndex.data(), nullptr, &events[1]);
|
||||
err |= queue->enqueueWriteBuffer(s.LUcols, CL_FALSE, 0, LUmat->nnzbs * sizeof(int), LUmat->colIndices, nullptr, &events[2]);
|
||||
err |= queue->enqueueWriteBuffer(s.LUrows, CL_FALSE, 0, (LUmat->Nb + 1) * sizeof(int), LUmat->rowPointers, nullptr, &events[3]);
|
||||
});
|
||||
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
if (err != CL_SUCCESS) {
|
||||
// enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL
|
||||
OPM_THROW(std::logic_error, "BILU0 OpenCL enqueueWriteBuffer error");
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
std::ostringstream out;
|
||||
out << "BILU0 copy to GPU: " << t_copyToGpu.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
|
||||
Timer t_decomposition;
|
||||
std::ostringstream out;
|
||||
cl::Event event;
|
||||
for (int color = 0; color < numColors; ++color) {
|
||||
const unsigned int firstRow = rowsPerColorPrefix[color];
|
||||
const unsigned int lastRow = rowsPerColorPrefix[color + 1];
|
||||
if (verbosity >= 4) {
|
||||
out << "color " << color << ": " << firstRow << " - " << lastRow << " = " << lastRow - firstRow << "\n";
|
||||
}
|
||||
OpenclKernels::ILU_decomp(firstRow, lastRow, s.LUvals, s.LUcols, s.LUrows, s.diagIndex, s.invDiagVals, Nb, block_size);
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
out << "BILU0 decomposition: " << t_decomposition.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
#endif // CHOW_PATEL
|
||||
|
||||
return true;
|
||||
} // end create_preconditioner()
|
||||
|
||||
|
||||
// kernels are blocking on an NVIDIA GPU, so waiting for events is not needed
|
||||
// however, if individual kernel calls are timed, waiting for events is needed
|
||||
// behavior on other GPUs is untested
|
||||
template <unsigned int block_size>
|
||||
void BILU0<block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
|
||||
{
|
||||
const double relaxation = 0.9;
|
||||
cl::Event event;
|
||||
Timer t_apply;
|
||||
|
||||
for (int color = 0; color < numColors; ++color) {
|
||||
#if CHOW_PATEL
|
||||
OpenclKernels::ILU_apply1(s.Lvals, s.Lcols, s.Lrows, s.diagIndex, y, x, s.rowsPerColor, color, Nb, block_size);
|
||||
#else
|
||||
OpenclKernels::ILU_apply1(s.LUvals, s.LUcols, s.LUrows, s.diagIndex, y, x, s.rowsPerColor, color, Nb, block_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
for (int color = numColors - 1; color >= 0; --color) {
|
||||
#if CHOW_PATEL
|
||||
OpenclKernels::ILU_apply2(s.Uvals, s.Ucols, s.Urows, s.diagIndex, s.invDiagVals, x, s.rowsPerColor, color, Nb, block_size);
|
||||
#else
|
||||
OpenclKernels::ILU_apply2(s.LUvals, s.LUcols, s.LUrows, s.diagIndex, s.invDiagVals, x, s.rowsPerColor, color, Nb, block_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
// apply relaxation
|
||||
OpenclKernels::scale(x, relaxation, N);
|
||||
|
||||
if (verbosity >= 4) {
|
||||
std::ostringstream out;
|
||||
out << "BILU0 apply: " << t_apply.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||
|
@ -36,90 +36,90 @@ namespace Opm
|
||||
namespace Accelerator
|
||||
{
|
||||
|
||||
/// This class implements a Blocked ILU0 preconditioner
|
||||
/// The decomposition is done on CPU, and reorders the rows of the matrix
|
||||
template <unsigned int block_size>
|
||||
class BILU0 : public Preconditioner<block_size>
|
||||
{
|
||||
typedef Preconditioner<block_size> Base;
|
||||
/// This class implements a Blocked ILU0 preconditioner
|
||||
/// The decomposition is done on CPU, and reorders the rows of the matrix
|
||||
template <unsigned int block_size>
|
||||
class BILU0 : public Preconditioner<block_size>
|
||||
{
|
||||
typedef Preconditioner<block_size> Base;
|
||||
|
||||
using Base::N;
|
||||
using Base::Nb;
|
||||
using Base::nnz;
|
||||
using Base::nnzb;
|
||||
using Base::verbosity;
|
||||
using Base::N;
|
||||
using Base::Nb;
|
||||
using Base::nnz;
|
||||
using Base::nnzb;
|
||||
using Base::verbosity;
|
||||
|
||||
private:
|
||||
std::unique_ptr<BlockedMatrix> LUmat = nullptr;
|
||||
std::shared_ptr<BlockedMatrix> rmat = nullptr; // only used with PAR_SIM
|
||||
private:
|
||||
std::unique_ptr<BlockedMatrix> LUmat = nullptr;
|
||||
std::shared_ptr<BlockedMatrix> rmat = nullptr; // only used with PAR_SIM
|
||||
#if CHOW_PATEL
|
||||
std::unique_ptr<BlockedMatrix> Lmat = nullptr, Umat = nullptr;
|
||||
std::unique_ptr<BlockedMatrix> Lmat = nullptr, Umat = nullptr;
|
||||
#endif
|
||||
double *invDiagVals = nullptr;
|
||||
std::vector<int> diagIndex;
|
||||
std::vector<int> rowsPerColor; // color i contains rowsPerColor[i] rows, which are processed in parallel
|
||||
std::vector<int> rowsPerColorPrefix; // the prefix sum of rowsPerColor
|
||||
std::vector<int> toOrder, fromOrder;
|
||||
int numColors;
|
||||
std::once_flag pattern_uploaded;
|
||||
double *invDiagVals = nullptr;
|
||||
std::vector<int> diagIndex;
|
||||
std::vector<int> rowsPerColor; // color i contains rowsPerColor[i] rows, which are processed in parallel
|
||||
std::vector<int> rowsPerColorPrefix; // the prefix sum of rowsPerColor
|
||||
std::vector<int> toOrder, fromOrder;
|
||||
int numColors;
|
||||
std::once_flag pattern_uploaded;
|
||||
|
||||
ILUReorder opencl_ilu_reorder;
|
||||
ILUReorder opencl_ilu_reorder;
|
||||
|
||||
typedef struct {
|
||||
cl::Buffer invDiagVals;
|
||||
cl::Buffer diagIndex;
|
||||
cl::Buffer rowsPerColor;
|
||||
typedef struct {
|
||||
cl::Buffer invDiagVals;
|
||||
cl::Buffer diagIndex;
|
||||
cl::Buffer rowsPerColor;
|
||||
#if CHOW_PATEL
|
||||
cl::Buffer Lvals, Lcols, Lrows;
|
||||
cl::Buffer Uvals, Ucols, Urows;
|
||||
cl::Buffer Lvals, Lcols, Lrows;
|
||||
cl::Buffer Uvals, Ucols, Urows;
|
||||
#else
|
||||
cl::Buffer LUvals, LUcols, LUrows;
|
||||
cl::Buffer LUvals, LUcols, LUrows;
|
||||
#endif
|
||||
} GPU_storage;
|
||||
} GPU_storage;
|
||||
|
||||
GPU_storage s;
|
||||
cl::Context *context;
|
||||
cl::CommandQueue *queue;
|
||||
std::vector<cl::Event> events;
|
||||
cl_int err;
|
||||
GPU_storage s;
|
||||
cl::Context *context;
|
||||
cl::CommandQueue *queue;
|
||||
std::vector<cl::Event> events;
|
||||
cl_int err;
|
||||
|
||||
#if CHOW_PATEL
|
||||
ChowPatelIlu<block_size> chowPatelIlu;
|
||||
ChowPatelIlu<block_size> chowPatelIlu;
|
||||
#endif
|
||||
|
||||
public:
|
||||
public:
|
||||
|
||||
BILU0(ILUReorder opencl_ilu_reorder, int verbosity);
|
||||
BILU0(ILUReorder opencl_ilu_reorder, int verbosity);
|
||||
|
||||
~BILU0();
|
||||
~BILU0();
|
||||
|
||||
void init(int Nb, int nnzb, std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
|
||||
void init(int Nb, int nnzb, std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
|
||||
|
||||
// analysis, find reordering if specified
|
||||
bool analyze_matrix(BlockedMatrix *mat) override;
|
||||
// analysis, find reordering if specified
|
||||
bool analyze_matrix(BlockedMatrix *mat) override;
|
||||
|
||||
// ilu_decomposition
|
||||
bool create_preconditioner(BlockedMatrix *mat) override;
|
||||
// ilu_decomposition
|
||||
bool create_preconditioner(BlockedMatrix *mat) override;
|
||||
|
||||
// apply preconditioner, x = prec(y)
|
||||
void apply(const cl::Buffer& y, cl::Buffer& x) override;
|
||||
// apply preconditioner, x = prec(y)
|
||||
void apply(const cl::Buffer& y, cl::Buffer& x) override;
|
||||
|
||||
int* getToOrder() override
|
||||
{
|
||||
return toOrder.data();
|
||||
}
|
||||
int* getToOrder() override
|
||||
{
|
||||
return toOrder.data();
|
||||
}
|
||||
|
||||
int* getFromOrder() override
|
||||
{
|
||||
return fromOrder.data();
|
||||
}
|
||||
int* getFromOrder() override
|
||||
{
|
||||
return fromOrder.data();
|
||||
}
|
||||
|
||||
BlockedMatrix* getRMat() override
|
||||
{
|
||||
return rmat.get();
|
||||
}
|
||||
BlockedMatrix* getRMat() override
|
||||
{
|
||||
return rmat.get();
|
||||
}
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace Accelerator
|
||||
} // namespace Opm
|
||||
|
Loading…
Reference in New Issue
Block a user