mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Reordering is now actually skipped when NONE is chosen.
Also made reordering optional for openclSolver.
This commit is contained in:
parent
71692ff52b
commit
f26e58c6ca
@ -53,9 +53,12 @@ namespace bda
|
||||
{
|
||||
delete[] invDiagVals;
|
||||
delete[] diagIndex;
|
||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||
delete[] toOrder;
|
||||
delete[] fromOrder;
|
||||
}
|
||||
delete[] LUmat->nnzValues;
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool BILU0<block_size>::init(BlockedMatrix<block_size> *mat)
|
||||
@ -67,11 +70,18 @@ namespace bda
|
||||
this->nnz = mat->nnzbs * block_size * block_size;
|
||||
this->nnzbs = mat->nnzbs;
|
||||
|
||||
int *CSCRowIndices = nullptr;
|
||||
int *CSCColPointers = nullptr;
|
||||
|
||||
if (opencl_ilu_reorder == ILUReorder::NONE) {
|
||||
LUmat = std::make_unique<BlockedMatrix<block_size> >(*mat);
|
||||
} else {
|
||||
toOrder = new int[Nb];
|
||||
fromOrder = new int[Nb];
|
||||
|
||||
int *CSCRowIndices = new int[nnzbs];
|
||||
int *CSCColPointers = new int[Nb + 1];
|
||||
CSCRowIndices = new int[nnzbs];
|
||||
CSCColPointers = new int[Nb + 1];
|
||||
rmat = std::make_shared<BlockedMatrix<block_size> >(mat->Nb, mat->nnzbs);
|
||||
LUmat = std::make_unique<BlockedMatrix<block_size> >(*rmat);
|
||||
|
||||
Timer t_convert;
|
||||
csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb);
|
||||
@ -80,10 +90,9 @@ namespace bda
|
||||
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
}
|
||||
|
||||
Timer t_analysis;
|
||||
rmat = std::make_shared<BlockedMatrix<block_size> >(mat->Nb, mat->nnzbs);
|
||||
LUmat = std::make_unique<BlockedMatrix<block_size> >(*rmat);
|
||||
std::ostringstream out;
|
||||
if (opencl_ilu_reorder == ILUReorder::LEVEL_SCHEDULING) {
|
||||
out << "BILU0 reordering strategy: " << "level_scheduling\n";
|
||||
@ -93,15 +102,11 @@ namespace bda
|
||||
findGraphColoring<block_size>(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, mat->Nb, mat->Nb, &numColors, toOrder, fromOrder, rowsPerColor);
|
||||
} else if (opencl_ilu_reorder == ILUReorder::NONE) {
|
||||
out << "BILU0 reordering strategy: none\n";
|
||||
numColors = 1;
|
||||
rowsPerColor.emplace_back(Nb);
|
||||
// numColors = Nb;
|
||||
// for(int i = 0; i < Nb; ++i){
|
||||
// rowsPerColor.emplace_back(1);
|
||||
// }
|
||||
// numColors = 1;
|
||||
// rowsPerColor.emplace_back(Nb);
|
||||
numColors = Nb;
|
||||
for(int i = 0; i < Nb; ++i){
|
||||
toOrder[i] = i;
|
||||
fromOrder[i] = i;
|
||||
rowsPerColor.emplace_back(1);
|
||||
}
|
||||
} else {
|
||||
OPM_THROW(std::logic_error, "Error ilu reordering strategy not set correctly\n");
|
||||
@ -114,8 +119,10 @@ namespace bda
|
||||
}
|
||||
OpmLog::info(out.str());
|
||||
|
||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||
delete[] CSCRowIndices;
|
||||
delete[] CSCColPointers;
|
||||
}
|
||||
|
||||
diagIndex = new int[mat->Nb];
|
||||
invDiagVals = new double[mat->Nb * bs * bs];
|
||||
@ -385,6 +392,10 @@ namespace bda
|
||||
bool BILU0<block_size>::create_preconditioner(BlockedMatrix<block_size> *mat)
|
||||
{
|
||||
const unsigned int bs = block_size;
|
||||
auto *m = mat;
|
||||
|
||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||
m = rmat.get();
|
||||
Timer t_reorder;
|
||||
reorderBlockedMatrixByPattern<block_size>(mat, toOrder, fromOrder, rmat.get());
|
||||
|
||||
@ -393,10 +404,12 @@ namespace bda
|
||||
out << "BILU0 reorder matrix: " << t_reorder.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
|
||||
// this copy can have mat or rmat ->nnzValues as origin, depending on the reorder strategy
|
||||
Timer t_copy;
|
||||
memcpy(LUmat->nnzValues, rmat->nnzValues, sizeof(double) * bs * bs * rmat->nnzbs);
|
||||
memcpy(LUmat->nnzValues, m->nnzValues, sizeof(double) * bs * bs * m->nnzbs);
|
||||
|
||||
if (verbosity >= 3){
|
||||
std::ostringstream out;
|
||||
|
@ -126,6 +126,7 @@ public:
|
||||
/// Since the rows of the matrix are reordered, the columnindices of the matrixdata is incorrect
|
||||
/// Those indices need to be mapped via toOrder
|
||||
/// \param[in] toOrder array with mappings
|
||||
/// \param[in] reorder whether reordering is actually used or not
|
||||
void setReordering(int *toOrder, bool reorder);
|
||||
};
|
||||
|
||||
|
@ -44,7 +44,7 @@ using Opm::OpmLog;
|
||||
using Dune::Timer;
|
||||
|
||||
template <unsigned int block_size>
|
||||
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_, ILUReorder opencl_ilu_reorder) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_) {
|
||||
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_, ILUReorder opencl_ilu_reorder_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_), opencl_ilu_reorder(opencl_ilu_reorder_) {
|
||||
prec = new Preconditioner(opencl_ilu_reorder, verbosity_);
|
||||
|
||||
cl_int err = CL_SUCCESS;
|
||||
@ -485,7 +485,6 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
||||
prec->setOpenCLContext(context.get());
|
||||
prec->setOpenCLQueue(queue.get());
|
||||
|
||||
rb = new double[N];
|
||||
tmp = new double[N];
|
||||
#if COPY_ROW_BY_ROW
|
||||
vals_contiguous = new double[N];
|
||||
@ -508,7 +507,12 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
||||
d_Acols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzb);
|
||||
d_Arows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1));
|
||||
|
||||
bool reorder = (opencl_ilu_reorder != ILUReorder::NONE);
|
||||
if (reorder) {
|
||||
rb = new double[N];
|
||||
d_toOrder = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Nb);
|
||||
}
|
||||
wcontainer->init(wellContribs, N, Nb, reorder);
|
||||
|
||||
// queue.enqueueNDRangeKernel() is a blocking/synchronous call, at least for NVIDIA
|
||||
// cl::make_kernel<> myKernel(); myKernel(args, arg1, arg2); is also blocking
|
||||
@ -544,7 +548,9 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
||||
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::finalize() {
|
||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||
delete[] rb;
|
||||
}
|
||||
delete[] tmp;
|
||||
#if COPY_ROW_BY_ROW
|
||||
delete[] vals_contiguous;
|
||||
@ -572,7 +578,9 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
|
||||
queue->enqueueWriteBuffer(d_Acols, CL_TRUE, 0, sizeof(int) * nnzb, rmat->colIndices);
|
||||
queue->enqueueWriteBuffer(d_Arows, CL_TRUE, 0, sizeof(int) * (Nb + 1), rmat->rowPointers);
|
||||
queue->enqueueWriteBuffer(d_b, CL_TRUE, 0, sizeof(double) * N, rb);
|
||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||
queue->enqueueWriteBuffer(d_toOrder, CL_TRUE, 0, sizeof(int) * Nb, toOrder);
|
||||
}
|
||||
queue->enqueueFillBuffer(d_x, 0, 0, sizeof(double) * N, nullptr, &event);
|
||||
event.wait();
|
||||
|
||||
@ -624,9 +632,13 @@ bool openclSolverBackend<block_size>::analyse_matrix() {
|
||||
int lmem_per_work_group = work_group_size * sizeof(double);
|
||||
prec->setKernelParameters(work_group_size, total_work_items, lmem_per_work_group);
|
||||
|
||||
if (opencl_ilu_reorder == ILUReorder::NONE) {
|
||||
rmat = mat.get();
|
||||
} else {
|
||||
toOrder = prec->getToOrder();
|
||||
fromOrder = prec->getFromOrder();
|
||||
rmat = prec->getRMat();
|
||||
}
|
||||
|
||||
if (verbosity > 2) {
|
||||
std::ostringstream out;
|
||||
@ -645,7 +657,11 @@ void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
|
||||
Timer t;
|
||||
|
||||
mat->nnzValues = vals;
|
||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||
reorderBlockedVectorByPattern<block_size>(mat->Nb, b, fromOrder, rb);
|
||||
} else {
|
||||
rb = b;
|
||||
}
|
||||
|
||||
if (verbosity > 2) {
|
||||
std::ostringstream out;
|
||||
@ -703,8 +719,12 @@ template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::get_result(double *x) {
|
||||
Timer t;
|
||||
|
||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||
queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, rb);
|
||||
reorderBlockedVectorByPattern<block_size>(mat->Nb, rb, toOrder, x);
|
||||
} else {
|
||||
queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, x);
|
||||
}
|
||||
|
||||
if (verbosity > 2) {
|
||||
std::ostringstream out;
|
||||
|
@ -51,7 +51,7 @@ class openclSolverBackend : public BdaSolver<block_size>
|
||||
using Base::initialized;
|
||||
|
||||
private:
|
||||
double *rb = nullptr; // reordered b vector, the matrix is reordered, so b must also be
|
||||
double *rb = nullptr; // reordered b vector, if the matrix is reordered, rb is newly allocated, otherwise it just points to b
|
||||
double *vals_contiguous = nullptr; // only used if COPY_ROW_BY_ROW is true in openclSolverBackend.cpp
|
||||
|
||||
// OpenCL variables must be reusable, they are initialized in initialize()
|
||||
@ -59,7 +59,7 @@ private:
|
||||
cl::Buffer d_x, d_b, d_rb, d_r, d_rw, d_p; // vectors, used during linear solve
|
||||
cl::Buffer d_pw, d_s, d_t, d_v; // vectors, used during linear solve
|
||||
cl::Buffer d_tmp; // used as tmp GPU buffer for dot() and norm()
|
||||
cl::Buffer d_toOrder;
|
||||
cl::Buffer d_toOrder; // only used when reordering is used
|
||||
double *tmp = nullptr; // used as tmp CPU buffer for dot() and norm()
|
||||
|
||||
// shared pointers are also passed to other objects
|
||||
@ -81,7 +81,8 @@ private:
|
||||
int *toOrder = nullptr, *fromOrder = nullptr; // BILU0 reorders rows of the matrix via these mappings
|
||||
bool analysis_done = false;
|
||||
std::unique_ptr<BlockedMatrix<block_size> > mat = nullptr; // original matrix
|
||||
BlockedMatrix<block_size> *rmat = nullptr; // reordered matrix, used for spmv
|
||||
BlockedMatrix<block_size> *rmat = nullptr; // reordered matrix (or original if no reordering), used for spmv
|
||||
ILUReorder opencl_ilu_reorder; // reordering strategy
|
||||
|
||||
/// Divide A by B, and round up: return (int)ceil(A/B)
|
||||
/// \param[in] A dividend
|
||||
|
Loading…
Reference in New Issue
Block a user