/*
Copyright 2019 Equinor ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see .
*/
#ifndef BILU0_HPP
#define BILU0_HPP
#include
#include
namespace bda
{
/// This class implementa a Blocked ILU0 preconditioner
/// The decomposition is done on CPU, and reorders the rows of the matrix
template
class BILU0
{
private:
int N; // number of rows of the matrix
int Nb; // number of blockrows of the matrix
int nnz; // number of nonzeroes of the matrix (scalar)
int nnzbs; // number of blocks of the matrix
std::unique_ptr > Lmat = nullptr, Umat = nullptr, LUmat = nullptr;
std::shared_ptr > rmat = nullptr; // only used with PAR_SIM
double *invDiagVals = nullptr;
int *diagIndex = nullptr;
std::vector rowsPerColor; // color i contains rowsPerColor[i] rows, which are processed in parallel
int *toOrder = nullptr, *fromOrder = nullptr;
int numColors;
int verbosity;
// Level Scheduling respects the dependencies in the original matrix, and behaves like Dune and cusparse
// Graph Coloring is more aggresive and is likely to increase the number of linearizations and linear iterations to converge significantly, but can still be faster on GPU because it results in more parallelism
bool level_scheduling = false, graph_coloring = false;
typedef struct {
cl::Buffer Lvals, Uvals, invDiagVals;
cl::Buffer Lcols, Lrows;
cl::Buffer Ucols, Urows;
cl::Buffer rowsPerColor;
} GPU_storage;
cl::make_kernel *ILU_apply1;
cl::make_kernel *ILU_apply2;
GPU_storage s;
cl::Context *context;
cl::CommandQueue *queue;
int work_group_size = 0;
int total_work_items = 0;
int lmem_per_work_group = 0;
bool pattern_uploaded = false;
public:
BILU0(std::string ilu_reorder_strategy, int verbosity);
~BILU0();
// analysis
bool init(BlockedMatrix *mat);
// ilu_decomposition
bool create_preconditioner(BlockedMatrix *mat);
// apply preconditioner, y = prec(x)
void apply(cl::Buffer& x, cl::Buffer& y);
void setOpenCLContext(cl::Context *context);
void setOpenCLQueue(cl::CommandQueue *queue);
void setKernelParameters(const unsigned int work_group_size, const unsigned int total_work_items, const unsigned int lmem_per_work_group);
void setKernels(
cl::make_kernel *ILU_apply1,
cl::make_kernel *ILU_apply2
);
int* getToOrder()
{
return toOrder;
}
int* getFromOrder()
{
return fromOrder;
}
BlockedMatrix* getRMat()
{
return rmat.get();
}
};
} // end namespace bda
#endif