/* Copyright 2019 Equinor ASA This file is part of the Open Porous Media project (OPM). OPM is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OPM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OPM. If not, see . */ #ifndef BILU0_HPP #define BILU0_HPP #include #include namespace bda { /// This class implementa a Blocked ILU0 preconditioner /// The decomposition is done on CPU, and reorders the rows of the matrix template class BILU0 { private: int N; // number of rows of the matrix int Nb; // number of blockrows of the matrix int nnz; // number of nonzeroes of the matrix (scalar) int nnzbs; // number of blocks of the matrix std::unique_ptr > Lmat = nullptr, Umat = nullptr, LUmat = nullptr; std::shared_ptr > rmat = nullptr; // only used with PAR_SIM double *invDiagVals = nullptr; int *diagIndex = nullptr; std::vector rowsPerColor; // color i contains rowsPerColor[i] rows, which are processed in parallel int *toOrder = nullptr, *fromOrder = nullptr; int numColors; int verbosity; // Level Scheduling respects the dependencies in the original matrix, and behaves like Dune and cusparse // Graph Coloring is more aggresive and is likely to increase the number of linearizations and linear iterations to converge significantly, but can still be faster on GPU because it results in more parallelism bool level_scheduling = false, graph_coloring = false; typedef struct { cl::Buffer Lvals, Uvals, invDiagVals; cl::Buffer Lcols, Lrows; cl::Buffer Ucols, Urows; cl::Buffer rowsPerColor; } GPU_storage; cl::make_kernel *ILU_apply1; cl::make_kernel *ILU_apply2; GPU_storage s; cl::Context *context; cl::CommandQueue *queue; int work_group_size = 0; int total_work_items = 0; int lmem_per_work_group = 0; bool pattern_uploaded = false; public: BILU0(std::string ilu_reorder_strategy, int verbosity); ~BILU0(); // analysis bool init(BlockedMatrix *mat); // ilu_decomposition bool create_preconditioner(BlockedMatrix *mat); // apply preconditioner, y = prec(x) void apply(cl::Buffer& x, cl::Buffer& y); void setOpenCLContext(cl::Context *context); void setOpenCLQueue(cl::CommandQueue *queue); void setKernelParameters(const unsigned int work_group_size, const unsigned int total_work_items, const unsigned int lmem_per_work_group); void setKernels( cl::make_kernel *ILU_apply1, cl::make_kernel *ILU_apply2 ); int* getToOrder() { return toOrder; } int* getFromOrder() { return fromOrder; } BlockedMatrix* getRMat() { return rmat.get(); } }; } // end namespace bda #endif