/* Copyright 2022 Equinor ASA This file is part of the Open Porous Media project (OPM). OPM is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OPM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OPM. If not, see . */ #ifndef BISAI_HPP #define BISAI_HPP #include #include #include #include namespace Opm::Accelerator { template class BlockedMatrix; /// This class implements a Blocked version of the Incomplete Sparse Approximate Inverse (ISAI) preconditioner. /// Inspired by the paper "Incomplete Sparse Approximate Inverses for Parallel Preconditioning" by Anzt et. al. template class BISAI : public Preconditioner { using Base = Preconditioner; using Base::N; using Base::Nb; using Base::nnz; using Base::nnzb; using Base::verbosity; using Base::context; using Base::queue; using Base::events; using Base::err; private: std::once_flag initialize; std::vector colPointers; std::vector rowIndices; std::vector diagIndex; std::vector csrToCscOffsetMap; std::vector invLvals; std::vector invUvals; cl::Buffer d_colPointers; cl::Buffer d_rowIndices; cl::Buffer d_csrToCscOffsetMap; cl::Buffer d_diagIndex; cl::Buffer d_LUvals; cl::Buffer d_invDiagVals; cl::Buffer d_invLvals; cl::Buffer d_invUvals; cl::Buffer d_invL_x; bool opencl_ilu_parallel; std::unique_ptr> bilu0; /// Struct that holds the structure of the small subsystems for each column struct subsystemStructure { /// This vector holds the cumulative sum for the number of non-zero blocks for each subsystem. /// Works similarly to row and column pointers for the CSR and CSC matrix representations. std::vector subsystemPointers; /// This vector holds the indices of the non-zero blocks for the target subsystem. These blocks are /// the ones that are present in the shadow set of the non-zero blocks of column j of the main matrix, /// as described in section 2.3 of the paper. The amount of non-zero blocks for j-th subsystem is /// given by subsystemPointers[j+1] - subsystemPointers[j]. std::vector nzIndices; /// This vector holds the indices of the already known values of the right hand sides of the subsystems. /// Its purpose is to aid in the parallel solution of the subsystems. std::vector knownRhsIndices; /// This vector holds the indices of the unknown values of the right hand sides of the subsystems. std::vector unknownRhsIndices; }; /// GPU version of subsystemStructure struct subsystemStructureGPU { cl::Buffer subsystemPointers; cl::Buffer nzIndices; cl::Buffer knownRhsIndices; cl::Buffer unknownRhsIndices; } ; subsystemStructure lower, upper; subsystemStructureGPU d_lower, d_upper; /// An approximate inverse for L is computed by solving a small lower triangular system for each column of the main matrix. /// This function finds the structure of each of these subsystems and fills the 'lower' struct. void buildLowerSubsystemsStructures(); /// An approximate inverse for U is computed by solving a small upper triangular system for each column of the main matrix. /// This function finds the structure of each of theses subsystems and fills the 'upper' struct. void buildUpperSubsystemsStructures(); public: BISAI(bool opencl_ilu_parallel, int verbosity); // set own Opencl variables, but also that of the bilu0 preconditioner void setOpencl(std::shared_ptr& context, std::shared_ptr& queue) override; // analysis, extract parallelism bool analyze_matrix(BlockedMatrix* mat) override; bool analyze_matrix(BlockedMatrix* mat, BlockedMatrix* jacMat) override; // ilu_decomposition bool create_preconditioner(BlockedMatrix* mat) override; bool create_preconditioner(BlockedMatrix* mat, BlockedMatrix* jacMat) override; // apply preconditioner, x = prec(y) void apply(const cl::Buffer& y, cl::Buffer& x) override; }; /// Similar function to csrPatternToCsc. It gives an offset map from CSR to CSC instead of the full CSR to CSC conversion. /// The map works as follows: if an element 'e' of the matrix is in the i-th position in the CSR representation, it will be /// in the csrToCscOffsetMap[i]-th position in the CSC representation. std::vector buildCsrToCscOffsetMap(std::vector colPointers, std::vector rowIndices); } // namespace Opm::Accelerator #endif