/* Copyright 2019 Equinor ASA This file is part of the Open Porous Media project (OPM). OPM is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OPM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OPM. If not, see . */ #include #include #include #include #include #include #if HAVE_CUDA #include #endif #if HAVE_OPENCL #include #endif #if HAVE_FPGA #include #endif #if HAVE_AMGCL #include #endif typedef Dune::InverseOperatorResult InverseOperatorResult; namespace Opm { using bda::BdaResult; using bda::BdaSolver; using bda::SolverStatus; using bda::ILUReorder; template BdaBridge::BdaBridge(std::string accelerator_mode_, [[maybe_unused]] std::string fpga_bitstream, int linear_solver_verbosity, int maxit, double tolerance, [[maybe_unused]] unsigned int platformID, unsigned int deviceID, [[maybe_unused]] std::string opencl_ilu_reorder) : verbosity(linear_solver_verbosity), accelerator_mode(accelerator_mode_) { if (accelerator_mode.compare("cusparse") == 0) { #if HAVE_CUDA use_gpu = true; backend.reset(new bda::cusparseSolverBackend(linear_solver_verbosity, maxit, tolerance, deviceID)); #else OPM_THROW(std::logic_error, "Error cusparseSolver was chosen, but CUDA was not found by CMake"); #endif } else if (accelerator_mode.compare("opencl") == 0) { #if HAVE_OPENCL use_gpu = true; ILUReorder ilu_reorder; if (opencl_ilu_reorder == "") { ilu_reorder = bda::ILUReorder::GRAPH_COLORING; // default when not selected by user } else if (opencl_ilu_reorder == "level_scheduling") { ilu_reorder = bda::ILUReorder::LEVEL_SCHEDULING; } else if (opencl_ilu_reorder == "graph_coloring") { ilu_reorder = bda::ILUReorder::GRAPH_COLORING; } else if (opencl_ilu_reorder == "none") { ilu_reorder = bda::ILUReorder::NONE; } else { OPM_THROW(std::logic_error, "Error invalid argument for --opencl-ilu-reorder, usage: '--opencl-ilu-reorder=[level_scheduling|graph_coloring]'"); } backend.reset(new bda::openclSolverBackend(linear_solver_verbosity, maxit, tolerance, platformID, deviceID, ilu_reorder)); #else OPM_THROW(std::logic_error, "Error openclSolver was chosen, but OpenCL was not found by CMake"); #endif } else if (accelerator_mode.compare("fpga") == 0) { #if HAVE_FPGA use_fpga = true; ILUReorder ilu_reorder; if (opencl_ilu_reorder == "") { ilu_reorder = bda::ILUReorder::LEVEL_SCHEDULING; // default when not selected by user } else if (opencl_ilu_reorder == "level_scheduling") { ilu_reorder = bda::ILUReorder::LEVEL_SCHEDULING; } else if (opencl_ilu_reorder == "graph_coloring") { ilu_reorder = bda::ILUReorder::GRAPH_COLORING; } else { OPM_THROW(std::logic_error, "Error invalid argument for --opencl-ilu-reorder, usage: '--opencl-ilu-reorder=[level_scheduling|graph_coloring]'"); } backend.reset(new bda::FpgaSolverBackend(fpga_bitstream, linear_solver_verbosity, maxit, tolerance, ilu_reorder)); #else OPM_THROW(std::logic_error, "Error fpgaSolver was chosen, but FPGA was not enabled by CMake"); #endif } else if (accelerator_mode.compare("amgcl") == 0) { #if HAVE_AMGCL use_gpu = true; // should be replaced by a 'use_bridge' boolean backend.reset(new bda::amgclSolverBackend(linear_solver_verbosity, maxit, tolerance, platformID, deviceID)); #else OPM_THROW(std::logic_error, "Error amgclSolver was chosen, but amgcl was not found by CMake"); #endif } else if (accelerator_mode.compare("none") == 0) { use_gpu = false; use_fpga = false; } else { OPM_THROW(std::logic_error, "Error unknown value for parameter 'AcceleratorMode', should be passed like '--accelerator-mode=[none|cusparse|opencl|fpga|amgcl]"); } } template int checkZeroDiagonal(BridgeMatrix& mat) { static std::vector diag_indices; // contains offsets of the diagonal nnzs int numZeros = 0; const int dim = 3; // might be replaced with mat[0][0].N() or BridgeMatrix::block_type::size() const double zero_replace = 1e-15; if (diag_indices.size() == 0) { int N = mat.N(); diag_indices.reserve(N); for (typename BridgeMatrix::iterator r = mat.begin(); r != mat.end(); ++r) { auto diag = r->find(r.index()); // diag is an iterator assert(diag.index() == r.index()); for (int rr = 0; rr < dim; ++rr) { auto& val = (*diag)[rr][rr]; // reference to easily change the value if (val == 0.0) { // could be replaced by '< 1e-30' or similar val = zero_replace; ++numZeros; } } diag_indices.emplace_back(diag.offset()); } }else{ for (typename BridgeMatrix::iterator r = mat.begin(); r != mat.end(); ++r) { typename BridgeMatrix::size_type offset = diag_indices[r.index()]; auto& diag_block = r->getptr()[offset]; // diag_block is a reference to MatrixBlock, located on column r of row r for (int rr = 0; rr < dim; ++rr) { auto& val = diag_block[rr][rr]; if (val == 0.0) { // could be replaced by '< 1e-30' or similar val = zero_replace; ++numZeros; } } } } return numZeros; } // iterate sparsity pattern from Matrix and put colIndices and rowPointers in arrays // sparsity pattern should stay the same // this could be removed if Dune::BCRSMatrix features an API call that returns colIndices and rowPointers template void getSparsityPattern(BridgeMatrix& mat, std::vector &h_rows, std::vector &h_cols) { int sum_nnzs = 0; // convert colIndices and rowPointers if (h_rows.size() == 0) { h_rows.emplace_back(0); for (typename BridgeMatrix::const_iterator r = mat.begin(); r != mat.end(); ++r) { int size_row = 0; for (auto c = r->begin(); c != r->end(); ++c) { h_cols.emplace_back(c.index()); size_row++; } sum_nnzs += size_row; h_rows.emplace_back(sum_nnzs); } // h_rows and h_cols could be changed to 'unsigned int', but cusparse expects 'int' if (static_cast(h_rows[mat.N()]) != mat.nonzeroes()) { OPM_THROW(std::logic_error, "Error size of rows do not sum to number of nonzeroes in BdaBridge::getSparsityPattern()"); } } } // end getSparsityPattern() template void BdaBridge::solve_system([[maybe_unused]] BridgeMatrix* mat, [[maybe_unused]] BridgeVector& b, [[maybe_unused]] WellContributions& wellContribs, [[maybe_unused]] InverseOperatorResult& res) { if (use_gpu || use_fpga) { BdaResult result; result.converged = false; static std::vector h_rows; static std::vector h_cols; const int dim = (*mat)[0][0].N(); const int Nb = mat->N(); const int N = Nb * dim; const int nnzb = (h_rows.empty()) ? mat->nonzeroes() : h_rows.back(); const int nnz = nnzb * dim * dim; if (dim != 3) { OpmLog::warning("BdaSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program"); use_gpu = use_fpga = false; return; } if (h_rows.capacity() == 0) { h_rows.reserve(Nb+1); h_cols.reserve(nnzb); getSparsityPattern(*mat, h_rows, h_cols); } Dune::Timer t_zeros; int numZeros = checkZeroDiagonal(*mat); if (verbosity >= 2) { std::ostringstream out; out << "Checking zeros took: " << t_zeros.stop() << " s, found " << numZeros << " zeros"; OpmLog::info(out.str()); } ///////////////////////// // actually solve // assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, the chosen BdaSolver is expected to perform undefined behaviour SolverStatus status = backend->solve_system(N, nnz, dim, static_cast(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast(&(b[0][0])), wellContribs, result); switch(status) { case SolverStatus::BDA_SOLVER_SUCCESS: //OpmLog::info("BdaSolver converged"); break; case SolverStatus::BDA_SOLVER_ANALYSIS_FAILED: OpmLog::warning("BdaSolver could not analyse level information of matrix, perhaps there is still a 0.0 on the diagonal of a block on the diagonal"); break; case SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED: OpmLog::warning("BdaSolver could not create preconditioner, perhaps there is still a 0.0 on the diagonal of a block on the diagonal"); break; default: OpmLog::warning("BdaSolver returned unknown status code"); } res.iterations = result.iterations; res.reduction = result.reduction; res.converged = result.converged; res.conv_rate = result.conv_rate; res.elapsed = result.elapsed; } else { res.converged = false; } } template void BdaBridge::get_result([[maybe_unused]] BridgeVector& x) { if (use_gpu || use_fpga) { backend->get_result(static_cast(&(x[0][0]))); } } template void BdaBridge::initWellContributions([[maybe_unused]] WellContributions& wellContribs) { if(accelerator_mode.compare("opencl") == 0){ #if HAVE_OPENCL const auto openclBackend = static_cast*>(backend.get()); wellContribs.setOpenCLEnv(openclBackend->context.get(), openclBackend->queue.get()); #else OPM_THROW(std::logic_error, "Error openclSolver was chosen, but OpenCL was not found by CMake"); #endif } } // the tests use Dune::FieldMatrix, Flow uses Opm::MatrixBlock #define INSTANTIATE_BDA_FUNCTIONS(n) \ template class BdaBridge, std::allocator > >, \ Dune::BlockVector, std::allocator > >, \ n>; \ \ template class BdaBridge, std::allocator > >, \ Dune::BlockVector, std::allocator > >, \ n>; INSTANTIATE_BDA_FUNCTIONS(1); INSTANTIATE_BDA_FUNCTIONS(2); INSTANTIATE_BDA_FUNCTIONS(3); INSTANTIATE_BDA_FUNCTIONS(4); #undef INSTANTIATE_BDA_FUNCTIONS } // namespace Opm