mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Removed GRAPH_COLORING opencl reordering option.
Simplified and renamed opencl_ilu_reorder parameter.
This commit is contained in:
parent
e327142088
commit
42410d408f
@ -130,7 +130,7 @@ struct OpenclPlatformId {
|
|||||||
using type = UndefinedProperty;
|
using type = UndefinedProperty;
|
||||||
};
|
};
|
||||||
template<class TypeTag, class MyTypeTag>
|
template<class TypeTag, class MyTypeTag>
|
||||||
struct OpenclIluReorder {
|
struct OpenclIluParallel {
|
||||||
using type = UndefinedProperty;
|
using type = UndefinedProperty;
|
||||||
};
|
};
|
||||||
template<class TypeTag, class MyTypeTag>
|
template<class TypeTag, class MyTypeTag>
|
||||||
@ -232,8 +232,8 @@ struct OpenclPlatformId<TypeTag, TTag::FlowIstlSolverParams> {
|
|||||||
static constexpr int value = 0;
|
static constexpr int value = 0;
|
||||||
};
|
};
|
||||||
template<class TypeTag>
|
template<class TypeTag>
|
||||||
struct OpenclIluReorder<TypeTag, TTag::FlowIstlSolverParams> {
|
struct OpenclIluParallel<TypeTag, TTag::FlowIstlSolverParams> {
|
||||||
static constexpr auto value = ""; // note: default value is chosen depending on the solver used
|
static constexpr bool value = true; // note: false should only be used in debug
|
||||||
};
|
};
|
||||||
template<class TypeTag>
|
template<class TypeTag>
|
||||||
struct FpgaBitstream<TypeTag, TTag::FlowIstlSolverParams> {
|
struct FpgaBitstream<TypeTag, TTag::FlowIstlSolverParams> {
|
||||||
@ -269,7 +269,7 @@ namespace Opm
|
|||||||
int cpr_max_ell_iter_;
|
int cpr_max_ell_iter_;
|
||||||
int cpr_reuse_setup_;
|
int cpr_reuse_setup_;
|
||||||
int cpr_reuse_interval_;
|
int cpr_reuse_interval_;
|
||||||
std::string opencl_ilu_reorder_;
|
bool opencl_ilu_parallel_;
|
||||||
std::string fpga_bitstream_;
|
std::string fpga_bitstream_;
|
||||||
|
|
||||||
template <class TypeTag>
|
template <class TypeTag>
|
||||||
@ -296,7 +296,7 @@ namespace Opm
|
|||||||
accelerator_mode_ = EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode);
|
accelerator_mode_ = EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode);
|
||||||
bda_device_id_ = EWOMS_GET_PARAM(TypeTag, int, BdaDeviceId);
|
bda_device_id_ = EWOMS_GET_PARAM(TypeTag, int, BdaDeviceId);
|
||||||
opencl_platform_id_ = EWOMS_GET_PARAM(TypeTag, int, OpenclPlatformId);
|
opencl_platform_id_ = EWOMS_GET_PARAM(TypeTag, int, OpenclPlatformId);
|
||||||
opencl_ilu_reorder_ = EWOMS_GET_PARAM(TypeTag, std::string, OpenclIluReorder);
|
opencl_ilu_parallel_ = EWOMS_GET_PARAM(TypeTag, bool, OpenclIluParallel);
|
||||||
fpga_bitstream_ = EWOMS_GET_PARAM(TypeTag, std::string, FpgaBitstream);
|
fpga_bitstream_ = EWOMS_GET_PARAM(TypeTag, std::string, FpgaBitstream);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -323,7 +323,7 @@ namespace Opm
|
|||||||
EWOMS_REGISTER_PARAM(TypeTag, std::string, AcceleratorMode, "Use GPU (cusparseSolver or openclSolver) or FPGA (fpgaSolver) as the linear solver, usage: '--accelerator-mode=[none|cusparse|opencl|fpga|amgcl]'");
|
EWOMS_REGISTER_PARAM(TypeTag, std::string, AcceleratorMode, "Use GPU (cusparseSolver or openclSolver) or FPGA (fpgaSolver) as the linear solver, usage: '--accelerator-mode=[none|cusparse|opencl|fpga|amgcl]'");
|
||||||
EWOMS_REGISTER_PARAM(TypeTag, int, BdaDeviceId, "Choose device ID for cusparseSolver or openclSolver, use 'nvidia-smi' or 'clinfo' to determine valid IDs");
|
EWOMS_REGISTER_PARAM(TypeTag, int, BdaDeviceId, "Choose device ID for cusparseSolver or openclSolver, use 'nvidia-smi' or 'clinfo' to determine valid IDs");
|
||||||
EWOMS_REGISTER_PARAM(TypeTag, int, OpenclPlatformId, "Choose platform ID for openclSolver, use 'clinfo' to determine valid platform IDs");
|
EWOMS_REGISTER_PARAM(TypeTag, int, OpenclPlatformId, "Choose platform ID for openclSolver, use 'clinfo' to determine valid platform IDs");
|
||||||
EWOMS_REGISTER_PARAM(TypeTag, std::string, OpenclIluReorder, "Choose the reordering strategy for ILU for openclSolver and fpgaSolver, usage: '--opencl-ilu-reorder=[level_scheduling|graph_coloring], level_scheduling behaves like Dune and cusparse, graph_coloring is more aggressive and likely to be faster, but is random-based and generally increases the number of linear solves and linear iterations significantly.");
|
EWOMS_REGISTER_PARAM(TypeTag, bool, OpenclIluParallel, "Parallelize ILU decomposition and application on GPU. Default: true");
|
||||||
EWOMS_REGISTER_PARAM(TypeTag, std::string, FpgaBitstream, "Specify the bitstream file for fpgaSolver (including path), usage: '--fpga-bitstream=<filename>'");
|
EWOMS_REGISTER_PARAM(TypeTag, std::string, FpgaBitstream, "Specify the bitstream file for fpgaSolver (including path), usage: '--fpga-bitstream=<filename>'");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -347,7 +347,7 @@ namespace Opm
|
|||||||
accelerator_mode_ = "none";
|
accelerator_mode_ = "none";
|
||||||
bda_device_id_ = 0;
|
bda_device_id_ = 0;
|
||||||
opencl_platform_id_ = 0;
|
opencl_platform_id_ = 0;
|
||||||
opencl_ilu_reorder_ = ""; // note: the default value is chosen depending on the solver used
|
opencl_ilu_parallel_ = true;
|
||||||
fpga_bitstream_ = "";
|
fpga_bitstream_ = "";
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -181,12 +181,12 @@ BdaSolverInfo(const std::string& accelerator_mode,
|
|||||||
const double tolerance,
|
const double tolerance,
|
||||||
const int platformID,
|
const int platformID,
|
||||||
const int deviceID,
|
const int deviceID,
|
||||||
const std::string& opencl_ilu_reorder,
|
const bool opencl_ilu_parallel,
|
||||||
const std::string& linsolver)
|
const std::string& linsolver)
|
||||||
: bridge_(std::make_unique<Bridge>(accelerator_mode, fpga_bitstream,
|
: bridge_(std::make_unique<Bridge>(accelerator_mode, fpga_bitstream,
|
||||||
linear_solver_verbosity, maxit,
|
linear_solver_verbosity, maxit,
|
||||||
tolerance, platformID, deviceID,
|
tolerance, platformID, deviceID,
|
||||||
opencl_ilu_reorder, linsolver))
|
opencl_ilu_parallel, linsolver))
|
||||||
, accelerator_mode_(accelerator_mode)
|
, accelerator_mode_(accelerator_mode)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
@ -127,7 +127,7 @@ struct BdaSolverInfo
|
|||||||
const double tolerance,
|
const double tolerance,
|
||||||
const int platformID,
|
const int platformID,
|
||||||
const int deviceID,
|
const int deviceID,
|
||||||
const std::string& opencl_ilu_reorder,
|
const bool opencl_ilu_parallel,
|
||||||
const std::string& linsolver);
|
const std::string& linsolver);
|
||||||
|
|
||||||
~BdaSolverInfo();
|
~BdaSolverInfo();
|
||||||
@ -259,7 +259,7 @@ std::unique_ptr<Matrix> blockJacobiAdjacency(const Grid& grid,
|
|||||||
const int deviceID = EWOMS_GET_PARAM(TypeTag, int, BdaDeviceId);
|
const int deviceID = EWOMS_GET_PARAM(TypeTag, int, BdaDeviceId);
|
||||||
const int maxit = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIter);
|
const int maxit = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIter);
|
||||||
const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction);
|
const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction);
|
||||||
const std::string opencl_ilu_reorder = EWOMS_GET_PARAM(TypeTag, std::string, OpenclIluReorder);
|
const bool opencl_ilu_parallel = EWOMS_GET_PARAM(TypeTag, bool, OpenclIluParallel);
|
||||||
const int linear_solver_verbosity = parameters_.linear_solver_verbosity_;
|
const int linear_solver_verbosity = parameters_.linear_solver_verbosity_;
|
||||||
std::string fpga_bitstream = EWOMS_GET_PARAM(TypeTag, std::string, FpgaBitstream);
|
std::string fpga_bitstream = EWOMS_GET_PARAM(TypeTag, std::string, FpgaBitstream);
|
||||||
std::string linsolver = EWOMS_GET_PARAM(TypeTag, std::string, LinearSolver);
|
std::string linsolver = EWOMS_GET_PARAM(TypeTag, std::string, LinearSolver);
|
||||||
@ -270,7 +270,7 @@ std::unique_ptr<Matrix> blockJacobiAdjacency(const Grid& grid,
|
|||||||
tolerance,
|
tolerance,
|
||||||
platformID,
|
platformID,
|
||||||
deviceID,
|
deviceID,
|
||||||
opencl_ilu_reorder,
|
opencl_ilu_parallel,
|
||||||
linsolver);
|
linsolver);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
@ -53,7 +53,6 @@ namespace Opm
|
|||||||
using Opm::Accelerator::BdaResult;
|
using Opm::Accelerator::BdaResult;
|
||||||
using Opm::Accelerator::BdaSolver;
|
using Opm::Accelerator::BdaSolver;
|
||||||
using Opm::Accelerator::SolverStatus;
|
using Opm::Accelerator::SolverStatus;
|
||||||
using Opm::Accelerator::ILUReorder;
|
|
||||||
|
|
||||||
template <class BridgeMatrix, class BridgeVector, int block_size>
|
template <class BridgeMatrix, class BridgeVector, int block_size>
|
||||||
BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string accelerator_mode_,
|
BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string accelerator_mode_,
|
||||||
@ -62,7 +61,7 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
|
|||||||
double tolerance,
|
double tolerance,
|
||||||
[[maybe_unused]] unsigned int platformID,
|
[[maybe_unused]] unsigned int platformID,
|
||||||
unsigned int deviceID,
|
unsigned int deviceID,
|
||||||
[[maybe_unused]] std::string opencl_ilu_reorder,
|
[[maybe_unused]] bool opencl_ilu_parallel,
|
||||||
[[maybe_unused]] std::string linsolver)
|
[[maybe_unused]] std::string linsolver)
|
||||||
: verbosity(linear_solver_verbosity), accelerator_mode(accelerator_mode_)
|
: verbosity(linear_solver_verbosity), accelerator_mode(accelerator_mode_)
|
||||||
{
|
{
|
||||||
@ -76,36 +75,14 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
|
|||||||
} else if (accelerator_mode.compare("opencl") == 0) {
|
} else if (accelerator_mode.compare("opencl") == 0) {
|
||||||
#if HAVE_OPENCL
|
#if HAVE_OPENCL
|
||||||
use_gpu = true;
|
use_gpu = true;
|
||||||
ILUReorder ilu_reorder;
|
backend.reset(new Opm::Accelerator::openclSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_parallel, linsolver));
|
||||||
if (opencl_ilu_reorder == "") {
|
|
||||||
ilu_reorder = Opm::Accelerator::ILUReorder::GRAPH_COLORING; // default when not selected by user
|
|
||||||
} else if (opencl_ilu_reorder == "level_scheduling") {
|
|
||||||
ilu_reorder = Opm::Accelerator::ILUReorder::LEVEL_SCHEDULING;
|
|
||||||
} else if (opencl_ilu_reorder == "graph_coloring") {
|
|
||||||
ilu_reorder = Opm::Accelerator::ILUReorder::GRAPH_COLORING;
|
|
||||||
} else if (opencl_ilu_reorder == "none") {
|
|
||||||
ilu_reorder = Opm::Accelerator::ILUReorder::NONE;
|
|
||||||
} else {
|
|
||||||
OPM_THROW(std::logic_error, "Error invalid argument for --opencl-ilu-reorder, usage: '--opencl-ilu-reorder=[level_scheduling|graph_coloring]'");
|
|
||||||
}
|
|
||||||
backend.reset(new Opm::Accelerator::openclSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, platformID, deviceID, ilu_reorder, linsolver));
|
|
||||||
#else
|
#else
|
||||||
OPM_THROW(std::logic_error, "Error openclSolver was chosen, but OpenCL was not found by CMake");
|
OPM_THROW(std::logic_error, "Error openclSolver was chosen, but OpenCL was not found by CMake");
|
||||||
#endif
|
#endif
|
||||||
} else if (accelerator_mode.compare("fpga") == 0) {
|
} else if (accelerator_mode.compare("fpga") == 0) {
|
||||||
#if HAVE_FPGA
|
#if HAVE_FPGA
|
||||||
use_fpga = true;
|
use_fpga = true;
|
||||||
ILUReorder ilu_reorder;
|
backend.reset(new Opm::Accelerator::FpgaSolverBackend<block_size>(fpga_bitstream, linear_solver_verbosity, maxit, tolerance, opencl_ilu_parallel));
|
||||||
if (opencl_ilu_reorder == "") {
|
|
||||||
ilu_reorder = Opm::Accelerator::ILUReorder::LEVEL_SCHEDULING; // default when not selected by user
|
|
||||||
} else if (opencl_ilu_reorder == "level_scheduling") {
|
|
||||||
ilu_reorder = Opm::Accelerator::ILUReorder::LEVEL_SCHEDULING;
|
|
||||||
} else if (opencl_ilu_reorder == "graph_coloring") {
|
|
||||||
ilu_reorder = Opm::Accelerator::ILUReorder::GRAPH_COLORING;
|
|
||||||
} else {
|
|
||||||
OPM_THROW(std::logic_error, "Error invalid argument for --opencl-ilu-reorder, usage: '--opencl-ilu-reorder=[level_scheduling|graph_coloring]'");
|
|
||||||
}
|
|
||||||
backend.reset(new Opm::Accelerator::FpgaSolverBackend<block_size>(fpga_bitstream, linear_solver_verbosity, maxit, tolerance, ilu_reorder));
|
|
||||||
#else
|
#else
|
||||||
OPM_THROW(std::logic_error, "Error fpgaSolver was chosen, but FPGA was not enabled by CMake");
|
OPM_THROW(std::logic_error, "Error fpgaSolver was chosen, but FPGA was not enabled by CMake");
|
||||||
#endif
|
#endif
|
||||||
|
@ -23,8 +23,6 @@
|
|||||||
#include "dune/istl/solver.hh" // for struct InverseOperatorResult
|
#include "dune/istl/solver.hh" // for struct InverseOperatorResult
|
||||||
|
|
||||||
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
|
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
|
||||||
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
|
|
||||||
#include <opm/simulators/linalg/bda/ILUReorder.hpp>
|
|
||||||
|
|
||||||
namespace Opm
|
namespace Opm
|
||||||
{
|
{
|
||||||
@ -32,7 +30,6 @@ namespace Opm
|
|||||||
class WellContributions;
|
class WellContributions;
|
||||||
|
|
||||||
typedef Dune::InverseOperatorResult InverseOperatorResult;
|
typedef Dune::InverseOperatorResult InverseOperatorResult;
|
||||||
using Opm::Accelerator::ILUReorder;
|
|
||||||
|
|
||||||
/// BdaBridge acts as interface between opm-simulators with the BdaSolvers
|
/// BdaBridge acts as interface between opm-simulators with the BdaSolvers
|
||||||
template <class BridgeMatrix, class BridgeVector, int block_size>
|
template <class BridgeMatrix, class BridgeVector, int block_size>
|
||||||
@ -60,10 +57,10 @@ public:
|
|||||||
/// \param[in] tolerance required relative tolerance for BdaSolver
|
/// \param[in] tolerance required relative tolerance for BdaSolver
|
||||||
/// \param[in] platformID the OpenCL platform ID to be used
|
/// \param[in] platformID the OpenCL platform ID to be used
|
||||||
/// \param[in] deviceID the device ID to be used by the cusparse- and openclSolvers, too high values could cause runtime errors
|
/// \param[in] deviceID the device ID to be used by the cusparse- and openclSolvers, too high values could cause runtime errors
|
||||||
/// \param[in] opencl_ilu_reorder select either level_scheduling or graph_coloring, see ILUReorder.hpp for explanation
|
/// \param[in] opencl_ilu_parallel whether to parallelize the ILU decomposition and application in OpenCL
|
||||||
/// \param[in] linsolver copy of cmdline argument --linear-solver
|
/// \param[in] linsolver indicating the preconditioner, equal to the --linear-solver cmdline argument
|
||||||
BdaBridge(std::string accelerator_mode, std::string fpga_bitstream, int linear_solver_verbosity, int maxit, double tolerance,
|
BdaBridge(std::string accelerator_mode, std::string fpga_bitstream, int linear_solver_verbosity, int maxit, double tolerance,
|
||||||
unsigned int platformID, unsigned int deviceID, std::string opencl_ilu_reorder, std::string linsolver);
|
unsigned int platformID, unsigned int deviceID, bool opencl_ilu_parallel, std::string linsolver);
|
||||||
|
|
||||||
|
|
||||||
/// Solve linear system, A*x = b
|
/// Solve linear system, A*x = b
|
||||||
|
@ -1,39 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright 2020 Equinor ASA
|
|
||||||
|
|
||||||
This file is part of the Open Porous Media project (OPM).
|
|
||||||
|
|
||||||
OPM is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
OPM is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef ILUREORDER_HEADER_INCLUDED
|
|
||||||
#define ILUREORDER_HEADER_INCLUDED
|
|
||||||
|
|
||||||
namespace Opm
|
|
||||||
{
|
|
||||||
namespace Accelerator
|
|
||||||
{
|
|
||||||
// Level Scheduling respects the dependencies in the original matrix, and behaves like Dune and cusparse
|
|
||||||
// Graph Coloring is more aggresive and is likely to increase the number of linearizations and linear iterations to converge significantly, but can still be faster on GPU because it results in more parallelism
|
|
||||||
|
|
||||||
enum class ILUReorder {
|
|
||||||
LEVEL_SCHEDULING,
|
|
||||||
GRAPH_COLORING,
|
|
||||||
NONE
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace Accelerator
|
|
||||||
} // namespace Opm
|
|
||||||
|
|
||||||
#endif
|
|
@ -40,8 +40,8 @@ using Opm::OpmLog;
|
|||||||
using Dune::Timer;
|
using Dune::Timer;
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
BILU0<block_size>::BILU0(ILUReorder opencl_ilu_reorder_, int verbosity_) :
|
BILU0<block_size>::BILU0(bool opencl_ilu_parallel_, int verbosity_) :
|
||||||
Preconditioner<block_size>(verbosity_), opencl_ilu_reorder(opencl_ilu_reorder_)
|
Preconditioner<block_size>(verbosity_), opencl_ilu_parallel(opencl_ilu_parallel_)
|
||||||
{
|
{
|
||||||
#if CHOW_PATEL
|
#if CHOW_PATEL
|
||||||
chowPatelIlu.setVerbosity(verbosity);
|
chowPatelIlu.setVerbosity(verbosity);
|
||||||
@ -71,9 +71,7 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat
|
|||||||
|
|
||||||
auto *matToDecompose = jacMat ? jacMat : mat; // decompose jacMat if valid, otherwise decompose mat
|
auto *matToDecompose = jacMat ? jacMat : mat; // decompose jacMat if valid, otherwise decompose mat
|
||||||
|
|
||||||
if (opencl_ilu_reorder == ILUReorder::NONE) {
|
if (opencl_ilu_parallel) {
|
||||||
LUmat = std::make_unique<BlockedMatrix>(*mat);
|
|
||||||
} else {
|
|
||||||
toOrder.resize(Nb);
|
toOrder.resize(Nb);
|
||||||
fromOrder.resize(Nb);
|
fromOrder.resize(Nb);
|
||||||
CSCRowIndices.resize(matToDecompose->nnzbs);
|
CSCRowIndices.resize(matToDecompose->nnzbs);
|
||||||
@ -88,28 +86,26 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat
|
|||||||
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s";
|
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
LUmat = std::make_unique<BlockedMatrix>(*matToDecompose);
|
||||||
}
|
}
|
||||||
|
|
||||||
Timer t_analysis;
|
Timer t_analysis;
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
if (opencl_ilu_reorder == ILUReorder::LEVEL_SCHEDULING) {
|
if (opencl_ilu_parallel) {
|
||||||
out << "BILU0 reordering strategy: " << "level_scheduling\n";
|
out << "opencl_ilu_parallel: true (level_scheduling)\n";
|
||||||
findLevelScheduling(matToDecompose->colIndices, matToDecompose->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
|
findLevelScheduling(matToDecompose->colIndices, matToDecompose->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
|
||||||
} else if (opencl_ilu_reorder == ILUReorder::GRAPH_COLORING) {
|
} else {
|
||||||
out << "BILU0 reordering strategy: " << "graph_coloring\n";
|
out << "opencl_ilu_parallel: false\n";
|
||||||
findGraphColoring<block_size>(matToDecompose->colIndices, matToDecompose->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), Nb, Nb, Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
|
|
||||||
} else if (opencl_ilu_reorder == ILUReorder::NONE) {
|
|
||||||
out << "BILU0 reordering strategy: none\n";
|
|
||||||
// numColors = 1;
|
// numColors = 1;
|
||||||
// rowsPerColor.emplace_back(Nb);
|
// rowsPerColor.emplace_back(Nb);
|
||||||
numColors = Nb;
|
numColors = Nb;
|
||||||
for(int i = 0; i < Nb; ++i){
|
for(int i = 0; i < Nb; ++i){
|
||||||
rowsPerColor.emplace_back(1);
|
rowsPerColor.emplace_back(1);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
OPM_THROW(std::logic_error, "Error ilu reordering strategy not set correctly\n");
|
|
||||||
}
|
}
|
||||||
if(verbosity >= 1){
|
|
||||||
|
if (verbosity >= 1) {
|
||||||
out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors\n";
|
out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -152,7 +148,14 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat
|
|||||||
}
|
}
|
||||||
|
|
||||||
err |= queue->enqueueWriteBuffer(s.rowsPerColor, CL_FALSE, 0, (numColors + 1) * sizeof(int), rowsPerColorPrefix.data(), nullptr, &events[1]);
|
err |= queue->enqueueWriteBuffer(s.rowsPerColor, CL_FALSE, 0, (numColors + 1) * sizeof(int), rowsPerColorPrefix.data(), nullptr, &events[1]);
|
||||||
err |= queue->enqueueWriteBuffer(s.rowIndices, CL_FALSE, 0, Nb * sizeof(unsigned), fromOrder.data(), nullptr, &events[2]);
|
|
||||||
|
if (opencl_ilu_parallel) {
|
||||||
|
err |= queue->enqueueWriteBuffer(s.rowIndices, CL_FALSE, 0, Nb * sizeof(unsigned), fromOrder.data(), nullptr, &events[2]);
|
||||||
|
} else {
|
||||||
|
// rowsPerColorPrefix is misused here
|
||||||
|
// s.rowIndices[i] == i must hold
|
||||||
|
err |= queue->enqueueWriteBuffer(s.rowIndices, CL_FALSE, 0, Nb * sizeof(unsigned), rowsPerColorPrefix.data(), nullptr, &events[2]);
|
||||||
|
}
|
||||||
|
|
||||||
cl::WaitForEvents(events);
|
cl::WaitForEvents(events);
|
||||||
events.clear();
|
events.clear();
|
||||||
|
@ -23,7 +23,6 @@
|
|||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
|
||||||
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
|
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
|
||||||
#include <opm/simulators/linalg/bda/ILUReorder.hpp>
|
|
||||||
|
|
||||||
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
|
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
|
||||||
#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
|
#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
|
||||||
@ -65,7 +64,7 @@ private:
|
|||||||
int numColors;
|
int numColors;
|
||||||
std::once_flag pattern_uploaded;
|
std::once_flag pattern_uploaded;
|
||||||
|
|
||||||
ILUReorder opencl_ilu_reorder;
|
bool opencl_ilu_parallel;
|
||||||
|
|
||||||
std::vector<int> reordermappingNonzeroes; // maps nonzero blocks to new location in reordered matrix
|
std::vector<int> reordermappingNonzeroes; // maps nonzero blocks to new location in reordered matrix
|
||||||
std::vector<int> jacReordermappingNonzeroes; // same but for jacMatrix
|
std::vector<int> jacReordermappingNonzeroes; // same but for jacMatrix
|
||||||
@ -91,7 +90,7 @@ private:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
BILU0(ILUReorder opencl_ilu_reorder, int verbosity);
|
BILU0(bool opencl_ilu_parallel, int verbosity);
|
||||||
|
|
||||||
// analysis, find reordering if specified
|
// analysis, find reordering if specified
|
||||||
bool analyze_matrix(BlockedMatrix *mat) override;
|
bool analyze_matrix(BlockedMatrix *mat) override;
|
||||||
|
@ -42,13 +42,13 @@ using Opm::OpmLog;
|
|||||||
using Dune::Timer;
|
using Dune::Timer;
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
BISAI<block_size>::BISAI(ILUReorder opencl_ilu_reorder_, int verbosity_) :
|
BISAI<block_size>::BISAI(bool opencl_ilu_parallel_, int verbosity_) :
|
||||||
Preconditioner<block_size>(verbosity_)
|
Preconditioner<block_size>(verbosity_)
|
||||||
{
|
{
|
||||||
#if CHOW_PATEL
|
#if CHOW_PATEL
|
||||||
OPM_THROW(std::logic_error, "Error --linear-solver=isai cannot be used if ChowPatelIlu is used, probably defined by CMake\n");
|
OPM_THROW(std::logic_error, "Error --linear-solver=isai cannot be used if ChowPatelIlu is used, probably defined by CMake\n");
|
||||||
#endif
|
#endif
|
||||||
bilu0 = std::make_unique<BILU0<block_size> >(opencl_ilu_reorder_, verbosity_);
|
bilu0 = std::make_unique<BILU0<block_size> >(opencl_ilu_parallel_, verbosity_);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
|
@ -70,7 +70,7 @@ private:
|
|||||||
cl::Buffer d_invUvals;
|
cl::Buffer d_invUvals;
|
||||||
cl::Buffer d_invL_x;
|
cl::Buffer d_invL_x;
|
||||||
|
|
||||||
ILUReorder opencl_ilu_reorder;
|
bool opencl_ilu_parallel;
|
||||||
std::unique_ptr<BILU0<block_size> > bilu0;
|
std::unique_ptr<BILU0<block_size> > bilu0;
|
||||||
|
|
||||||
/// Struct that holds the structure of the small subsystems for each column
|
/// Struct that holds the structure of the small subsystems for each column
|
||||||
@ -110,7 +110,7 @@ private:
|
|||||||
void buildUpperSubsystemsStructures();
|
void buildUpperSubsystemsStructures();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
BISAI(ILUReorder opencl_ilu_reorder, int verbosity);
|
BISAI(bool opencl_ilu_parallel, int verbosity);
|
||||||
|
|
||||||
// set own Opencl variables, but also that of the bilu0 preconditioner
|
// set own Opencl variables, but also that of the bilu0 preconditioner
|
||||||
void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
|
void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue) override;
|
||||||
|
@ -44,10 +44,10 @@ using Opm::OpmLog;
|
|||||||
using Dune::Timer;
|
using Dune::Timer;
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
CPR<block_size>::CPR(int verbosity_, ILUReorder opencl_ilu_reorder_) :
|
CPR<block_size>::CPR(int verbosity_, bool opencl_ilu_parallel_) :
|
||||||
Preconditioner<block_size>(verbosity_), opencl_ilu_reorder(opencl_ilu_reorder_)
|
Preconditioner<block_size>(verbosity_), opencl_ilu_parallel(opencl_ilu_parallel_)
|
||||||
{
|
{
|
||||||
bilu0 = std::make_unique<BILU0<block_size> >(opencl_ilu_reorder, verbosity_);
|
bilu0 = std::make_unique<BILU0<block_size> >(opencl_ilu_parallel, verbosity_);
|
||||||
diagIndices.resize(1);
|
diagIndices.resize(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -71,12 +71,7 @@ bool CPR<block_size>::analyze_matrix(BlockedMatrix *mat_) {
|
|||||||
this->nnz = nnzb * block_size * block_size;
|
this->nnz = nnzb * block_size * block_size;
|
||||||
|
|
||||||
bool success = bilu0->analyze_matrix(mat_);
|
bool success = bilu0->analyze_matrix(mat_);
|
||||||
|
mat = mat_;
|
||||||
if (opencl_ilu_reorder == ILUReorder::NONE) {
|
|
||||||
mat = mat_;
|
|
||||||
} else {
|
|
||||||
mat = bilu0->getRMat();
|
|
||||||
}
|
|
||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,12 +83,8 @@ bool CPR<block_size>::analyze_matrix(BlockedMatrix *mat_, BlockedMatrix *jacMat)
|
|||||||
this->nnz = nnzb * block_size * block_size;
|
this->nnz = nnzb * block_size * block_size;
|
||||||
|
|
||||||
bool success = bilu0->analyze_matrix(mat_, jacMat);
|
bool success = bilu0->analyze_matrix(mat_, jacMat);
|
||||||
|
mat = mat_;
|
||||||
|
|
||||||
if (opencl_ilu_reorder == ILUReorder::NONE) {
|
|
||||||
mat = mat_;
|
|
||||||
} else {
|
|
||||||
mat = bilu0->getRMat();
|
|
||||||
}
|
|
||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -34,7 +34,6 @@
|
|||||||
#include <opm/simulators/linalg/bda/opencl/BILU0.hpp>
|
#include <opm/simulators/linalg/bda/opencl/BILU0.hpp>
|
||||||
#include <opm/simulators/linalg/bda/Matrix.hpp>
|
#include <opm/simulators/linalg/bda/Matrix.hpp>
|
||||||
#include <opm/simulators/linalg/bda/opencl/OpenclMatrix.hpp>
|
#include <opm/simulators/linalg/bda/opencl/OpenclMatrix.hpp>
|
||||||
#include <opm/simulators/linalg/bda/ILUReorder.hpp>
|
|
||||||
#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
|
#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
|
||||||
|
|
||||||
#include <opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp>
|
#include <opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp>
|
||||||
@ -98,7 +97,7 @@ private:
|
|||||||
unsigned num_post_smooth_steps; // number of Jacobi smooth steps after prolongation
|
unsigned num_post_smooth_steps; // number of Jacobi smooth steps after prolongation
|
||||||
|
|
||||||
std::unique_ptr<openclSolverBackend<1> > coarse_solver; // coarse solver is scalar
|
std::unique_ptr<openclSolverBackend<1> > coarse_solver; // coarse solver is scalar
|
||||||
ILUReorder opencl_ilu_reorder; // reordering strategy for ILU0 in coarse solver
|
bool opencl_ilu_parallel; // reordering strategy for ILU0 in coarse solver
|
||||||
|
|
||||||
// Analyze the AMG hierarchy build by Dune
|
// Analyze the AMG hierarchy build by Dune
|
||||||
void analyzeHierarchy();
|
void analyzeHierarchy();
|
||||||
@ -122,7 +121,7 @@ private:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
CPR(int verbosity, ILUReorder opencl_ilu_reorder);
|
CPR(int verbosity, bool opencl_ilu_parallel);
|
||||||
|
|
||||||
bool analyze_matrix(BlockedMatrix *mat) override;
|
bool analyze_matrix(BlockedMatrix *mat) override;
|
||||||
bool analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat) override;
|
bool analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat) override;
|
||||||
|
@ -40,13 +40,13 @@ void Preconditioner<block_size>::setOpencl(std::shared_ptr<cl::Context>& context
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
std::unique_ptr<Preconditioner<block_size> > Preconditioner<block_size>::create(PreconditionerType type, int verbosity, ILUReorder opencl_ilu_reorder) {
|
std::unique_ptr<Preconditioner<block_size> > Preconditioner<block_size>::create(PreconditionerType type, int verbosity, bool opencl_ilu_parallel) {
|
||||||
if (type == PreconditionerType::BILU0) {
|
if (type == PreconditionerType::BILU0) {
|
||||||
return std::make_unique<Opm::Accelerator::BILU0<block_size> >(opencl_ilu_reorder, verbosity);
|
return std::make_unique<Opm::Accelerator::BILU0<block_size> >(opencl_ilu_parallel, verbosity);
|
||||||
} else if (type == PreconditionerType::CPR) {
|
} else if (type == PreconditionerType::CPR) {
|
||||||
return std::make_unique<Opm::Accelerator::CPR<block_size> >(verbosity, opencl_ilu_reorder);
|
return std::make_unique<Opm::Accelerator::CPR<block_size> >(verbosity, opencl_ilu_parallel);
|
||||||
} else if (type == PreconditionerType::BISAI) {
|
} else if (type == PreconditionerType::BISAI) {
|
||||||
return std::make_unique<Opm::Accelerator::BISAI<block_size> >(opencl_ilu_reorder, verbosity);
|
return std::make_unique<Opm::Accelerator::BISAI<block_size> >(opencl_ilu_parallel, verbosity);
|
||||||
} else {
|
} else {
|
||||||
OPM_THROW(std::logic_error, "Invalid PreconditionerType");
|
OPM_THROW(std::logic_error, "Invalid PreconditionerType");
|
||||||
}
|
}
|
||||||
@ -63,11 +63,12 @@ bool Preconditioner<block_size>::create_preconditioner(BlockedMatrix *mat, [[may
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
template std::unique_ptr<Preconditioner<n> > Preconditioner<n>::create(PreconditionerType, int, ILUReorder); \
|
template std::unique_ptr<Preconditioner<n> > Preconditioner<n>::create(PreconditionerType, int, bool); \
|
||||||
template void Preconditioner<n>::setOpencl(std::shared_ptr<cl::Context>&, std::shared_ptr<cl::CommandQueue>&); \
|
template void Preconditioner<n>::setOpencl(std::shared_ptr<cl::Context>&, std::shared_ptr<cl::CommandQueue>&); \
|
||||||
template bool Preconditioner<n>::analyze_matrix(BlockedMatrix *, BlockedMatrix *); \
|
template bool Preconditioner<n>::analyze_matrix(BlockedMatrix *, BlockedMatrix *); \
|
||||||
template bool Preconditioner<n>::create_preconditioner(BlockedMatrix *, BlockedMatrix *);
|
template bool Preconditioner<n>::create_preconditioner(BlockedMatrix *, BlockedMatrix *);
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||||
INSTANTIATE_BDA_FUNCTIONS(3);
|
INSTANTIATE_BDA_FUNCTIONS(3);
|
||||||
|
@ -21,7 +21,6 @@
|
|||||||
#define OPM_PRECONDITIONER_HEADER_INCLUDED
|
#define OPM_PRECONDITIONER_HEADER_INCLUDED
|
||||||
|
|
||||||
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
|
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
|
||||||
#include <opm/simulators/linalg/bda/ILUReorder.hpp>
|
|
||||||
|
|
||||||
namespace Opm
|
namespace Opm
|
||||||
{
|
{
|
||||||
@ -58,7 +57,7 @@ public:
|
|||||||
BISAI
|
BISAI
|
||||||
};
|
};
|
||||||
|
|
||||||
static std::unique_ptr<Preconditioner> create(PreconditionerType type, int verbosity, ILUReorder opencl_ilu_reorder);
|
static std::unique_ptr<Preconditioner> create(PreconditionerType type, int verbosity, bool opencl_ilu_parallel);
|
||||||
|
|
||||||
virtual ~Preconditioner() = default;
|
virtual ~Preconditioner() = default;
|
||||||
|
|
||||||
|
@ -47,7 +47,7 @@ using Opm::OpmLog;
|
|||||||
using Dune::Timer;
|
using Dune::Timer;
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_, ILUReorder opencl_ilu_reorder_, std::string linsolver) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_), opencl_ilu_reorder(opencl_ilu_reorder_) {
|
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_, bool opencl_ilu_parallel_, std::string linsolver) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_), opencl_ilu_parallel(opencl_ilu_parallel_) {
|
||||||
|
|
||||||
bool use_cpr, use_isai;
|
bool use_cpr, use_isai;
|
||||||
|
|
||||||
@ -68,11 +68,11 @@ openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_,
|
|||||||
|
|
||||||
using PreconditionerType = typename Preconditioner<block_size>::PreconditionerType;
|
using PreconditionerType = typename Preconditioner<block_size>::PreconditionerType;
|
||||||
if (use_cpr) {
|
if (use_cpr) {
|
||||||
prec = Preconditioner<block_size>::create(PreconditionerType::CPR, verbosity, opencl_ilu_reorder);
|
prec = Preconditioner<block_size>::create(PreconditionerType::CPR, verbosity, opencl_ilu_parallel);
|
||||||
} else if (use_isai) {
|
} else if (use_isai) {
|
||||||
prec = Preconditioner<block_size>::create(PreconditionerType::BISAI, verbosity, opencl_ilu_reorder);
|
prec = Preconditioner<block_size>::create(PreconditionerType::BISAI, verbosity, opencl_ilu_parallel);
|
||||||
} else {
|
} else {
|
||||||
prec = Preconditioner<block_size>::create(PreconditionerType::BILU0, verbosity, opencl_ilu_reorder);
|
prec = Preconditioner<block_size>::create(PreconditionerType::BILU0, verbosity, opencl_ilu_parallel);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
@ -219,11 +219,11 @@ openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, ILUReorder opencl_ilu_reorder_) :
|
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, bool opencl_ilu_parallel_) :
|
||||||
BdaSolver<block_size>(verbosity_, maxit_, tolerance_), opencl_ilu_reorder(opencl_ilu_reorder_)
|
BdaSolver<block_size>(verbosity_, maxit_, tolerance_), opencl_ilu_parallel(opencl_ilu_parallel_)
|
||||||
{
|
{
|
||||||
// prec = std::make_unique<BILU0<block_size> >(opencl_ilu_reorder, verbosity_);
|
// prec = std::make_unique<BILU0<block_size> >(opencl_ilu_parallel, verbosity_);
|
||||||
// cpr = std::make_unique<CPR<block_size> >(verbosity_, opencl_ilu_reorder, /*use_amg=*/false);
|
// cpr = std::make_unique<CPR<block_size> >(verbosity_, opencl_ilu_parallel, /*use_amg=*/false);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
@ -456,8 +456,7 @@ void openclSolverBackend<block_size>::initialize(std::shared_ptr<BlockedMatrix>
|
|||||||
d_Acols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzb);
|
d_Acols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzb);
|
||||||
d_Arows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1));
|
d_Arows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1));
|
||||||
|
|
||||||
bool reorder = (opencl_ilu_reorder != ILUReorder::NONE);
|
if (opencl_ilu_parallel) {
|
||||||
if (reorder) {
|
|
||||||
d_toOrder = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Nb);
|
d_toOrder = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Nb);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -496,7 +495,7 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
|
|||||||
err |= queue->enqueueWriteBuffer(d_Arows, CL_TRUE, 0, sizeof(int) * (Nb + 1), rmat->rowPointers, nullptr, &events[2]);
|
err |= queue->enqueueWriteBuffer(d_Arows, CL_TRUE, 0, sizeof(int) * (Nb + 1), rmat->rowPointers, nullptr, &events[2]);
|
||||||
err |= queue->enqueueWriteBuffer(d_b, CL_TRUE, 0, sizeof(double) * N, h_b, nullptr, &events[3]);
|
err |= queue->enqueueWriteBuffer(d_b, CL_TRUE, 0, sizeof(double) * N, h_b, nullptr, &events[3]);
|
||||||
err |= queue->enqueueFillBuffer(d_x, 0, 0, sizeof(double) * N, nullptr, &events[4]);
|
err |= queue->enqueueFillBuffer(d_x, 0, 0, sizeof(double) * N, nullptr, &events[4]);
|
||||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
if (opencl_ilu_parallel) {
|
||||||
events.resize(6);
|
events.resize(6);
|
||||||
queue->enqueueWriteBuffer(d_toOrder, CL_TRUE, 0, sizeof(int) * Nb, toOrder, nullptr, &events[5]);
|
queue->enqueueWriteBuffer(d_toOrder, CL_TRUE, 0, sizeof(int) * Nb, toOrder, nullptr, &events[5]);
|
||||||
}
|
}
|
||||||
@ -559,15 +558,15 @@ bool openclSolverBackend<block_size>::analyze_matrix() {
|
|||||||
else
|
else
|
||||||
success = prec->analyze_matrix(mat.get());
|
success = prec->analyze_matrix(mat.get());
|
||||||
|
|
||||||
if (opencl_ilu_reorder == ILUReorder::NONE) {
|
if (opencl_ilu_parallel) {
|
||||||
rmat = mat.get();
|
|
||||||
} else {
|
|
||||||
// toOrder = bilu0->getToOrder();
|
// toOrder = bilu0->getToOrder();
|
||||||
// fromOrder = bilu0->getFromOrder();
|
// fromOrder = bilu0->getFromOrder();
|
||||||
// rmat = bilu0->getRMat();
|
// rmat = bilu0->getRMat();
|
||||||
toOrder = prec->getToOrder();
|
toOrder = prec->getToOrder();
|
||||||
fromOrder = prec->getFromOrder();
|
fromOrder = prec->getFromOrder();
|
||||||
rmat = prec->getRMat();
|
rmat = prec->getRMat();
|
||||||
|
} else {
|
||||||
|
rmat = mat.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -694,8 +693,8 @@ SolverStatus openclSolverBackend<block_size>::solve_system(std::shared_ptr<Block
|
|||||||
|
|
||||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
template openclSolverBackend<n>::openclSolverBackend( \
|
template openclSolverBackend<n>::openclSolverBackend( \
|
||||||
int, int, double, unsigned int, unsigned int, ILUReorder, std::string); \
|
int, int, double, unsigned int, unsigned int, bool, std::string); \
|
||||||
template openclSolverBackend<n>::openclSolverBackend(int, int, double, ILUReorder); \
|
template openclSolverBackend<n>::openclSolverBackend(int, int, double, bool); \
|
||||||
template void openclSolverBackend<n>::setOpencl(std::shared_ptr<cl::Context>&, std::shared_ptr<cl::CommandQueue>&);
|
template void openclSolverBackend<n>::setOpencl(std::shared_ptr<cl::Context>&, std::shared_ptr<cl::CommandQueue>&);
|
||||||
|
|
||||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||||
|
@ -23,7 +23,6 @@
|
|||||||
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
|
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
|
||||||
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
||||||
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
|
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
|
||||||
#include <opm/simulators/linalg/bda/ILUReorder.hpp>
|
|
||||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||||
|
|
||||||
#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
|
#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
|
||||||
@ -73,7 +72,7 @@ private:
|
|||||||
std::shared_ptr<BlockedMatrix> mat = nullptr; // original matrix
|
std::shared_ptr<BlockedMatrix> mat = nullptr; // original matrix
|
||||||
std::shared_ptr<BlockedMatrix> jacMat = nullptr; // matrix for preconditioner
|
std::shared_ptr<BlockedMatrix> jacMat = nullptr; // matrix for preconditioner
|
||||||
BlockedMatrix *rmat = nullptr; // reordered matrix (or original if no reordering), used for spmv
|
BlockedMatrix *rmat = nullptr; // reordered matrix (or original if no reordering), used for spmv
|
||||||
ILUReorder opencl_ilu_reorder; // reordering strategy
|
bool opencl_ilu_parallel; // reordering strategy
|
||||||
std::vector<cl::Event> events;
|
std::vector<cl::Event> events;
|
||||||
cl_int err;
|
cl_int err;
|
||||||
|
|
||||||
@ -175,14 +174,14 @@ public:
|
|||||||
/// \param[in] tolerance required relative tolerance for openclSolver
|
/// \param[in] tolerance required relative tolerance for openclSolver
|
||||||
/// \param[in] platformID the OpenCL platform to be used
|
/// \param[in] platformID the OpenCL platform to be used
|
||||||
/// \param[in] deviceID the device to be used
|
/// \param[in] deviceID the device to be used
|
||||||
/// \param[in] opencl_ilu_reorder select either level_scheduling or graph_coloring, see Reorder.hpp for explanation
|
/// \param[in] opencl_ilu_parallel whether to parallelize the ILU decomposition and application in OpenCL
|
||||||
/// \param[in] linsolver indicating the preconditioner, equal to the --linear-solver cmdline argument
|
/// \param[in] linsolver indicating the preconditioner, equal to the --linear-solver cmdline argument
|
||||||
/// only ilu0, cpr_quasiimpes and isai are supported
|
/// only ilu0, cpr_quasiimpes and isai are supported
|
||||||
openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID,
|
openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID,
|
||||||
ILUReorder opencl_ilu_reorder, std::string linsolver);
|
bool opencl_ilu_parallel, std::string linsolver);
|
||||||
|
|
||||||
/// For the CPR coarse solver
|
/// For the CPR coarse solver
|
||||||
openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, ILUReorder opencl_ilu_reorder);
|
openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, bool opencl_ilu_parallel);
|
||||||
|
|
||||||
/// Solve linear system, A*x = b, matrix A must be in blocked-CSR format
|
/// Solve linear system, A*x = b, matrix A must be in blocked-CSR format
|
||||||
/// \param[in] matrix matrix A
|
/// \param[in] matrix matrix A
|
||||||
|
@ -100,7 +100,7 @@ testCusparseSolver(const boost::property_tree::ptree& prm, Matrix<bz>& matrix, V
|
|||||||
const int linear_solver_verbosity = prm.get<int>("verbosity");
|
const int linear_solver_verbosity = prm.get<int>("verbosity");
|
||||||
const int maxit = prm.get<int>("maxiter");
|
const int maxit = prm.get<int>("maxiter");
|
||||||
const double tolerance = prm.get<double>("tol");
|
const double tolerance = prm.get<double>("tol");
|
||||||
const std::string opencl_ilu_reorder("none"); // unused
|
const bool opencl_ilu_parallel(true); // unused
|
||||||
const int platformID = 0; // unused
|
const int platformID = 0; // unused
|
||||||
const int deviceID = 0;
|
const int deviceID = 0;
|
||||||
const std::string accelerator_mode("cusparse");
|
const std::string accelerator_mode("cusparse");
|
||||||
@ -113,7 +113,7 @@ testCusparseSolver(const boost::property_tree::ptree& prm, Matrix<bz>& matrix, V
|
|||||||
auto wellContribs = Opm::WellContributions::create("cusparse", false);
|
auto wellContribs = Opm::WellContributions::create("cusparse", false);
|
||||||
std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
|
std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
|
||||||
try {
|
try {
|
||||||
bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode, fpga_bitstream, linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_reorder, linsolver);
|
bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode, fpga_bitstream, linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_parallel, linsolver);
|
||||||
auto mat2 = matrix; // deep copy to make sure nnz values are in contiguous memory
|
auto mat2 = matrix; // deep copy to make sure nnz values are in contiguous memory
|
||||||
// matrix created by readMatrixMarket() did not have contiguous memory
|
// matrix created by readMatrixMarket() did not have contiguous memory
|
||||||
bridge->solve_system(&mat2, &mat2, /*numJacobiBlocks=*/0, rhs, *wellContribs, result);
|
bridge->solve_system(&mat2, &mat2, /*numJacobiBlocks=*/0, rhs, *wellContribs, result);
|
||||||
|
@ -99,7 +99,7 @@ testOpenclSolver(const boost::property_tree::ptree& prm, Matrix<bz>& matrix, Vec
|
|||||||
const int linear_solver_verbosity = prm.get<int>("verbosity");
|
const int linear_solver_verbosity = prm.get<int>("verbosity");
|
||||||
const int maxit = prm.get<int>("maxiter");
|
const int maxit = prm.get<int>("maxiter");
|
||||||
const double tolerance = prm.get<double>("tol");
|
const double tolerance = prm.get<double>("tol");
|
||||||
const std::string opencl_ilu_reorder("none");
|
const bool opencl_ilu_parallel(true);
|
||||||
const int platformID = 0;
|
const int platformID = 0;
|
||||||
const int deviceID = 0;
|
const int deviceID = 0;
|
||||||
const std::string accelerator_mode("opencl");
|
const std::string accelerator_mode("opencl");
|
||||||
@ -111,7 +111,7 @@ testOpenclSolver(const boost::property_tree::ptree& prm, Matrix<bz>& matrix, Vec
|
|||||||
auto wellContribs = Opm::WellContributions::create("opencl", false);
|
auto wellContribs = Opm::WellContributions::create("opencl", false);
|
||||||
std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
|
std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
|
||||||
try {
|
try {
|
||||||
bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode, fpga_bitstream, linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_reorder, linsolver);
|
bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode, fpga_bitstream, linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_parallel, linsolver);
|
||||||
} catch (const std::logic_error& error) {
|
} catch (const std::logic_error& error) {
|
||||||
BOOST_WARN_MESSAGE(true, error.what());
|
BOOST_WARN_MESSAGE(true, error.what());
|
||||||
throw PlatformInitException(error.what());
|
throw PlatformInitException(error.what());
|
||||||
|
Loading…
Reference in New Issue
Block a user