mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
add mixed precision option to gpudilu class
This commit is contained in:
@@ -349,9 +349,10 @@ struct StandardPreconditioners {
|
|||||||
F::addCreator("GPUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
|
F::addCreator("GPUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
|
||||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||||
|
const bool store_factorization_as_float = prm.get<bool>("store_factorization_as_float", false);
|
||||||
using field_type = typename V::field_type;
|
using field_type = typename V::field_type;
|
||||||
using GpuDILU = typename gpuistl::GpuDILU<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
using GpuDILU = typename gpuistl::GpuDILU<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||||
auto gpuDILU = std::make_shared<GpuDILU>(op.getmat(), split_matrix, tune_gpu_kernels);
|
auto gpuDILU = std::make_shared<GpuDILU>(op.getmat(), split_matrix, tune_gpu_kernels, store_factorization_as_float);
|
||||||
|
|
||||||
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuDILU>>(gpuDILU);
|
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuDILU>>(gpuDILU);
|
||||||
auto wrapped = std::make_shared<gpuistl::GpuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
auto wrapped = std::make_shared<gpuistl::GpuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||||
@@ -631,14 +632,16 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
|
|||||||
F::addCreator("GPUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
|
F::addCreator("GPUDILU", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
|
||||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||||
|
const bool store_factorization_as_float = prm.get<bool>("store_factorization_as_float", false);
|
||||||
using field_type = typename V::field_type;
|
using field_type = typename V::field_type;
|
||||||
using GPUDILU = typename gpuistl::GpuDILU<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
using GPUDILU = typename gpuistl::GpuDILU<M, gpuistl::GpuVector<field_type>, gpuistl::GpuVector<field_type>>;
|
||||||
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GPUDILU>>(std::make_shared<GPUDILU>(op.getmat(), split_matrix, tune_gpu_kernels));
|
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GPUDILU>>(std::make_shared<GPUDILU>(op.getmat(), split_matrix, tune_gpu_kernels, store_factorization_as_float));
|
||||||
});
|
});
|
||||||
|
|
||||||
F::addCreator("GPUDILUFloat", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
|
F::addCreator("GPUDILUFloat", [](const O& op, [[maybe_unused]] const P& prm, const std::function<V()>&, std::size_t) {
|
||||||
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
const bool split_matrix = prm.get<bool>("split_matrix", true);
|
||||||
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
const bool tune_gpu_kernels = prm.get<bool>("tune_gpu_kernels", true);
|
||||||
|
const bool store_factorization_as_float = prm.get<bool>("store_factorization_as_float", false);
|
||||||
|
|
||||||
using block_type = typename V::block_type;
|
using block_type = typename V::block_type;
|
||||||
using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
|
using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
|
||||||
@@ -647,7 +650,7 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
|
|||||||
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, GpuDILU>;
|
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, GpuDILU>;
|
||||||
using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
|
using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
|
||||||
auto converted = std::make_shared<Converter>(op.getmat());
|
auto converted = std::make_shared<Converter>(op.getmat());
|
||||||
auto adapted = std::make_shared<Adapter>(std::make_shared<GpuDILU>(converted->getConvertedMatrix(), split_matrix, tune_gpu_kernels));
|
auto adapted = std::make_shared<Adapter>(std::make_shared<GpuDILU>(converted->getConvertedMatrix(), split_matrix, tune_gpu_kernels, store_factorization_as_float));
|
||||||
converted->setUnderlyingPreconditioner(adapted);
|
converted->setUnderlyingPreconditioner(adapted);
|
||||||
return converted;
|
return converted;
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ namespace Opm::gpuistl
|
|||||||
{
|
{
|
||||||
|
|
||||||
template <class M, class X, class Y, int l>
|
template <class M, class X, class Y, int l>
|
||||||
GpuDILU<M, X, Y, l>::GpuDILU(const M& A, bool splitMatrix, bool tuneKernels)
|
GpuDILU<M, X, Y, l>::GpuDILU(const M& A, bool splitMatrix, bool tuneKernels, bool storeFactorizationAsFloat)
|
||||||
: m_cpuMatrix(A)
|
: m_cpuMatrix(A)
|
||||||
, m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER))
|
, m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER))
|
||||||
, m_reorderedToNatural(detail::createReorderedToNatural(m_levelSets))
|
, m_reorderedToNatural(detail::createReorderedToNatural(m_levelSets))
|
||||||
@@ -52,6 +52,7 @@ GpuDILU<M, X, Y, l>::GpuDILU(const M& A, bool splitMatrix, bool tuneKernels)
|
|||||||
, m_gpuDInv(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize())
|
, m_gpuDInv(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize())
|
||||||
, m_splitMatrix(splitMatrix)
|
, m_splitMatrix(splitMatrix)
|
||||||
, m_tuneThreadBlockSizes(tuneKernels)
|
, m_tuneThreadBlockSizes(tuneKernels)
|
||||||
|
, m_storeFactorizationAsFloat(storeFactorizationAsFloat)
|
||||||
|
|
||||||
{
|
{
|
||||||
// TODO: Should in some way verify that this matrix is symmetric, only do it debug mode?
|
// TODO: Should in some way verify that this matrix is symmetric, only do it debug mode?
|
||||||
@@ -80,6 +81,14 @@ GpuDILU<M, X, Y, l>::GpuDILU(const M& A, bool splitMatrix, bool tuneKernels)
|
|||||||
m_gpuMatrixReordered = detail::createReorderedMatrix<M, field_type, GpuSparseMatrix<field_type>>(
|
m_gpuMatrixReordered = detail::createReorderedMatrix<M, field_type, GpuSparseMatrix<field_type>>(
|
||||||
m_cpuMatrix, m_reorderedToNatural);
|
m_cpuMatrix, m_reorderedToNatural);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (m_storeFactorizationAsFloat) {
|
||||||
|
OPM_THROW(std::runtime_error, "Matrix must be split when storing as float.");
|
||||||
|
m_gpuMatrixReorderedLowerFloat = std::make_unique<FloatMat>(m_gpuMatrixReorderedLower->getRowIndices(), m_gpuMatrixReorderedLower->getColumnIndices(), blocksize_);
|
||||||
|
m_gpuMatrixReorderedUpperFloat = std::make_unique<FloatMat>(m_gpuMatrixReorderedUpper->getRowIndices(), m_gpuMatrixReorderedUpper->getColumnIndices(), blocksize_);
|
||||||
|
m_gpuMatrixReorderedDiagFloat = std::make_unique<FloatVec>(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize());
|
||||||
|
}
|
||||||
|
|
||||||
computeDiagAndMoveReorderedData(m_moveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
|
computeDiagAndMoveReorderedData(m_moveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
|
||||||
|
|
||||||
if (m_tuneThreadBlockSizes) {
|
if (m_tuneThreadBlockSizes) {
|
||||||
|
|||||||
@@ -53,6 +53,8 @@ public:
|
|||||||
using field_type = typename X::field_type;
|
using field_type = typename X::field_type;
|
||||||
//! \brief The GPU matrix type
|
//! \brief The GPU matrix type
|
||||||
using CuMat = GpuSparseMatrix<field_type>;
|
using CuMat = GpuSparseMatrix<field_type>;
|
||||||
|
using FloatMat = GpuSparseMatrix<float>;
|
||||||
|
using FloatVec = GpuVector<float>;
|
||||||
|
|
||||||
//! \brief Constructor.
|
//! \brief Constructor.
|
||||||
//!
|
//!
|
||||||
@@ -60,7 +62,7 @@ public:
|
|||||||
//! \param A The matrix to operate on.
|
//! \param A The matrix to operate on.
|
||||||
//! \param w The relaxation factor.
|
//! \param w The relaxation factor.
|
||||||
//!
|
//!
|
||||||
explicit GpuDILU(const M& A, bool splitMatrix, bool tuneKernels);
|
explicit GpuDILU(const M& A, bool splitMatrix, bool tuneKernels, bool storeFactorizationAsFloat);
|
||||||
|
|
||||||
//! \brief Prepare the preconditioner.
|
//! \brief Prepare the preconditioner.
|
||||||
//! \note Does nothing at the time being.
|
//! \note Does nothing at the time being.
|
||||||
@@ -127,6 +129,10 @@ private:
|
|||||||
std::unique_ptr<CuMat> m_gpuMatrixReorderedUpper;
|
std::unique_ptr<CuMat> m_gpuMatrixReorderedUpper;
|
||||||
//! \brief If matrix splitting is enabled, we also store the diagonal separately
|
//! \brief If matrix splitting is enabled, we also store the diagonal separately
|
||||||
std::unique_ptr<GpuVector<field_type>> m_gpuMatrixReorderedDiag;
|
std::unique_ptr<GpuVector<field_type>> m_gpuMatrixReorderedDiag;
|
||||||
|
//! \brief If mixed precision is enabled, store a float matrix
|
||||||
|
std::unique_ptr<FloatMat> m_gpuMatrixReorderedLowerFloat;
|
||||||
|
std::unique_ptr<FloatMat> m_gpuMatrixReorderedUpperFloat;
|
||||||
|
std::unique_ptr<FloatVec> m_gpuMatrixReorderedDiagFloat;
|
||||||
//! row conversion from natural to reordered matrix indices stored on the GPU
|
//! row conversion from natural to reordered matrix indices stored on the GPU
|
||||||
GpuVector<int> m_gpuNaturalToReorder;
|
GpuVector<int> m_gpuNaturalToReorder;
|
||||||
//! row conversion from reordered to natural matrix indices stored on the GPU
|
//! row conversion from reordered to natural matrix indices stored on the GPU
|
||||||
@@ -137,6 +143,8 @@ private:
|
|||||||
bool m_splitMatrix;
|
bool m_splitMatrix;
|
||||||
//! \brief Bool storing whether or not we will tune the threadblock sizes. Only used for AMD cards
|
//! \brief Bool storing whether or not we will tune the threadblock sizes. Only used for AMD cards
|
||||||
bool m_tuneThreadBlockSizes;
|
bool m_tuneThreadBlockSizes;
|
||||||
|
//! \brief Bool storing whether or not we will store the factorization as float. Only used for mixed precision
|
||||||
|
bool m_storeFactorizationAsFloat;
|
||||||
//! \brief variables storing the threadblocksizes to use if using the tuned sizes and AMD cards
|
//! \brief variables storing the threadblocksizes to use if using the tuned sizes and AMD cards
|
||||||
//! The default value of -1 indicates that we have not calibrated and selected a value yet
|
//! The default value of -1 indicates that we have not calibrated and selected a value yet
|
||||||
int m_upperSolveThreadBlockSize = -1;
|
int m_upperSolveThreadBlockSize = -1;
|
||||||
|
|||||||
@@ -211,7 +211,7 @@ BOOST_AUTO_TEST_CASE(TestDiluApply)
|
|||||||
|
|
||||||
// Initialize preconditioner objects
|
// Initialize preconditioner objects
|
||||||
Dune::MultithreadDILU<Sp1x1BlockMatrix, B1x1Vec, B1x1Vec> cpudilu(matA);
|
Dune::MultithreadDILU<Sp1x1BlockMatrix, B1x1Vec, B1x1Vec> cpudilu(matA);
|
||||||
auto gpudilu = GpuDilu1x1(matA, true, true);
|
auto gpudilu = GpuDilu1x1(matA, true, true, false);
|
||||||
|
|
||||||
// Use the apply
|
// Use the apply
|
||||||
gpudilu.apply(d_output, d_input);
|
gpudilu.apply(d_output, d_input);
|
||||||
@@ -235,7 +235,7 @@ BOOST_AUTO_TEST_CASE(TestDiluApplyBlocked)
|
|||||||
|
|
||||||
// init matrix with 2x2 blocks
|
// init matrix with 2x2 blocks
|
||||||
Sp2x2BlockMatrix matA = get2x2BlockTestMatrix();
|
Sp2x2BlockMatrix matA = get2x2BlockTestMatrix();
|
||||||
auto gpudilu = GpuDilu2x2(matA, true, true);
|
auto gpudilu = GpuDilu2x2(matA, true, true, false);
|
||||||
Dune::MultithreadDILU<Sp2x2BlockMatrix, B2x2Vec, B2x2Vec> cpudilu(matA);
|
Dune::MultithreadDILU<Sp2x2BlockMatrix, B2x2Vec, B2x2Vec> cpudilu(matA);
|
||||||
|
|
||||||
// create input/output buffers for the apply
|
// create input/output buffers for the apply
|
||||||
@@ -275,7 +275,7 @@ BOOST_AUTO_TEST_CASE(TestDiluInitAndUpdateLarge)
|
|||||||
{
|
{
|
||||||
// create gpu dilu preconditioner
|
// create gpu dilu preconditioner
|
||||||
Sp1x1BlockMatrix matA = get1x1BlockTestMatrix();
|
Sp1x1BlockMatrix matA = get1x1BlockTestMatrix();
|
||||||
auto gpudilu = GpuDilu1x1(matA, true, true);
|
auto gpudilu = GpuDilu1x1(matA, true, true, false);
|
||||||
|
|
||||||
matA[0][0][0][0] = 11.0;
|
matA[0][0][0][0] = 11.0;
|
||||||
matA[0][1][0][0] = 12.0;
|
matA[0][1][0][0] = 12.0;
|
||||||
|
|||||||
Reference in New Issue
Block a user