mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
333 lines
9.2 KiB
C++
333 lines
9.2 KiB
C++
/*
|
|
Copyright 2024 SINTEF AS
|
|
|
|
This file is part of the Open Porous Media project (OPM).
|
|
|
|
OPM is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
OPM is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#include <config.h>
|
|
|
|
#define BOOST_TEST_MODULE TestGpuDILU
|
|
|
|
#include <boost/test/unit_test.hpp>
|
|
#include <dune/common/fmatrix.hh>
|
|
#include <dune/istl/bcrsmatrix.hh>
|
|
#include <memory>
|
|
#include <opm/simulators/linalg/DILU.hpp>
|
|
#include <opm/simulators/linalg/gpuistl/GpuDILU.hpp>
|
|
#include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
|
|
#include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
|
|
#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
|
|
#include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
|
|
#include <random>
|
|
#include <vector>
|
|
|
|
|
|
using T = double;
|
|
using FM1x1 = Dune::FieldMatrix<T, 1, 1>;
|
|
using FM2x2 = Dune::FieldMatrix<T, 2, 2>;
|
|
using B1x1Vec = Dune::BlockVector<Dune::FieldVector<double, 1>>;
|
|
using B2x2Vec = Dune::BlockVector<Dune::FieldVector<double, 2>>;
|
|
using Sp1x1BlockMatrix = Dune::BCRSMatrix<FM1x1>;
|
|
using Sp2x2BlockMatrix = Dune::BCRSMatrix<FM2x2>;
|
|
using CuMatrix = Opm::gpuistl::GpuSparseMatrix<T>;
|
|
using CuIntVec = Opm::gpuistl::GpuVector<int>;
|
|
using CuFloatingPointVec = Opm::gpuistl::GpuVector<T>;
|
|
using GpuDilu1x1 = Opm::gpuistl::GpuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
|
using GpuDilu2x2 = Opm::gpuistl::GpuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
|
|
|
Sp1x1BlockMatrix
|
|
get1x1BlockTestMatrix()
|
|
{
|
|
/*
|
|
matA:
|
|
1 2 0 3 0 0
|
|
4 5 0 6 0 7
|
|
0 0 8 0 0 0
|
|
9 10 0 11 12 0
|
|
0 0 0 13 14 0
|
|
0 15 0 0 0 16
|
|
|
|
Expected reordering:
|
|
1 2 0 3 0 0
|
|
0 0 8 0 0 0
|
|
4 5 0 6 0 7
|
|
9 10 0 11 12 0
|
|
0 15 0 0 0 16
|
|
0 0 0 13 14 0
|
|
|
|
Expected lowerTriangularReorderedMatrix:
|
|
0 0 0 0 0 0
|
|
0 0 0 0 0 0
|
|
4 0 0 0 0 0
|
|
9 10 0 0 0 0
|
|
0 15 0 0 0 0
|
|
0 0 0 13 0 0
|
|
|
|
Expected lowerTriangularReorderedMatrix:
|
|
0 2 0 3 0 0
|
|
0 0 0 0 0 0
|
|
0 0 0 6 0 7
|
|
0 0 0 0 12 0
|
|
0 0 0 0 0 0
|
|
*/
|
|
|
|
const int N = 6;
|
|
const int nonZeroes = 16;
|
|
|
|
// Create the Dune A matrix
|
|
Sp1x1BlockMatrix matA(N, N, nonZeroes, Sp1x1BlockMatrix::row_wise);
|
|
for (auto row = matA.createbegin(); row != matA.createend(); ++row) {
|
|
row.insert(row.index());
|
|
if (row.index() == 0) {
|
|
row.insert(row.index() + 1);
|
|
row.insert(row.index() + 3);
|
|
}
|
|
if (row.index() == 1) {
|
|
row.insert(row.index() - 1);
|
|
row.insert(row.index() + 2);
|
|
row.insert(row.index() + 4);
|
|
}
|
|
if (row.index() == 2) {
|
|
}
|
|
if (row.index() == 3) {
|
|
row.insert(row.index() - 3);
|
|
row.insert(row.index() - 2);
|
|
row.insert(row.index() + 1);
|
|
}
|
|
if (row.index() == 4) {
|
|
row.insert(row.index() - 1);
|
|
}
|
|
if (row.index() == 5) {
|
|
row.insert(row.index() - 4);
|
|
}
|
|
}
|
|
|
|
matA[0][0][0][0] = 1.0;
|
|
matA[0][1][0][0] = 2.0;
|
|
matA[0][3][0][0] = 3.0;
|
|
matA[1][0][0][0] = 4.0;
|
|
matA[1][1][0][0] = 5.0;
|
|
matA[1][3][0][0] = 6.0;
|
|
matA[1][5][0][0] = 7.0;
|
|
matA[2][2][0][0] = 8.0;
|
|
matA[3][0][0][0] = 9.0;
|
|
matA[3][1][0][0] = 10.0;
|
|
matA[3][3][0][0] = 11.0;
|
|
matA[3][4][0][0] = 12.0;
|
|
matA[4][3][0][0] = 13.0;
|
|
matA[4][4][0][0] = 14.0;
|
|
matA[5][1][0][0] = 15.0;
|
|
matA[5][5][0][0] = 16.0;
|
|
|
|
return matA;
|
|
}
|
|
|
|
Sp2x2BlockMatrix
|
|
get2x2BlockTestMatrix()
|
|
{
|
|
/*
|
|
matA:
|
|
1 2 0 3 0 0
|
|
4 5 0 6 0 7
|
|
|
|
0 0 1 0 0 0
|
|
9 10 0 1 12 0
|
|
|
|
0 0 0 13 14 0
|
|
0 15 0 0 0 16
|
|
|
|
*/
|
|
const int N = 3;
|
|
const int nonZeroes = 9;
|
|
|
|
// Create the Dune A matrix
|
|
Sp2x2BlockMatrix matA(N, N, nonZeroes, Sp2x2BlockMatrix::row_wise);
|
|
for (auto row = matA.createbegin(); row != matA.createend(); ++row) {
|
|
row.insert(row.index());
|
|
if (row.index() == 0) {
|
|
row.insert(row.index() + 1);
|
|
row.insert(row.index() + 2);
|
|
}
|
|
if (row.index() == 1) {
|
|
row.insert(row.index() - 1);
|
|
row.insert(row.index() + 1);
|
|
}
|
|
if (row.index() == 2) {
|
|
row.insert(row.index() - 1);
|
|
row.insert(row.index() - 2);
|
|
}
|
|
}
|
|
|
|
matA[0][0][0][0] = 1.0;
|
|
matA[0][0][0][1] = 2.0;
|
|
matA[0][0][1][0] = 4.0;
|
|
matA[0][0][1][1] = 5.0;
|
|
matA[0][1][0][1] = 3.0;
|
|
matA[0][1][1][1] = 6.0;
|
|
matA[0][2][1][1] = 7.0;
|
|
matA[1][0][1][0] = 9.0;
|
|
matA[1][0][1][1] = 10.0;
|
|
matA[1][1][0][0] = 1.0;
|
|
matA[1][1][1][1] = 1.0;
|
|
matA[1][2][1][0] = 12.0;
|
|
matA[2][0][1][1] = 15.0;
|
|
matA[2][1][0][1] = 13.0;
|
|
matA[2][2][0][0] = 14.0;
|
|
matA[2][2][1][1] = 16.0;
|
|
|
|
return matA;
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(TestDiluApply)
|
|
{
|
|
Sp1x1BlockMatrix matA = get1x1BlockTestMatrix();
|
|
|
|
std::vector<double> input = {1.1, 1.2, 1.3, 1.4, 1.5, 1.6};
|
|
std::vector<double> output(6);
|
|
|
|
CuFloatingPointVec d_input(input);
|
|
CuFloatingPointVec d_output(output);
|
|
|
|
B1x1Vec h_input(6);
|
|
h_input[0] = 1.1;
|
|
h_input[1] = 1.2;
|
|
h_input[2] = 1.3;
|
|
h_input[3] = 1.4;
|
|
h_input[4] = 1.5;
|
|
h_input[5] = 1.6;
|
|
B1x1Vec h_output(6);
|
|
|
|
// Initialize preconditioner objects
|
|
Dune::MultithreadDILU<Sp1x1BlockMatrix, B1x1Vec, B1x1Vec> cpudilu(matA);
|
|
auto gpudilu = GpuDilu1x1(matA, true, true);
|
|
|
|
// Use the apply
|
|
gpudilu.apply(d_output, d_input);
|
|
cpudilu.apply(h_output, h_input);
|
|
|
|
// put results in std::vector
|
|
std::vector<T> cpudilures;
|
|
for (auto e : h_output) {
|
|
cpudilures.push_back(e);
|
|
}
|
|
auto cudilures = d_output.asStdVector();
|
|
|
|
// check that GpuDilu results matches that of CPU dilu
|
|
for (size_t i = 0; i < cudilures.size(); ++i) {
|
|
BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7);
|
|
}
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(TestDiluApplyBlocked)
|
|
{
|
|
|
|
// init matrix with 2x2 blocks
|
|
Sp2x2BlockMatrix matA = get2x2BlockTestMatrix();
|
|
auto gpudilu = GpuDilu2x2(matA, true, true);
|
|
Dune::MultithreadDILU<Sp2x2BlockMatrix, B2x2Vec, B2x2Vec> cpudilu(matA);
|
|
|
|
// create input/output buffers for the apply
|
|
std::vector<double> input = {1.1, 1.2, 1.3, 1.4, 1.5, 1.6};
|
|
std::vector<double> output(6);
|
|
CuFloatingPointVec d_input(input);
|
|
CuFloatingPointVec d_output(output);
|
|
|
|
B2x2Vec h_input(3);
|
|
h_input[0][0] = 1.1;
|
|
h_input[0][1] = 1.2;
|
|
h_input[1][0] = 1.3;
|
|
h_input[1][1] = 1.4;
|
|
h_input[2][0] = 1.5;
|
|
h_input[2][1] = 1.6;
|
|
B2x2Vec h_output(3);
|
|
|
|
// call apply with cpu and gpu dilu
|
|
cpudilu.apply(h_output, h_input);
|
|
gpudilu.apply(d_output, d_input);
|
|
|
|
auto cudilures = d_output.asStdVector();
|
|
std::vector<T> cpudilures;
|
|
for (auto v : h_output) {
|
|
for (auto e : v) {
|
|
cpudilures.push_back(e);
|
|
}
|
|
}
|
|
|
|
// check that the values are close
|
|
for (size_t i = 0; i < cudilures.size(); ++i) {
|
|
BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7);
|
|
}
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(TestDiluInitAndUpdateLarge)
|
|
{
|
|
// create gpu dilu preconditioner
|
|
Sp1x1BlockMatrix matA = get1x1BlockTestMatrix();
|
|
auto gpudilu = GpuDilu1x1(matA, true, true);
|
|
|
|
matA[0][0][0][0] = 11.0;
|
|
matA[0][1][0][0] = 12.0;
|
|
matA[0][3][0][0] = 13.0;
|
|
matA[1][0][0][0] = 14.0;
|
|
matA[1][1][0][0] = 15.0;
|
|
matA[1][3][0][0] = 16.0;
|
|
matA[1][5][0][0] = 17.0;
|
|
matA[2][2][0][0] = 18.0;
|
|
matA[3][0][0][0] = 19.0;
|
|
matA[3][1][0][0] = 110.0;
|
|
matA[3][3][0][0] = 111.0;
|
|
matA[3][4][0][0] = 112.0;
|
|
matA[4][3][0][0] = 113.0;
|
|
matA[4][4][0][0] = 114.0;
|
|
matA[5][1][0][0] = 115.0;
|
|
matA[5][5][0][0] = 116.0;
|
|
|
|
// make sure the function is updated
|
|
gpudilu.update();
|
|
// create a cpu dilu preconditioner on the matrix that is definitely updated
|
|
Dune::MultithreadDILU<Sp1x1BlockMatrix, B1x1Vec, B1x1Vec> cpudilu(matA);
|
|
|
|
std::vector<double> input = {1.1, 1.2, 1.3, 1.4, 1.5, 1.6};
|
|
std::vector<double> output(6);
|
|
|
|
CuFloatingPointVec d_input(input);
|
|
CuFloatingPointVec d_output(output);
|
|
|
|
B1x1Vec h_input(6);
|
|
h_input[0] = 1.1;
|
|
h_input[1] = 1.2;
|
|
h_input[2] = 1.3;
|
|
h_input[3] = 1.4;
|
|
h_input[4] = 1.5;
|
|
h_input[5] = 1.6;
|
|
B1x1Vec h_output(6);
|
|
|
|
// run an apply to see effect of update
|
|
gpudilu.apply(d_output, d_input);
|
|
cpudilu.apply(h_output, h_input);
|
|
|
|
// put results in std::vector
|
|
std::vector<T> cpudilures;
|
|
for (auto e : h_output) {
|
|
cpudilures.push_back(e);
|
|
}
|
|
auto cudilures = d_output.asStdVector();
|
|
|
|
// check that GpuDilu results matches that of CPU dilu
|
|
for (size_t i = 0; i < cudilures.size(); ++i) {
|
|
BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7);
|
|
}
|
|
}
|