refactor cuvector

This commit is contained in:
Tobias Meyer Andersen
2024-08-22 15:20:20 +02:00
parent 3aa1767548
commit fba1858f42
28 changed files with 234 additions and 234 deletions

View File

@@ -27,7 +27,7 @@
#include <opm/simulators/linalg/DILU.hpp>
#include <opm/simulators/linalg/cuistl/GpuDILU.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <random>
@@ -42,8 +42,8 @@ using B2x2Vec = Dune::BlockVector<Dune::FieldVector<double, 2>>;
using Sp1x1BlockMatrix = Dune::BCRSMatrix<FM1x1>;
using Sp2x2BlockMatrix = Dune::BCRSMatrix<FM2x2>;
using CuMatrix = Opm::gpuistl::GpuSparseMatrix<T>;
using CuIntVec = Opm::gpuistl::CuVector<int>;
using CuFloatingPointVec = Opm::gpuistl::CuVector<T>;
using CuIntVec = Opm::gpuistl::GpuVector<int>;
using CuFloatingPointVec = Opm::gpuistl::GpuVector<T>;
using GpuDilu1x1 = Opm::gpuistl::GpuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
using GpuDilu2x2 = Opm::gpuistl::GpuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;

View File

@@ -25,7 +25,7 @@
#include <dune/istl/bcrsmatrix.hh>
#include <opm/simulators/linalg/cuistl/GpuJac.hpp>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
@@ -49,7 +49,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(GPUJACApplyBlocksize2, T, NumericTypes)
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
@@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(GPUJACApplyBlocksize1, T, NumericTypes)
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
using GpuJac = Opm::gpuistl::GpuJac<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {

View File

@@ -27,7 +27,7 @@
#include <dune/istl/owneroverlapcopy.hh>
#include <memory>
#include <opm/simulators/linalg/cuistl/GpuOwnerOverlapCopy.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/set_device.hpp>
#include <random>
@@ -62,7 +62,7 @@ BOOST_AUTO_TEST_CASE(TestProject)
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
auto xGPU = Opm::gpuistl::CuVector<double>(xCPU);
auto xGPU = Opm::gpuistl::GpuVector<double>(xCPU);
auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);
@@ -94,7 +94,7 @@ BOOST_AUTO_TEST_CASE(TestDot)
indexInfo.addRemoteIndex(std::make_tuple(0, 2, Dune::OwnerOverlapCopyAttributeSet::copy));
auto ownerOverlapCopy = Dune::OwnerOverlapCopyCommunication<int>(indexInfo, MPI_COMM_WORLD);
auto xCPU = std::vector<double> {{1.0, 2.0, 3.0}};
auto xGPU = Opm::gpuistl::CuVector<double>(xCPU);
auto xGPU = Opm::gpuistl::GpuVector<double>(xCPU);
auto gpuComm = std::make_shared<Opm::gpuistl::GPUObliviousMPISender<double, 1, Dune::OwnerOverlapCopyCommunication<int>>>(ownerOverlapCopy);

View File

@@ -28,7 +28,7 @@
#include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/preconditioners.hh>
#include <opm/simulators/linalg/cuistl/GpuSeqILU0.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
@@ -63,7 +63,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
using M = Dune::FieldMatrix<T, 1, 1>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, 1>>;
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
@@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
using M = Dune::FieldMatrix<T, 2, 2>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, 2>>;
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::GpuVector<T>, Opm::gpuistl::GpuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {

View File

@@ -24,7 +24,7 @@
#include <dune/istl/bcrsmatrix.hh>
#include <memory>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <random>
@@ -149,8 +149,8 @@ BOOST_AUTO_TEST_CASE(RandomSparsityMatrix)
std::vector<double> inputDataX(N * dim, 0.0);
inputDataX[component] = 1.0;
std::vector<double> inputDataY(N * dim, .25);
auto inputVectorX = Opm::gpuistl::CuVector<double>(inputDataX.data(), inputDataX.size());
auto inputVectorY = Opm::gpuistl::CuVector<double>(inputDataY.data(), inputDataY.size());
auto inputVectorX = Opm::gpuistl::GpuVector<double>(inputDataX.data(), inputDataX.size());
auto inputVectorY = Opm::gpuistl::GpuVector<double>(inputDataY.data(), inputDataY.size());
Vector xHost(N), yHost(N);
yHost = inputDataY[0];
inputVectorX.copyToHost(xHost);

View File

@@ -18,13 +18,13 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuVector
#define BOOST_TEST_MODULE TestGpuVector
#include <boost/test/unit_test.hpp>
#include <cuda_runtime.h>
#include <dune/common/fvector.hh>
#include <dune/istl/bvector.hh>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <random>
@@ -32,7 +32,7 @@ BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
{
auto someDataOnCPU = std::vector<double>({1.0, 2.0, 42.0, 59.9451743, 10.7132692});
auto dataOnGPU = ::Opm::gpuistl::CuVector<double>(someDataOnCPU);
auto dataOnGPU = ::Opm::gpuistl::GpuVector<double>(someDataOnCPU);
// Multiply by 4.0:
dataOnGPU *= 4.0;
@@ -50,14 +50,14 @@ BOOST_AUTO_TEST_CASE(TestDocumentedUsage)
BOOST_AUTO_TEST_CASE(TestConstructionSize)
{
const int numberOfElements = 1234;
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(numberOfElements);
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(numberOfElements);
BOOST_CHECK_EQUAL(numberOfElements, vectorOnGPU.dim());
}
BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
std::vector<double> buffer(data.size(), 0.0);
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
@@ -68,7 +68,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostConstructor)
BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.size());
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
vectorOnGPU.copyFromHost(data.data(), data.size());
std::vector<double> buffer(data.size(), 0.0);
@@ -80,7 +80,7 @@ BOOST_AUTO_TEST_CASE(TestCopyFromHostFunction)
BOOST_AUTO_TEST_CASE(TestCopyFromBvector)
{
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 2>> {{{42, 43}, {44, 45}, {46, 47}}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(blockVector.dim());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(blockVector.dim());
vectorOnGPU.copyFromHost(blockVector);
std::vector<double> buffer(vectorOnGPU.dim());
vectorOnGPU.copyToHost(buffer.data(), buffer.size());
@@ -93,7 +93,7 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
auto blockVector = Dune::BlockVector<Dune::FieldVector<double, 3>>(3);
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
vectorOnGPU.copyToHost(blockVector);
@@ -103,7 +103,7 @@ BOOST_AUTO_TEST_CASE(TestCopyToBvector)
BOOST_AUTO_TEST_CASE(TestDataPointer)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7, 8, 9}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
std::vector<double> buffer(data.size(), 0.0);
OPM_CUDA_SAFE_CALL(cudaMemcpy(buffer.data(), vectorOnGPU.data(), sizeof(double) * data.size(), cudaMemcpyDeviceToHost));
@@ -113,7 +113,7 @@ BOOST_AUTO_TEST_CASE(TestDataPointer)
BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
BOOST_CHECK_EQUAL(data.size(), vectorOnGPU.dim());
const double scalar = 42.25;
vectorOnGPU *= scalar;
@@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(TestCopyScalarMultiply)
BOOST_AUTO_TEST_CASE(TestTwoNorm)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
auto twoNorm = vectorOnGPU.two_norm();
double correctAnswer = 0.0;
@@ -143,8 +143,8 @@ BOOST_AUTO_TEST_CASE(TestDot)
{
std::vector<double> dataA {{1, 2, 3, 4, 5, 6, 7}};
std::vector<double> dataB {{8, 9, 10, 11, 12, 13, 14}};
auto vectorOnGPUA = Opm::gpuistl::CuVector<double>(dataA.data(), dataA.size());
auto vectorOnGPUB = Opm::gpuistl::CuVector<double>(dataB.data(), dataB.size());
auto vectorOnGPUA = Opm::gpuistl::GpuVector<double>(dataA.data(), dataA.size());
auto vectorOnGPUB = Opm::gpuistl::GpuVector<double>(dataB.data(), dataB.size());
auto dot = vectorOnGPUA.dot(vectorOnGPUB);
double correctAnswer = 0.0;
@@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE(TestDot)
BOOST_AUTO_TEST_CASE(Assigment)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
vectorOnGPU = 10.0;
vectorOnGPU.copyToHost(data.data(), data.size());
@@ -171,9 +171,9 @@ BOOST_AUTO_TEST_CASE(Assigment)
BOOST_AUTO_TEST_CASE(CopyAssignment)
{
std::vector<double> data {{1, 2, 3, 4, 5, 6, 7}};
auto vectorOnGPU = Opm::gpuistl::CuVector<double>(data.data(), data.size());
auto vectorOnGPU = Opm::gpuistl::GpuVector<double>(data.data(), data.size());
vectorOnGPU.copyToHost(data.data(), data.size());
auto vectorOnGPUB = Opm::gpuistl::CuVector<double>(data.size());
auto vectorOnGPUB = Opm::gpuistl::GpuVector<double>(data.size());
vectorOnGPUB = 4.0;
vectorOnGPUB = vectorOnGPU;
@@ -185,7 +185,7 @@ BOOST_AUTO_TEST_CASE(CopyAssignment)
BOOST_AUTO_TEST_CASE(RandomVectors)
{
using GVector = Opm::gpuistl::CuVector<double>;
using GVector = Opm::gpuistl::GpuVector<double>;
std::srand(0);
std::mt19937 generator;
std::uniform_real_distribution<double> distribution(-100.0, 100.0);
@@ -268,7 +268,7 @@ BOOST_AUTO_TEST_CASE(RandomVectors)
indexSet.push_back(i);
}
}
auto indexSetGPU = Opm::gpuistl::CuVector<int>(indexSet);
auto indexSetGPU = Opm::gpuistl::GpuVector<int>(indexSet);
aGPU.setZeroAtIndexSet(indexSetGPU);
auto projectedA = aGPU.asStdVector();

View File

@@ -24,7 +24,7 @@
#include <cuda_runtime.h>
#include <dune/istl/bcrsmatrix.hh>
#include <opm/simulators/linalg/cuistl/GpuSparseMatrix.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
@@ -86,7 +86,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith3By3Blocks, T, Numeric
B[1][1][2][2] = -1.0;
Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::gpuistl::GpuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 3>(
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());
@@ -160,7 +160,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith2By2Blocks, T, Numeric
B[1][1][1][1] = -1.0;
Opm::gpuistl::GpuSparseMatrix<T> m = Opm::gpuistl::GpuSparseMatrix<T>::fromMatrix(B);
Opm::gpuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::gpuistl::GpuVector<T> dInvDiag(blocksize * blocksize * N);
Opm::gpuistl::detail::JAC::invertDiagonalAndFlatten<T, 2>(
m.getNonZeroValues().data(), m.getRowIndices().data(), m.getColumnIndices().data(), N, dInvDiag.data());

View File

@@ -18,13 +18,13 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuVectorOperations
#define BOOST_TEST_MODULE TestGpuVectorOperations
#include <boost/mpl/list.hpp>
#include <boost/test/unit_test.hpp>
#include <cuda_runtime.h>
#include <dune/istl/bcrsmatrix.hh>
#include <opm/simulators/linalg/cuistl/GpuJac.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/GpuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
@@ -47,9 +47,9 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf3By3BlockVectorAndVecto
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0});
std::vector<T> hostVecVector({3.0, 2.0, 1.0});
std::vector<T> hostDstVector({0, 0, 0});
Opm::gpuistl::CuVector<T> deviceBlockVector(hostBlockVector);
Opm::gpuistl::CuVector<T> deviceVecVector(hostVecVector);
Opm::gpuistl::CuVector<T> deviceDstVector(hostDstVector);
Opm::gpuistl::GpuVector<T> deviceBlockVector(hostBlockVector);
Opm::gpuistl::GpuVector<T> deviceVecVector(hostVecVector);
Opm::gpuistl::GpuVector<T> deviceDstVector(hostDstVector);
Opm::gpuistl::detail::weightedDiagMV(
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
@@ -81,9 +81,9 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVecto
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0});
std::vector<T> hostVecVector({1.0, 3.0, 2.0, 4.0});
std::vector<T> hostDstVector({0, 0, 0, 0});
Opm::gpuistl::CuVector<T> deviceBlockVector(hostBlockVector);
Opm::gpuistl::CuVector<T> deviceVecVector(hostVecVector);
Opm::gpuistl::CuVector<T> deviceDstVector(hostDstVector);
Opm::gpuistl::GpuVector<T> deviceBlockVector(hostBlockVector);
Opm::gpuistl::GpuVector<T> deviceVecVector(hostVecVector);
Opm::gpuistl::GpuVector<T> deviceDstVector(hostDstVector);
Opm::gpuistl::detail::weightedDiagMV(
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());