mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Merge pull request #4273 from Tongdongq/add-test
Add test for cusparseSolver
This commit is contained in:
@@ -162,6 +162,7 @@ endif()
|
|||||||
|
|
||||||
if(CUDA_FOUND)
|
if(CUDA_FOUND)
|
||||||
set(HAVE_CUDA 1)
|
set(HAVE_CUDA 1)
|
||||||
|
set(COMPILE_BDA_BRIDGE 1)
|
||||||
include_directories(${CUDA_INCLUDE_DIRS})
|
include_directories(${CUDA_INCLUDE_DIRS})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
@@ -173,6 +174,7 @@ if(OpenCL_FOUND)
|
|||||||
find_file(CL2_HPP CL/cl2.hpp HINTS ${OpenCL_INCLUDE_DIRS})
|
find_file(CL2_HPP CL/cl2.hpp HINTS ${OpenCL_INCLUDE_DIRS})
|
||||||
if(CL2_HPP)
|
if(CL2_HPP)
|
||||||
set(HAVE_OPENCL 1)
|
set(HAVE_OPENCL 1)
|
||||||
|
set(COMPILE_BDA_BRIDGE 1)
|
||||||
include_directories(${OpenCL_INCLUDE_DIRS})
|
include_directories(${OpenCL_INCLUDE_DIRS})
|
||||||
find_file(OPENCL_HPP CL/opencl.hpp HINTS ${OpenCL_INCLUDE_DIRS})
|
find_file(OPENCL_HPP CL/opencl.hpp HINTS ${OpenCL_INCLUDE_DIRS})
|
||||||
if(OPENCL_HPP)
|
if(OPENCL_HPP)
|
||||||
@@ -206,6 +208,7 @@ endif()
|
|||||||
find_package(amgcl)
|
find_package(amgcl)
|
||||||
if(amgcl_FOUND)
|
if(amgcl_FOUND)
|
||||||
set(HAVE_AMGCL 1)
|
set(HAVE_AMGCL 1)
|
||||||
|
set(COMPILE_BDA_BRIDGE 1)
|
||||||
# Linking to target angcl::amgcl drags in OpenMP and -fopenmp as a compile
|
# Linking to target angcl::amgcl drags in OpenMP and -fopenmp as a compile
|
||||||
# flag. With that nvcc fails as it does not that flag.
|
# flag. With that nvcc fails as it does not that flag.
|
||||||
# Hence we set AMGCL_INCLUDE_DIRS.
|
# Hence we set AMGCL_INCLUDE_DIRS.
|
||||||
@@ -217,6 +220,7 @@ if(OpenCL_FOUND)
|
|||||||
find_package(VexCL)
|
find_package(VexCL)
|
||||||
if(VexCL_FOUND)
|
if(VexCL_FOUND)
|
||||||
set(HAVE_VEXCL 1)
|
set(HAVE_VEXCL 1)
|
||||||
|
set(COMPILE_BDA_BRIDGE 1)
|
||||||
# generator expressions in vexcl do not seem to work and therefore
|
# generator expressions in vexcl do not seem to work and therefore
|
||||||
# we cannot use the imported target. Hence we exract the needed info
|
# we cannot use the imported target. Hence we exract the needed info
|
||||||
# from the targets
|
# from the targets
|
||||||
@@ -235,6 +239,7 @@ endif()
|
|||||||
find_package(rocalution)
|
find_package(rocalution)
|
||||||
if(ROCALUTION_FOUND)
|
if(ROCALUTION_FOUND)
|
||||||
set(HAVE_ROCALUTION 1)
|
set(HAVE_ROCALUTION 1)
|
||||||
|
set(COMPILE_BDA_BRIDGE 1)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
@@ -148,7 +148,7 @@ endif()
|
|||||||
if(ROCALUTION_FOUND)
|
if(ROCALUTION_FOUND)
|
||||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocalutionSolverBackend.cpp)
|
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocalutionSolverBackend.cpp)
|
||||||
endif()
|
endif()
|
||||||
if(CUDA_FOUND OR OPENCL_FOUND OR HAVE_FPGA OR amgcl_FOUND OR ROCALUTION_FOUND)
|
if(COMPILE_BDA_BRIDGE)
|
||||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp)
|
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp)
|
||||||
endif()
|
endif()
|
||||||
if(amgcl_FOUND)
|
if(amgcl_FOUND)
|
||||||
|
@@ -5,6 +5,7 @@ set (opm-simulators_CONFIG_VAR
|
|||||||
HAVE_EWOMS
|
HAVE_EWOMS
|
||||||
HAVE_MPI
|
HAVE_MPI
|
||||||
HAVE_PETSC
|
HAVE_PETSC
|
||||||
|
COMPILE_BDA_BRIDGE
|
||||||
HAVE_CUDA
|
HAVE_CUDA
|
||||||
HAVE_OPENCL
|
HAVE_OPENCL
|
||||||
HAVE_OPENCL_HPP
|
HAVE_OPENCL_HPP
|
||||||
|
@@ -31,7 +31,7 @@
|
|||||||
#include <opm/simulators/linalg/ParallelIstlInformation.hpp>
|
#include <opm/simulators/linalg/ParallelIstlInformation.hpp>
|
||||||
#include <opm/simulators/utils/ParallelCommunication.hpp>
|
#include <opm/simulators/utils/ParallelCommunication.hpp>
|
||||||
|
|
||||||
#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
|
#if COMPILE_BDA_BRIDGE
|
||||||
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
|
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
|
||||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||||
#endif
|
#endif
|
||||||
@@ -175,7 +175,7 @@ void FlexibleSolverInfo<Matrix,Vector,Comm>::create(const Matrix& matrix,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
|
#if COMPILE_BDA_BRIDGE
|
||||||
template<class Matrix, class Vector>
|
template<class Matrix, class Vector>
|
||||||
BdaSolverInfo<Matrix,Vector>::
|
BdaSolverInfo<Matrix,Vector>::
|
||||||
BdaSolverInfo(const std::string& accelerator_mode,
|
BdaSolverInfo(const std::string& accelerator_mode,
|
||||||
@@ -329,7 +329,7 @@ copyMatToBlockJac(const Matrix& mat, Matrix& blockJac)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif // COMPILE_BDA_BRIDGE
|
||||||
|
|
||||||
template<int Dim>
|
template<int Dim>
|
||||||
using BM = Dune::BCRSMatrix<MatrixBlock<double,Dim,Dim>>;
|
using BM = Dune::BCRSMatrix<MatrixBlock<double,Dim,Dim>>;
|
||||||
@@ -346,7 +346,7 @@ using CommunicationType = Dune::CollectiveCommunication<int>;
|
|||||||
template void makeOverlapRowsInvalid<BM<Dim>>(BM<Dim>&, const std::vector<int>&); \
|
template void makeOverlapRowsInvalid<BM<Dim>>(BM<Dim>&, const std::vector<int>&); \
|
||||||
template struct FlexibleSolverInfo<BM<Dim>,BV<Dim>,CommunicationType>;
|
template struct FlexibleSolverInfo<BM<Dim>,BV<Dim>,CommunicationType>;
|
||||||
|
|
||||||
#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
|
#if COMPILE_BDA_BRIDGE
|
||||||
|
|
||||||
#define INSTANCE_GRID(Dim, Grid) \
|
#define INSTANCE_GRID(Dim, Grid) \
|
||||||
template void BdaSolverInfo<BM<Dim>,BV<Dim>>:: \
|
template void BdaSolverInfo<BM<Dim>,BV<Dim>>:: \
|
||||||
@@ -376,7 +376,7 @@ using CommunicationType = Dune::CollectiveCommunication<int>;
|
|||||||
#else
|
#else
|
||||||
#define INSTANCE(Dim) \
|
#define INSTANCE(Dim) \
|
||||||
INSTANCE_FLEX(Dim)
|
INSTANCE_FLEX(Dim)
|
||||||
#endif
|
#endif // COMPILE_BDA_BRIDGE
|
||||||
|
|
||||||
INSTANCE(1)
|
INSTANCE(1)
|
||||||
INSTANCE(2)
|
INSTANCE(2)
|
||||||
|
@@ -84,7 +84,7 @@ public:
|
|||||||
namespace Opm
|
namespace Opm
|
||||||
{
|
{
|
||||||
|
|
||||||
#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
|
#if COMPILE_BDA_BRIDGE
|
||||||
template<class Matrix, class Vector, int block_size> class BdaBridge;
|
template<class Matrix, class Vector, int block_size> class BdaBridge;
|
||||||
class WellContributions;
|
class WellContributions;
|
||||||
#endif
|
#endif
|
||||||
@@ -113,7 +113,7 @@ struct FlexibleSolverInfo
|
|||||||
size_t interiorCellNum_ = 0;
|
size_t interiorCellNum_ = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
|
#if COMPILE_BDA_BRIDGE
|
||||||
template<class Matrix, class Vector>
|
template<class Matrix, class Vector>
|
||||||
struct BdaSolverInfo
|
struct BdaSolverInfo
|
||||||
{
|
{
|
||||||
@@ -245,7 +245,7 @@ std::unique_ptr<Matrix> blockJacobiAdjacency(const Grid& grid,
|
|||||||
EWOMS_PARAM_IS_SET(TypeTag, int, LinearSolverMaxIter),
|
EWOMS_PARAM_IS_SET(TypeTag, int, LinearSolverMaxIter),
|
||||||
EWOMS_PARAM_IS_SET(TypeTag, int, CprMaxEllIter));
|
EWOMS_PARAM_IS_SET(TypeTag, int, CprMaxEllIter));
|
||||||
|
|
||||||
#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
|
#if COMPILE_BDA_BRIDGE
|
||||||
{
|
{
|
||||||
std::string accelerator_mode = EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode);
|
std::string accelerator_mode = EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode);
|
||||||
if ((simulator_.vanguard().grid().comm().size() > 1) && (accelerator_mode != "none")) {
|
if ((simulator_.vanguard().grid().comm().size() > 1) && (accelerator_mode != "none")) {
|
||||||
@@ -383,7 +383,7 @@ std::unique_ptr<Matrix> blockJacobiAdjacency(const Grid& grid,
|
|||||||
// Solve system.
|
// Solve system.
|
||||||
Dune::InverseOperatorResult result;
|
Dune::InverseOperatorResult result;
|
||||||
|
|
||||||
#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
|
#if COMPILE_BDA_BRIDGE
|
||||||
std::function<void(WellContributions&)> getContribs =
|
std::function<void(WellContributions&)> getContribs =
|
||||||
[this](WellContributions& w)
|
[this](WellContributions& w)
|
||||||
{
|
{
|
||||||
@@ -555,7 +555,7 @@ std::unique_ptr<Matrix> blockJacobiAdjacency(const Grid& grid,
|
|||||||
Matrix* matrix_;
|
Matrix* matrix_;
|
||||||
Vector *rhs_;
|
Vector *rhs_;
|
||||||
|
|
||||||
#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
|
#if COMPILE_BDA_BRIDGE
|
||||||
std::unique_ptr<detail::BdaSolverInfo<Matrix, Vector>> bdaBridge;
|
std::unique_ptr<detail::BdaSolverInfo<Matrix, Vector>> bdaBridge;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@@ -52,7 +52,31 @@ const cusparseDirection_t order = CUSPARSE_DIRECTION_ROW;
|
|||||||
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
cusparseSolverBackend<block_size>::cusparseSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int deviceID_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, deviceID_) {}
|
cusparseSolverBackend<block_size>::cusparseSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int deviceID_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, deviceID_) {
|
||||||
|
|
||||||
|
// initialize CUDA device, stream and libraries
|
||||||
|
cudaSetDevice(deviceID);
|
||||||
|
cudaCheckLastError("Could not get device");
|
||||||
|
struct cudaDeviceProp props;
|
||||||
|
cudaGetDeviceProperties(&props, deviceID);
|
||||||
|
cudaCheckLastError("Could not get device properties");
|
||||||
|
std::ostringstream out;
|
||||||
|
out << "Name GPU: " << props.name << ", Compute Capability: " << props.major << "." << props.minor;
|
||||||
|
OpmLog::info(out.str());
|
||||||
|
|
||||||
|
cudaStreamCreate(&stream);
|
||||||
|
cudaCheckLastError("Could not create stream");
|
||||||
|
|
||||||
|
cublasCreate(&cublasHandle);
|
||||||
|
cudaCheckLastError("Could not create cublasHandle");
|
||||||
|
cusparseCreate(&cusparseHandle);
|
||||||
|
cudaCheckLastError("Could not create cusparseHandle");
|
||||||
|
|
||||||
|
cublasSetStream(cublasHandle, stream);
|
||||||
|
cudaCheckLastError("Could not set stream to cublas");
|
||||||
|
cusparseSetStream(cusparseHandle, stream);
|
||||||
|
cudaCheckLastError("Could not set stream to cusparse");
|
||||||
|
}
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
cusparseSolverBackend<block_size>::~cusparseSolverBackend() {
|
cusparseSolverBackend<block_size>::~cusparseSolverBackend() {
|
||||||
@@ -209,25 +233,6 @@ void cusparseSolverBackend<block_size>::initialize(std::shared_ptr<BlockedMatrix
|
|||||||
out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance << "\n";
|
out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance << "\n";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
|
|
||||||
cudaSetDevice(deviceID);
|
|
||||||
cudaCheckLastError("Could not get device");
|
|
||||||
struct cudaDeviceProp props;
|
|
||||||
cudaGetDeviceProperties(&props, deviceID);
|
|
||||||
cudaCheckLastError("Could not get device properties");
|
|
||||||
out.str("");
|
|
||||||
out.clear();
|
|
||||||
out << "Name GPU: " << props.name << ", Compute Capability: " << props.major << "." << props.minor;
|
|
||||||
OpmLog::info(out.str());
|
|
||||||
|
|
||||||
cudaStreamCreate(&stream);
|
|
||||||
cudaCheckLastError("Could not create stream");
|
|
||||||
|
|
||||||
cublasCreate(&cublasHandle);
|
|
||||||
cudaCheckLastError("Could not create cublasHandle");
|
|
||||||
|
|
||||||
cusparseCreate(&cusparseHandle);
|
|
||||||
cudaCheckLastError("Could not create cusparseHandle");
|
|
||||||
|
|
||||||
cudaMalloc((void**)&d_x, sizeof(double) * N);
|
cudaMalloc((void**)&d_x, sizeof(double) * N);
|
||||||
cudaMalloc((void**)&d_b, sizeof(double) * N);
|
cudaMalloc((void**)&d_b, sizeof(double) * N);
|
||||||
cudaMalloc((void**)&d_r, sizeof(double) * N);
|
cudaMalloc((void**)&d_r, sizeof(double) * N);
|
||||||
@@ -251,11 +256,6 @@ void cusparseSolverBackend<block_size>::initialize(std::shared_ptr<BlockedMatrix
|
|||||||
}
|
}
|
||||||
cudaCheckLastError("Could not allocate enough memory on GPU");
|
cudaCheckLastError("Could not allocate enough memory on GPU");
|
||||||
|
|
||||||
cublasSetStream(cublasHandle, stream);
|
|
||||||
cudaCheckLastError("Could not set stream to cublas");
|
|
||||||
cusparseSetStream(cusparseHandle, stream);
|
|
||||||
cudaCheckLastError("Could not set stream to cusparse");
|
|
||||||
|
|
||||||
#if COPY_ROW_BY_ROW
|
#if COPY_ROW_BY_ROW
|
||||||
cudaMallocHost((void**)&vals_contiguous, sizeof(double) * nnz);
|
cudaMallocHost((void**)&vals_contiguous, sizeof(double) * nnz);
|
||||||
cudaCheckLastError("Could not allocate pinned memory");
|
cudaCheckLastError("Could not allocate pinned memory");
|
||||||
|
@@ -52,6 +52,11 @@ rocalutionSolverBackend<block_size>::rocalutionSolverBackend(int verbosity_, int
|
|||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
rocalutionSolverBackend<block_size>::~rocalutionSolverBackend() {
|
rocalutionSolverBackend<block_size>::~rocalutionSolverBackend() {
|
||||||
|
// normally, these rocalution variables are destroyed after the destructor automatically,
|
||||||
|
// but sometimes it segfaults, both with test_rocalutionSolver and with an actual case
|
||||||
|
// release both variables here to prevent that segfault
|
||||||
|
roc_prec.release();
|
||||||
|
roc_solver.release();
|
||||||
rocalution::stop_rocalution();
|
rocalution::stop_rocalution();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -60,7 +60,6 @@ private:
|
|||||||
int *tmp_colindices; // store matrix on host, this pointer is given to and freed by rocalution
|
int *tmp_colindices; // store matrix on host, this pointer is given to and freed by rocalution
|
||||||
double *tmp_nnzvalues; // store matrix on host, this pointer is given to and freed by rocalution
|
double *tmp_nnzvalues; // store matrix on host, this pointer is given to and freed by rocalution
|
||||||
|
|
||||||
// must be declared in this order, to prevent a segfault during the test
|
|
||||||
std::unique_ptr<rocalution::ILU<rocalution::LocalMatrix<double>, rocalution::LocalVector<double>, double> > roc_prec;
|
std::unique_ptr<rocalution::ILU<rocalution::LocalMatrix<double>, rocalution::LocalVector<double>, double> > roc_prec;
|
||||||
std::unique_ptr<rocalution::BiCGStab<rocalution::LocalMatrix<double>, rocalution::LocalVector<double>, double> > roc_solver;
|
std::unique_ptr<rocalution::BiCGStab<rocalution::LocalMatrix<double>, rocalution::LocalVector<double>, double> > roc_solver;
|
||||||
|
|
||||||
|
@@ -87,24 +87,18 @@ getDuneSolution(Matrix<bz>& matrix, Vector<bz>& rhs)
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <int bz>
|
template <int bz>
|
||||||
Dune::BlockVector<Dune::FieldVector<double, bz>>
|
void
|
||||||
testCusparseSolver(const boost::property_tree::ptree& prm, Matrix<bz>& matrix, Vector<bz>& rhs)
|
createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >& bridge)
|
||||||
{
|
{
|
||||||
|
|
||||||
const int linear_solver_verbosity = prm.get<int>("verbosity");
|
const int linear_solver_verbosity = prm.get<int>("verbosity");
|
||||||
const int maxit = prm.get<int>("maxiter");
|
const int maxit = prm.get<int>("maxiter");
|
||||||
const double tolerance = prm.get<double>("tol");
|
const double tolerance = prm.get<double>("tol");
|
||||||
const bool opencl_ilu_parallel(true); // unused
|
const bool opencl_ilu_parallel(true);
|
||||||
const int platformID = 0; // unused
|
const int platformID = 0;
|
||||||
const int deviceID = 0;
|
const int deviceID = 0;
|
||||||
const std::string accelerator_mode("cusparse");
|
const std::string accelerator_mode("cusparse");
|
||||||
const std::string linsolver("ilu0");
|
const std::string linsolver("ilu0");
|
||||||
Dune::InverseOperatorResult result;
|
|
||||||
|
|
||||||
Vector<bz> x(rhs.size());
|
|
||||||
|
|
||||||
auto wellContribs = Opm::WellContributions::create("cusparse", false);
|
|
||||||
std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
|
|
||||||
try {
|
try {
|
||||||
bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
|
bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
|
||||||
linear_solver_verbosity,
|
linear_solver_verbosity,
|
||||||
@@ -114,12 +108,6 @@ testCusparseSolver(const boost::property_tree::ptree& prm, Matrix<bz>& matrix, V
|
|||||||
deviceID,
|
deviceID,
|
||||||
opencl_ilu_parallel,
|
opencl_ilu_parallel,
|
||||||
linsolver);
|
linsolver);
|
||||||
auto mat2 = matrix; // deep copy to make sure nnz values are in contiguous memory
|
|
||||||
// matrix created by readMatrixMarket() did not have contiguous memory
|
|
||||||
bridge->solve_system(&mat2, &mat2, /*numJacobiBlocks=*/0, rhs, *wellContribs, result);
|
|
||||||
bridge->get_result(x);
|
|
||||||
|
|
||||||
return x;
|
|
||||||
} catch (const std::logic_error& error) {
|
} catch (const std::logic_error& error) {
|
||||||
BOOST_WARN_MESSAGE(true, error.what());
|
BOOST_WARN_MESSAGE(true, error.what());
|
||||||
if (strstr(error.what(), "Could not get device") != nullptr)
|
if (strstr(error.what(), "Could not get device") != nullptr)
|
||||||
@@ -129,6 +117,38 @@ testCusparseSolver(const boost::property_tree::ptree& prm, Matrix<bz>& matrix, V
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int bz>
|
||||||
|
Dune::BlockVector<Dune::FieldVector<double, bz>>
|
||||||
|
testCusparseSolver(Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz>& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
|
||||||
|
{
|
||||||
|
Dune::InverseOperatorResult result;
|
||||||
|
Vector<bz> x(rhs.size());
|
||||||
|
auto wellContribs = Opm::WellContributions::create("cusparse", false);
|
||||||
|
auto mat2 = matrix; // deep copy to make sure nnz values are in contiguous memory
|
||||||
|
// matrix created by readMatrixMarket() did not have contiguous memory
|
||||||
|
bridge.solve_system(&mat2, &mat2, /*numJacobiBlocks=*/0, rhs, *wellContribs, result);
|
||||||
|
bridge.get_result(x);
|
||||||
|
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int bz>
|
||||||
|
Dune::BlockVector<Dune::FieldVector<double, bz>>
|
||||||
|
testCusparseSolverJacobi(Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz>& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
|
||||||
|
{
|
||||||
|
Dune::InverseOperatorResult result;
|
||||||
|
Vector<bz> x(rhs.size());
|
||||||
|
auto wellContribs = Opm::WellContributions::create("cusparse", false);
|
||||||
|
auto mat2 = matrix; // deep copy to make sure nnz values are in contiguous memory
|
||||||
|
// matrix created by readMatrixMarket() did not have contiguous memory
|
||||||
|
auto mat3 = matrix; // another deep copy, to make sure Jacobi matrix memory is different
|
||||||
|
// the sparsity pattern and values are actually the same
|
||||||
|
bridge.solve_system(&mat2, &mat3, /*numJacobiBlocks=*/2, rhs, *wellContribs, result);
|
||||||
|
bridge.get_result(x);
|
||||||
|
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
namespace pt = boost::property_tree;
|
namespace pt = boost::property_tree;
|
||||||
|
|
||||||
void test3(const pt::ptree& prm)
|
void test3(const pt::ptree& prm)
|
||||||
@@ -136,17 +156,31 @@ void test3(const pt::ptree& prm)
|
|||||||
const int bz = 3;
|
const int bz = 3;
|
||||||
Matrix<bz> matrix;
|
Matrix<bz> matrix;
|
||||||
Vector<bz> rhs;
|
Vector<bz> rhs;
|
||||||
|
std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
|
||||||
readLinearSystem("matr33.txt", "rhs3.txt", matrix, rhs);
|
readLinearSystem("matr33.txt", "rhs3.txt", matrix, rhs);
|
||||||
Vector<bz> rhs2 = rhs; // deep copy, getDuneSolution() changes values in rhs vector
|
Vector<bz> rhs2 = rhs; // deep copy, getDuneSolution() changes values in rhs vector
|
||||||
auto duneSolution = getDuneSolution<bz>(matrix, rhs);
|
auto duneSolution = getDuneSolution<bz>(matrix, rhs);
|
||||||
auto sol = testCusparseSolver<bz>(prm, matrix, rhs2);
|
|
||||||
|
|
||||||
|
createBridge(prm, bridge); // create bridge with openclSolver
|
||||||
|
// should create bridge only once
|
||||||
|
|
||||||
|
// test cusparseSolver without Jacobi matrix
|
||||||
|
auto sol = testCusparseSolver<bz>(*bridge, matrix, rhs2);
|
||||||
BOOST_REQUIRE_EQUAL(sol.size(), duneSolution.size());
|
BOOST_REQUIRE_EQUAL(sol.size(), duneSolution.size());
|
||||||
for (size_t i = 0; i < sol.size(); ++i) {
|
for (size_t i = 0; i < sol.size(); ++i) {
|
||||||
for (int row = 0; row < bz; ++row) {
|
for (int row = 0; row < bz; ++row) {
|
||||||
BOOST_CHECK_CLOSE(sol[i][row], duneSolution[i][row], 1e-3);
|
BOOST_CHECK_CLOSE(sol[i][row], duneSolution[i][row], 1e-3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// test cusparseSolver with Jacobi matrix
|
||||||
|
auto solJacobi = testCusparseSolverJacobi<bz>(*bridge, matrix, rhs2);
|
||||||
|
BOOST_REQUIRE_EQUAL(solJacobi.size(), duneSolution.size());
|
||||||
|
for (size_t i = 0; i < solJacobi.size(); ++i) {
|
||||||
|
for (int row = 0; row < bz; ++row) {
|
||||||
|
BOOST_CHECK_CLOSE(solJacobi[i][row], duneSolution[i][row], 1e-3);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -116,22 +116,22 @@ createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::BdaBri
|
|||||||
|
|
||||||
template <int bz>
|
template <int bz>
|
||||||
Dune::BlockVector<Dune::FieldVector<double, bz>>
|
Dune::BlockVector<Dune::FieldVector<double, bz>>
|
||||||
testOpenclSolver(std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
|
testOpenclSolver(Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz>& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
|
||||||
{
|
{
|
||||||
Dune::InverseOperatorResult result;
|
Dune::InverseOperatorResult result;
|
||||||
Vector<bz> x(rhs.size());
|
Vector<bz> x(rhs.size());
|
||||||
auto wellContribs = Opm::WellContributions::create("opencl", false);
|
auto wellContribs = Opm::WellContributions::create("opencl", false);
|
||||||
auto mat2 = matrix; // deep copy to make sure nnz values are in contiguous memory
|
auto mat2 = matrix; // deep copy to make sure nnz values are in contiguous memory
|
||||||
// matrix created by readMatrixMarket() did not have contiguous memory
|
// matrix created by readMatrixMarket() did not have contiguous memory
|
||||||
bridge->solve_system(&mat2, &mat2, /*numJacobiBlocks=*/0, rhs, *wellContribs, result);
|
bridge.solve_system(&mat2, &mat2, /*numJacobiBlocks=*/0, rhs, *wellContribs, result);
|
||||||
bridge->get_result(x);
|
bridge.get_result(x);
|
||||||
|
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int bz>
|
template <int bz>
|
||||||
Dune::BlockVector<Dune::FieldVector<double, bz>>
|
Dune::BlockVector<Dune::FieldVector<double, bz>>
|
||||||
testOpenclSolverJacobi(std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
|
testOpenclSolverJacobi(Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz>& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
|
||||||
{
|
{
|
||||||
Dune::InverseOperatorResult result;
|
Dune::InverseOperatorResult result;
|
||||||
Vector<bz> x(rhs.size());
|
Vector<bz> x(rhs.size());
|
||||||
@@ -140,8 +140,8 @@ testOpenclSolverJacobi(std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz
|
|||||||
// matrix created by readMatrixMarket() did not have contiguous memory
|
// matrix created by readMatrixMarket() did not have contiguous memory
|
||||||
auto mat3 = matrix; // another deep copy, to make sure Jacobi matrix memory is different
|
auto mat3 = matrix; // another deep copy, to make sure Jacobi matrix memory is different
|
||||||
// the sparsity pattern and values are actually the same
|
// the sparsity pattern and values are actually the same
|
||||||
bridge->solve_system(&mat2, &mat3, /*numJacobiBlocks=*/2, rhs, *wellContribs, result);
|
bridge.solve_system(&mat2, &mat3, /*numJacobiBlocks=*/2, rhs, *wellContribs, result);
|
||||||
bridge->get_result(x);
|
bridge.get_result(x);
|
||||||
|
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
@@ -162,7 +162,7 @@ void test3(const pt::ptree& prm)
|
|||||||
// should create bridge only once
|
// should create bridge only once
|
||||||
|
|
||||||
// test openclSolver without Jacobi matrix
|
// test openclSolver without Jacobi matrix
|
||||||
auto sol = testOpenclSolver<bz>(bridge, matrix, rhs2);
|
auto sol = testOpenclSolver<bz>(*bridge, matrix, rhs2);
|
||||||
BOOST_REQUIRE_EQUAL(sol.size(), duneSolution.size());
|
BOOST_REQUIRE_EQUAL(sol.size(), duneSolution.size());
|
||||||
for (size_t i = 0; i < sol.size(); ++i) {
|
for (size_t i = 0; i < sol.size(); ++i) {
|
||||||
for (int row = 0; row < bz; ++row) {
|
for (int row = 0; row < bz; ++row) {
|
||||||
@@ -171,7 +171,7 @@ void test3(const pt::ptree& prm)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// test openclSolver with Jacobi matrix
|
// test openclSolver with Jacobi matrix
|
||||||
auto solJacobi = testOpenclSolverJacobi<bz>(bridge, matrix, rhs2);
|
auto solJacobi = testOpenclSolverJacobi<bz>(*bridge, matrix, rhs2);
|
||||||
BOOST_REQUIRE_EQUAL(solJacobi.size(), duneSolution.size());
|
BOOST_REQUIRE_EQUAL(solJacobi.size(), duneSolution.size());
|
||||||
for (size_t i = 0; i < solJacobi.size(); ++i) {
|
for (size_t i = 0; i < solJacobi.size(); ++i) {
|
||||||
for (int row = 0; row < bz; ++row) {
|
for (int row = 0; row < bz; ++row) {
|
||||||
|
Reference in New Issue
Block a user