Changed structure of cusparseSolver to fit into master branch

This commit is contained in:
T.D. (Tongdong) Qiu 2019-12-04 16:59:58 +01:00
parent 4db2e7ca4e
commit 69033ca7f2
9 changed files with 49 additions and 48 deletions

View File

@ -297,5 +297,7 @@ endif()
if(CUDA_FOUND)
target_link_libraries( flow ${CUDA_cublas_LIBRARY} )
target_link_libraries( flow ${CUDA_cusparse_LIBRARY} )
target_link_libraries( ebos ${CUDA_cublas_LIBRARY} )
target_link_libraries( ebos ${CUDA_cusparse_LIBRARY} )
endif()

View File

@ -29,6 +29,7 @@ list (APPEND MAIN_SOURCE_FILES
opm/simulators/flow/MissingFeatures.cpp
opm/simulators/linalg/ExtractParallelGridInformationToISTL.cpp
opm/simulators/linalg/setupPropertyTree.cpp
opm/simulators/linalg/bda/BdaBridge.cpp
opm/simulators/timestepping/TimeStepControl.cpp
opm/simulators/timestepping/AdaptiveSimulatorTimer.cpp
opm/simulators/timestepping/SimulatorTimer.cpp
@ -42,7 +43,7 @@ list (APPEND MAIN_SOURCE_FILES
)
if(CUDA_FOUND)
list (APPEND MAIN_SOURCE_FILES opm/bda/cusparseSolverBackend.cu)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cusparseSolverBackend.cu)
endif()
# originally generated with the command:

View File

@ -4,6 +4,7 @@
Copyright 2015 Dr. Blatt - HPC-Simulation-Software & Services
Copyright 2015 NTNU
Copyright 2015 Statoil AS
Copyright 2019 Big Data Accelerate
This file is part of the Open Porous Media project (OPM).
@ -67,6 +68,7 @@ NEW_PROP_TAG(CprMaxEllIter);
NEW_PROP_TAG(CprEllSolvetype);
NEW_PROP_TAG(CprReuseSetup);
NEW_PROP_TAG(LinearSolverConfigurationJsonFile);
NEW_PROP_TAG(UseGpu);
SET_SCALAR_PROP(FlowIstlSolverParams, LinearSolverReduction, 1e-2);
SET_SCALAR_PROP(FlowIstlSolverParams, IluRelaxation, 0.9);
@ -92,6 +94,7 @@ SET_INT_PROP(FlowIstlSolverParams, CprMaxEllIter, 20);
SET_INT_PROP(FlowIstlSolverParams, CprEllSolvetype, 0);
SET_INT_PROP(FlowIstlSolverParams, CprReuseSetup, 0);
SET_STRING_PROP(FlowIstlSolverParams, LinearSolverConfigurationJsonFile, "none");
SET_BOOL_PROP(FlowIstlSolverParams, UseGpu, false);
@ -163,6 +166,7 @@ namespace Opm
std::string system_strategy_;
bool scale_linear_system_;
std::string linear_solver_configuration_json_file_;
bool use_gpu_;
template <class TypeTag>
void init()
@ -190,6 +194,7 @@ namespace Opm
cpr_ell_solvetype_ = EWOMS_GET_PARAM(TypeTag, int, CprEllSolvetype);
cpr_reuse_setup_ = EWOMS_GET_PARAM(TypeTag, int, CprReuseSetup);
linear_solver_configuration_json_file_ = EWOMS_GET_PARAM(TypeTag, std::string, LinearSolverConfigurationJsonFile);
use_gpu_ = EWOMS_GET_PARAM(TypeTag, bool, UseGpu);
}
template <class TypeTag>
@ -217,6 +222,7 @@ namespace Opm
EWOMS_REGISTER_PARAM(TypeTag, int, CprEllSolvetype, "Solver type of elliptic pressure solve (0: bicgstab, 1: cg, 2: only amg preconditioner)");
EWOMS_REGISTER_PARAM(TypeTag, int, CprReuseSetup, "Reuse Amg Setup");
EWOMS_REGISTER_PARAM(TypeTag, std::string, LinearSolverConfigurationJsonFile, "Filename of JSON configuration for flexible linear solver system.");
EWOMS_REGISTER_PARAM(TypeTag, bool, UseGpu, "Use GPU cusparseSolver as the linear solver");
}
FlowLinearSolverParameters() { reset(); }
@ -238,6 +244,7 @@ namespace Opm
ilu_milu_ = MILU_VARIANT::ILU;
ilu_redblack_ = false;
ilu_reorder_sphere_ = true;
use_gpu_ = false;
}
};

View File

@ -46,6 +46,8 @@
#include <opm/common/utility/platform_dependent/reenable_warnings.h>
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
BEGIN_PROPERTIES
NEW_TYPE_TAG(FlowIstlSolver, INHERITS_FROM(FlowIstlSolverParams));
@ -223,13 +225,10 @@ protected:
enum { pressureVarIndex = Indices::pressureSwitchIdx };
static const int numEq = Indices::numEq;
<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp
=======
#if HAVE_CUDA
BdaBridge *bdaBridge;
#endif
>>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp
public:
typedef Dune::AssembledLinearOperator< Matrix, Vector, Vector > AssembledLinearOperatorType;
@ -247,15 +246,13 @@ protected:
converged_(false)
{
parameters_.template init<TypeTag>();
<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp
=======
#if HAVE_CUDA
const bool use_gpu = EWOMS_GET_PARAM(TypeTag, bool, UseGpu);
const int maxit = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIter);
const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction);
const bool matrix_add_well_contributions = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions);
if(use_gpu && !matrix_add_well_contributions){
std::cerr << "Error cannot use GPU solver if command line parameter --matrix-add-well-contributions is false, due to the changing sparsity pattern" << std::endl;
std::cerr << "Error cannot use GPU solver if command line parameter --matrix-add-well-contributions is false, because the GPU solver performs a standard bicgstab" << std::endl;
exit(1);
}
bdaBridge = new BdaBridge(use_gpu, maxit, tolerance);
@ -266,20 +263,16 @@ protected:
exit(1);
}
#endif
>>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp
extractParallelGridInformationToISTL(simulator_.vanguard().grid(), parallelInformation_);
detail::findOverlapRowsAndColumns(simulator_.vanguard().grid(),overlapRowAndColumns_);
}
<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp
=======
~ISTLSolverEbos(){
#if HAVE_CUDA
delete bdaBridge;
#endif
}
>>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp
// nothing to clean here
void eraseMatrix() {
matrix_for_preconditioner_.reset();
@ -472,10 +465,6 @@ protected:
else
#endif
{
<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp
// Construct preconditioner.
auto precond = constructPrecond(linearOperator, parallelInformation_arg);
=======
// tries to solve linear system
// solve_system() does nothing if Dune is selected
#if HAVE_CUDA
@ -490,13 +479,12 @@ protected:
solve(linearOperator, x, istlb, *sp, *precond, result);
} // end Dune call
#else
// Construct preconditioner.
auto precond = constructPrecond(linearOperator, parallelInformation_arg);
solve(linearOperator, x, istlb, *sp, *precond, result);
#endif
>>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp
// Solve.
solve(linearOperator, x, istlb, *sp, *precond, result);
#endif
}
}

View File

@ -20,8 +20,8 @@
#include <config.h>
#include <memory>
#include <opm/bda/BdaBridge.hpp>
#include <opm/bda/BdaResult.hpp>
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#define PRINT_TIMERS_BRIDGE_BRIDGE 0
@ -210,25 +210,24 @@ void BdaBridge::get_result(BridgeVector &x){
#endif
}
#if HAVE_CUDA
template void BdaBridge::solve_system< \
Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 2, 2>, std::allocator<Ewoms::MatrixBlock<double, 2, 2> > > , \
Dune::BCRSMatrix<Opm::MatrixBlock<double, 2, 2>, std::allocator<Opm::MatrixBlock<double, 2, 2> > > , \
Dune::BlockVector<Dune::FieldVector<double, 2>, std::allocator<Dune::FieldVector<double, 2> > > > \
(Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 2, 2>, std::allocator<Ewoms::MatrixBlock<double, 2, 2> > > *mat, \
(Dune::BCRSMatrix<Opm::MatrixBlock<double, 2, 2>, std::allocator<Opm::MatrixBlock<double, 2, 2> > > *mat, \
Dune::BlockVector<Dune::FieldVector<double, 2>, std::allocator<Dune::FieldVector<double, 2> > > &b, \
InverseOperatorResult &res);
template void BdaBridge::solve_system< \
Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 3, 3>, std::allocator<Ewoms::MatrixBlock<double, 3, 3> > > , \
Dune::BCRSMatrix<Opm::MatrixBlock<double, 3, 3>, std::allocator<Opm::MatrixBlock<double, 3, 3> > > , \
Dune::BlockVector<Dune::FieldVector<double, 3>, std::allocator<Dune::FieldVector<double, 3> > > > \
(Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 3, 3>, std::allocator<Ewoms::MatrixBlock<double, 3, 3> > > *mat, \
(Dune::BCRSMatrix<Opm::MatrixBlock<double, 3, 3>, std::allocator<Opm::MatrixBlock<double, 3, 3> > > *mat, \
Dune::BlockVector<Dune::FieldVector<double, 3>, std::allocator<Dune::FieldVector<double, 3> > > &b, \
InverseOperatorResult &res);
template void BdaBridge::solve_system< \
Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 4, 4>, std::allocator<Ewoms::MatrixBlock<double, 4, 4> > > , \
Dune::BCRSMatrix<Opm::MatrixBlock<double, 4, 4>, std::allocator<Opm::MatrixBlock<double, 4, 4> > > , \
Dune::BlockVector<Dune::FieldVector<double, 4>, std::allocator<Dune::FieldVector<double, 4> > > > \
(Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 4, 4>, std::allocator<Ewoms::MatrixBlock<double, 4, 4> > > *mat, \
(Dune::BCRSMatrix<Opm::MatrixBlock<double, 4, 4>, std::allocator<Opm::MatrixBlock<double, 4, 4> > > *mat, \
Dune::BlockVector<Dune::FieldVector<double, 4>, std::allocator<Dune::FieldVector<double, 4> > > &b, \
InverseOperatorResult &res);
@ -244,7 +243,7 @@ Dune::BlockVector<Dune::FieldVector<double, 3>, std::allocator<Dune::FieldVector
template void BdaBridge::get_result< \
Dune::BlockVector<Dune::FieldVector<double, 4>, std::allocator<Dune::FieldVector<double, 4> > > > \
(Dune::BlockVector<Dune::FieldVector<double, 4>, std::allocator<Dune::FieldVector<double, 4> > > &x);
#endif
}

View File

@ -24,10 +24,10 @@
#include "dune/istl/solver.hh" // for struct InverseOperatorResult
#include "dune/istl/bcrsmatrix.hh"
#include <ewoms/linear/matrixblock.hh>
#include <opm/simulators/linalg/matrixblock.hh>
#if HAVE_CUDA
#include <opm/bda/cusparseSolverBackend.hpp>
#include <opm/simulators/linalg/bda/cusparseSolverBackend.hpp>
#endif
namespace Opm

View File

@ -20,7 +20,7 @@
#ifndef CUDA_HEADER_H
#define CUDA_HEADER_H
#include <stdio.h>
#include <iostream>
typedef double Block[9];

View File

@ -27,9 +27,9 @@
#include <iostream>
#include <sys/time.h>
#include <opm/bda/cusparseSolverBackend.hpp>
#include <opm/bda/BdaResult.hpp>
#include <opm/bda/cuda_header.h>
#include <opm/simulators/linalg/bda/cusparseSolverBackend.hpp>
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/bda/cuda_header.h>
#include "cublas_v2.h"
#include "cusparse_v2.h"
@ -89,11 +89,11 @@ namespace Opm
printf("Tolerance: %.0e, nnzb: %d\n", tolerance, nnzb);
#endif
for (it = 0.5; it < maxit; it+=0.5){
for(it = 0.5; it < maxit; it+=0.5){
rhop = rho;
cublasDdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho);
if (it > 1){
if(it > 1){
beta = (rho/rhop) * (alpha/omega);
nomega = -omega;
cublasDaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1);
@ -116,15 +116,17 @@ namespace Opm
cublasDdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1);
alpha = rho / tmp1;
nalpha = -(alpha);
nalpha = -alpha;
cublasDaxpy(cublasHandle, n, &nalpha, d_v, 1, d_r, 1);
cublasDaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1);
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
if (norm < tolerance * norm_0 && it > minit){
if(norm < tolerance * norm_0 && it > minit){
break;
}
it += 0.5;
// apply ilu0
cusparseDbsrsv2_solve(cusparseHandle, order, \
operation, Nb, nnzb, &one, \
@ -141,18 +143,18 @@ namespace Opm
cublasDdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1);
cublasDdot(cublasHandle, n, d_t, 1, d_t, 1, &tmp2);
omega = tmp1 / tmp2;
nomega = -(omega);
nomega = -omega;
cublasDaxpy(cublasHandle, n, &omega, d_s, 1, d_x, 1);
cublasDaxpy(cublasHandle, n, &nomega, d_t, 1, d_r, 1);
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
if (norm < tolerance * norm_0 && it > minit){
if(norm < tolerance * norm_0 && it > minit){
break;
}
#if VERBOSE_BACKEND
if(i % 1 == 0){
if((int)it % 10 == 0){
printf("it: %.1f, norm: %.5e\n", it, norm);
}
#endif
@ -161,16 +163,17 @@ namespace Opm
t_total2 = second();
#if PRINT_TIMERS_BACKEND
printf("Total solve time: %.6f s\n", t_total2-t_total1);
#endif
#if VERBOSE_BACKEND
printf("Iterations: %.1f\n", it);
printf("Final norm: %.5e\n", norm);
#endif
res.iterations = std::min(it, (float)maxit);
res.reduction = norm/norm_0;
res.conv_rate = static_cast<double>(pow(res.reduction,1.0/it));
res.elapsed = t_total2-t_total1;
res.converged = (it != (maxit + 0.5));
#if VERBOSE_BACKEND
printf("Iterations: %.1f\n", it);
printf("Final norm: %.5e\n", norm);
printf("GPU converged: %d\n", res.converged);
#endif
return res.converged;
}
@ -395,6 +398,7 @@ namespace Opm
cusparseDbsrilu02(cusparseHandle, order, \
Nb, nnzb, descr_M, d_mVals, d_mRows, d_mCols, \
BLOCK_SIZE, info_M, policy, d_buffer);
int structural_zero;
cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero);
if(CUSPARSE_STATUS_ZERO_PIVOT == status){

View File

@ -24,7 +24,7 @@
#include "cublas_v2.h"
#include "cusparse_v2.h"
#include "opm/bda/BdaResult.hpp"
#include "opm/simulators/linalg/bda/BdaResult.hpp"
namespace Opm
{
@ -72,10 +72,10 @@ public:
void finalize();
void copy_system_to_gpu(double *vals, int *rows, int *cols, double *f);
void copy_system_to_gpu(double *vals, int *rows, int *cols, double *b);
// don't copy rowpointers and colindices, they stay the same
void update_system_on_gpu(double *vals, double *f);
void update_system_on_gpu(double *vals, double *b);
void reset_prec_on_gpu();