diff --git a/CMakeLists.txt b/CMakeLists.txt index b7200f297..08e57abcc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -297,5 +297,7 @@ endif() if(CUDA_FOUND) target_link_libraries( flow ${CUDA_cublas_LIBRARY} ) target_link_libraries( flow ${CUDA_cusparse_LIBRARY} ) + target_link_libraries( ebos ${CUDA_cublas_LIBRARY} ) + target_link_libraries( ebos ${CUDA_cusparse_LIBRARY} ) endif() diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 955e325da..2b711fb9d 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -29,6 +29,7 @@ list (APPEND MAIN_SOURCE_FILES opm/simulators/flow/MissingFeatures.cpp opm/simulators/linalg/ExtractParallelGridInformationToISTL.cpp opm/simulators/linalg/setupPropertyTree.cpp + opm/simulators/linalg/bda/BdaBridge.cpp opm/simulators/timestepping/TimeStepControl.cpp opm/simulators/timestepping/AdaptiveSimulatorTimer.cpp opm/simulators/timestepping/SimulatorTimer.cpp @@ -42,7 +43,7 @@ list (APPEND MAIN_SOURCE_FILES ) if(CUDA_FOUND) - list (APPEND MAIN_SOURCE_FILES opm/bda/cusparseSolverBackend.cu) + list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cusparseSolverBackend.cu) endif() # originally generated with the command: diff --git a/opm/simulators/linalg/FlowLinearSolverParameters.hpp b/opm/simulators/linalg/FlowLinearSolverParameters.hpp index 79f79d3a9..ab8efbce5 100644 --- a/opm/simulators/linalg/FlowLinearSolverParameters.hpp +++ b/opm/simulators/linalg/FlowLinearSolverParameters.hpp @@ -4,6 +4,7 @@ Copyright 2015 Dr. Blatt - HPC-Simulation-Software & Services Copyright 2015 NTNU Copyright 2015 Statoil AS + Copyright 2019 Big Data Accelerate This file is part of the Open Porous Media project (OPM). @@ -67,6 +68,7 @@ NEW_PROP_TAG(CprMaxEllIter); NEW_PROP_TAG(CprEllSolvetype); NEW_PROP_TAG(CprReuseSetup); NEW_PROP_TAG(LinearSolverConfigurationJsonFile); +NEW_PROP_TAG(UseGpu); SET_SCALAR_PROP(FlowIstlSolverParams, LinearSolverReduction, 1e-2); SET_SCALAR_PROP(FlowIstlSolverParams, IluRelaxation, 0.9); @@ -92,6 +94,7 @@ SET_INT_PROP(FlowIstlSolverParams, CprMaxEllIter, 20); SET_INT_PROP(FlowIstlSolverParams, CprEllSolvetype, 0); SET_INT_PROP(FlowIstlSolverParams, CprReuseSetup, 0); SET_STRING_PROP(FlowIstlSolverParams, LinearSolverConfigurationJsonFile, "none"); +SET_BOOL_PROP(FlowIstlSolverParams, UseGpu, false); @@ -163,6 +166,7 @@ namespace Opm std::string system_strategy_; bool scale_linear_system_; std::string linear_solver_configuration_json_file_; + bool use_gpu_; template void init() @@ -190,6 +194,7 @@ namespace Opm cpr_ell_solvetype_ = EWOMS_GET_PARAM(TypeTag, int, CprEllSolvetype); cpr_reuse_setup_ = EWOMS_GET_PARAM(TypeTag, int, CprReuseSetup); linear_solver_configuration_json_file_ = EWOMS_GET_PARAM(TypeTag, std::string, LinearSolverConfigurationJsonFile); + use_gpu_ = EWOMS_GET_PARAM(TypeTag, bool, UseGpu); } template @@ -217,6 +222,7 @@ namespace Opm EWOMS_REGISTER_PARAM(TypeTag, int, CprEllSolvetype, "Solver type of elliptic pressure solve (0: bicgstab, 1: cg, 2: only amg preconditioner)"); EWOMS_REGISTER_PARAM(TypeTag, int, CprReuseSetup, "Reuse Amg Setup"); EWOMS_REGISTER_PARAM(TypeTag, std::string, LinearSolverConfigurationJsonFile, "Filename of JSON configuration for flexible linear solver system."); + EWOMS_REGISTER_PARAM(TypeTag, bool, UseGpu, "Use GPU cusparseSolver as the linear solver"); } FlowLinearSolverParameters() { reset(); } @@ -238,6 +244,7 @@ namespace Opm ilu_milu_ = MILU_VARIANT::ILU; ilu_redblack_ = false; ilu_reorder_sphere_ = true; + use_gpu_ = false; } }; diff --git a/opm/simulators/linalg/ISTLSolverEbos.hpp b/opm/simulators/linalg/ISTLSolverEbos.hpp index ec038eb8d..1bbf039b1 100644 --- a/opm/simulators/linalg/ISTLSolverEbos.hpp +++ b/opm/simulators/linalg/ISTLSolverEbos.hpp @@ -46,6 +46,8 @@ #include +#include + BEGIN_PROPERTIES NEW_TYPE_TAG(FlowIstlSolver, INHERITS_FROM(FlowIstlSolverParams)); @@ -223,13 +225,10 @@ protected: enum { pressureVarIndex = Indices::pressureSwitchIdx }; static const int numEq = Indices::numEq; -<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp -======= #if HAVE_CUDA BdaBridge *bdaBridge; #endif ->>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp public: typedef Dune::AssembledLinearOperator< Matrix, Vector, Vector > AssembledLinearOperatorType; @@ -247,15 +246,13 @@ protected: converged_(false) { parameters_.template init(); -<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp -======= #if HAVE_CUDA const bool use_gpu = EWOMS_GET_PARAM(TypeTag, bool, UseGpu); const int maxit = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIter); const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction); const bool matrix_add_well_contributions = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions); if(use_gpu && !matrix_add_well_contributions){ - std::cerr << "Error cannot use GPU solver if command line parameter --matrix-add-well-contributions is false, due to the changing sparsity pattern" << std::endl; + std::cerr << "Error cannot use GPU solver if command line parameter --matrix-add-well-contributions is false, because the GPU solver performs a standard bicgstab" << std::endl; exit(1); } bdaBridge = new BdaBridge(use_gpu, maxit, tolerance); @@ -266,20 +263,16 @@ protected: exit(1); } #endif ->>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp extractParallelGridInformationToISTL(simulator_.vanguard().grid(), parallelInformation_); detail::findOverlapRowsAndColumns(simulator_.vanguard().grid(),overlapRowAndColumns_); } -<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp -======= ~ISTLSolverEbos(){ #if HAVE_CUDA delete bdaBridge; #endif } ->>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp // nothing to clean here void eraseMatrix() { matrix_for_preconditioner_.reset(); @@ -472,10 +465,6 @@ protected: else #endif { -<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp - // Construct preconditioner. - auto precond = constructPrecond(linearOperator, parallelInformation_arg); -======= // tries to solve linear system // solve_system() does nothing if Dune is selected #if HAVE_CUDA @@ -490,13 +479,12 @@ protected: solve(linearOperator, x, istlb, *sp, *precond, result); } // end Dune call #else + // Construct preconditioner. auto precond = constructPrecond(linearOperator, parallelInformation_arg); - solve(linearOperator, x, istlb, *sp, *precond, result); -#endif ->>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp // Solve. solve(linearOperator, x, istlb, *sp, *precond, result); +#endif } } diff --git a/opm/simulators/linalg/bda/BdaBridge.cpp b/opm/simulators/linalg/bda/BdaBridge.cpp index 893922e69..64b395a04 100644 --- a/opm/simulators/linalg/bda/BdaBridge.cpp +++ b/opm/simulators/linalg/bda/BdaBridge.cpp @@ -20,8 +20,8 @@ #include #include -#include -#include +#include +#include #define PRINT_TIMERS_BRIDGE_BRIDGE 0 @@ -210,25 +210,24 @@ void BdaBridge::get_result(BridgeVector &x){ #endif } -#if HAVE_CUDA template void BdaBridge::solve_system< \ -Dune::BCRSMatrix, std::allocator > > , \ +Dune::BCRSMatrix, std::allocator > > , \ Dune::BlockVector, std::allocator > > > \ -(Dune::BCRSMatrix, std::allocator > > *mat, \ +(Dune::BCRSMatrix, std::allocator > > *mat, \ Dune::BlockVector, std::allocator > > &b, \ InverseOperatorResult &res); template void BdaBridge::solve_system< \ -Dune::BCRSMatrix, std::allocator > > , \ +Dune::BCRSMatrix, std::allocator > > , \ Dune::BlockVector, std::allocator > > > \ -(Dune::BCRSMatrix, std::allocator > > *mat, \ +(Dune::BCRSMatrix, std::allocator > > *mat, \ Dune::BlockVector, std::allocator > > &b, \ InverseOperatorResult &res); template void BdaBridge::solve_system< \ -Dune::BCRSMatrix, std::allocator > > , \ +Dune::BCRSMatrix, std::allocator > > , \ Dune::BlockVector, std::allocator > > > \ -(Dune::BCRSMatrix, std::allocator > > *mat, \ +(Dune::BCRSMatrix, std::allocator > > *mat, \ Dune::BlockVector, std::allocator > > &b, \ InverseOperatorResult &res); @@ -244,7 +243,7 @@ Dune::BlockVector, std::allocator, std::allocator > > > \ (Dune::BlockVector, std::allocator > > &x); -#endif + } diff --git a/opm/simulators/linalg/bda/BdaBridge.hpp b/opm/simulators/linalg/bda/BdaBridge.hpp index d0259f20c..e593a6cec 100644 --- a/opm/simulators/linalg/bda/BdaBridge.hpp +++ b/opm/simulators/linalg/bda/BdaBridge.hpp @@ -24,10 +24,10 @@ #include "dune/istl/solver.hh" // for struct InverseOperatorResult #include "dune/istl/bcrsmatrix.hh" -#include +#include #if HAVE_CUDA -#include +#include #endif namespace Opm diff --git a/opm/simulators/linalg/bda/cuda_header.h b/opm/simulators/linalg/bda/cuda_header.h index 072c90eb4..81b8ad78c 100644 --- a/opm/simulators/linalg/bda/cuda_header.h +++ b/opm/simulators/linalg/bda/cuda_header.h @@ -20,7 +20,7 @@ #ifndef CUDA_HEADER_H #define CUDA_HEADER_H -#include +#include typedef double Block[9]; diff --git a/opm/simulators/linalg/bda/cusparseSolverBackend.cu b/opm/simulators/linalg/bda/cusparseSolverBackend.cu index decc9cec3..86b904cd4 100644 --- a/opm/simulators/linalg/bda/cusparseSolverBackend.cu +++ b/opm/simulators/linalg/bda/cusparseSolverBackend.cu @@ -27,9 +27,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include "cublas_v2.h" #include "cusparse_v2.h" @@ -89,11 +89,11 @@ namespace Opm printf("Tolerance: %.0e, nnzb: %d\n", tolerance, nnzb); #endif - for (it = 0.5; it < maxit; it+=0.5){ + for(it = 0.5; it < maxit; it+=0.5){ rhop = rho; cublasDdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho); - if (it > 1){ + if(it > 1){ beta = (rho/rhop) * (alpha/omega); nomega = -omega; cublasDaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1); @@ -116,15 +116,17 @@ namespace Opm cublasDdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1); alpha = rho / tmp1; - nalpha = -(alpha); + nalpha = -alpha; cublasDaxpy(cublasHandle, n, &nalpha, d_v, 1, d_r, 1); cublasDaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1); cublasDnrm2(cublasHandle, n, d_r, 1, &norm); - if (norm < tolerance * norm_0 && it > minit){ + if(norm < tolerance * norm_0 && it > minit){ break; } + it += 0.5; + // apply ilu0 cusparseDbsrsv2_solve(cusparseHandle, order, \ operation, Nb, nnzb, &one, \ @@ -141,18 +143,18 @@ namespace Opm cublasDdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1); cublasDdot(cublasHandle, n, d_t, 1, d_t, 1, &tmp2); omega = tmp1 / tmp2; - nomega = -(omega); + nomega = -omega; cublasDaxpy(cublasHandle, n, &omega, d_s, 1, d_x, 1); cublasDaxpy(cublasHandle, n, &nomega, d_t, 1, d_r, 1); cublasDnrm2(cublasHandle, n, d_r, 1, &norm); - if (norm < tolerance * norm_0 && it > minit){ + if(norm < tolerance * norm_0 && it > minit){ break; } #if VERBOSE_BACKEND - if(i % 1 == 0){ + if((int)it % 10 == 0){ printf("it: %.1f, norm: %.5e\n", it, norm); } #endif @@ -161,16 +163,17 @@ namespace Opm t_total2 = second(); #if PRINT_TIMERS_BACKEND printf("Total solve time: %.6f s\n", t_total2-t_total1); -#endif -#if VERBOSE_BACKEND - printf("Iterations: %.1f\n", it); - printf("Final norm: %.5e\n", norm); #endif res.iterations = std::min(it, (float)maxit); res.reduction = norm/norm_0; res.conv_rate = static_cast(pow(res.reduction,1.0/it)); res.elapsed = t_total2-t_total1; res.converged = (it != (maxit + 0.5)); +#if VERBOSE_BACKEND + printf("Iterations: %.1f\n", it); + printf("Final norm: %.5e\n", norm); + printf("GPU converged: %d\n", res.converged); +#endif return res.converged; } @@ -395,6 +398,7 @@ namespace Opm cusparseDbsrilu02(cusparseHandle, order, \ Nb, nnzb, descr_M, d_mVals, d_mRows, d_mCols, \ BLOCK_SIZE, info_M, policy, d_buffer); + int structural_zero; cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero); if(CUSPARSE_STATUS_ZERO_PIVOT == status){ diff --git a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp index 29bcc041a..7f7b9a9ba 100644 --- a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp +++ b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp @@ -24,7 +24,7 @@ #include "cublas_v2.h" #include "cusparse_v2.h" -#include "opm/bda/BdaResult.hpp" +#include "opm/simulators/linalg/bda/BdaResult.hpp" namespace Opm { @@ -72,10 +72,10 @@ public: void finalize(); - void copy_system_to_gpu(double *vals, int *rows, int *cols, double *f); + void copy_system_to_gpu(double *vals, int *rows, int *cols, double *b); // don't copy rowpointers and colindices, they stay the same - void update_system_on_gpu(double *vals, double *f); + void update_system_on_gpu(double *vals, double *b); void reset_prec_on_gpu();