Changed structure of cusparseSolver to fit into master branch

2025-02-25 18:55:30 -06:00 · 2019-12-04 16:59:58 +01:00 · 2019-12-04 16:59:58 +01:00 · 69033ca7f2
commit 69033ca7f2
parent 4db2e7ca4e
9 changed files with 49 additions and 48 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -297,5 +297,7 @@ endif()
 if(CUDA_FOUND)
  target_link_libraries( flow ${CUDA_cublas_LIBRARY} )
  target_link_libraries( flow ${CUDA_cusparse_LIBRARY} )
+  target_link_libraries( ebos ${CUDA_cublas_LIBRARY} )
+  target_link_libraries( ebos ${CUDA_cusparse_LIBRARY} )
 endif()

--- a/CMakeLists_files.cmake
+++ b/CMakeLists_files.cmake
@ -29,6 +29,7 @@ list (APPEND MAIN_SOURCE_FILES
  opm/simulators/flow/MissingFeatures.cpp
  opm/simulators/linalg/ExtractParallelGridInformationToISTL.cpp
  opm/simulators/linalg/setupPropertyTree.cpp
+  opm/simulators/linalg/bda/BdaBridge.cpp
  opm/simulators/timestepping/TimeStepControl.cpp
  opm/simulators/timestepping/AdaptiveSimulatorTimer.cpp
  opm/simulators/timestepping/SimulatorTimer.cpp
@ -42,7 +43,7 @@ list (APPEND MAIN_SOURCE_FILES
  )

 if(CUDA_FOUND)
-  list (APPEND MAIN_SOURCE_FILES opm/bda/cusparseSolverBackend.cu)
+  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cusparseSolverBackend.cu)
 endif()

 # originally generated with the command:
--- a/opm/simulators/linalg/FlowLinearSolverParameters.hpp
+++ b/opm/simulators/linalg/FlowLinearSolverParameters.hpp
@ -4,6 +4,7 @@
  Copyright 2015 Dr. Blatt - HPC-Simulation-Software & Services
  Copyright 2015 NTNU
  Copyright 2015 Statoil AS
+  Copyright 2019 Big Data Accelerate

  This file is part of the Open Porous Media project (OPM).

@ -67,6 +68,7 @@ NEW_PROP_TAG(CprMaxEllIter);
 NEW_PROP_TAG(CprEllSolvetype);
 NEW_PROP_TAG(CprReuseSetup);
 NEW_PROP_TAG(LinearSolverConfigurationJsonFile);
+NEW_PROP_TAG(UseGpu);

 SET_SCALAR_PROP(FlowIstlSolverParams, LinearSolverReduction, 1e-2);
 SET_SCALAR_PROP(FlowIstlSolverParams, IluRelaxation, 0.9);
@ -92,6 +94,7 @@ SET_INT_PROP(FlowIstlSolverParams, CprMaxEllIter, 20);
 SET_INT_PROP(FlowIstlSolverParams, CprEllSolvetype, 0);
 SET_INT_PROP(FlowIstlSolverParams, CprReuseSetup, 0);
 SET_STRING_PROP(FlowIstlSolverParams, LinearSolverConfigurationJsonFile, "none");
+SET_BOOL_PROP(FlowIstlSolverParams, UseGpu, false);



@ -163,6 +166,7 @@ namespace Opm
        std::string system_strategy_;
        bool scale_linear_system_;
        std::string linear_solver_configuration_json_file_;
+        bool use_gpu_;

        template <class TypeTag>
        void init()
@ -190,6 +194,7 @@ namespace Opm
            cpr_ell_solvetype_  =  EWOMS_GET_PARAM(TypeTag, int, CprEllSolvetype);
            cpr_reuse_setup_  =  EWOMS_GET_PARAM(TypeTag, int, CprReuseSetup);
            linear_solver_configuration_json_file_ = EWOMS_GET_PARAM(TypeTag, std::string, LinearSolverConfigurationJsonFile);
+            use_gpu_ = EWOMS_GET_PARAM(TypeTag, bool, UseGpu);
        }

        template <class TypeTag>
@ -217,6 +222,7 @@ namespace Opm
            EWOMS_REGISTER_PARAM(TypeTag, int, CprEllSolvetype, "Solver type of elliptic pressure solve (0: bicgstab, 1: cg, 2: only amg preconditioner)");
            EWOMS_REGISTER_PARAM(TypeTag, int, CprReuseSetup, "Reuse Amg Setup");
            EWOMS_REGISTER_PARAM(TypeTag, std::string, LinearSolverConfigurationJsonFile, "Filename of JSON configuration for flexible linear solver system.");
+            EWOMS_REGISTER_PARAM(TypeTag, bool, UseGpu, "Use GPU cusparseSolver as the linear solver");
        }

        FlowLinearSolverParameters() { reset(); }
@ -238,6 +244,7 @@ namespace Opm
            ilu_milu_                 = MILU_VARIANT::ILU;
            ilu_redblack_             = false;
            ilu_reorder_sphere_       = true;
+            use_gpu_                  = false;
        }
    };

--- a/opm/simulators/linalg/ISTLSolverEbos.hpp
+++ b/opm/simulators/linalg/ISTLSolverEbos.hpp
@ -46,6 +46,8 @@

 #include <opm/common/utility/platform_dependent/reenable_warnings.h>

+#include <opm/simulators/linalg/bda/BdaBridge.hpp>
+
 BEGIN_PROPERTIES

 NEW_TYPE_TAG(FlowIstlSolver, INHERITS_FROM(FlowIstlSolverParams));
@ -223,13 +225,10 @@ protected:
        enum { pressureVarIndex = Indices::pressureSwitchIdx };
        static const int numEq = Indices::numEq;

-<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp
-=======
 #if HAVE_CUDA
        BdaBridge *bdaBridge;
 #endif

->>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp
    public:
        typedef Dune::AssembledLinearOperator< Matrix, Vector, Vector > AssembledLinearOperatorType;

@ -247,15 +246,13 @@ protected:
              converged_(false)
        {
            parameters_.template init<TypeTag>();
-<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp
-=======
 #if HAVE_CUDA
            const bool use_gpu = EWOMS_GET_PARAM(TypeTag, bool, UseGpu);
            const int maxit = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIter);
            const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction);
            const bool matrix_add_well_contributions = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions);
            if(use_gpu && !matrix_add_well_contributions){
-                std::cerr << "Error cannot use GPU solver if command line parameter --matrix-add-well-contributions is false, due to the changing sparsity pattern" << std::endl;
+                std::cerr << "Error cannot use GPU solver if command line parameter --matrix-add-well-contributions is false, because the GPU solver performs a standard bicgstab" << std::endl;
                exit(1);
            }
            bdaBridge = new BdaBridge(use_gpu, maxit, tolerance);
@ -266,20 +263,16 @@ protected:
                exit(1);
            }
 #endif
->>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp
            extractParallelGridInformationToISTL(simulator_.vanguard().grid(), parallelInformation_);
            detail::findOverlapRowsAndColumns(simulator_.vanguard().grid(),overlapRowAndColumns_);
        }

-<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp
-=======
        ~ISTLSolverEbos(){
 #if HAVE_CUDA
            delete bdaBridge;
 #endif
        }

->>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp
        // nothing to clean here
        void eraseMatrix() {
            matrix_for_preconditioner_.reset();
@ -472,10 +465,6 @@ protected:
            else
 #endif
            {
-<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp
-                // Construct preconditioner.
-                auto precond = constructPrecond(linearOperator, parallelInformation_arg);
-=======
                // tries to solve linear system
                // solve_system() does nothing if Dune is selected
 #if HAVE_CUDA
@ -490,13 +479,12 @@ protected:
                    solve(linearOperator, x, istlb, *sp, *precond, result);
                } // end Dune call
 #else
+                // Construct preconditioner.
                auto precond = constructPrecond(linearOperator, parallelInformation_arg);
-                solve(linearOperator, x, istlb, *sp, *precond, result);
-#endif
->>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp

                // Solve.
                solve(linearOperator, x, istlb, *sp, *precond, result);
+#endif
            }
        }

--- a/opm/simulators/linalg/bda/BdaBridge.cpp
+++ b/opm/simulators/linalg/bda/BdaBridge.cpp
@ -20,8 +20,8 @@
 #include <config.h>
 #include <memory>

-#include <opm/bda/BdaBridge.hpp>
-#include <opm/bda/BdaResult.hpp>
+#include <opm/simulators/linalg/bda/BdaBridge.hpp>
+#include <opm/simulators/linalg/bda/BdaResult.hpp>

 #define PRINT_TIMERS_BRIDGE_BRIDGE 0

@ -210,25 +210,24 @@ void BdaBridge::get_result(BridgeVector &x){
 #endif
 }

-#if HAVE_CUDA
 template void BdaBridge::solve_system< \
-Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 2, 2>, std::allocator<Ewoms::MatrixBlock<double, 2, 2> > > , \
+Dune::BCRSMatrix<Opm::MatrixBlock<double, 2, 2>, std::allocator<Opm::MatrixBlock<double, 2, 2> > > , \
 Dune::BlockVector<Dune::FieldVector<double, 2>, std::allocator<Dune::FieldVector<double, 2> > > > \
-(Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 2, 2>, std::allocator<Ewoms::MatrixBlock<double, 2, 2> > > *mat, \
+(Dune::BCRSMatrix<Opm::MatrixBlock<double, 2, 2>, std::allocator<Opm::MatrixBlock<double, 2, 2> > > *mat, \
 	Dune::BlockVector<Dune::FieldVector<double, 2>, std::allocator<Dune::FieldVector<double, 2> > > &b, \
 	InverseOperatorResult &res);

 template void BdaBridge::solve_system< \
-Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 3, 3>, std::allocator<Ewoms::MatrixBlock<double, 3, 3> > > , \
+Dune::BCRSMatrix<Opm::MatrixBlock<double, 3, 3>, std::allocator<Opm::MatrixBlock<double, 3, 3> > > , \
 Dune::BlockVector<Dune::FieldVector<double, 3>, std::allocator<Dune::FieldVector<double, 3> > > > \
-(Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 3, 3>, std::allocator<Ewoms::MatrixBlock<double, 3, 3> > > *mat, \
+(Dune::BCRSMatrix<Opm::MatrixBlock<double, 3, 3>, std::allocator<Opm::MatrixBlock<double, 3, 3> > > *mat, \
 	Dune::BlockVector<Dune::FieldVector<double, 3>, std::allocator<Dune::FieldVector<double, 3> > > &b, \
 	InverseOperatorResult &res);

 template void BdaBridge::solve_system< \
-Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 4, 4>, std::allocator<Ewoms::MatrixBlock<double, 4, 4> > > , \
+Dune::BCRSMatrix<Opm::MatrixBlock<double, 4, 4>, std::allocator<Opm::MatrixBlock<double, 4, 4> > > , \
 Dune::BlockVector<Dune::FieldVector<double, 4>, std::allocator<Dune::FieldVector<double, 4> > > > \
-(Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 4, 4>, std::allocator<Ewoms::MatrixBlock<double, 4, 4> > > *mat, \
+(Dune::BCRSMatrix<Opm::MatrixBlock<double, 4, 4>, std::allocator<Opm::MatrixBlock<double, 4, 4> > > *mat, \
 	Dune::BlockVector<Dune::FieldVector<double, 4>, std::allocator<Dune::FieldVector<double, 4> > > &b, \
 	InverseOperatorResult &res);

@ -244,7 +243,7 @@ Dune::BlockVector<Dune::FieldVector<double, 3>, std::allocator<Dune::FieldVector
 template void BdaBridge::get_result< \
 Dune::BlockVector<Dune::FieldVector<double, 4>, std::allocator<Dune::FieldVector<double, 4> > > > \
 (Dune::BlockVector<Dune::FieldVector<double, 4>, std::allocator<Dune::FieldVector<double, 4> > > &x);
-#endif
+


 }
--- a/opm/simulators/linalg/bda/BdaBridge.hpp
+++ b/opm/simulators/linalg/bda/BdaBridge.hpp
@ -24,10 +24,10 @@
 #include "dune/istl/solver.hh" // for struct InverseOperatorResult

 #include "dune/istl/bcrsmatrix.hh"
-#include <ewoms/linear/matrixblock.hh>
+#include <opm/simulators/linalg/matrixblock.hh>

 #if HAVE_CUDA
-#include <opm/bda/cusparseSolverBackend.hpp>
+#include <opm/simulators/linalg/bda/cusparseSolverBackend.hpp>
 #endif

 namespace Opm
--- a/opm/simulators/linalg/bda/cuda_header.h
+++ b/opm/simulators/linalg/bda/cuda_header.h
@ -20,7 +20,7 @@
 #ifndef CUDA_HEADER_H
 #define CUDA_HEADER_H

-#include <stdio.h>
+#include <iostream>

 typedef double Block[9];

--- a/opm/simulators/linalg/bda/cusparseSolverBackend.cu
+++ b/opm/simulators/linalg/bda/cusparseSolverBackend.cu
@ -27,9 +27,9 @@
 #include <iostream>
 #include <sys/time.h>

-#include <opm/bda/cusparseSolverBackend.hpp>
-#include <opm/bda/BdaResult.hpp>
-#include <opm/bda/cuda_header.h>
+#include <opm/simulators/linalg/bda/cusparseSolverBackend.hpp>
+#include <opm/simulators/linalg/bda/BdaResult.hpp>
+#include <opm/simulators/linalg/bda/cuda_header.h>

 #include "cublas_v2.h"
 #include "cusparse_v2.h"
@ -89,11 +89,11 @@ namespace Opm
 		printf("Tolerance: %.0e, nnzb: %d\n", tolerance, nnzb);
 #endif

-		for (it = 0.5; it < maxit; it+=0.5){
+		for(it = 0.5; it < maxit; it+=0.5){
 			rhop = rho;
 			cublasDdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho);

-			if (it > 1){
+			if(it > 1){
 				beta = (rho/rhop) * (alpha/omega);
 				nomega = -omega;
 				cublasDaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1);
@ -116,15 +116,17 @@ namespace Opm

 			cublasDdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1);
 			alpha = rho / tmp1;
-			nalpha = -(alpha);
+			nalpha = -alpha;
 			cublasDaxpy(cublasHandle, n, &nalpha, d_v, 1, d_r, 1);
 			cublasDaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1);
 			cublasDnrm2(cublasHandle, n, d_r, 1, &norm);

-			if (norm < tolerance * norm_0 && it > minit){
+			if(norm < tolerance * norm_0 && it > minit){
 				break;
 			}

+			it += 0.5;
+
 			// apply ilu0
 			cusparseDbsrsv2_solve(cusparseHandle, order, \
 				operation, Nb, nnzb, &one, \
@ -141,18 +143,18 @@ namespace Opm
 			cublasDdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1);
 			cublasDdot(cublasHandle, n, d_t, 1, d_t, 1, &tmp2);
 			omega = tmp1 / tmp2;
-			nomega = -(omega);
+			nomega = -omega;
 			cublasDaxpy(cublasHandle, n, &omega, d_s, 1, d_x, 1);
 			cublasDaxpy(cublasHandle, n, &nomega, d_t, 1, d_r, 1);

 			cublasDnrm2(cublasHandle, n, d_r, 1, &norm);


-			if (norm < tolerance * norm_0 && it > minit){
+			if(norm < tolerance * norm_0 && it > minit){
 				break;
 			}
 #if VERBOSE_BACKEND
-			if(i % 1 == 0){
+			if((int)it % 10 == 0){
 				printf("it: %.1f, norm: %.5e\n", it, norm);
 			}
 #endif
@ -161,16 +163,17 @@ namespace Opm
 		t_total2 = second();
 #if PRINT_TIMERS_BACKEND
 		printf("Total solve time: %.6f s\n", t_total2-t_total1);
-#endif
-#if VERBOSE_BACKEND
-		printf("Iterations: %.1f\n", it);
-		printf("Final norm: %.5e\n", norm);
 #endif
 		res.iterations = std::min(it, (float)maxit);
 		res.reduction = norm/norm_0;
 		res.conv_rate  = static_cast<double>(pow(res.reduction,1.0/it));
 		res.elapsed = t_total2-t_total1;
 		res.converged = (it != (maxit + 0.5));
+#if VERBOSE_BACKEND
+		printf("Iterations: %.1f\n", it);
+		printf("Final norm: %.5e\n", norm);
+		printf("GPU converged: %d\n", res.converged);
+#endif
 		return res.converged;
 	}

@ -395,6 +398,7 @@ namespace Opm
 		cusparseDbsrilu02(cusparseHandle, order, \
 			Nb, nnzb, descr_M, d_mVals, d_mRows, d_mCols, \
 			BLOCK_SIZE, info_M, policy, d_buffer);
+
 		int structural_zero;
 		cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero);
 		if(CUSPARSE_STATUS_ZERO_PIVOT == status){
--- a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp
@ -24,7 +24,7 @@
 #include "cublas_v2.h"
 #include "cusparse_v2.h"

-#include "opm/bda/BdaResult.hpp"
+#include "opm/simulators/linalg/bda/BdaResult.hpp"

 namespace Opm
 {
@ -72,10 +72,10 @@ public:

    void finalize();

-    void copy_system_to_gpu(double *vals, int *rows, int *cols, double *f);
+    void copy_system_to_gpu(double *vals, int *rows, int *cols, double *b);

    // don't copy rowpointers and colindices, they stay the same
-    void update_system_on_gpu(double *vals, double *f);
+    void update_system_on_gpu(double *vals, double *b);

    void reset_prec_on_gpu();