diff --git a/CMakeLists.txt b/CMakeLists.txt
index b7200f297..08e57abcc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -297,5 +297,7 @@ endif()
 if(CUDA_FOUND)
   target_link_libraries( flow ${CUDA_cublas_LIBRARY} )
   target_link_libraries( flow ${CUDA_cusparse_LIBRARY} )
+  target_link_libraries( ebos ${CUDA_cublas_LIBRARY} )
+  target_link_libraries( ebos ${CUDA_cusparse_LIBRARY} )
 endif()
 
diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake
index 955e325da..2b711fb9d 100644
--- a/CMakeLists_files.cmake
+++ b/CMakeLists_files.cmake
@@ -29,6 +29,7 @@ list (APPEND MAIN_SOURCE_FILES
   opm/simulators/flow/MissingFeatures.cpp
   opm/simulators/linalg/ExtractParallelGridInformationToISTL.cpp
   opm/simulators/linalg/setupPropertyTree.cpp
+  opm/simulators/linalg/bda/BdaBridge.cpp
   opm/simulators/timestepping/TimeStepControl.cpp
   opm/simulators/timestepping/AdaptiveSimulatorTimer.cpp
   opm/simulators/timestepping/SimulatorTimer.cpp
@@ -42,7 +43,7 @@ list (APPEND MAIN_SOURCE_FILES
   )
 
 if(CUDA_FOUND)
-  list (APPEND MAIN_SOURCE_FILES opm/bda/cusparseSolverBackend.cu)
+  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cusparseSolverBackend.cu)
 endif()
 
 # originally generated with the command:
diff --git a/opm/simulators/linalg/FlowLinearSolverParameters.hpp b/opm/simulators/linalg/FlowLinearSolverParameters.hpp
index 79f79d3a9..ab8efbce5 100644
--- a/opm/simulators/linalg/FlowLinearSolverParameters.hpp
+++ b/opm/simulators/linalg/FlowLinearSolverParameters.hpp
@@ -4,6 +4,7 @@
   Copyright 2015 Dr. Blatt - HPC-Simulation-Software & Services
   Copyright 2015 NTNU
   Copyright 2015 Statoil AS
+  Copyright 2019 Big Data Accelerate
 
   This file is part of the Open Porous Media project (OPM).
 
@@ -67,6 +68,7 @@ NEW_PROP_TAG(CprMaxEllIter);
 NEW_PROP_TAG(CprEllSolvetype);
 NEW_PROP_TAG(CprReuseSetup);
 NEW_PROP_TAG(LinearSolverConfigurationJsonFile);
+NEW_PROP_TAG(UseGpu);
 
 SET_SCALAR_PROP(FlowIstlSolverParams, LinearSolverReduction, 1e-2);
 SET_SCALAR_PROP(FlowIstlSolverParams, IluRelaxation, 0.9);
@@ -92,6 +94,7 @@ SET_INT_PROP(FlowIstlSolverParams, CprMaxEllIter, 20);
 SET_INT_PROP(FlowIstlSolverParams, CprEllSolvetype, 0);
 SET_INT_PROP(FlowIstlSolverParams, CprReuseSetup, 0);
 SET_STRING_PROP(FlowIstlSolverParams, LinearSolverConfigurationJsonFile, "none");
+SET_BOOL_PROP(FlowIstlSolverParams, UseGpu, false);
 
 
 
@@ -163,6 +166,7 @@ namespace Opm
         std::string system_strategy_;
         bool scale_linear_system_;
         std::string linear_solver_configuration_json_file_;
+        bool use_gpu_;
 
         template <class TypeTag>
         void init()
@@ -190,6 +194,7 @@ namespace Opm
             cpr_ell_solvetype_  =  EWOMS_GET_PARAM(TypeTag, int, CprEllSolvetype);
             cpr_reuse_setup_  =  EWOMS_GET_PARAM(TypeTag, int, CprReuseSetup);
             linear_solver_configuration_json_file_ = EWOMS_GET_PARAM(TypeTag, std::string, LinearSolverConfigurationJsonFile);
+            use_gpu_ = EWOMS_GET_PARAM(TypeTag, bool, UseGpu);
         }
 
         template <class TypeTag>
@@ -217,6 +222,7 @@ namespace Opm
             EWOMS_REGISTER_PARAM(TypeTag, int, CprEllSolvetype, "Solver type of elliptic pressure solve (0: bicgstab, 1: cg, 2: only amg preconditioner)");
             EWOMS_REGISTER_PARAM(TypeTag, int, CprReuseSetup, "Reuse Amg Setup");
             EWOMS_REGISTER_PARAM(TypeTag, std::string, LinearSolverConfigurationJsonFile, "Filename of JSON configuration for flexible linear solver system.");
+            EWOMS_REGISTER_PARAM(TypeTag, bool, UseGpu, "Use GPU cusparseSolver as the linear solver");
         }
 
         FlowLinearSolverParameters() { reset(); }
@@ -238,6 +244,7 @@ namespace Opm
             ilu_milu_                 = MILU_VARIANT::ILU;
             ilu_redblack_             = false;
             ilu_reorder_sphere_       = true;
+            use_gpu_                  = false;
         }
     };
 
diff --git a/opm/simulators/linalg/ISTLSolverEbos.hpp b/opm/simulators/linalg/ISTLSolverEbos.hpp
index ec038eb8d..1bbf039b1 100644
--- a/opm/simulators/linalg/ISTLSolverEbos.hpp
+++ b/opm/simulators/linalg/ISTLSolverEbos.hpp
@@ -46,6 +46,8 @@
 
 #include <opm/common/utility/platform_dependent/reenable_warnings.h>
 
+#include <opm/simulators/linalg/bda/BdaBridge.hpp>
+
 BEGIN_PROPERTIES
 
 NEW_TYPE_TAG(FlowIstlSolver, INHERITS_FROM(FlowIstlSolverParams));
@@ -223,13 +225,10 @@ protected:
         enum { pressureVarIndex = Indices::pressureSwitchIdx };
         static const int numEq = Indices::numEq;
 
-<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp
-=======
 #if HAVE_CUDA
         BdaBridge *bdaBridge;
 #endif
 
->>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp
     public:
         typedef Dune::AssembledLinearOperator< Matrix, Vector, Vector > AssembledLinearOperatorType;
 
@@ -247,15 +246,13 @@ protected:
               converged_(false)
         {
             parameters_.template init<TypeTag>();
-<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp
-=======
 #if HAVE_CUDA
             const bool use_gpu = EWOMS_GET_PARAM(TypeTag, bool, UseGpu);
             const int maxit = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIter);
             const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction);
             const bool matrix_add_well_contributions = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions);
             if(use_gpu && !matrix_add_well_contributions){
-                std::cerr << "Error cannot use GPU solver if command line parameter --matrix-add-well-contributions is false, due to the changing sparsity pattern" << std::endl;
+                std::cerr << "Error cannot use GPU solver if command line parameter --matrix-add-well-contributions is false, because the GPU solver performs a standard bicgstab" << std::endl;
                 exit(1);
             }
             bdaBridge = new BdaBridge(use_gpu, maxit, tolerance);
@@ -266,20 +263,16 @@ protected:
                 exit(1);
             }
 #endif
->>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp
             extractParallelGridInformationToISTL(simulator_.vanguard().grid(), parallelInformation_);
             detail::findOverlapRowsAndColumns(simulator_.vanguard().grid(),overlapRowAndColumns_);
         }
 
-<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp
-=======
         ~ISTLSolverEbos(){
 #if HAVE_CUDA
             delete bdaBridge;
 #endif
         }
 
->>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp
         // nothing to clean here
         void eraseMatrix() {
             matrix_for_preconditioner_.reset();
@@ -472,10 +465,6 @@ protected:
             else
 #endif
             {
-<<<<<<< HEAD:opm/simulators/linalg/ISTLSolverEbos.hpp
-                // Construct preconditioner.
-                auto precond = constructPrecond(linearOperator, parallelInformation_arg);
-=======
                 // tries to solve linear system
                 // solve_system() does nothing if Dune is selected
 #if HAVE_CUDA
@@ -490,13 +479,12 @@ protected:
                     solve(linearOperator, x, istlb, *sp, *precond, result);
                 } // end Dune call
 #else
+                // Construct preconditioner.
                 auto precond = constructPrecond(linearOperator, parallelInformation_arg);
-                solve(linearOperator, x, istlb, *sp, *precond, result);
-#endif
->>>>>>> 200e000... Changed cusparseSolver. Use find_package(CUDA) instead of setting a flag manually. Use HAVE_CUDA in sources to disable the BdaBridge when no GPU can be found anyway.:opm/autodiff/ISTLSolverEbos.hpp
 
                 // Solve.
                 solve(linearOperator, x, istlb, *sp, *precond, result);
+#endif
             }
         }
 
diff --git a/opm/simulators/linalg/bda/BdaBridge.cpp b/opm/simulators/linalg/bda/BdaBridge.cpp
index 893922e69..64b395a04 100644
--- a/opm/simulators/linalg/bda/BdaBridge.cpp
+++ b/opm/simulators/linalg/bda/BdaBridge.cpp
@@ -20,8 +20,8 @@
 #include <config.h>
 #include <memory>
 
-#include <opm/bda/BdaBridge.hpp>
-#include <opm/bda/BdaResult.hpp>
+#include <opm/simulators/linalg/bda/BdaBridge.hpp>
+#include <opm/simulators/linalg/bda/BdaResult.hpp>
 
 #define PRINT_TIMERS_BRIDGE_BRIDGE 0
 
@@ -210,25 +210,24 @@ void BdaBridge::get_result(BridgeVector &x){
 #endif
 }
 
-#if HAVE_CUDA
 template void BdaBridge::solve_system< \
-Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 2, 2>, std::allocator<Ewoms::MatrixBlock<double, 2, 2> > > , \
+Dune::BCRSMatrix<Opm::MatrixBlock<double, 2, 2>, std::allocator<Opm::MatrixBlock<double, 2, 2> > > , \
 Dune::BlockVector<Dune::FieldVector<double, 2>, std::allocator<Dune::FieldVector<double, 2> > > > \
-(Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 2, 2>, std::allocator<Ewoms::MatrixBlock<double, 2, 2> > > *mat, \
+(Dune::BCRSMatrix<Opm::MatrixBlock<double, 2, 2>, std::allocator<Opm::MatrixBlock<double, 2, 2> > > *mat, \
 	Dune::BlockVector<Dune::FieldVector<double, 2>, std::allocator<Dune::FieldVector<double, 2> > > &b, \
 	InverseOperatorResult &res);
 
 template void BdaBridge::solve_system< \
-Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 3, 3>, std::allocator<Ewoms::MatrixBlock<double, 3, 3> > > , \
+Dune::BCRSMatrix<Opm::MatrixBlock<double, 3, 3>, std::allocator<Opm::MatrixBlock<double, 3, 3> > > , \
 Dune::BlockVector<Dune::FieldVector<double, 3>, std::allocator<Dune::FieldVector<double, 3> > > > \
-(Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 3, 3>, std::allocator<Ewoms::MatrixBlock<double, 3, 3> > > *mat, \
+(Dune::BCRSMatrix<Opm::MatrixBlock<double, 3, 3>, std::allocator<Opm::MatrixBlock<double, 3, 3> > > *mat, \
 	Dune::BlockVector<Dune::FieldVector<double, 3>, std::allocator<Dune::FieldVector<double, 3> > > &b, \
 	InverseOperatorResult &res);
 
 template void BdaBridge::solve_system< \
-Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 4, 4>, std::allocator<Ewoms::MatrixBlock<double, 4, 4> > > , \
+Dune::BCRSMatrix<Opm::MatrixBlock<double, 4, 4>, std::allocator<Opm::MatrixBlock<double, 4, 4> > > , \
 Dune::BlockVector<Dune::FieldVector<double, 4>, std::allocator<Dune::FieldVector<double, 4> > > > \
-(Dune::BCRSMatrix<Ewoms::MatrixBlock<double, 4, 4>, std::allocator<Ewoms::MatrixBlock<double, 4, 4> > > *mat, \
+(Dune::BCRSMatrix<Opm::MatrixBlock<double, 4, 4>, std::allocator<Opm::MatrixBlock<double, 4, 4> > > *mat, \
 	Dune::BlockVector<Dune::FieldVector<double, 4>, std::allocator<Dune::FieldVector<double, 4> > > &b, \
 	InverseOperatorResult &res);
 
@@ -244,7 +243,7 @@ Dune::BlockVector<Dune::FieldVector<double, 3>, std::allocator<Dune::FieldVector
 template void BdaBridge::get_result< \
 Dune::BlockVector<Dune::FieldVector<double, 4>, std::allocator<Dune::FieldVector<double, 4> > > > \
 (Dune::BlockVector<Dune::FieldVector<double, 4>, std::allocator<Dune::FieldVector<double, 4> > > &x);
-#endif
+
 
 
 }
diff --git a/opm/simulators/linalg/bda/BdaBridge.hpp b/opm/simulators/linalg/bda/BdaBridge.hpp
index d0259f20c..e593a6cec 100644
--- a/opm/simulators/linalg/bda/BdaBridge.hpp
+++ b/opm/simulators/linalg/bda/BdaBridge.hpp
@@ -24,10 +24,10 @@
 #include "dune/istl/solver.hh" // for struct InverseOperatorResult
 
 #include "dune/istl/bcrsmatrix.hh"
-#include <ewoms/linear/matrixblock.hh>
+#include <opm/simulators/linalg/matrixblock.hh>
 
 #if HAVE_CUDA
-#include <opm/bda/cusparseSolverBackend.hpp>
+#include <opm/simulators/linalg/bda/cusparseSolverBackend.hpp>
 #endif
 
 namespace Opm
diff --git a/opm/simulators/linalg/bda/cuda_header.h b/opm/simulators/linalg/bda/cuda_header.h
index 072c90eb4..81b8ad78c 100644
--- a/opm/simulators/linalg/bda/cuda_header.h
+++ b/opm/simulators/linalg/bda/cuda_header.h
@@ -20,7 +20,7 @@
 #ifndef CUDA_HEADER_H
 #define CUDA_HEADER_H
 
-#include <stdio.h>
+#include <iostream>
 
 typedef double Block[9];
 
diff --git a/opm/simulators/linalg/bda/cusparseSolverBackend.cu b/opm/simulators/linalg/bda/cusparseSolverBackend.cu
index decc9cec3..86b904cd4 100644
--- a/opm/simulators/linalg/bda/cusparseSolverBackend.cu
+++ b/opm/simulators/linalg/bda/cusparseSolverBackend.cu
@@ -27,9 +27,9 @@
 #include <iostream>
 #include <sys/time.h>
 
-#include <opm/bda/cusparseSolverBackend.hpp>
-#include <opm/bda/BdaResult.hpp>
-#include <opm/bda/cuda_header.h>
+#include <opm/simulators/linalg/bda/cusparseSolverBackend.hpp>
+#include <opm/simulators/linalg/bda/BdaResult.hpp>
+#include <opm/simulators/linalg/bda/cuda_header.h>
 
 #include "cublas_v2.h"
 #include "cusparse_v2.h"
@@ -89,11 +89,11 @@ namespace Opm
 		printf("Tolerance: %.0e, nnzb: %d\n", tolerance, nnzb);
 #endif
 
-		for (it = 0.5; it < maxit; it+=0.5){
+		for(it = 0.5; it < maxit; it+=0.5){
 			rhop = rho;
 			cublasDdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho);
 
-			if (it > 1){
+			if(it > 1){
 				beta = (rho/rhop) * (alpha/omega);
 				nomega = -omega;
 				cublasDaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1);
@@ -116,15 +116,17 @@ namespace Opm
 
 			cublasDdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1);
 			alpha = rho / tmp1;
-			nalpha = -(alpha);
+			nalpha = -alpha;
 			cublasDaxpy(cublasHandle, n, &nalpha, d_v, 1, d_r, 1);
 			cublasDaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1);
 			cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
 
-			if (norm < tolerance * norm_0 && it > minit){
+			if(norm < tolerance * norm_0 && it > minit){
 				break;
 			}
 
+			it += 0.5;
+
 			// apply ilu0
 			cusparseDbsrsv2_solve(cusparseHandle, order, \
 				operation, Nb, nnzb, &one, \
@@ -141,18 +143,18 @@ namespace Opm
 			cublasDdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1);
 			cublasDdot(cublasHandle, n, d_t, 1, d_t, 1, &tmp2);
 			omega = tmp1 / tmp2;
-			nomega = -(omega);
+			nomega = -omega;
 			cublasDaxpy(cublasHandle, n, &omega, d_s, 1, d_x, 1);
 			cublasDaxpy(cublasHandle, n, &nomega, d_t, 1, d_r, 1);
 
 			cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
 
 
-			if (norm < tolerance * norm_0 && it > minit){
+			if(norm < tolerance * norm_0 && it > minit){
 				break;
 			}
 #if VERBOSE_BACKEND
-			if(i % 1 == 0){
+			if((int)it % 10 == 0){
 				printf("it: %.1f, norm: %.5e\n", it, norm);
 			}
 #endif
@@ -161,16 +163,17 @@ namespace Opm
 		t_total2 = second();
 #if PRINT_TIMERS_BACKEND
 		printf("Total solve time: %.6f s\n", t_total2-t_total1);
-#endif
-#if VERBOSE_BACKEND
-		printf("Iterations: %.1f\n", it);
-		printf("Final norm: %.5e\n", norm);
 #endif
 		res.iterations = std::min(it, (float)maxit);
 		res.reduction = norm/norm_0;
 		res.conv_rate  = static_cast<double>(pow(res.reduction,1.0/it));
 		res.elapsed = t_total2-t_total1;
 		res.converged = (it != (maxit + 0.5));
+#if VERBOSE_BACKEND
+		printf("Iterations: %.1f\n", it);
+		printf("Final norm: %.5e\n", norm);
+		printf("GPU converged: %d\n", res.converged);
+#endif
 		return res.converged;
 	}
 
@@ -395,6 +398,7 @@ namespace Opm
 		cusparseDbsrilu02(cusparseHandle, order, \
 			Nb, nnzb, descr_M, d_mVals, d_mRows, d_mCols, \
 			BLOCK_SIZE, info_M, policy, d_buffer);
+
 		int structural_zero;
 		cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero);
 		if(CUSPARSE_STATUS_ZERO_PIVOT == status){
diff --git a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp
index 29bcc041a..7f7b9a9ba 100644
--- a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp
@@ -24,7 +24,7 @@
 #include "cublas_v2.h"
 #include "cusparse_v2.h"
 
-#include "opm/bda/BdaResult.hpp"
+#include "opm/simulators/linalg/bda/BdaResult.hpp"
 
 namespace Opm
 {
@@ -72,10 +72,10 @@ public:
 
     void finalize();
 
-    void copy_system_to_gpu(double *vals, int *rows, int *cols, double *f);
+    void copy_system_to_gpu(double *vals, int *rows, int *cols, double *b);
 
     // don't copy rowpointers and colindices, they stay the same
-    void update_system_on_gpu(double *vals, double *f);
+    void update_system_on_gpu(double *vals, double *b);
 
     void reset_prec_on_gpu();