diff --git a/CMakeLists.txt b/CMakeLists.txt
index f7b74f01f..98acb7b3c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,7 +26,6 @@ option(BUILD_FLOW_POLY_GRID "Build flow blackoil with polyhedral grid" OFF)
 option(OPM_ENABLE_PYTHON "Enable python bindings?" OFF)
 option(OPM_ENABLE_PYTHON_TESTS "Enable tests for the python bindings?" ON)
 option(OPM_INSTALL_PYTHON "Install python bindings?" ON)
-option(ENABLE_FPGA "Enable FPGA kernels integration?" OFF)
 option(USE_CHOW_PATEL_ILU "Use the iterative ILU by Chow and Patel?" OFF)
 option(USE_CHOW_PATEL_ILU_GPU "Run iterative ILU decomposition on GPU? Requires USE_CHOW_PATEL_ILU" OFF)
 option(USE_CHOW_PATEL_ILU_GPU_PARALLEL "Try to use more parallelism on the GPU during the iterative ILU decomposition? Requires USE_CHOW_PATEL_ILU_GPU" OFF)
@@ -168,70 +167,6 @@ endif()
 
 find_package(OpenCL)
 
-# include FPGA target only when ENABLE_FPGA is set to ON
-if(ENABLE_FPGA)
-  # must include opencl.h which is a wrapper for all the OpenCL extensions
-  find_file(OPENCL_H CL/opencl.h HINTS ${OpenCL_INCLUDE_DIRS})
-
-  # FIXME: devise a check for the presence of the "FPGA" module; currently looking for makefile in <opm-simulators>/../FPGA
-  find_file(FPGA_MODULE linearalgebra/ilu0bicgstab/xilinx/alveo_u280/vitis_20192/OPM-integration/makefile HINTS ${opm-simulators_SOURCE_DIR}/../FPGA)
-
-  # this code only runs if the user explicitly enabled the FPGA
-  # hence fatal_error instead of warning
-  if(NOT OPENCL_H OR NOT OpenCL_FOUND)
-    set(HAVE_FPGA 0)
-    message(FATAL_ERROR " OpenCL packages/headers were not found. Make sure CL/opencl.h exists or deactivate FPGA.")
-  elseif(NOT FPGA_MODULE)
-    set(HAVE_FPGA 0)
-    message(FATAL_ERROR " FPGA module was not found. Make sure FPGA repository exists or deactivate FPGA.")
-  elseif(NOT DEFINED ENV{XILINX_XRT})
-    # Xilinx XRT must be installed and properly setup
-    set(HAVE_FPGA 0)
-    message(FATAL_ERROR " Xilinx XRT not found. Make sure it is installed and setup (check its documentation) or deactivate FPGA.")
-  else()
-    set(HAVE_FPGA 1)
-    message(STATUS "FPGA library and kernel integration active.")
-
-    # FIXME: set the correct path to the FPGA module
-    set(FPGA_SOURCE_DIR ${opm-simulators_SOURCE_DIR}/../FPGA)
-
-    # configuration variables with default values: they can be overridden on the cmake command line
-    # FPGA_PORTS_CONFIG selects the kernel's memory ports configuration; must be in sync with available kernel bitstream
-    if(NOT FPGA_PORTS_CONFIG)
-      set(FPGA_PORTS_CONFIG 2r_3r3w_ddr)
-    endif()
-    # FPGA_DEBUG_LEVEL sets the debug messages level for FPGA library functions (should be set to 0 for Release build)
-    if(NOT FPGA_DEBUG_LEVEL)
-      set(FPGA_DEBUG_LEVEL 0)
-    endif()
-    message(STATUS "Using the following settings for the FPGA library compilation: "
-      "FPGA_PORTS_CONFIG=${FPGA_PORTS_CONFIG}, "
-      "FPGA_DEBUG_LEVEL=${FPGA_DEBUG_LEVEL}")
-    add_compile_options(-DPORTS_CONFIG=PORTS_${FPGA_PORTS_CONFIG})
-    add_compile_options(-DBDA_DEBUG_LEVEL=${FPGA_DEBUG_LEVEL})
-    # include directories for the FPGA library
-    include_directories(${FPGA_SOURCE_DIR})
-    include_directories(${FPGA_SOURCE_DIR}/linearalgebra/ilu0bicgstab/xilinx/src/sda_app)
-    include_directories(${FPGA_SOURCE_DIR}/linearalgebra/ilu0bicgstab/xilinx/src/sda_app/common)
-    # add external project to compile the FPGA library
-    include (ExternalProject)
-    ExternalProject_Add(FPGA_library
-      # force the build step to always be run because source dependencies cannot be made explicit
-      BUILD_ALWAYS 1
-      DOWNLOAD_COMMAND ""
-      UPDATE_COMMAND ""
-      CONFIGURE_COMMAND ""
-      BUILD_COMMAND make
-        -f ${FPGA_SOURCE_DIR}/linearalgebra/ilu0bicgstab/xilinx/alveo_u280/vitis_20192/OPM-integration/makefile
-        SRCDIR=${FPGA_SOURCE_DIR}/linearalgebra/ilu0bicgstab/xilinx/src/sda_app
-        PORTS_CONFIG=${FPGA_PORTS_CONFIG}
-        DEBUG_LEVEL=${FPGA_DEBUG_LEVEL}
-      INSTALL_COMMAND ""
-      TEST_COMMAND ""
-    )
-  endif()
-endif()
-
 if(OpenCL_FOUND)
   # the current OpenCL implementation relies on cl2.hpp, not cl.hpp
   # make sure it is available, otherwise disable OpenCL
@@ -553,11 +488,7 @@ endif()
 if(VexCL_FOUND)
   target_link_libraries( opmsimulators PUBLIC OPM::VexCL::OpenCL )
 endif()
-if(HAVE_FPGA)
-  add_dependencies(opmsimulators FPGA_library)
-  ExternalProject_Get_Property(FPGA_library binary_dir)
-  target_link_libraries(opmsimulators PUBLIC ${binary_dir}/fpga_lib_alveo_u280.a)
-endif()
+
 if(Damaris_FOUND)
   target_link_libraries(opmsimulators PUBLIC damaris)
 endif()
diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake
index b05bfe65c..d5e536d49 100644
--- a/CMakeLists_files.cmake
+++ b/CMakeLists_files.cmake
@@ -149,11 +149,6 @@ endif()
 if(CUDA_FOUND OR OPENCL_FOUND OR HAVE_FPGA OR amgcl_FOUND OR ROCALUTION_FOUND)
   list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp)
 endif()
-if(HAVE_FPGA)
-  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/FPGABILU0.cpp)
-  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/FPGASolverBackend.cpp)
-  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/FPGAUtils.cpp)
-endif()
 if(amgcl_FOUND)
   list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/amgclSolverBackend.cpp)
   if(CUDA_FOUND)
@@ -304,9 +299,6 @@ list (APPEND PUBLIC_HEADER_FILES
   opm/simulators/linalg/bda/cuda/cusparseSolverBackend.hpp
   opm/simulators/linalg/bda/opencl/ChowPatelIlu.hpp
   opm/simulators/linalg/bda/opencl/BISAI.hpp
-  opm/simulators/linalg/bda/FPGABILU0.hpp
-  opm/simulators/linalg/bda/FPGASolverBackend.hpp
-  opm/simulators/linalg/bda/FPGAUtils.hpp
   opm/simulators/linalg/bda/Reorder.hpp
   opm/simulators/linalg/bda/opencl/opencl.hpp
   opm/simulators/linalg/bda/opencl/openclKernels.hpp
diff --git a/opm-simulators-prereqs.cmake b/opm-simulators-prereqs.cmake
index 468f7ed06..741b470ab 100644
--- a/opm-simulators-prereqs.cmake
+++ b/opm-simulators-prereqs.cmake
@@ -8,7 +8,6 @@ set (opm-simulators_CONFIG_VAR
   HAVE_CUDA
   HAVE_OPENCL
   HAVE_OPENCL_HPP
-  HAVE_FPGA
   HAVE_AMGCL
   HAVE_VEXCL
   HAVE_ROCALUTION
diff --git a/opm/simulators/linalg/FlowLinearSolverParameters.hpp b/opm/simulators/linalg/FlowLinearSolverParameters.hpp
index ec57870b5..4cfc20bec 100644
--- a/opm/simulators/linalg/FlowLinearSolverParameters.hpp
+++ b/opm/simulators/linalg/FlowLinearSolverParameters.hpp
@@ -128,10 +128,6 @@ template<class TypeTag, class MyTypeTag>
 struct OpenclIluParallel {
     using type = UndefinedProperty;
 };
-template<class TypeTag, class MyTypeTag>
-struct FpgaBitstream {
-    using type = UndefinedProperty;
-};
 template<class TypeTag>
 struct LinearSolverReduction<TypeTag, TTag::FlowIstlSolverParams> {
     using type = GetPropType<TypeTag, Scalar>;
@@ -226,10 +222,6 @@ template<class TypeTag>
 struct OpenclIluParallel<TypeTag, TTag::FlowIstlSolverParams> {
     static constexpr bool value = true; // note: false should only be used in debug
 };
-template<class TypeTag>
-struct FpgaBitstream<TypeTag, TTag::FlowIstlSolverParams> {
-    static constexpr auto value = "";
-};
 
 } // namespace Opm::Properties
 
@@ -261,7 +253,6 @@ namespace Opm
         int cpr_reuse_setup_;
         int cpr_reuse_interval_;
         bool opencl_ilu_parallel_;
-        std::string fpga_bitstream_;
 
         template <class TypeTag>
         void init()
@@ -287,7 +278,6 @@ namespace Opm
             bda_device_id_ = EWOMS_GET_PARAM(TypeTag, int, BdaDeviceId);
             opencl_platform_id_ = EWOMS_GET_PARAM(TypeTag, int, OpenclPlatformId);
             opencl_ilu_parallel_ = EWOMS_GET_PARAM(TypeTag, bool, OpenclIluParallel);
-            fpga_bitstream_ = EWOMS_GET_PARAM(TypeTag, std::string, FpgaBitstream);
         }
 
         template <class TypeTag>
@@ -309,11 +299,10 @@ namespace Opm
             EWOMS_REGISTER_PARAM(TypeTag, int, CprReuseSetup, "Reuse preconditioner setup. Valid options are 0: recreate the preconditioner for every linear solve, 1: recreate once every timestep, 2: recreate if last linear solve took more than 10 iterations, 3: never recreate, 4: recreated every CprReuseInterval");
             EWOMS_REGISTER_PARAM(TypeTag, int, CprReuseInterval, "Reuse preconditioner interval. Used when CprReuseSetup is set to 4, then the preconditioner will be fully recreated instead of reused every N linear solve, where N is this parameter.");
             EWOMS_REGISTER_PARAM(TypeTag, std::string, LinearSolver, "Configuration of solver. Valid options are: ilu0 (default), cpr (an alias for cpr_trueimpes), cpr_quasiimpes, cpr_trueimpes or amg. Alternatively, you can request a configuration to be read from a JSON file by giving the filename here, ending with '.json.'");
-            EWOMS_REGISTER_PARAM(TypeTag, std::string, AcceleratorMode, "Use GPU (cusparseSolver or openclSolver) or FPGA (fpgaSolver) as the linear solver, usage: '--accelerator-mode=[none|cusparse|opencl|fpga|amgcl]'");
+            EWOMS_REGISTER_PARAM(TypeTag, std::string, AcceleratorMode, "Choose a linear solver, usage: '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution]'");
             EWOMS_REGISTER_PARAM(TypeTag, int, BdaDeviceId, "Choose device ID for cusparseSolver or openclSolver, use 'nvidia-smi' or 'clinfo' to determine valid IDs");
             EWOMS_REGISTER_PARAM(TypeTag, int, OpenclPlatformId, "Choose platform ID for openclSolver, use 'clinfo' to determine valid platform IDs");
             EWOMS_REGISTER_PARAM(TypeTag, bool, OpenclIluParallel, "Parallelize ILU decomposition and application on GPU");
-            EWOMS_REGISTER_PARAM(TypeTag, std::string, FpgaBitstream, "Specify the bitstream file for fpgaSolver (including path), usage: '--fpga-bitstream=<filename>'");
         }
 
         FlowLinearSolverParameters() { reset(); }
@@ -337,7 +326,6 @@ namespace Opm
             bda_device_id_            = 0;
             opencl_platform_id_       = 0;
             opencl_ilu_parallel_      = true;
-            fpga_bitstream_           = "";
         }
     };
 
diff --git a/opm/simulators/linalg/ISTLSolverEbos.cpp b/opm/simulators/linalg/ISTLSolverEbos.cpp
index 1a15bed8f..beee1906f 100644
--- a/opm/simulators/linalg/ISTLSolverEbos.cpp
+++ b/opm/simulators/linalg/ISTLSolverEbos.cpp
@@ -31,7 +31,7 @@
 #include <opm/simulators/linalg/ParallelIstlInformation.hpp>
 #include <opm/simulators/utils/ParallelCommunication.hpp>
 
-#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL || HAVE_ROCALUTION
+#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
 #include <opm/simulators/linalg/bda/BdaBridge.hpp>
 #include <opm/simulators/linalg/bda/WellContributions.hpp>
 #include <iostream>
@@ -176,11 +176,10 @@ void FlexibleSolverInfo<Matrix,Vector,Comm>::create(const Matrix& matrix,
     }
 }
 
-#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL || HAVE_ROCALUTION
+#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
 template<class Matrix, class Vector>
 BdaSolverInfo<Matrix,Vector>::
 BdaSolverInfo(const std::string& accelerator_mode,
-              const std::string& fpga_bitstream,
               const int linear_solver_verbosity,
               const int maxit,
               const double tolerance,
@@ -188,7 +187,7 @@ BdaSolverInfo(const std::string& accelerator_mode,
               const int deviceID,
               const bool opencl_ilu_parallel,
               const std::string& linsolver)
-    : bridge_(std::make_unique<Bridge>(accelerator_mode, fpga_bitstream,
+    : bridge_(std::make_unique<Bridge>(accelerator_mode,
                                        linear_solver_verbosity, maxit,
                                        tolerance, platformID, deviceID,
                                        opencl_ilu_parallel, linsolver))
@@ -229,12 +228,11 @@ apply(Vector& rhs,
       Dune::InverseOperatorResult& result)
 {
     bool use_gpu = bridge_->getUseGpu();
-    bool use_fpga = bridge_->getUseFpga();
-    if (use_gpu || use_fpga) {
+    if (use_gpu) {
         auto wellContribs = WellContributions::create(accelerator_mode_, useWellConn);
         bridge_->initWellContributions(*wellContribs, x.N() * x[0].N());
 
-        // the WellContributions can only be applied separately with CUDA or OpenCL, not with an FPGA, amgcl or rocalution
+        // the WellContributions can only be applied separately with CUDA or OpenCL, not with amgcl or rocalution
 #if HAVE_CUDA || HAVE_OPENCL
         if (!useWellConn) {
             getContribs(*wellContribs);
@@ -350,7 +348,7 @@ using CommunicationType = Dune::CollectiveCommunication<int>;
     template void makeOverlapRowsInvalid<BM<Dim>>(BM<Dim>&, const std::vector<int>&); \
     template struct FlexibleSolverInfo<BM<Dim>,BV<Dim>,CommunicationType>;
 
-#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL || HAVE_ROCALUTION
+#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
 
 #define INSTANCE_GRID(Dim, Grid) \
     template void BdaSolverInfo<BM<Dim>,BV<Dim>>:: \
diff --git a/opm/simulators/linalg/ISTLSolverEbos.hpp b/opm/simulators/linalg/ISTLSolverEbos.hpp
index a49c633af..958729fc7 100644
--- a/opm/simulators/linalg/ISTLSolverEbos.hpp
+++ b/opm/simulators/linalg/ISTLSolverEbos.hpp
@@ -84,7 +84,7 @@ public:
 namespace Opm
 {
 
-#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL || HAVE_ROCALUTION
+#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
 template<class Matrix, class Vector, int block_size> class BdaBridge;
 class WellContributions;
 #endif
@@ -113,7 +113,7 @@ struct FlexibleSolverInfo
     size_t interiorCellNum_ = 0;
 };
 
-#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL || HAVE_ROCALUTION
+#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
 template<class Matrix, class Vector>
 struct BdaSolverInfo
 {
@@ -121,7 +121,6 @@ struct BdaSolverInfo
   using Bridge = BdaBridge<Matrix,Vector,Matrix::block_type::rows>;
 
   BdaSolverInfo(const std::string& accelerator_mode,
-                const std::string& fpga_bitstream,
                 const int linear_solver_verbosity,
                 const int maxit,
                 const double tolerance,
@@ -246,12 +245,12 @@ std::unique_ptr<Matrix> blockJacobiAdjacency(const Grid& grid,
                                      EWOMS_PARAM_IS_SET(TypeTag, int, LinearSolverMaxIter),
                                      EWOMS_PARAM_IS_SET(TypeTag, int, CprMaxEllIter));
 
-#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL || HAVE_ROCALUTION
+#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
             {
                 std::string accelerator_mode = EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode);
                 if ((simulator_.vanguard().grid().comm().size() > 1) && (accelerator_mode != "none")) {
                     if (on_io_rank) {
-                        OpmLog::warning("Cannot use GPU or FPGA with MPI, GPU/FPGA are disabled");
+                        OpmLog::warning("Cannot use GPU with MPI, GPU are disabled");
                     }
                     accelerator_mode = "none";
                 }
@@ -261,10 +260,8 @@ std::unique_ptr<Matrix> blockJacobiAdjacency(const Grid& grid,
                 const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction);
                 const bool opencl_ilu_parallel = EWOMS_GET_PARAM(TypeTag, bool, OpenclIluParallel);
                 const int linear_solver_verbosity = parameters_.linear_solver_verbosity_;
-                std::string fpga_bitstream = EWOMS_GET_PARAM(TypeTag, std::string, FpgaBitstream);
                 std::string linsolver = EWOMS_GET_PARAM(TypeTag, std::string, LinearSolver);
                 bdaBridge = std::make_unique<detail::BdaSolverInfo<Matrix,Vector>>(accelerator_mode,
-                                                                                   fpga_bitstream,
                                                                                    linear_solver_verbosity,
                                                                                    maxit,
                                                                                    tolerance,
@@ -275,7 +272,7 @@ std::unique_ptr<Matrix> blockJacobiAdjacency(const Grid& grid,
             }
 #else
             if (EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode) != "none") {
-                OPM_THROW(std::logic_error,"Cannot use accelerated solver since CUDA, OpenCL and amgcl were not found by cmake and FPGA was not enabled");
+                OPM_THROW(std::logic_error,"Cannot use accelerated solver since CUDA, OpenCL and amgcl were not found by cmake");
             }
 #endif
             extractParallelGridInformationToISTL(simulator_.vanguard().grid(), parallelInformation_);
@@ -286,12 +283,6 @@ std::unique_ptr<Matrix> blockJacobiAdjacency(const Grid& grid,
             ElementMapper elemMapper(simulator_.vanguard().gridView(), Dune::mcmgElementLayout());
             detail::findOverlapAndInterior(simulator_.vanguard().grid(), elemMapper, overlapRows_, interiorRows_);
             useWellConn_ = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions);
-#if HAVE_FPGA
-            // check usage of MatrixAddWellContributions: for FPGA they must be included
-            if (EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode) == "fpga" && !useWellConn_) {
-                OPM_THROW(std::logic_error,"fpgaSolver needs --matrix-add-well-contributions=true");
-            }
-#endif
             const bool ownersFirst = EWOMS_GET_PARAM(TypeTag, bool, OwnerCellsFirst);
             if (!ownersFirst) {
                 const std::string msg = "The linear solver no longer supports --owner-cells-first=false.";
@@ -392,9 +383,7 @@ std::unique_ptr<Matrix> blockJacobiAdjacency(const Grid& grid,
             // Solve system.
             Dune::InverseOperatorResult result;
 
-            // Use GPU if: available, chosen by user, and successful.
-            // Use FPGA if: support compiled, chosen by user, and successful.
-#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL || HAVE_ROCALUTION
+#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
             std::function<void(WellContributions&)> getContribs =
                 [this](WellContributions& w)
                 {
@@ -566,7 +555,7 @@ std::unique_ptr<Matrix> blockJacobiAdjacency(const Grid& grid,
         Matrix* matrix_;
         Vector *rhs_;
 
-#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL || HAVE_ROCALUTION
+#if HAVE_CUDA || HAVE_OPENCL || HAVE_AMGCL || HAVE_ROCALUTION
         std::unique_ptr<detail::BdaSolverInfo<Matrix, Vector>> bdaBridge;
 #endif
 
diff --git a/opm/simulators/linalg/bda/BdaBridge.cpp b/opm/simulators/linalg/bda/BdaBridge.cpp
index 24e123b90..0d1a7e18c 100644
--- a/opm/simulators/linalg/bda/BdaBridge.cpp
+++ b/opm/simulators/linalg/bda/BdaBridge.cpp
@@ -37,10 +37,6 @@
 #include <opm/simulators/linalg/bda/opencl/openclWellContributions.hpp>
 #endif
 
-#if HAVE_FPGA
-#include <opm/simulators/linalg/bda/FPGASolverBackend.hpp>
-#endif
-
 #if HAVE_AMGCL
 #include <opm/simulators/linalg/bda/amgclSolverBackend.hpp>
 #endif
@@ -60,7 +56,6 @@ namespace Opm
 
 template <class BridgeMatrix, class BridgeVector, int block_size>
 BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string accelerator_mode_,
-                                                             [[maybe_unused]] std::string fpga_bitstream,
                                                              int linear_solver_verbosity, int maxit,
                                                              double tolerance,
                                                              [[maybe_unused]] unsigned int platformID,
@@ -82,13 +77,6 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
         backend.reset(new Opm::Accelerator::openclSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_parallel, linsolver));
 #else
         OPM_THROW(std::logic_error, "Error openclSolver was chosen, but OpenCL was not found by CMake");
-#endif
-    } else if (accelerator_mode.compare("fpga") == 0) {
-#if HAVE_FPGA
-        use_fpga = true;
-        backend.reset(new Opm::Accelerator::FpgaSolverBackend<block_size>(fpga_bitstream, linear_solver_verbosity, maxit, tolerance, opencl_ilu_parallel));
-#else
-        OPM_THROW(std::logic_error, "Error fpgaSolver was chosen, but FPGA was not enabled by CMake");
 #endif
     } else if (accelerator_mode.compare("amgcl") == 0) {
 #if HAVE_AMGCL
@@ -106,9 +94,8 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
 #endif
     } else if (accelerator_mode.compare("none") == 0) {
         use_gpu = false;
-        use_fpga = false;
     } else {
-        OPM_THROW(std::logic_error, "Error unknown value for parameter 'AcceleratorMode', should be passed like '--accelerator-mode=[none|cusparse|opencl|fpga|amgcl|rocalution]");
+        OPM_THROW(std::logic_error, "Error unknown value for parameter 'AcceleratorMode', should be passed like '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution]");
     }
 }
 
@@ -208,7 +195,7 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system(BridgeMatri
                                                                      WellContributions& wellContribs,
                                                                      InverseOperatorResult& res)
 {
-    if (use_gpu || use_fpga) {
+    if (use_gpu) {
         BdaResult result;
         result.converged = false;
         const int dim = (*bridgeMat)[0][0].N();
@@ -217,7 +204,7 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system(BridgeMatri
 
         if (dim != 3) {
             OpmLog::warning("BdaSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program");
-            use_gpu = use_fpga = false;
+            use_gpu = false;
             return;
         }
 
@@ -289,7 +276,7 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system(BridgeMatri
 
 template <class BridgeMatrix, class BridgeVector, int block_size>
 void BdaBridge<BridgeMatrix, BridgeVector, block_size>::get_result([[maybe_unused]] BridgeVector& x) {
-    if (use_gpu || use_fpga) {
+    if (use_gpu) {
         backend->get_result(static_cast<double*>(&(x[0][0])));
     }
 }
diff --git a/opm/simulators/linalg/bda/BdaBridge.hpp b/opm/simulators/linalg/bda/BdaBridge.hpp
index 33609d3a4..8001b9552 100644
--- a/opm/simulators/linalg/bda/BdaBridge.hpp
+++ b/opm/simulators/linalg/bda/BdaBridge.hpp
@@ -38,7 +38,6 @@ class BdaBridge
 private:
     int verbosity = 0;
     bool use_gpu = false;
-    bool use_fpga = false;
     std::string accelerator_mode;
     std::unique_ptr<Opm::Accelerator::BdaSolver<block_size> > backend;
     std::shared_ptr<Opm::Accelerator::BlockedMatrix> matrix;  // 'stores' matrix, actually points to h_rows, h_cols and the received BridgeMatrix for the nonzeroes
@@ -50,8 +49,7 @@ private:
 
 public:
     /// Construct a BdaBridge
-    /// \param[in] accelerator_mode           to select if an accelerated solver is used, is passed via command-line: '--accelerator-mode=[none|cusparse|opencl|fpga]'
-    /// \param[in] fpga_bitstream             FPGA programming bitstream file name, is passed via command-line: '--fpga-bitstream=[<filename>]'
+    /// \param[in] accelerator_mode           to select if an accelerated solver is used, is passed via command-line: '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution]'
     /// \param[in] linear_solver_verbosity    verbosity of BdaSolver
     /// \param[in] maxit                      maximum number of iterations for BdaSolver
     /// \param[in] tolerance                  required relative tolerance for BdaSolver
@@ -59,7 +57,7 @@ public:
     /// \param[in] deviceID                   the device ID to be used by the cusparse- and openclSolvers, too high values could cause runtime errors
     /// \param[in] opencl_ilu_parallel        whether to parallelize the ILU decomposition and application in OpenCL with level_scheduling
     /// \param[in] linsolver                  indicating the preconditioner, equal to the --linear-solver cmdline argument
-    BdaBridge(std::string accelerator_mode, std::string fpga_bitstream, int linear_solver_verbosity, int maxit, double tolerance,
+    BdaBridge(std::string accelerator_mode, int linear_solver_verbosity, int maxit, double tolerance,
         unsigned int platformID, unsigned int deviceID, bool opencl_ilu_parallel, std::string linsolver);
 
 
@@ -95,12 +93,6 @@ public:
     /// \param[in] N              number of rows in scalar vector that wellContribs will be applied on
     void initWellContributions(WellContributions& wellContribs, unsigned N);
 
-    /// Return whether the BdaBridge will use the FPGA or not
-    /// return whether the BdaBridge will use the FPGA or not
-    bool getUseFpga(){
-        return use_fpga;
-    }
-
     /// Return the selected accelerator mode, this is input via the command-line
     std::string getAccleratorName(){
         return accelerator_mode;
diff --git a/opm/simulators/linalg/bda/BdaSolver.hpp b/opm/simulators/linalg/bda/BdaSolver.hpp
index dd5b97094..586a7b9f6 100644
--- a/opm/simulators/linalg/bda/BdaSolver.hpp
+++ b/opm/simulators/linalg/bda/BdaSolver.hpp
@@ -57,8 +57,6 @@ namespace Accelerator {
         int maxit = 200;
         double tolerance = 1e-2;
 
-        std::string bitstream;
-
         int N;           // number of rows
         int Nb;          // number of blocked rows (Nb*block_size == N)
         int nnz;         // number of nonzeroes (scalars)
@@ -70,8 +68,7 @@ namespace Accelerator {
         bool initialized = false;
 
     public:
-        /// Construct a BdaSolver, can be cusparseSolver, openclSolver, fpgaSolver
-        /// \param[in] fpga_bitstream             FPGA bitstream file name (only for fpgaSolver)
+        /// Construct a BdaSolver
         /// \param[in] linear_solver_verbosity    verbosity of solver
         /// \param[in] maxit                      maximum number of iterations for solver
         /// \param[in] tolerance                  required relative tolerance for solver
@@ -80,7 +77,6 @@ namespace Accelerator {
         BdaSolver(int linear_solver_verbosity, int max_it, double tolerance_) : verbosity(linear_solver_verbosity), maxit(max_it), tolerance(tolerance_) {};
         BdaSolver(int linear_solver_verbosity, int max_it, double tolerance_, unsigned int deviceID_) : verbosity(linear_solver_verbosity), maxit(max_it), tolerance(tolerance_), deviceID(deviceID_) {};
         BdaSolver(int linear_solver_verbosity, int max_it, double tolerance_, unsigned int platformID_, unsigned int deviceID_) : verbosity(linear_solver_verbosity), maxit(max_it), tolerance(tolerance_), platformID(platformID_), deviceID(deviceID_) {};
-        BdaSolver(std::string fpga_bitstream, int linear_solver_verbosity, int max_it, double tolerance_) : verbosity(linear_solver_verbosity), maxit(max_it), tolerance(tolerance_), bitstream(fpga_bitstream) {};
 
         /// Define virtual destructor, so that the derivedclass destructor will be called
         virtual ~BdaSolver() {};
diff --git a/opm/simulators/linalg/bda/BlockedMatrix.cpp b/opm/simulators/linalg/bda/BlockedMatrix.cpp
index c5b2074e7..20f055cb1 100644
--- a/opm/simulators/linalg/bda/BlockedMatrix.cpp
+++ b/opm/simulators/linalg/bda/BlockedMatrix.cpp
@@ -27,7 +27,6 @@
 
 #include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
 #include <opm/simulators/linalg/bda/Matrix.hpp>
-#include <opm/simulators/linalg/bda/FPGAUtils.hpp>
 #include <opm/simulators/linalg/bda/Matrix.hpp>
 
 namespace Opm
@@ -98,368 +97,5 @@ void blockMult(double *mat1, double *mat2, double *resMat, unsigned int block_si
     }
 }
 
-#if HAVE_FPGA
-
-/*Subtract two blocks from one another element by element*/
-void blockSub(double *mat1, double *mat2, double *resMat, unsigned int block_size) {
-    for (unsigned int row = 0; row < block_size; row++) {
-        for (unsigned int col = 0; col < block_size; col++) {
-            resMat[row * block_size + col] = mat1[row * block_size + col] - mat2[row * block_size + col];
-        }
-    }
-}
-
-/*Multiply a block with a vector block, and add the result, scaled by a constant, to the result vector*/
-void blockVectMult(double *mat, double *vect, double scale, double *resVect, bool resetRes, unsigned int block_size) {
-    for (unsigned int row = 0; row < block_size; row++) {
-        if (resetRes) {
-            resVect[row] = 0.0;
-        }
-        for (unsigned int col = 0; col < block_size; col++) {
-            resVect[row] += scale * mat[row * block_size + col] * vect[col];
-        }
-    }
-}
-
-
-
-int BlockedMatrix::countUnblockedNnzs() {
-    int numNnzsOverThreshold = 0;
-    int totalNnzs = rowPointers[Nb];
-    for (unsigned int idx = 0; idx < totalNnzs * block_size * block_size; idx++) {
-        if (fabs(nnzValues[idx]) > nnzThreshold) {
-            numNnzsOverThreshold++;
-        }
-    }
-    return numNnzsOverThreshold;
-}
-
-/*
- * Unblock the blocked matrix. Input the blocked matrix and output a CSR matrix without blocks.
- * If unblocking the U matrix, the rows in all blocks need to written to the new matrix in reverse order.
-*/
-void BlockedMatrix::unblock(Matrix *mat, bool isUMatrix) {
-    const unsigned int bs = block_size;
-    int valIndex = 0, nnzsPerRow;
-
-    mat->rowPointers[0] = 0;
-    // go through the blocked matrix row-by row of blocks, and then row-by-row inside the block, and
-    // write all non-zero values and corresponding column indices that belong to the same row into the new matrix.
-    for (int row = 0; row < Nb; row++) {
-        for (unsigned int bRow = 0; bRow < bs; bRow++) {
-            nnzsPerRow = 0;
-            for (int col = rowPointers[row]; col < rowPointers[row + 1]; col++) {
-                for (unsigned int bCol = 0; bCol < bs; bCol++) {
-                    int idx = 0;
-                    // If the matrix is the U matrix, store the rows inside a block in reverse order.
-                    if (isUMatrix) {
-                        idx = col * bs * bs + (bs - bRow - 1) * bs + bCol;
-                    } else {
-                        idx = col * bs * bs + bRow * bs + bCol;
-                    }
-
-                    if (fabs(nnzValues[idx]) > nnzThreshold) {
-                        mat->nnzValues[valIndex] = nnzValues[idx];
-                        mat->colIndices[valIndex] = colIndices[col] * bs + bCol;
-                        valIndex++;
-                        nnzsPerRow++;
-                    }
-                }
-            }
-            // Update the rowpointers of the new matrix
-            mat->rowPointers[row * bs + bRow + 1] = mat->rowPointers[row * bs + bRow] + nnzsPerRow;
-        }
-    }
-}
-
-
-
-/*Optimized version*/
-// ub* prefixes indicate unblocked data
-int BlockedMatrix::toRDF(int numColors, int *nodesPerColor, bool isUMatrix,
-                                     std::vector<std::vector<int> >& colIndicesInColor, int nnzsPerRowLimit, int *nnzValsSizes,
-                                     std::vector<std::vector<double> >& ubNnzValues, short int *ubColIndices, unsigned char *NROffsets, int *colorSizes, int *valSize)
-{
-    int res;
-    int numUnblockedNnzs = countUnblockedNnzs();
-
-    // initialize the non-blocked matrix with the obtained size.
-    std::unique_ptr<Matrix> ubMat = std::make_unique<Matrix>(Nb * block_size, numUnblockedNnzs);
-
-    unblock(ubMat.get(), isUMatrix);
-
-    std::vector<int> ubNodesPerColor(numColors);
-    for (int i = 0; i < numColors; i++) {
-        ubNodesPerColor[i] = nodesPerColor[i] * block_size;
-    }
-
-    *valSize = ubMat->nnzs;
-
-    res = ubMat->toRDF(numColors, ubNodesPerColor,
-                       colIndicesInColor, nnzsPerRowLimit,
-                       ubNnzValues, ubColIndices, nnzValsSizes,
-                       NROffsets, colorSizes);
-    return res;
-}
-
-
-// coloring is already done, numColors and nodesPerColor are set
-// [rows|columns]PerColorLimit are already queried from the FPGA
-// colIndicesInColor, PIndicesAddr and colorSizes are written here
-// There are 3 matrices analysed: the full matrix for spmv, L and U for ILU
-// node == row
-// color == partition
-// colorSizes: contains meta info about a color/partition, like number of rows and number of nnzs
-// colIndicesInColor: for each color: mapping of colIdx to colValue, unblocked. Used in Matrix::toRDF().
-//                    due to partitioning, lots of columns are removed, this matrix keeps track of the mapping
-// PIndicesAddr: contiguously for each color: indices of x in global x vector, unblocked
-//               if color 0 has A unique colAccesses, PIndicesAddr[0 - A] are for color 0
-//               then PIndicesAddr[A - A+B] are for color 1. Directly copied to FPGA
-int BlockedMatrix::findPartitionColumns(int numColors, int *nodesPerColor,
-        int rowsPerColorLimit, int columnsPerColorLimit,
-        std::vector<std::vector<int> >& colIndicesInColor, int *PIndicesAddr, int *colorSizes,
-        std::vector<std::vector<int> >& LColIndicesInColor, int *LPIndicesAddr, int *LColorSizes,
-        std::vector<std::vector<int> >& UColIndicesInColor, int *UPIndicesAddr, int *UColorSizes)
-{
-    // Data related to column indices per partition
-    int doneRows = 0;
-    std::vector<bool> isColAccessed(Nb); // std::vector<bool> might have some different optimized implementation, initialize in a loop
-    std::vector<bool> isLColAccessed(Nb);
-    int totalCols = 0;    // sum of numColAccesses for each color, blocked
-    int LTotalCols = 0, UTotalCols = 0;
-    int maxCols = 0;         // max value of numColAccesses for any color
-    int maxRowsPerColor = 0; // max value of numRows for any color
-    int maxColsPerRow = 0;   // max value of colsPerRow for any color
-    // colsInColor holds all (blocked) columnIndices that are accessed by that color without duplicates
-    // colsInColor[c][i] contains the ith column that color c accesses
-    // initial size allows for each color to access all columns, with space for padding
-    std::vector<std::vector<int> > colsInColor(numColors, std::vector<int>(roundUpTo(Nb, 16)));
-    std::vector<std::vector<int> > LColsInColor(numColors, std::vector<int>(roundUpTo(Nb, 16)));
-    std::vector<std::vector<int> > UColsInColor(numColors, std::vector<int>(roundUpTo(Nb, 16)));
-
-    // find which columns are accessed in each color, as well as how many non-zeroes there are per color.
-    for (int c = 0; c < numColors; c++) {
-        int numRows = 0;
-        // initialize
-        for (int row = 0; row < Nb; row++) {
-            isColAccessed[row] = false;
-            isLColAccessed[row] = false;
-        }
-        if (c > 0) {
-            for (int i = doneRows - nodesPerColor[c - 1]; i < doneRows; i++) {
-                isLColAccessed[i] = true;
-            }
-        }
-        int numColAccesses = 0, LNumColAccesses = 0, UNumColAccesses = 0;   // number of unique accesses, blocked
-        // for every row in this color
-        for (int row = doneRows; row < doneRows + nodesPerColor[c]; row++) {
-            int colsPerRow = 0;    // number of blocks for this row
-            bool rowIsEmpty = (rowPointers[row] == rowPointers[row + 1]);
-            for (int idx = rowPointers[row]; idx < rowPointers[row + 1]; idx++) {
-                // for every column in the current row, check if that column was accessed before this color
-                int col = colIndices[idx];
-                if (isColAccessed[col] == false) {
-                    colsInColor[c][numColAccesses] = col;
-                    isColAccessed[col] = true;
-                    numColAccesses++;
-                    if (col > row) {
-                        UColsInColor[numColors - c - 1][UNumColAccesses] = col;
-                        UNumColAccesses++;
-                    }
-                }
-                if (isLColAccessed[col] == false) {
-                    if (col < row) {
-                        LColsInColor[c][LNumColAccesses] = col;
-                        LNumColAccesses++;
-                        isLColAccessed[col] = true;
-                    }
-                }
-                colsPerRow++;
-            }
-            if (rowIsEmpty != true) {
-                numRows++;
-            }
-            maxColsPerRow = std::max(maxColsPerRow, colsPerRow);
-        }
-
-        // add columns from previous color into L partition to simplify data forwarding
-        if (c > 0) {
-            for (int i = doneRows - nodesPerColor[c - 1]; i < doneRows; i++) {
-                LColsInColor[c][LNumColAccesses] = i;
-                LNumColAccesses++;
-            }
-        }
-
-        colorSizes[c * 4 + 10] = numColAccesses * block_size;
-        LColorSizes[c * 4 + 10] = LNumColAccesses * block_size;
-        UColorSizes[(numColors - c - 1) * 4 + 10] = UNumColAccesses * block_size;
-
-        // store mapping
-        for (int col = 0; col < numColAccesses; col++) {
-            for (unsigned int i = 0; i < block_size; i++) {
-                colIndicesInColor[c][colsInColor[c][col]*block_size + i] = col * block_size + i;
-            }
-        }
-        for (int col = 0; col < LNumColAccesses; col++) {
-            for (unsigned int i = 0; i < block_size; i++) {
-                LColIndicesInColor[c][LColsInColor[c][col]*block_size + i] = col * block_size + i;
-            }
-        }
-        for (int col = 0; col < UNumColAccesses; col++) {
-            for (unsigned int i = 0; i < block_size; i++) {
-                UColIndicesInColor[numColors - c - 1][UColsInColor[numColors - c - 1][col]*block_size + i] = col * block_size + i;
-            }
-        }
-
-        // zeropad the colsInColor number to the nearest multiple of 16, because there are 16 32-bit color_col_index values per cacheline
-        while (numColAccesses % 16 != 0) {
-            colsInColor[c][numColAccesses] = colsInColor[c][numColAccesses - 1];
-            numColAccesses++;
-        }
-        while (LNumColAccesses % 16 != 0) {
-            LColsInColor[c][LNumColAccesses] = LColsInColor[c][LNumColAccesses - 1];
-            LNumColAccesses++;
-        }
-        while (UNumColAccesses % 16 != 0) {
-            UColsInColor[numColors - c - 1][UNumColAccesses] = UColsInColor[numColors - c - 1][UNumColAccesses - 1];
-            UNumColAccesses++;
-        }
-        maxCols = std::max(numColAccesses, maxCols);
-        totalCols += numColAccesses;
-        LTotalCols += LNumColAccesses;
-        UTotalCols += UNumColAccesses;
-        doneRows = doneRows + nodesPerColor[c];
-        maxRowsPerColor = std::max(numRows, maxRowsPerColor);
-    }
-
-    if (maxCols * static_cast<int>(block_size) > columnsPerColorLimit) {
-        std::ostringstream errorstring;
-        errorstring << "ERROR: Current reordering exceeds maximum number of columns per color limit: " << maxCols * block_size << " > " << columnsPerColorLimit;
-        OPM_THROW(std::logic_error, errorstring.str());
-    }
-
-    doneRows = 0;
-    int diagValsSize = 0;
-    int maxRows = 0;
-
-    for (int c = 0; c < numColors; c++) {
-        // calculate sizes that include zeropadding
-        diagValsSize += roundUpTo(nodesPerColor[c] * block_size * 4, 8);
-        doneRows += nodesPerColor[c];
-        if (nodesPerColor[c] * static_cast<int>(block_size) > maxRows)
-            maxRows = nodesPerColor[c];
-        colorSizes[c * 4 + 9] = nodesPerColor[c] * block_size;
-        LColorSizes[c * 4 + 9] = nodesPerColor[c] * block_size;
-        UColorSizes[c * 4 + 9] = nodesPerColor[numColors - c - 1] * block_size;
-    }
-
-    if (maxRows * static_cast<int>(block_size) > rowsPerColorLimit) {
-        std::ostringstream errorstring;
-        errorstring << "ERROR: Current reordering exceeds maximum number of columns per color limit: " << maxRows * block_size << " > " << rowsPerColorLimit;
-        OPM_THROW(std::logic_error, errorstring.str());
-    }
-
-    // create and fill sizes array as far as already possible
-    colorSizes[0] = Nb * block_size;
-    LColorSizes[0] = Nb * block_size;
-    UColorSizes[0] = Nb * block_size;
-    // col_sizes (but the matrix is square)
-    colorSizes[1] = Nb * block_size;
-    LColorSizes[1] = Nb * block_size;
-    UColorSizes[1] = Nb * block_size;
-    colorSizes[2] = totalCols * block_size;
-    LColorSizes[2] = LTotalCols * block_size;
-    UColorSizes[2] = UTotalCols * block_size;
-    // missing val_size, written in Matrix::toRDF()
-    colorSizes[4] = numColors;
-    LColorSizes[4] = numColors;
-    UColorSizes[4] = numColors;
-    // missing NRFlagsSize, written in Matrix::toRDF()
-    colorSizes[6] = diagValsSize;
-    LColorSizes[6] = diagValsSize;
-    UColorSizes[6] = diagValsSize;
-
-    int paddingIdx = numColors;
-    while (paddingIdx % 4 != 0) {
-        for (unsigned int i = 0; i < 4; i++) {
-            colorSizes[paddingIdx * 4 + 8 + i] = 0;
-            LColorSizes[paddingIdx * 4 + 8 + i] = 0;
-            UColorSizes[paddingIdx * 4 + 8 + i] = 0;
-        }
-        paddingIdx++;
-    }
-
-    int index = 0, Lindex = 0, Uindex = 0;
-    for (int c = 0; c < numColors; c++) {
-        // for each unique col access
-        for (int col = 0; col < colorSizes[c * 4 + 10] / static_cast<int>(block_size) ; col++) {
-            for (unsigned int i = 0; i < block_size; i++) {
-                PIndicesAddr[index] = colsInColor[c][col] * block_size + i;
-                index++;
-            }
-        }
-        // add padding
-        while (index % 16 != 0) {
-            PIndicesAddr[index] = PIndicesAddr[index - 1];
-            index++;
-        }
-        for (int col = 0; col < LColorSizes[c * 4 + 10] / static_cast<int>(block_size) ; col++) {
-            for (unsigned int i = 0; i < block_size; i++) {
-                LPIndicesAddr[Lindex] = LColsInColor[c][col] * block_size + i;
-                Lindex++;
-            }
-        }
-        while (Lindex % 16 != 0) {
-            LPIndicesAddr[Lindex] = LPIndicesAddr[Lindex - 1];
-            Lindex++;
-        }
-        for (int col = 0; col < UColorSizes[c * 4 + 10] / static_cast<int>(block_size) ; col++) {
-            for (unsigned int i = 0; i < block_size; i++) {
-                UPIndicesAddr[Uindex] = UColsInColor[c][col] * block_size + i;
-                Uindex++;
-            }
-        }
-        while (Uindex % 16 != 0) {
-            UPIndicesAddr[Uindex] = UPIndicesAddr[Uindex - 1];
-            Uindex++;
-        }
-    }
-    return 0;
-}
-
-
-void blockedDiagtoRDF(double *blockedDiagVals, int rowSize, int numColors, std::vector<int>& rowsPerColor, double *RDFDiag) {
-    const unsigned int block_size = 3;
-    int doneRows = rowSize - 1;    // since the rows of U are reversed, the rows of the diag are also reversed
-    int RDFIndex = 0;
-    for (int c = 0; c < numColors; c++) {
-        for (int r = 0; r < rowsPerColor[c]; r++) {
-
-            // the rows in the block are reversed
-            for (int i = static_cast<int>(block_size) - 1; i >= 0; i--) {
-                for (unsigned int j = 0; j < block_size; j++) {
-                    RDFDiag[RDFIndex] = blockedDiagVals[(doneRows - r) * block_size * block_size + i * block_size + j];
-                    RDFIndex++;
-                }
-                // add 4th column, zeropadding
-                RDFDiag[RDFIndex] = 0.0;
-                RDFIndex++;
-            }
-        }
-        doneRows -= rowsPerColor[c];
-
-        // make sure the color completely fills a cacheline
-        // a color with 3 blocks would otherwise leave space
-        while (RDFIndex % 8 != 0) {
-            RDFDiag[RDFIndex] = 0.0;
-            RDFIndex++;
-        }
-    }
-    assert(RDFIndex % 8 == 0);
-}
-
-#endif // HAVE_FPGA
-
-
 } // namespace Accelerator
 } // namespace Opm
diff --git a/opm/simulators/linalg/bda/BlockedMatrix.hpp b/opm/simulators/linalg/bda/BlockedMatrix.hpp
index ba120e523..d146938b6 100644
--- a/opm/simulators/linalg/bda/BlockedMatrix.hpp
+++ b/opm/simulators/linalg/bda/BlockedMatrix.hpp
@@ -20,12 +20,6 @@
 #ifndef OPM_BLOCKED_MATRIX_HPP
 #define OPM_BLOCKED_MATRIX_HPP
 
-#if HAVE_FPGA
-#include <vector>
-#include <opm/simulators/linalg/bda/Matrix.hpp>
-#endif
-
-
 namespace Opm
 {
 namespace Accelerator
@@ -94,27 +88,6 @@ public:
         }
     }
 
-#if HAVE_FPGA
-    constexpr static double nnzThreshold = 1e-80;  // for unblocking, a nonzero must be bigger than this threshold to be considered a nonzero
-
-    int countUnblockedNnzs();
-
-    void unblock(Matrix *mat, bool isUMatrix);
-
-    /// Converts this matrix to the dataformat used by the FPGA
-    /// Is done every linear solve. The exact sparsity pattern can change every time since the zeros are removed during unblocking
-    int toRDF(int numColors, int *nodesPerColor, bool isUMatrix,
-        std::vector<std::vector<int> >& colIndicesInColor, int nnzsPerRowLimit, int *nnzValsSizes,
-        std::vector<std::vector<double> >& nnzValues, short int *colIndices, unsigned char *NROffsets, int *colorSizes, int *valSize);
-
-    /// Analyses the sparsity pattern and prepares for toRDF()
-    /// Is only called once
-    int findPartitionColumns(int numColors, int *nodesPerColor,
-        int rowsPerColorLimit, int columnsPerColorLimit,
-        std::vector<std::vector<int> >& colIndicesInColor, int *PIndicesAddr, int *colorSizes,
-        std::vector<std::vector<int> >& LColIndicesInColor, int *LPIndicesAddr, int *LColorSizes,
-        std::vector<std::vector<int> >& UColIndicesInColor, int *UPIndicesAddr, int *UColorSizes);
-#endif
 
     double *nnzValues;
     int *colIndices;
@@ -151,40 +124,6 @@ void blockMultSub(double *a, double *b, double *c, unsigned int block_size);
 /// \param[in] block_size        size of block
 void blockMult(double *mat1, double *mat2, double *resMat, unsigned int block_size);
 
-
-#if HAVE_FPGA
-/// Perform a matrix-matrix subtraction on two blocks, element-wise
-/// resMat = mat1 - mat2
-/// \param[in] mat1              input block 1
-/// \param[in] mat2              input block 2
-/// \param[out] resMat           output block
-/// \param[in] block_size        size of block
-void blockSub(double *mat1, double *mat2, double *resMat, unsigned int block_size);
-
-/// Perform a matrix-vector multiplication
-/// resVect = mat * vect
-/// resVect += mat * vect
-/// \param[in] mat               input matrix
-/// \param[in] vect              input vector
-/// \param[in] scale             multiply output with this factor
-/// \param[inout] resVect        output vector
-/// \param[in] resetRes          if true, overwrite resVect, otherwise add to it
-/// \param[in] block_size        size of block
-void blockVectMult(double *mat, double *vect, double scale, double *resVect, bool resetRes, unsigned int block_size);
-
-/// Convert a blocked inverse diagonal to the FPGA format.
-/// This is the only blocked structure on the FPGA, since it needs blocked matrix-vector multiplication after the backwards substitution of U.
-/// Since the rows of U are reversed, the rows of the diag are also reversed.
-/// The cachelines can hold 8 doubles, a block has 9 doubles.
-/// The format converts 3x3 blocks to 3x4 blocks, so 1 cacheline holds 2 unblocked rows.
-/// Then 2 blocks (24 doubles) fit on 3 cachelines.
-/// Example:
-/// [1 2 3]    [1 2 3 0]              [1 2 3 0 4 5 6 0]
-/// [4 5 6] -> [4 5 6 0] -> hardware: [7 8 9 0 block2 row1]
-/// [7 8 9]    [7 8 9 0]              [block2 row2 block2 row3]
-void blockedDiagtoRDF(double *blockedDiagVals, int rowSize, int numColors, std::vector<int>& rowsPerColor, double *RDFDiag);
-#endif
-
 } // namespace Accelerator
 } // namespace Opm
 
diff --git a/opm/simulators/linalg/bda/FPGABILU0.cpp b/opm/simulators/linalg/bda/FPGABILU0.cpp
deleted file mode 100644
index 013fa406d..000000000
--- a/opm/simulators/linalg/bda/FPGABILU0.cpp
+++ /dev/null
@@ -1,418 +0,0 @@
-/*
-  Copyright 2020 Equinor ASA
-
-  This file is part of the Open Porous Media project (OPM).
-
-  OPM is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  OPM is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <config.h>
-
-#include <opm/common/OpmLog/OpmLog.hpp>
-#include <opm/common/ErrorMacros.hpp>
-#include <opm/simulators/linalg/MatrixBlock.hpp>
-#include <dune/common/timer.hh>
-
-#include <opm/simulators/linalg/bda/FPGABILU0.hpp>
-#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
-#include <opm/simulators/linalg/bda/Reorder.hpp>
-#include <opm/simulators/linalg/bda/FPGAUtils.hpp>
-
-namespace Opm
-{
-namespace Accelerator
-{
-
-using Opm::OpmLog;
-using Dune::Timer;
-
-template <unsigned int block_size>
-FPGABILU0<block_size>::FPGABILU0(ILUReorder opencl_ilu_reorder_, int verbosity_, int maxRowsPerColor_, int maxColsPerColor_, int maxNNZsPerRow_, int maxNumColors_) :
-    verbosity(verbosity_), opencl_ilu_reorder(opencl_ilu_reorder_), maxRowsPerColor(maxRowsPerColor_), maxColsPerColor(maxColsPerColor_), maxNNZsPerRow(maxNNZsPerRow_), maxNumColors(maxNumColors_)
-{
-    if (opencl_ilu_reorder == ILUReorder::LEVEL_SCHEDULING) {
-        level_scheduling = true;
-    } else if (opencl_ilu_reorder == ILUReorder::GRAPH_COLORING) {
-        graph_coloring = true;
-    } else {
-        OPM_THROW(std::logic_error, "Error ilu reordering strategy not set correctly\n");
-    }
-}
-
-
-template <unsigned int block_size>
-FPGABILU0<block_size>::~FPGABILU0()
-{
-    delete[] invDiagVals;
-}
-
-
-template <unsigned int block_size>
-bool FPGABILU0<block_size>::init(BlockedMatrix *mat)
-{
-    const unsigned int bs = block_size;
-
-    resultPointers.resize(numResultPointers, nullptr);
-    resultSizes.resize(numResultSizes);
-
-    // Set nnzSplit as hardcoded constant until support for more than one nnzVals read array is added.
-    const unsigned int nnzSplit = 1;
-
-    this->N = mat->Nb * block_size;
-    this->Nb = mat->Nb;
-    this->nnz = mat->nnzbs * block_size * block_size;
-    this->nnzbs = mat->nnzbs;
-
-    toOrder.resize(Nb);
-    fromOrder.resize(Nb);
-
-    std::vector<int> CSCRowIndices(nnzbs);
-    std::vector<int> CSCColPointers(Nb + 1);
-
-    if (level_scheduling) {
-        Timer t_convert;
-        csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), mat->Nb);
-        if (verbosity >= 3) {
-            std::ostringstream out;
-            out << "FPGABILU0 convert CSR to CSC: " << t_convert.stop() << " s";
-            OpmLog::info(out.str());
-        }
-    }
-
-    Timer t_analysis;
-    rMat = std::make_shared<BlockedMatrix>(mat->Nb, mat->nnzbs, block_size);
-    LUMat = std::make_unique<BlockedMatrix>(*rMat);
-    std::ostringstream out;
-    if (level_scheduling) {
-        out << "FPGABILU0 reordering strategy: " << "level_scheduling\n";
-        findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), mat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
-    } else if (graph_coloring) {
-        out << "FPGABILU0 reordering strategy: " << "graph_coloring\n";
-        findGraphColoring<bs>(mat->colIndices, mat->rowPointers, CSCRowIndices.data(), CSCColPointers.data(), mat->Nb, maxRowsPerColor, maxColsPerColor, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
-    }
-
-    if (numColors > maxNumColors) {
-        std::ostringstream errorstring;
-        errorstring << "ERROR: the matrix was reordered into too many colors. Created " << numColors << " colors, while hardware only supports up to " << maxNumColors << "\n";
-        OPM_THROW(std::logic_error, errorstring.str());
-    }
-
-    if (verbosity >= 3) {
-        out << "FPGABILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors";
-    }
-    OpmLog::info(out.str());
-
-    int colorRoundedValSize = 0, LColorRoundedValSize = 0, UColorRoundedValSize = 0;
-    int NROffsetSize = 0, LNROffsetSize = 0, UNROffsetSize = 0;
-    int blockDiagSize = 0;
-    // This reordering is needed here only to te result can be used to calculate worst-case scenario array sizes
-    reorderBlockedMatrixByPattern(mat, toOrder.data(), fromOrder.data(), rMat.get());
-    int doneRows = 0;
-    for (int c = 0; c < numColors; c++) {
-        for (int i = doneRows; i < doneRows + rowsPerColor[c]; i++) {
-            for (int j = rMat->rowPointers[i]; j < rMat->rowPointers[i + 1]; j++) {
-                int columnIndex = rMat->colIndices[j];
-                if (columnIndex < i) {
-                    LColorRoundedValSize += 9;
-                    LNROffsetSize += 9;
-                }
-                if (columnIndex > i) {
-                    UColorRoundedValSize += 9;
-                    UNROffsetSize += 9;
-                }
-                colorRoundedValSize += 9;
-                NROffsetSize += 9;
-            }
-            blockDiagSize += 12;
-        }
-        // End of color: round all sizes to nearest cacheline
-        colorRoundedValSize = roundUpTo(colorRoundedValSize, 32);
-        LColorRoundedValSize = roundUpTo(LColorRoundedValSize, 32);
-        UColorRoundedValSize = roundUpTo(UColorRoundedValSize, 32);
-        NROffsetSize = roundUpTo(NROffsetSize, 64);
-        LNROffsetSize = roundUpTo(LNROffsetSize, 64);
-        UNROffsetSize = roundUpTo(UNROffsetSize, 64);
-        blockDiagSize = roundUpTo(blockDiagSize, 8);
-
-        doneRows += rowsPerColor[c];
-    }
-    int colorSizesNum = 8 + roundUpTo(4 * numColors, 16);
-    int worstCaseColumnAccessNum = numColors * maxColsPerColor;
-
-    nnzValues.resize(nnzSplit, std::vector<double>(colorRoundedValSize));
-    LnnzValues.resize(nnzSplit, std::vector<double>(LColorRoundedValSize));
-    UnnzValues.resize(nnzSplit, std::vector<double>(UColorRoundedValSize));
-    // initial number of nnz, used to allocate
-    nnzValsSizes.resize(nnzSplit, colorRoundedValSize);
-    LnnzValsSizes.resize(nnzSplit, LColorRoundedValSize);
-    UnnzValsSizes.resize(nnzSplit, UColorRoundedValSize);
-    colIndices.resize(colorRoundedValSize);
-    LColIndices.resize(LColorRoundedValSize);
-    UColIndices.resize(UColorRoundedValSize);
-    NROffsets.resize(NROffsetSize);
-    LNROffsets.resize(LNROffsetSize);
-    UNROffsets.resize(UNROffsetSize);
-    PIndicesAddr.resize(worstCaseColumnAccessNum);
-    LPIndicesAddr.resize(worstCaseColumnAccessNum);
-    UPIndicesAddr.resize(worstCaseColumnAccessNum);
-    colorSizes.resize(colorSizesNum);
-    LColorSizes.resize(colorSizesNum);
-    UColorSizes.resize(colorSizesNum);
-    blockDiag.resize(blockDiagSize);
-    colIndicesInColor.resize(numColors, std::vector<int>(rMat->Nb * block_size, 0));
-    LColIndicesInColor.resize(numColors, std::vector<int>(rMat->Nb * block_size, 0));
-    UColIndicesInColor.resize(numColors, std::vector<int>(rMat->Nb * block_size, 0));
-
-    int err = rMat->findPartitionColumns(numColors, rowsPerColor.data(),
-                                         maxRowsPerColor, maxColsPerColor,
-                                         colIndicesInColor, PIndicesAddr.data(), colorSizes.data(),
-                                         LColIndicesInColor, LPIndicesAddr.data(), LColorSizes.data(),
-                                         UColIndicesInColor, UPIndicesAddr.data(), UColorSizes.data());
-    if (err != 0) {
-        std::ostringstream errorstring;
-        errorstring << "ERROR: findPartitionColumns failed, code " << err << "\n";
-        OPM_THROW(std::logic_error, errorstring.str());
-    }
-
-    diagIndex.resize(mat->Nb, 0);
-    invDiagVals = new double[mat->Nb * bs * bs];
-    LMat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2, block_size);
-    UMat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2, block_size);
-    resultPointers[0] = (void *) colorSizes.data();
-    resultPointers[1] = (void *) PIndicesAddr.data();
-    resultPointers[2] = (void *) nnzValues.data();
-    resultPointers[3] = (void *) colIndices.data();
-    resultPointers[4] = (void *) NROffsets.data();
-    resultPointers[5] = (void *) nnzValsSizes.data();
-    resultPointers[6] = (void *) LColorSizes.data();
-    resultPointers[7] = (void *) LPIndicesAddr.data();
-    resultPointers[8] = (void *) LnnzValues.data();
-    resultPointers[9] = (void *) LColIndices.data();
-    resultPointers[10] = (void *) LNROffsets.data();
-    resultPointers[11] = (void *) LnnzValsSizes.data();
-    resultPointers[12] = (void *) UColorSizes.data();
-    resultPointers[13] = (void *) UPIndicesAddr.data();
-    resultPointers[14] = (void *) UnnzValues.data();
-    resultPointers[15] = (void *) UColIndices.data();
-    resultPointers[16] = (void *) UNROffsets.data();
-    resultPointers[17] = (void *) UnnzValsSizes.data();
-    resultPointers[18] = (void *) blockDiag.data();
-    //resultPointers[19] and [20] are set by the caller
-    resultSizes[0] = mat->Nb * block_size;
-    resultSizes[1] = colorRoundedValSize; // zeropadded valSize;
-    resultSizes[2] = numColors;
-    resultSizes[3] = worstCaseColumnAccessNum; //totalCols
-    resultSizes[4] = NROffsetSize; //NRFlagSize
-    resultSizes[5] = blockDiagSize; //diagValsSize
-    resultSizes[6] = mat->Nb * block_size;
-    resultSizes[7] = LColorRoundedValSize; // zeropadded LValSize;
-    resultSizes[8] = numColors;
-    resultSizes[9] = worstCaseColumnAccessNum; //LTotalCols
-    resultSizes[10] = LNROffsetSize; //LNRFlagSize
-    resultSizes[11] = blockDiagSize; //LDiagValsSize
-    resultSizes[12] = mat->Nb * block_size;
-    resultSizes[13] = UColorRoundedValSize; // zeropadded UValSize;
-    resultSizes[14] = numColors;
-    resultSizes[15] = worstCaseColumnAccessNum; //UTotalCols
-    resultSizes[16] = UNROffsetSize; //UNRFlagSize
-    resultSizes[17] = blockDiagSize; //UDiagValsSize
-    return true;
-} // end init()
-
-
-template <unsigned int block_size>
-bool FPGABILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
-{
-    const unsigned int bs = block_size;
-    Timer t_reorder;
-    reorderBlockedMatrixByPattern(mat, toOrder.data(), fromOrder.data(), rMat.get());
-
-    if (verbosity >= 3) {
-        std::ostringstream out;
-        out << "FPGABILU0 reorder matrix: " << t_reorder.stop() << " s";
-        OpmLog::info(out.str());
-    }
-
-    // TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
-    Timer t_memcpy;
-    memcpy(LUMat->nnzValues, rMat->nnzValues, sizeof(double) * bs * bs * rMat->nnzbs);
-
-    if (verbosity >= 3) {
-        std::ostringstream out;
-        out << "FPGABILU0 memcpy: " << t_memcpy.stop() << " s";
-        OpmLog::info(out.str());
-    }
-
-    int i, j, ij, ik, jk;
-    int iRowStart, iRowEnd, jRowEnd;
-    double pivot[bs * bs];
-    int LSize = 0;
-    Opm::Detail::Inverter<bs> inverter;   // reuse inverter to invert blocks
-
-    Timer t_decomposition;
-
-    // go through all rows
-    for (i = 0; i < LUMat->Nb; i++) {
-        iRowStart = LUMat->rowPointers[i];
-        iRowEnd = LUMat->rowPointers[i + 1];
-
-        // go through all elements of the row
-        for (ij = iRowStart; ij < iRowEnd; ij++) {
-            j = LUMat->colIndices[ij];
-            // if the element is the diagonal, store the index and go to next row
-            if (j == i) {
-                diagIndex[i] = ij;
-                break;
-            }
-            // if an element beyond the diagonal is reach, no diagonal was found
-            // throw an error now. TODO: perform reordering earlier to prevent this
-            if (j > i) {
-                std::ostringstream out;
-                out << "BILU0 Error could not find diagonal value in row: " << i;
-                OpmLog::error(out.str());
-                return false;
-            }
-
-            LSize++;
-            // calculate the pivot of this row
-            blockMult(LUMat->nnzValues + ij * bs * bs, invDiagVals + j * bs * bs, &pivot[0], bs);
-
-            memcpy(LUMat->nnzValues + ij * bs * bs, &pivot[0], sizeof(double) * bs * bs);
-
-            jRowEnd = LUMat->rowPointers[j + 1];
-            jk = diagIndex[j] + 1;
-            ik = ij + 1;
-            // subtract that row scaled by the pivot from this row.
-            while (ik < iRowEnd && jk < jRowEnd) {
-                if (LUMat->colIndices[ik] == LUMat->colIndices[jk]) {
-                    blockMultSub(LUMat->nnzValues + ik * bs * bs, pivot, LUMat->nnzValues + jk * bs * bs, bs);
-                    ik++;
-                    jk++;
-                } else {
-                    if (LUMat->colIndices[ik] < LUMat->colIndices[jk])
-                    { ik++; }
-                    else
-                    { jk++; }
-                }
-            }
-        }
-        // store the inverse in the diagonal!
-        inverter(LUMat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs);
-        memcpy(LUMat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs, sizeof(double) * bs * bs);
-    }
-
-    LMat->rowPointers[0] = 0;
-    UMat->rowPointers[0] = 0;
-
-    // Split the LU matrix into two by comparing column indices to diagonal indices
-    for (i = 0; i < LUMat->Nb; i++) {
-        LMat->rowPointers[i + 1] = LMat->rowPointers[i];
-        for (j = LUMat->rowPointers[i]; j < LUMat->rowPointers[i + 1]; j++) {
-            if (j < diagIndex[i]) {
-                memcpy(LMat->nnzValues + (LMat->rowPointers[i + 1]) * bs * bs, LUMat->nnzValues + j * bs * bs, sizeof(double) * bs * bs);
-                LMat->colIndices[LMat->rowPointers[i + 1]] = LUMat->colIndices[j];
-                LMat->rowPointers[i + 1] = LMat->rowPointers[i + 1] + 1;
-            }
-        }
-    }
-    // Reverse the order or the (blocked) rows for the U matrix,
-    // because the rows are accessed in reverse order when applying the ILU0
-    int URowIndex = 0;
-    for (i = LUMat->Nb - 1; i >= 0; i--) {
-        UMat->rowPointers[URowIndex + 1] = UMat->rowPointers[URowIndex];
-        for (j = LUMat->rowPointers[i]; j < LUMat->rowPointers[i + 1]; j++) {
-            if (j > diagIndex[i]) {
-                memcpy(UMat->nnzValues + (UMat->rowPointers[URowIndex + 1]) * bs * bs, LUMat->nnzValues + j * bs * bs, sizeof(double) * bs * bs);
-                UMat->colIndices[UMat->rowPointers[URowIndex + 1]] = LUMat->colIndices[j];
-                UMat->rowPointers[URowIndex + 1] = UMat->rowPointers[URowIndex + 1] + 1;
-            }
-        }
-        URowIndex++;
-    }
-
-    if (verbosity >= 3) {
-        std::ostringstream out;
-        out << "FPGABILU0 decomposition: " << t_decomposition.stop() << " s";
-        OpmLog::info(out.str());
-    }
-
-    std::vector<int> URowsPerColor(numColors);
-    rowSize = block_size * rMat->Nb;
-    LRowSize = block_size * LMat->Nb;
-    URowSize = block_size * UMat->Nb;
-    LNumColors = numColors;
-    UNumColors = numColors;
-    for (int c = 0; c < numColors; c++) {
-        URowsPerColor[numColors - c - 1] = rowsPerColor[c];
-    }
-    int err;
-    err = rMat->toRDF(numColors, rowsPerColor.data(), /*isUMatrix:*/ false,
-                      colIndicesInColor, maxNNZsPerRow, nnzValsSizes.data(),
-                      nnzValues, colIndices.data(), NROffsets.data(), colorSizes.data(), &valSize);
-    if (err != 0) {
-        return false;
-    }
-    err = LMat->toRDF(LNumColors, rowsPerColor.data(), /*isUMatrix:*/ false,
-                      LColIndicesInColor, maxNNZsPerRow, LnnzValsSizes.data(),
-                      LnnzValues, LColIndices.data(), LNROffsets.data(), LColorSizes.data(), &LValSize);
-    if (err != 0) {
-        return false;
-    }
-    err = UMat->toRDF(UNumColors, URowsPerColor.data(), /*isUMatrix:*/ true,
-                      UColIndicesInColor, maxNNZsPerRow, UnnzValsSizes.data(),
-                      UnnzValues, UColIndices.data(), UNROffsets.data(), UColorSizes.data(), &UValSize);
-    if (err != 0) {
-        return false;
-    }
-    blockedDiagtoRDF(invDiagVals, rMat->Nb, numColors, URowsPerColor, blockDiag.data());
-    // resultPointers are set in the init method
-    resultSizes[0] = rowSize;
-    resultSizes[1] = colorSizes[3]; // zeropadded valSize;
-    resultSizes[2] = numColors;
-    resultSizes[3] = colorSizes[2]; //totalCols
-    resultSizes[4] = colorSizes[5]; //NRFlagSize
-    resultSizes[5] = colorSizes[6]; //diagValsSize
-    resultSizes[6] = LRowSize;
-    resultSizes[7] = LColorSizes[3]; // zeropadded LValSize;
-    resultSizes[8] = LNumColors;
-    resultSizes[9] = LColorSizes[2]; //LTotalCols
-    resultSizes[10] = LColorSizes[5]; //LNRFlagSize
-    resultSizes[11] = LColorSizes[6]; //LDiagValsSize
-    resultSizes[12] = URowSize;
-    resultSizes[13] = UColorSizes[3]; // zeropadded UValSize;
-    resultSizes[14] = UNumColors;
-    resultSizes[15] = UColorSizes[2]; //UTotalCols
-    resultSizes[16] = UColorSizes[5]; //UNRFlagSize
-    resultSizes[17] = UColorSizes[6]; //UDiagValsSize
-    return true;
-} // end create_preconditioner()
-
-
-#define INSTANTIATE_BDA_FUNCTIONS(n)                                    \
-template FPGABILU0<n>::FPGABILU0(ILUReorder, int, int, int, int, int);  \
-template FPGABILU0<n>::~FPGABILU0();                                    \
-template bool FPGABILU0<n>::init(BlockedMatrix*);                       \
-template bool FPGABILU0<n>::create_preconditioner(BlockedMatrix *);
-
-INSTANTIATE_BDA_FUNCTIONS(1);
-INSTANTIATE_BDA_FUNCTIONS(2);
-INSTANTIATE_BDA_FUNCTIONS(3);
-INSTANTIATE_BDA_FUNCTIONS(4);
-INSTANTIATE_BDA_FUNCTIONS(5);
-INSTANTIATE_BDA_FUNCTIONS(6);
-
-#undef INSTANTIATE_BDA_FUNCTIONS
-
-} // namespace Accelerator
-} // namespace Opm
diff --git a/opm/simulators/linalg/bda/FPGABILU0.hpp b/opm/simulators/linalg/bda/FPGABILU0.hpp
deleted file mode 100644
index 423b7077e..000000000
--- a/opm/simulators/linalg/bda/FPGABILU0.hpp
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
-  Copyright 2020 Equinor ASA
-
-  This file is part of the Open Porous Media project (OPM).
-
-  OPM is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  OPM is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef FPGA_BILU0_HEADER_INCLUDED
-#define FPGA_BILU0_HEADER_INCLUDED
-
-#include <vector>
-
-#include <opm/simulators/linalg/bda/ILUReorder.hpp>
-#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
-
-namespace Opm
-{
-namespace Accelerator
-{
-
-/*
- * This class implements a Blocked ILU0 preconditioner, with output data
- * specifically formatted for the FPGA.
- * The decomposition and reorders of the rows of the matrix are done on CPU.
- */
-
-template <unsigned int block_size>
-class FPGABILU0
-{
-
-private:
-    int N;       // number of rows of the matrix
-    int Nb;      // number of blockrows of the matrix
-    int nnz;     // number of nonzeroes of the matrix (scalar)
-    int nnzbs;   // number of blocks of the matrix
-    std::unique_ptr<BlockedMatrix> LMat = nullptr, UMat = nullptr, LUMat = nullptr;
-    std::shared_ptr<BlockedMatrix> rMat = nullptr; // reordered mat
-    double *invDiagVals = nullptr;
-    std::vector<int> diagIndex;
-    std::vector<int> toOrder, fromOrder;
-    std::vector<int> rowsPerColor;
-    int numColors;
-    int verbosity;
-
-    // sizes and arrays used during RDF generation
-    std::vector<std::vector<double> > nnzValues, LnnzValues, UnnzValues;
-    std::vector<short int> colIndices, LColIndices, UColIndices;
-    std::vector<unsigned char> NROffsets, LNROffsets, UNROffsets;
-    std::vector<int> PIndicesAddr, LPIndicesAddr, UPIndicesAddr;
-    std::vector<int> colorSizes, LColorSizes, UColorSizes;
-    std::vector<int> nnzValsSizes, LnnzValsSizes, UnnzValsSizes;
-    std::vector<std::vector<int> > colIndicesInColor, LColIndicesInColor, UColIndicesInColor;
-
-    int rowSize, valSize;
-    int LRowSize, LValSize, LNumColors;
-    int URowSize, UValSize, UNumColors;
-    std::vector<double> blockDiag;
-    ILUReorder opencl_ilu_reorder;
-    bool level_scheduling = false, graph_coloring = false;
-    int numResultPointers = 21;
-    std::vector<void *> resultPointers;
-    int numResultSizes = 18;
-    std::vector<int> resultSizes;
-    int maxRowsPerColor, maxColsPerColor, maxNNZsPerRow, maxNumColors; // are set via the constructor
-
-public:
-
-    FPGABILU0(ILUReorder opencl_ilu_reorder, int verbosity, int maxRowsPerColor, int maxColsPerColor, int maxNNZsPerRow, int maxNumColors);
-
-    ~FPGABILU0();
-
-    // analysis (optional)
-    bool init(BlockedMatrix *mat);
-
-    // ilu_decomposition
-    bool create_preconditioner(BlockedMatrix *mat);
-
-    int* getToOrder()
-    {
-        return toOrder.data();
-    }
-
-    int* getFromOrder()
-    {
-        return fromOrder.data();
-    }
-
-    BlockedMatrix* getRMat()
-    {
-        return rMat.get();
-    }
-
-    void **getResultPointers()
-    {
-        return resultPointers.data();
-    }
-
-    int *getResultSizes()
-    {
-        return resultSizes.data();
-    }
-
-};
-
-} // namespace Accelerator
-} // namespace Opm
-
-#endif // FPGA_BILU0_HEADER_INCLUDED
diff --git a/opm/simulators/linalg/bda/FPGASolverBackend.cpp b/opm/simulators/linalg/bda/FPGASolverBackend.cpp
deleted file mode 100644
index 122331e9f..000000000
--- a/opm/simulators/linalg/bda/FPGASolverBackend.cpp
+++ /dev/null
@@ -1,741 +0,0 @@
-/*
-  Copyright 2020 Equinor ASA
-
-  This file is part of the Open Porous Media project (OPM).
-
-  OPM is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  OPM is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <config.h>
-
-#include <cmath>
-#include <cstring>
-
-#include <opm/common/OpmLog/OpmLog.hpp>
-#include <opm/common/ErrorMacros.hpp>
-
-#include <opm/simulators/linalg/bda/FPGASolverBackend.hpp>
-#include <opm/simulators/linalg/bda/FPGAUtils.hpp>
-#include <opm/simulators/linalg/bda/Reorder.hpp>
-
-// if defined, any FPGA kernel failure will terminate flow; otherwise, the FPGA
-// kernel will be disabled and execution will continue using DUNE
-#define FPGA_EXIT_WITH_HW_FAILURE
-//#undef FPGA_EXIT_WITH_HW_FAILURE
-
-// if defined, the function generate_statistics will create a CSV-formatted file
-// with detailed statistics about the FPGA backend performance
-//#define FPGA_STATISTICS_FILE_ENABLED
-#undef FPGA_STATISTICS_FILE_ENABLED
-
-namespace Opm
-{
-namespace Accelerator
-{
-
-using Opm::OpmLog;
-
-template <unsigned int block_size>
-FpgaSolverBackend<block_size>::FpgaSolverBackend(std::string fpga_bitstream, int verbosity_, int maxit_, double tolerance_, ILUReorder opencl_ilu_reorder) : BdaSolver<block_size>(fpga_bitstream, verbosity_, maxit_, tolerance_)
-{
-    int err;
-    std::ostringstream oss;
-    double start = second();
-
-    // currently, only block size == 3 is supported by the FPGA backend
-    assert(block_size == 3);
-
-    if (verbosity < 1) {
-        perf_call_enabled = false;
-    }
-    // setup bitstream name and other parameters
-    if (fpga_bitstream.compare("") == 0) {
-        OPM_THROW(std::logic_error, "fpgaSolver called but bitstream file has not been specified");
-    }
-    if (!fileExists(fpga_bitstream.c_str())) {
-        OPM_THROW(std::logic_error, "fpgaSolver called but bitstream file specified does not exists or is not readable");
-    }
-    // -----------------------------
-    // FPGA: setup the OpenCL platform
-    // -----------------------------
-    std::string main_kernel_name(KERNEL_NAME); // macro defined in bicgstab_solver_config.hpp
-    // auto-select the proper FPGA device and create context and other CL objects
-    err = setup_opencl(nullptr, &device_id, &context, &commands, &program, &kernel, main_kernel_name.c_str(), fpga_bitstream.c_str(), &platform_awsf1);
-    if (err != 0) {
-        oss << "Failed to setup the OpenCL device (" << err << ")";
-        OPM_THROW(std::logic_error, oss.str());
-    }
-    oss << "Detected FPGA platform type is ";
-    if (platform_awsf1) { oss << "AWS-F1."; } else { oss << "Xilinx Alveo."; }
-    OpmLog::info(oss.str());
-    oss.str("");
-    oss.clear();
-    // -----------------------------
-    // FPGA: setup the debug buffer
-    // -----------------------------
-    // set kernel debug lines depending on an environment variable
-    const char *xem = getenv("XCL_EMULATION_MODE");
-    if ((xem != nullptr) && (strcmp(xem, "sw_emu") == 0 || strcmp(xem, "hw_emu") == 0)) {
-        debug_outbuf_words = DEBUG_OUTBUF_WORDS_MAX_EMU;
-        oss << "Detected co-simulation mode, debug_outbuf_words set to " << debug_outbuf_words << ".\n";
-        OpmLog::info(oss.str());
-        oss.str("");
-        oss.clear();
-    } else {
-        // set to 2 to reduce overhead in reading back and interpreting the debug lines;
-        // increase to get more debug info from the kernel
-        // range is 2..DEBUG_OUTBUF_WORDS_MAX-1
-        debug_outbuf_words = 2;
-    }
-
-    // host debug buffer setup
-    err = fpga_setup_host_debugbuf(debug_outbuf_words, &debugBuffer, &debugbufferSize);
-    if (err != 0) {
-        oss << "Failed to call fpga_setup_host_debug_buffer (" << err << ")";
-        OPM_THROW(std::logic_error, oss.str());
-    }
-    // device debug buffer setup
-    err = fpga_setup_device_debugbuf(context, debugBuffer, &cldebug, debugbufferSize);
-    if (err != 0) {
-        oss << "Failed to call fpga_setup_device_debug_buffer (" << err << ").\n";
-        OPM_THROW(std::logic_error, oss.str());
-    }
-    // copy debug buffer to device
-    err = fpga_copy_to_device_debugbuf(commands, cldebug, debugBuffer, debugbufferSize, debug_outbuf_words);
-    if (err != 0) {
-        oss << "Failed to call fpga_copy_to_device_debugbuf (" << err << ").\n";
-        OPM_THROW(std::logic_error, oss.str());
-    }
-    // ------------------------------------------------
-    // FPGA: query the kernel for limits/configuration
-    // ------------------------------------------------
-    err = fpga_kernel_query(context, commands, kernel, cldebug,
-                            debugBuffer, debug_outbuf_words,
-                            rst_assert_cycles, rst_settle_cycles,
-                            &hw_x_vector_elem, &hw_max_row_size,
-                            &hw_max_column_size, &hw_max_colors_size,
-                            &hw_max_nnzs_per_row, &hw_max_matrix_size,
-                            &hw_use_uram, &hw_write_ilu0_results,
-                            &hw_dma_data_width, &hw_mult_num,
-                            &hw_x_vector_latency, &hw_add_latency, &hw_mult_latency,
-                            &hw_num_read_ports, &hw_num_write_ports,
-                            &hw_reset_cycles, &hw_reset_settle);
-    if (err != 0) {
-        oss << "Failed to call fpga_kernel_query (" << err << ")";
-        OPM_THROW(std::logic_error, oss.str());
-    }
-
-    if (verbosity >= 1) {
-        oss << "FPGA kernel limits/configuration:\n";
-        oss << "  x_vector_elem=" << hw_max_colors_size << ", max_row_size=" << hw_max_nnzs_per_row << ", max_column_size=" << hw_max_matrix_size << "\n";
-        oss << "  max_colors_size=" << hw_x_vector_elem << ", max_nnzs_per_row=" << hw_max_row_size << ", max_matrix_size=" << hw_max_column_size << "\n";
-        oss << "  use_uram=" << hw_use_uram << ", write_ilu0_results=" << hw_write_ilu0_results << "\n";
-        oss << "  dma_data_width=" << hw_dma_data_width << ", mult_num=" << (unsigned int)hw_mult_num << "\n";
-        oss << "  x_vector_latency=" << (unsigned int)hw_x_vector_latency << "\n";
-        oss << "  add_latency=" << (unsigned int)hw_add_latency << ", mult_latency=" << (unsigned int)hw_mult_latency << "\n";
-        oss << "  num_read_ports=" << (unsigned int)hw_num_read_ports << ", num_write_ports=" << (unsigned int)hw_num_write_ports << "\n";
-        oss << "  reset_cycles=" << hw_reset_cycles << ", reset_settle=" << hw_reset_settle;
-        OpmLog::info(oss.str());
-        oss.str("");
-        oss.clear();
-    }
-
-    // check that LU results are generated by the kernel
-    if (use_LU_res && !hw_write_ilu0_results) {
-        OpmLog::warning("Kernel reports that LU results are not written to memory, but use_LU_res is set; disabling LU results usage");
-        oss.str("");
-        oss.clear();
-        use_LU_res = false;
-    }
-
-    // setup preconditioner
-    double start_prec = second();
-    prec = std::make_unique<Preconditioner>(opencl_ilu_reorder, verbosity_, hw_max_row_size, hw_max_column_size, hw_max_nnzs_per_row, hw_max_colors_size);
-    perf_total.s_preconditioner_setup = second() - start_prec;
-
-    if (opencl_ilu_reorder == ILUReorder::LEVEL_SCHEDULING) {
-        level_scheduling = true;
-    }
-
-    perf_total.s_initialization = second() - start;
-} // end fpgaSolverBackend
-
-
-template <unsigned int block_size>
-FpgaSolverBackend<block_size>::~FpgaSolverBackend()
-{
-    if (verbosity >= 1) {
-        generate_statistics();
-    }
-    delete[] rx;
-    delete[] rb;
-    if (nnzValArrays != nullptr) { free(nnzValArrays); }
-    if (L_nnzValArrays != nullptr) { free(L_nnzValArrays); }
-    if (U_nnzValArrays != nullptr) { free(U_nnzValArrays); }
-    // FPGA: buffers
-    free(debugBuffer);
-    for (int b = 0; b < RW_BUF; b++) {
-        free(dataBuffer[b]);
-    }
-    free(databufferSize);
-    // FPGA: OpenCL objects
-    if (cldebug != nullptr) { clReleaseMemObject(cldebug); }
-    for (int b = 0; b < RW_BUF; b++) {
-        if (cldata[b] != nullptr) {
-            clReleaseMemObject(cldata[b]);
-        }
-    }
-    clReleaseCommandQueue(commands);
-    clReleaseContext(context);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    clReleaseDevice(device_id);
-} // end ~fpgaSolverBackend()
-
-
-// copy result to host memory
-// caller must be sure that x is a valid array
-template <unsigned int block_size>
-void FpgaSolverBackend<block_size>::get_result(double *x_)
-{
-    double start = 0;
-
-    if (perf_call_enabled) {
-        start = second();
-    }
-    // apply to results the reordering (stored in toOrder)
-    reorderBlockedVectorByPattern<block_size>(mat->Nb, rx, toOrder, x_);
-    // TODO: check if it is more efficient to avoid copying resultsBuffer[0] to rx in solve_system (private)
-    if (perf_call_enabled) {
-        perf_call.back().s_postprocess = second() - start;
-    }
-} // end get_result()
-
-
-template <unsigned int block_size>
-SolverStatus FpgaSolverBackend<block_size>::solve_system(std::shared_ptr<BlockedMatrix> matrix,
-                                                         double *b,
-                                                         [[maybe_unused]] std::shared_ptr<BlockedMatrix> jacMatrix,
-                                                         WellContributions&,
-                                                         BdaResult &res)
-{
-    if (initialized == false) {
-        initialize(matrix->Nb, matrix->nnzbzs, matrix->nnzValues, matrix->rowPointers, matrix->colIndices);
-        if (!analyse_matrix()) {
-            return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
-        }
-    }
-    perf_call.emplace_back();
-    update_system(matrix->nnzValues, b);
-    if (!create_preconditioner()) {
-        return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
-    }
-    solve_system(res);
-
-    if (verbosity >= 1) {
-        std::ostringstream oss;
-        oss << "fpgaSolverBackend::" << __func__ << " - converged: " << res.converged << \
-            ", iterations: " << res.iterations << ", reduction: " << res.reduction << \
-            ", conv_rate: " << res.conv_rate << ", elapsed: " << res.elapsed;
-        OpmLog::info(oss.str());
-    }
-    return SolverStatus::BDA_SOLVER_SUCCESS;
-}
-
-
-template <unsigned int block_size>
-void FpgaSolverBackend<block_size>::initialize(int Nb_, int nnzbs, double *vals, int *rows, int *cols)
-{
-    double start = second();
-    this->Nb = Nb_;
-    this->N = Nb * block_size;
-    this->nnzb = nnzbs;
-    this->nnz = nnzbs * block_size * block_size;
-
-    // allocate host memory for matrices and vectors
-    // actual data for mat points to std::vector.data() in ISTLSolverEbos, so no alloc/free here
-    mat.reset(new BlockedMatrix(N_ / block_size, nnz_ / block_size / block_size, block_size, vals, cols, rows));
-
-    std::ostringstream oss;
-    oss << "Initializing FPGA data, matrix size: " << this->Nb << " blockrows, nnz: " << this->nnzb << " blocks, " << \
-        "block size: " << block_size << ", total nnz: " << this->nnz << "\n";
-    oss << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance;
-    OpmLog::info(oss.str());
-
-    rx = new double[roundUpTo(N_, CACHELINE_BYTES / sizeof(double))];
-    rb = new double[roundUpTo(N_, CACHELINE_BYTES / sizeof(double))];
-
-    perf_total.s_initialization += second() - start;
-    initialized = true;
-} // end initialize()
-
-
-template <unsigned int block_size>
-bool FpgaSolverBackend<block_size>::analyse_matrix()
-{
-    std::ostringstream oss;
-    int err;
-
-    double start = second();
-    bool success = prec->init(mat.get());
-
-    if (!success) {
-        OpmLog::warning("Preconditioner for FPGA solver failed to initialize");
-        return success;
-    }
-
-    toOrder = prec->getToOrder();
-    fromOrder = prec->getFromOrder();
-    rMat = prec->getRMat();
-    processedPointers = prec->getResultPointers();
-    processedSizes = prec->getResultSizes();
-    processedPointers[19] = rb;
-    processedPointers[20] = rx;
-    nnzValArrays_size = static_cast<int*>(processedPointers[5])[0];
-    L_nnzValArrays_size = static_cast<int*>(processedPointers[11])[0];
-    U_nnzValArrays_size = static_cast<int*>(processedPointers[17])[0];
-    // -------------------------------------
-    // FPGA: setup host/device data buffers
-    // -------------------------------------
-    // allocate memory and setup data layout
-    err = fpga_setup_host_datamem(level_scheduling, fpga_config_bits,
-                                  processedSizes,
-                                  &setupArray,
-                                  &nnzValArrays,   &nnzValArrays_size,   &columnIndexArray,   &newRowOffsetArray,   &PIndexArray,   &colorSizesArray,
-                                  &L_nnzValArrays, &L_nnzValArrays_size, &L_columnIndexArray, &L_newRowOffsetArray, &L_PIndexArray, &L_colorSizesArray,
-                                  &U_nnzValArrays, &U_nnzValArrays_size, &U_columnIndexArray, &U_newRowOffsetArray, &U_PIndexArray, &U_colorSizesArray,
-                                  &BLKDArray, &X1Array, &R1Array,
-                                  &X2Array, &R2Array,
-                                  &LresArray, &UresArray,
-                                  &databufferSize, dataBuffer,
-                                  result_offsets, 1 /*num_nnz_arrays*/,
-                                  true /*reset_data_buffers*/,  /* WARNING: leave reset_data_buffers always ENABLED to avoid data corruption! */
-                                  debugbufferSize);
-    if (err) {
-        oss << "Failed to call fpga_setup_host_datamem (" << err << ")";
-        OPM_THROW(std::logic_error, oss.str());
-    }
-
-    // results buffers setup
-    if (use_LU_res) {
-        resultsBufferNum = 4;
-    } else {
-        resultsBufferNum = 2;
-    }
-    if (resultsBufferNum > RES_BUF_MAX) {
-        oss << "Number of results buffer (" << resultsBufferNum << ") is out of range (max " << RES_BUF_MAX << ")";
-        OPM_THROW(std::logic_error, oss.str());
-    }
-
-    resultsNum = processedSizes[0]; // rowSize, invariant between system solves
-    for (int i = 0; i < resultsBufferNum; i++) {
-        resultsBufferSize[i] = roundUpTo(resultsNum, CACHELINE_BYTES / sizeof(double)) * sizeof(double);
-    }
-
-    // device data memory setup
-    err = fpga_setup_device_datamem(context, databufferSize, dataBuffer, cldata);
-    if (err != 0) {
-        oss << "Failed to call fpga_setup_device_datamem (" << err << ")";
-        OPM_THROW(std::logic_error, oss.str());
-    }
-
-    // ------------------------------------
-    // FPGA: setup the kernel's parameters
-    // ------------------------------------
-    err = fpga_set_kernel_parameters(kernel, abort_cycles, debug_outbuf_words - 1, maxit,
-                                     debug_sample_rate, tolerance, cldata, cldebug);
-    if (err != 0) {
-        oss << "Failed to call fpga_set_kernel_parameters (" << err << ")";
-        OPM_THROW(std::logic_error, oss.str());
-    }
-
-    perf_total.s_analysis = second() - start;
-    analysis_done = true;
-
-    return success;
-} // end analyse_matrix()
-
-
-template <unsigned int block_size>
-bool FpgaSolverBackend<block_size>::create_preconditioner()
-{
-    double start = 0;
-
-    if (perf_call_enabled) {
-        start = second();
-    }
-    memset(rx, 0, sizeof(double) * N);
-    bool result = prec->create_preconditioner(mat.get());
-    if (!result) {
-        OpmLog::warning("fpgaSolverBackend: create_preconditioner failed");
-    }
-
-    if (perf_call_enabled) {
-        perf_call.back().s_preconditioner_create = second() - start;
-    }
-    return result;
-} // end create_preconditioner()
-
-
-template <unsigned int block_size>
-void FpgaSolverBackend<block_size>::solve_system(BdaResult &res)
-{
-    std::ostringstream oss;
-    int err;
-    double start = 0, start_total = 0;
-
-    // ------------------------------------
-    // FPGA: return immediately if FPGA is disabled
-    // ------------------------------------
-    if (fpga_disabled) {
-        res.converged = false;
-        OpmLog::warning("FPGA is disabled, fallback to SW execution");
-        return;
-    }
-
-    fpga_calls++;
-
-    if (perf_call_enabled) {
-        start = second();
-        start_total = start;
-    }
-
-    // check if any buffer is larger than the size set in preconditioner->init
-    // TODO: add check for all other buffer sizes that may overflow?
-    err = 0;
-    if ( ((int *)processedPointers[5])[0]  > nnzValArrays_size ||
-            ((int *)processedPointers[11])[0] > L_nnzValArrays_size ||
-            ((int *)processedPointers[17])[0] > U_nnzValArrays_size ) {
-        err = 1;
-    }
-    if (err != 0) {
-        OPM_THROW(std::logic_error, "A buffer size is larger than the initial allocation in solve_system (check preconditioner init)");
-    }
-
-    // ------------------------------------
-    // FPGA: copy input data to host data buffers
-    // ------------------------------------
-    if (perf_call_enabled) {
-        start = second();
-    }
-    err = fpga_copy_host_datamem(
-              processedPointers, processedSizes, setupArray,
-              nnzValArrays,   &nnzValArrays_size,   columnIndexArray,   newRowOffsetArray,   PIndexArray,   colorSizesArray,
-              L_nnzValArrays, &L_nnzValArrays_size, L_columnIndexArray, L_newRowOffsetArray, L_PIndexArray, L_colorSizesArray,
-              U_nnzValArrays, &U_nnzValArrays_size, U_columnIndexArray, U_newRowOffsetArray, U_PIndexArray, U_colorSizesArray,
-              BLKDArray, X1Array, R1Array, X2Array, R2Array,
-              use_LU_res, LresArray, UresArray,
-              databufferSize, dataBuffer,
-              1 /* nnzValArrays_num */,
-              reset_data_buffers, fill_results_buffers,
-              dump_data_buffers, fpga_calls);
-    if (perf_call_enabled) {
-        perf_call.back().s_mem_setup = second() - start;
-    }
-    if (err != 0) {
-        oss << "Failed to call fpga_copy_to_device_debugbuf (" << err << ")";
-        OPM_THROW(std::logic_error, oss.str());
-    }
-    // ------------------------------------
-    // FPGA: copy buffers to device
-    // ------------------------------------
-    // copy debug buffer to device
-    if (perf_call_enabled) {
-        start = second();
-    }
-    err = fpga_copy_to_device_debugbuf(commands,
-                                       cldebug, debugBuffer, debugbufferSize,
-                                       debug_outbuf_words);
-    if (err != 0) {
-        oss << "Failed to call fpga_copy_to_device_debugbuf (" << err << ")";
-        OPM_THROW(std::logic_error, oss.str());
-    }
-    // copy data buffers to device
-    err = fpga_copy_to_device_datamem(commands, RW_BUF, cldata);
-    if (err != 0) {
-        oss << "Failed to call fpga_copy_to_device_datamem (" << err << ")";
-        OPM_THROW(std::logic_error, oss.str());
-    }
-    if (perf_call_enabled) {
-        perf_call.back().s_mem_h2d = second() - start;
-    }
-    // ------------------------------------
-    // FPGA: execute the kernel
-    // ------------------------------------
-    double time_elapsed_ms;
-    if (perf_call_enabled) {
-        start = second();
-    }
-    err = fpga_kernel_run(commands, kernel, &time_elapsed_ms);
-    if (perf_call_enabled) {
-        perf_call.back().s_kernel_exec = second() - start;
-    }
-    if (err != 0) {
-        oss << "Failed to call fpga_kernel_run (" << err << ")";
-        OPM_THROW(std::logic_error, oss.str());
-    }
-    // ----------------------------------------
-    // FPGA: read back debug buffer from device
-    // ----------------------------------------
-    if (perf_call_enabled) {
-        start = second();
-    }
-    err = fpga_copy_from_device_debugbuf((bool)(verbosity < 10),
-                                         commands,
-                                         debug_outbuf_words, debugbufferSize,
-                                         cldebug, debugBuffer,
-                                         abort_cycles,
-                                         &kernel_cycles, &kernel_iter_run,
-                                         norms, &last_norm_idx,
-                                         &kernel_aborted, &kernel_signature, &kernel_overflow, &kernel_noresults,
-                                         &kernel_wrafterend, &kernel_dbgfifofull);
-    if (err != 0) {
-        oss << "Failed to call fpga_copy_from_device_debugbuf (" << err << ")";
-        OPM_THROW(std::logic_error, oss.str());
-    }
-    if (kernel_wrafterend) {
-        OpmLog::warning("Detected recoverable FPGA error: kernel write after end");
-    }
-    if (kernel_dbgfifofull) {
-        OpmLog::warning("Detected recoverable FPGA error: debug FIFO full");
-    }
-    if (kernel_aborted || kernel_signature || kernel_overflow) {
-#if defined(FPGA_EXIT_WITH_HW_FAILURE)
-        oss << "Detected unrecoverable FPGA error (ABRT=" << kernel_aborted << \
-            ",SIG=" << kernel_signature << ",OVF=" << kernel_overflow << ")";
-        OPM_THROW(std::logic_error, oss.str());
-#else
-        oss << "Detected unrecoverable FPGA error (ABRT=" << kernel_aborted << \
-            ",SIG=" << kernel_signature << ",OVF=" << kernel_overflow << ")\n";
-        oss << "Disabling FPGA kernel: execution will continue with SW kernel";
-        OpmLog::warning(oss.str());
-        oss.str("");
-        oss.clear();
-        fpga_disabled = true;
-#endif
-    }
-    if (perf_call_enabled) {
-        perf_call.back().n_kernel_exec_cycles = kernel_cycles;
-    }
-    // copy (back) results only if FPGA is not disabled
-    if (!fpga_disabled) {
-        if (kernel_noresults) {
-            OpmLog::warning("FPGA kernel did not return results because the required precision is already reached");
-            // rx still contains zeros from initial guess
-        } else {
-            // ------------------------------------
-            // FPGA: read back results from device
-            // ------------------------------------
-            err = fpga_map_results(even(kernel_iter_run),
-                                   use_residuals, use_LU_res, commands,
-                                   resultsNum, resultsBufferNum, resultsBufferSize,
-                                   debugbufferSize,
-                                   cldata, resultsBuffer,
-                                   result_offsets,
-                                   dump_results, data_dir, basename, sequence);
-            if (err != 0) {
-                oss << "Failed to call fpga_map_results (" << err << ")";
-                OPM_THROW(std::logic_error, oss.str());
-            }
-            // TODO: copy results buffers to reordering output buffers
-            memcpy(rx, resultsBuffer[0], resultsNum * sizeof(double));
-            err = fpga_unmap_results(even(kernel_iter_run),
-                                     use_residuals, use_LU_res,
-                                     commands, cldata, resultsBuffer);
-            if (err != 0) {
-                oss << "Failed to call fpga_unmap_results (" << err << ")";
-                OPM_THROW(std::logic_error, oss.str());
-            }
-        }
-    }
-    // set results and update statistics (if enabled)
-    if (perf_call_enabled) {
-        perf_call.back().s_mem_d2h = second() - start;
-    }
-    float iter = ((float)kernel_iter_run / 2.0) + 0.5; // convert from half iteration int to actual iterationns
-    res.iterations = (int)iter;
-    res.reduction = norms[0] / norms[last_norm_idx]; // norms[0] is the initial norm
-    res.conv_rate = pow(res.reduction, 1.0 / iter);
-    res.elapsed = second() - start_total;
-    if (perf_call_enabled) {
-        perf_call.back().s_solve = res.elapsed;
-        perf_call.back().n_kernel_exec_iters = iter;
-    }
-    // convergence depends on number of iterations reached and hw execution errors
-    res.converged = true;
-    if (fpga_disabled || kernel_aborted || kernel_signature || kernel_overflow || iter >= (float)maxit) {
-        res.converged = false;
-        if (verbosity >= 1) {
-            oss << "FPGA kernel did not converge, reason: fpga_disabled=" << fpga_disabled << \
-                ", kernel_aborted=" << kernel_aborted << ", kernel_signature=" << kernel_signature << \
-                ", kernel_overflow=" << kernel_overflow << ", (iter>=" << maxit << ")=" << (iter >= (float)maxit);
-            OpmLog::warning(oss.str());
-            oss.str("");
-            oss.clear();
-        }
-    }
-    if (perf_call_enabled) {
-        perf_call.back().converged = res.converged;
-        perf_call.back().converged_flags = ((unsigned int)fpga_disabled) +
-                                           ((unsigned int)kernel_aborted << 1) + ((unsigned int)kernel_signature << 2) +
-                                           ((unsigned int)kernel_overflow << 3) + ((unsigned int)(iter >= (float)maxit) << 4);
-    }
-} // end solve_system()
-
-
-template <unsigned int block_size>
-void FpgaSolverBackend<block_size>::update_system(double *vals, double *b)
-{
-    double start = 0;
-
-    mat->nnzValues = vals;
-    // reorder inputs using previously found ordering (stored in fromOrder)
-    if (perf_call_enabled) {
-        start = second();
-    }
-    reorderBlockedVectorByPattern<block_size>(mat->Nb, b, fromOrder, rb);
-    if (perf_call_enabled) {
-        perf_call.back().s_reorder = second() - start;
-    }
-} // end update_system()
-
-
-template <unsigned int block_size>
-void FpgaSolverBackend<block_size>::generate_statistics()
-{
-    std::ostringstream oss;
-    unsigned int conv_iter = 0, conv_ovf = 0;
-
-    if (!perf_call_enabled || fpga_calls == 0) {
-        OpmLog::warning("FPGA statistics were not collected");
-        return;
-    }
-    std::printf("--- FPGA statistics ---\n");
-    std::printf("total solver calls..........: %u\n", fpga_calls);
-    std::printf("time initialization.........: %8.6f s\n", perf_total.s_initialization);
-    std::printf("time preconditioner setup...: %8.6f s\n", perf_total.s_preconditioner_setup);
-
-#if defined(FPGA_STATISTICS_FILE_ENABLED)
-    // DEBUG: this can be enabled to gather all the statistics in a CSV-formatted file
-    FILE *fout = fopen("fpga_statistics_details.csv", "w");
-    if (fout != nullptr) {
-        std::fprintf(fout, "call,preconditioner_create,analysis,reorder,mem_setup,mem_h2d,kernel_exec,kernel_cycles,kernel_iters,mem_d2h,solve,postprocess,converged\n");
-    }
-#endif
-    unsigned int num_data_points = perf_call.size();
-    for (unsigned int i = 0; i < num_data_points; i++) {
-        perf_total.s_preconditioner_create += perf_call[i].s_preconditioner_create;
-        if (perf_call[i].s_preconditioner_create > perf_total.s_preconditioner_create_max) { perf_total.s_preconditioner_create_max = perf_call[i].s_preconditioner_create; }
-        if (perf_call[i].s_preconditioner_create < perf_total.s_preconditioner_create_min) { perf_total.s_preconditioner_create_min = perf_call[i].s_preconditioner_create; }
-        perf_total.s_analysis += perf_call[i].s_analysis;
-        if (perf_call[i].s_analysis > perf_total.s_analysis_max) { perf_total.s_analysis_max = perf_call[i].s_analysis; }
-        if (perf_call[i].s_analysis < perf_total.s_analysis_min) { perf_total.s_analysis_min = perf_call[i].s_analysis; }
-        perf_total.s_reorder += perf_call[i].s_reorder;
-        if (perf_call[i].s_reorder > perf_total.s_reorder_max) { perf_total.s_reorder_max = perf_call[i].s_reorder; }
-        if (perf_call[i].s_reorder < perf_total.s_reorder_min) { perf_total.s_reorder_min = perf_call[i].s_reorder; }
-        perf_total.s_mem_setup += perf_call[i].s_mem_setup;
-        if (perf_call[i].s_mem_setup > perf_total.s_mem_setup_max) { perf_total.s_mem_setup_max = perf_call[i].s_mem_setup; }
-        if (perf_call[i].s_mem_setup < perf_total.s_mem_setup_min) { perf_total.s_mem_setup_min = perf_call[i].s_mem_setup; }
-        perf_total.s_mem_h2d += perf_call[i].s_mem_h2d;
-        if (perf_call[i].s_mem_h2d > perf_total.s_mem_h2d_max) { perf_total.s_mem_h2d_max = perf_call[i].s_mem_h2d; }
-        if (perf_call[i].s_mem_h2d < perf_total.s_mem_h2d_min) { perf_total.s_mem_h2d_min = perf_call[i].s_mem_h2d; }
-        perf_total.s_kernel_exec += perf_call[i].s_kernel_exec;
-        if (perf_call[i].s_kernel_exec > perf_total.s_kernel_exec_max) { perf_total.s_kernel_exec_max = perf_call[i].s_kernel_exec; }
-        if (perf_call[i].s_kernel_exec < perf_total.s_kernel_exec_min) { perf_total.s_kernel_exec_min = perf_call[i].s_kernel_exec; }
-        perf_total.n_kernel_exec_cycles += (unsigned long)perf_call[i].n_kernel_exec_cycles;
-        if (perf_call[i].n_kernel_exec_cycles > perf_total.n_kernel_exec_cycles_max) { perf_total.n_kernel_exec_cycles_max = perf_call[i].n_kernel_exec_cycles; }
-        if (perf_call[i].n_kernel_exec_cycles < perf_total.n_kernel_exec_cycles_min) { perf_total.n_kernel_exec_cycles_min = perf_call[i].n_kernel_exec_cycles; }
-        perf_total.n_kernel_exec_iters += perf_call[i].n_kernel_exec_iters;
-        if (perf_call[i].n_kernel_exec_iters > perf_total.n_kernel_exec_iters_max) { perf_total.n_kernel_exec_iters_max = perf_call[i].n_kernel_exec_iters; }
-        if (perf_call[i].n_kernel_exec_iters < perf_total.n_kernel_exec_iters_min) { perf_total.n_kernel_exec_iters_min = perf_call[i].n_kernel_exec_iters; }
-        perf_total.s_mem_d2h += perf_call[i].s_mem_d2h;
-        if (perf_call[i].s_mem_d2h > perf_total.s_mem_d2h_max) { perf_total.s_mem_d2h_max = perf_call[i].s_mem_d2h; }
-        if (perf_call[i].s_mem_d2h < perf_total.s_mem_d2h_min) { perf_total.s_mem_d2h_min = perf_call[i].s_mem_d2h; }
-        perf_total.s_solve += perf_call[i].s_solve;
-        if (perf_call[i].s_solve > perf_total.s_solve_max) { perf_total.s_solve_max = perf_call[i].s_solve; }
-        if (perf_call[i].s_solve < perf_total.s_solve_min) { perf_total.s_solve_min = perf_call[i].s_solve; }
-        perf_total.s_postprocess += perf_call[i].s_postprocess;
-        if (perf_call[i].s_postprocess > perf_total.s_postprocess_max) { perf_total.s_postprocess_max = perf_call[i].s_postprocess; }
-        if (perf_call[i].s_postprocess < perf_total.s_postprocess_min) { perf_total.s_postprocess_min = perf_call[i].s_postprocess; }
-        perf_total.n_converged += (unsigned int)perf_call[i].converged;
-        if (perf_call[i].converged_flags & 1 << 4) { conv_iter += 1; }
-        if (perf_call[i].converged_flags & 1 << 3) { conv_ovf += 1; }
-#if defined(FPGA_STATISTICS_FILE_ENABLED)
-        if (fout != nullptr) {
-            std::fprintf(fout, "%d,%8.6f,%8.6f,%8.6f,%8.6f,%8.6f,%8.6f,%u,%.1f,%8.6f,%8.6f,%8.6f,%u\n",
-                         i, perf_call[i].s_preconditioner_create, perf_call[i].s_analysis, perf_call[i].s_reorder,
-                         perf_call[i].s_mem_setup, perf_call[i].s_mem_h2d, perf_call[i].s_kernel_exec, perf_call[i].n_kernel_exec_cycles,
-                         perf_call[i].n_kernel_exec_iters, perf_call[i].s_mem_d2h, perf_call[i].s_solve, perf_call[i].s_postprocess,
-                         (unsigned int)perf_call[i].converged);
-        }
-#endif
-    }
-#if defined(FPGA_STATISTICS_FILE_ENABLED)
-    if (fout != nullptr) {
-        fclose(fout);
-    }
-#endif
-    perf_total.s_preconditioner_create_avg = perf_total.s_preconditioner_create / num_data_points;
-    perf_total.s_analysis_avg = perf_total.s_analysis / num_data_points;
-    perf_total.s_reorder_avg = perf_total.s_reorder / num_data_points;
-    perf_total.s_mem_setup_avg = perf_total.s_mem_setup / num_data_points;
-    perf_total.s_mem_h2d_avg = perf_total.s_mem_h2d / num_data_points;
-    perf_total.s_kernel_exec_avg = perf_total.s_kernel_exec / num_data_points;
-    perf_total.n_kernel_exec_cycles_avg = perf_total.n_kernel_exec_cycles / num_data_points;
-    perf_total.n_kernel_exec_iters_avg = perf_total.n_kernel_exec_iters / num_data_points;
-    perf_total.s_mem_d2h_avg = perf_total.s_mem_d2h / num_data_points;
-    perf_total.s_solve_avg = perf_total.s_solve / num_data_points;
-    perf_total.s_postprocess_avg = perf_total.s_postprocess / num_data_points;
-    std::printf("time preconditioner creation: total %8.6f s, avg %8.6f s, min %8.6f s, max %8.6f s\n",
-                perf_total.s_preconditioner_create, perf_total.s_preconditioner_create_avg, perf_total.s_preconditioner_create_min, perf_total.s_preconditioner_create_max);
-    std::printf("time analysis...............: total %8.6f s, avg %8.6f s, min %8.6f s, max %8.6f s\n",
-                perf_total.s_analysis, perf_total.s_analysis_avg, perf_total.s_analysis_min, perf_total.s_analysis_max);
-    std::printf("time reorder................: total %8.6f s, avg %8.6f s, min %8.6f s, max %8.6f s\n",
-                perf_total.s_reorder, perf_total.s_reorder_avg, perf_total.s_reorder_min, perf_total.s_reorder_max);
-    std::printf("time memory setup...........: total %8.6f s, avg %8.6f s, min %8.6f s, max %8.6f s\n",
-                perf_total.s_mem_setup, perf_total.s_mem_setup_avg, perf_total.s_mem_setup_min, perf_total.s_mem_setup_max);
-    std::printf("time memory host2dev........: total %8.6f s, avg %8.6f s, min %8.6f s, max %8.6f s\n",
-                perf_total.s_mem_h2d, perf_total.s_mem_h2d_avg, perf_total.s_mem_h2d_min, perf_total.s_mem_h2d_max);
-    std::printf("time kernel execution.......: total %8.6f s, avg %8.6f s, min %8.6f s, max %8.6f s\n",
-                perf_total.s_kernel_exec, perf_total.s_kernel_exec_avg, perf_total.s_kernel_exec_min, perf_total.s_kernel_exec_max);
-    std::printf("cycles kernel execution.....: total %lu, avg %lu, min %lu, max %lu\n",
-                perf_total.n_kernel_exec_cycles, perf_total.n_kernel_exec_cycles_avg, perf_total.n_kernel_exec_cycles_min, perf_total.n_kernel_exec_cycles_max);
-    std::printf("iterations kernel execution.: total %.1f, avg %.1f, min %.1f, max %.1f\n",
-                perf_total.n_kernel_exec_iters, perf_total.n_kernel_exec_iters_avg, perf_total.n_kernel_exec_iters_min, perf_total.n_kernel_exec_iters_max);
-    std::printf("time memory dev2host........: total %8.6f s, avg %8.6f s, min %8.6f s, max %8.6f s\n",
-                perf_total.s_mem_d2h, perf_total.s_mem_d2h_avg, perf_total.s_mem_d2h_min, perf_total.s_mem_d2h_max);
-    std::printf("time solve..................: total %8.6f s, avg %8.6f s, min %8.6f s, max %8.6f s\n",
-                perf_total.s_solve, perf_total.s_solve_avg, perf_total.s_solve_min, perf_total.s_solve_max);
-    std::printf("time postprocess............: total %8.6f s, avg %8.6f s, min %8.6f s, max %8.6f s\n",
-                perf_total.s_postprocess, perf_total.s_postprocess_avg, perf_total.s_postprocess_min, perf_total.s_postprocess_max);
-    std::printf("converged...................: %u/%u, with iter>%d=%u, overflow=%u\n",
-                perf_total.n_converged, num_data_points, maxit, conv_iter, conv_ovf);
-    std::printf("-----------------------\n");
-} //end generate_statistics()
-
-
-#define INSTANTIATE_BDA_FUNCTIONS(n)                                                          \
-template FpgaSolverBackend<n>::FpgaSolverBackend(std::string, int, int, double, ILUReorder);  \
-
-INSTANTIATE_BDA_FUNCTIONS(1);
-INSTANTIATE_BDA_FUNCTIONS(2);
-INSTANTIATE_BDA_FUNCTIONS(3);
-INSTANTIATE_BDA_FUNCTIONS(4);
-INSTANTIATE_BDA_FUNCTIONS(5);
-INSTANTIATE_BDA_FUNCTIONS(6);
-
-#undef INSTANTIATE_BDA_FUNCTIONS
-
-} // namespace Accelerator
-} // namespace Opm
diff --git a/opm/simulators/linalg/bda/FPGASolverBackend.hpp b/opm/simulators/linalg/bda/FPGASolverBackend.hpp
deleted file mode 100644
index 05e7c6d33..000000000
--- a/opm/simulators/linalg/bda/FPGASolverBackend.hpp
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
-  Copyright 2020 Equinor ASA
-
-  This file is part of the Open Porous Media project (OPM).
-
-  OPM is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  OPM is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef OPM_FPGASOLVER_BACKEND_HEADER_INCLUDED
-#define OPM_FPGASOLVER_BACKEND_HEADER_INCLUDED
-
-#include <opm/simulators/linalg/bda/BdaSolver.hpp>
-#include <opm/simulators/linalg/bda/FPGABILU0.hpp>
-
-#include <linearalgebra/ilu0bicgstab/xilinx/src/sda_app/bicgstab_solver_config.hpp>
-#include <linearalgebra/ilu0bicgstab/xilinx/src/sda_app/common/opencl_lib.hpp>
-#include <linearalgebra/ilu0bicgstab/xilinx/src/sda_app/common/fpga_functions_bicgstab.hpp>
-
-namespace Opm
-{
-namespace Accelerator
-{
-
-/// This class implements an ilu0-bicgstab solver on FPGA
-template <unsigned int block_size>
-class FpgaSolverBackend : public BdaSolver<block_size>
-{
-    typedef BdaSolver<block_size> Base;
-    typedef FPGABILU0<block_size> Preconditioner;
-
-    using Base::N;
-    using Base::Nb;
-    using Base::nnz;
-    using Base::nnzb;
-    using Base::verbosity;
-    using Base::maxit;
-    using Base::tolerance;
-    using Base::initialized;
-
-private:
-    double *rx = nullptr; // reordered x
-    double *rb = nullptr; // reordered b
-    int *fromOrder = nullptr, *toOrder = nullptr;
-    bool analysis_done = false;
-    bool level_scheduling = false;
-
-    // LUMat will shallow copy rowPointers and colIndices of mat/rMat
-    std::unique_ptr<BlockedMatrix> mat = nullptr;
-    BlockedMatrix *rMat = nullptr;
-    std::unique_ptr<Preconditioner> prec = nullptr;
-
-    // vectors with data processed by the preconditioner (input to the kernel)
-    void **processedPointers = nullptr;
-    int *processedSizes = nullptr;
-
-    unsigned int fpga_calls = 0;
-    bool perf_call_enabled = true;
-
-    // per call performance metrics
-    typedef struct {
-      double s_preconditioner_create = 0.0;
-      double s_analysis = 0.0;
-      double s_reorder = 0.0;
-      double s_mem_setup = 0.0;
-      double s_mem_h2d = 0.0;
-      double s_kernel_exec = 0.0;
-      unsigned int n_kernel_exec_cycles = 0;
-      float n_kernel_exec_iters = 0.0;
-      double s_mem_d2h = 0.0;
-      double s_solve = 0.0;
-      double s_postprocess = 0.0;
-      bool converged = false;
-      unsigned int converged_flags = 0;
-    } perf_call_metrics_t;
-    // cumulative performance metrics
-    typedef struct {
-      double s_initialization;
-      double s_preconditioner_setup;
-      double s_preconditioner_create;
-      double s_preconditioner_create_min,s_preconditioner_create_max,s_preconditioner_create_avg;
-      double s_analysis;
-      double s_analysis_min,s_analysis_max,s_analysis_avg;
-      double s_reorder;
-      double s_reorder_min,s_reorder_max,s_reorder_avg;
-      double s_mem_setup;
-      double s_mem_setup_min,s_mem_setup_max,s_mem_setup_avg;
-      double s_mem_h2d;
-      double s_mem_h2d_min,s_mem_h2d_max,s_mem_h2d_avg;
-      double s_kernel_exec;
-      double s_kernel_exec_min,s_kernel_exec_max,s_kernel_exec_avg;
-      unsigned long n_kernel_exec_cycles;
-      unsigned long n_kernel_exec_cycles_min,n_kernel_exec_cycles_max,n_kernel_exec_cycles_avg;
-      float n_kernel_exec_iters;
-      float n_kernel_exec_iters_min,n_kernel_exec_iters_max,n_kernel_exec_iters_avg;
-      double s_mem_d2h;
-      double s_mem_d2h_min,s_mem_d2h_max,s_mem_d2h_avg;
-      double s_solve;
-      double s_solve_min,s_solve_max,s_solve_avg;
-      double s_postprocess;
-      double s_postprocess_min,s_postprocess_max,s_postprocess_avg;
-      unsigned int n_converged;
-    } perf_total_metrics_t;
-    std::vector<perf_call_metrics_t> perf_call;
-    perf_total_metrics_t perf_total;
-    // fpga_config_bits: bit0=do_reset_debug: if 1, will reset debug flags at each state change, otherwise flags are sticky
-    // fpga_config_bits: bit1=absolute_compare: if 1, will compare norm with provided precision value, otherwise it's incremental
-    unsigned int fpga_config_bits = 0;
-    bool fpga_disabled = false;
-    bool platform_awsf1;
-    unsigned int debugbufferSize;
-    unsigned long int *debugBuffer = nullptr;
-    unsigned int *databufferSize = nullptr;
-    unsigned char *dataBuffer[RW_BUF] = {nullptr};
-    unsigned int debug_outbuf_words;
-    int resultsNum;
-    int resultsBufferNum;
-    unsigned int resultsBufferSize[RES_BUF_MAX];
-    unsigned int result_offsets[6];
-    unsigned int kernel_cycles, kernel_iter_run;
-    double norms[4];
-    unsigned char last_norm_idx;
-    bool kernel_aborted, kernel_signature, kernel_overflow;
-    bool kernel_noresults;
-    bool kernel_wrafterend, kernel_dbgfifofull;
-    bool use_residuals = false;
-    bool use_LU_res = false;
-    int sequence = 0;
-    // TODO: these values may be sent via command line parameters
-    unsigned int abort_cycles = 2000000000; // 2x10^9 @ 300MHz is around 6.6 s
-    unsigned int debug_sample_rate = 65535; // max value allowed is 65535, 0 means disabled; reduce to get a finer debug dump
-    int nnzValArrays_size = 0;
-    int L_nnzValArrays_size = 0;
-    int U_nnzValArrays_size = 0;
-    // aliases to areas of the host data buffers
-    long unsigned int *setupArray = nullptr;
-    double **nnzValArrays  = nullptr;
-    short unsigned int *columnIndexArray = nullptr;
-    unsigned char *newRowOffsetArray = nullptr;
-    unsigned int *PIndexArray = nullptr;
-    unsigned int *colorSizesArray = nullptr;
-    double **L_nnzValArrays = nullptr;
-    short unsigned int *L_columnIndexArray = nullptr;
-    unsigned char *L_newRowOffsetArray = nullptr;
-    unsigned int *L_PIndexArray = nullptr;
-    unsigned int *L_colorSizesArray = nullptr;
-    double **U_nnzValArrays = nullptr;
-    short unsigned int *U_columnIndexArray = nullptr;
-    unsigned char *U_newRowOffsetArray = nullptr;
-    unsigned int *U_PIndexArray = nullptr;
-    unsigned int *U_colorSizesArray = nullptr;
-    double *BLKDArray = nullptr;
-    double *X1Array = nullptr, *X2Array = nullptr;
-    double *R1Array = nullptr, *R2Array = nullptr;
-    double *LresArray = nullptr, *UresArray = nullptr;
-    double *resultsBuffer[RES_BUF_MAX] = {nullptr}; // alias for data output region
-    // OpenCL variables
-    cl_device_id device_id;
-    cl_context context;
-    cl_command_queue commands;
-    cl_program program;
-    cl_kernel kernel;
-    cl_mem cldata[RW_BUF] = {nullptr};
-    cl_mem cldebug = nullptr;
-    // HW limits/configuration variables
-    unsigned int hw_x_vector_elem;
-    unsigned int hw_max_row_size;
-    unsigned int hw_max_column_size;
-    unsigned int hw_max_colors_size;
-    unsigned short hw_max_nnzs_per_row;
-    unsigned int hw_max_matrix_size;
-    bool hw_use_uram;
-    bool hw_write_ilu0_results;
-    unsigned short hw_dma_data_width;
-    unsigned char hw_x_vector_latency;
-    unsigned char hw_add_latency;
-    unsigned char hw_mult_latency;
-    unsigned char hw_mult_num;
-    unsigned char hw_num_read_ports;
-    unsigned char hw_num_write_ports;
-    unsigned short hw_reset_cycles;
-    unsigned short hw_reset_settle;
-    // debug
-    bool reset_data_buffers = false;
-    bool fill_results_buffers = false;
-    int dump_data_buffers = 0; // 0=disabled, 1=binary format, 2=text format
-    bool dump_results = false;
-    char *data_dir = nullptr;
-    char *basename = nullptr;
-    unsigned short rst_assert_cycles = 0;
-    unsigned short rst_settle_cycles = 0;
-
-    /// Allocate host memory
-    /// \param[in] Nb             number of blockrows
-    /// \param[in] nnzbs          number of blocks
-    /// \param[in] vals           array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
-    /// \param[in] rows           array of rowPointers, contains N/dim+1 values
-    /// \param[in] cols           array of columnIndices, contains nnz values
-    void initialize(int Nb, int nnzbs, int dim, double *vals, int *rows, int *cols);
-
-    /// Reorder the linear system so it corresponds with the coloring
-    /// \param[in] vals           array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
-    /// \param[in] b              input vector
-    void update_system(double *vals, double *b);
-
-    /// Analyse sparsity pattern to extract parallelism
-    /// \return true iff analysis was successful
-    bool analyse_matrix();
-
-    /// Perform ilu0-decomposition
-    /// \return true iff decomposition was successful
-    bool create_preconditioner();
-
-    /// Solve linear system
-    /// \param[inout] res         summary of solver result
-    void solve_system(BdaResult &res);
-
-    /// Generate FPGA backend statistics
-    void generate_statistics(void);
-
-public:
-
-    /// Construct an fpgaSolver
-    /// \param[in] fpga_bitstream             FPGA bitstream file name
-    /// \param[in] linear_solver_verbosity    verbosity of fpgaSolver
-    /// \param[in] maxit                      maximum number of iterations for fpgaSolver
-    /// \param[in] tolerance                  required relative tolerance for fpgaSolver
-    /// \param[in] opencl_ilu_reorder         select either level_scheduling or graph_coloring, see ILUReorder.hpp for explanation
-    FpgaSolverBackend(std::string fpga_bitstream, int linear_solver_verbosity, int maxit, double tolerance, ILUReorder opencl_ilu_reorder);
-
-    /// Destroy an fpgaSolver, and free memory
-    ~FpgaSolverBackend();
-
-    /// Solve linear system, A*x = b, matrix A must be in blocked-CSR format
-    /// \param[in] matrix         matrix A
-    /// \param[in] b              input vector, contains N values
-    /// \param[in] jacMatrix      matrix for preconditioner
-    /// \param[in] wellContribs   WellContributions, not used in FPGA solver because it requires them already added to matrix A
-    /// \param[inout] res         summary of solver result
-    /// \return                   status code
-    SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b,
-        std::shared_ptr<BlockedMatrix> jacMatrix, WellContributions& wellContribs, BdaResult &res) override;
-    
-    /// Get result after linear solve, and peform postprocessing if necessary
-    /// \param[inout] x           resulting x vector, caller must guarantee that x points to a valid array
-    void get_result(double *x) override;
-
-}; // end class fpgaSolverBackend
-
-} // namespace Accelerator
-} // namespace Opm
-
-#endif
-
diff --git a/opm/simulators/linalg/bda/FPGAUtils.cpp b/opm/simulators/linalg/bda/FPGAUtils.cpp
deleted file mode 100644
index 667a87e0b..000000000
--- a/opm/simulators/linalg/bda/FPGAUtils.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
-  Copyright 2020 Equinor ASA
-
-  This file is part of the Open Porous Media project (OPM).
-
-  OPM is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  OPM is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#if HAVE_CONFIG_H
-#include "config.h"
-#endif // HAVE_CONFIG_H
-
-#include <sys/time.h>
-#include <fstream>
-
-namespace Opm
-{
-namespace Accelerator
-{
-
-double second(void)
-{
-    struct timeval tv;
-    gettimeofday(&tv, nullptr);
-    return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
-}
-
-bool even(int n)
-{
-    if (n % 2 == 0) {
-        return true;
-    } else {
-        return false;
-    }
-}
-
-int roundUpTo(int i, int n)
-{
-    if (i % n == 0) {
-        return i;
-    } else {
-        return (i / n + 1) * n;
-    }
-}
-
-bool fileExists(const char *filename)
-{
-    FILE *fin;
-    fin = fopen(filename, "r");
-    if (fin == nullptr) {
-        return false;
-    } else {
-        fclose(fin);
-        return true;
-    }
-}
-
-} // namespace Accelerator
-} // namespace Opm
diff --git a/opm/simulators/linalg/bda/FPGAUtils.hpp b/opm/simulators/linalg/bda/FPGAUtils.hpp
deleted file mode 100644
index 1214baf82..000000000
--- a/opm/simulators/linalg/bda/FPGAUtils.hpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
-  Copyright 2020 Equinor ASA
-
-  This file is part of the Open Porous Media project (OPM).
-
-  OPM is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  OPM is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef FPGA_UTILS_HEADER_INCLUDED
-#define FPGA_UTILS_HEADER_INCLUDED
-
-namespace Opm
-{
-namespace Accelerator
-{
-
-union double2int
-{
-    unsigned long int int_val;
-    double double_val;
-};
-
-double second(void);
-bool even(int n);
-int roundUpTo(int i, int n);
-bool fileExists(const char *filename);
-
-} // namespace Accelerator
-} // namespace Opm
-
-#endif // FPGA_UTILS_HEADER_INCLUDED
diff --git a/opm/simulators/linalg/bda/Matrix.cpp b/opm/simulators/linalg/bda/Matrix.cpp
index 0edd51c1f..b0f8ae3e4 100644
--- a/opm/simulators/linalg/bda/Matrix.cpp
+++ b/opm/simulators/linalg/bda/Matrix.cpp
@@ -24,7 +24,6 @@
 
 #include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
 #include <opm/simulators/linalg/bda/Matrix.hpp>
-#include <opm/simulators/linalg/bda/FPGAUtils.hpp>
 
 namespace Opm
 {
@@ -60,199 +59,5 @@ void sortRow(int *colIndices, double *data, int left, int right) {
 
 }
 
-#if HAVE_FPGA
-/*
- * Write all data used by the VHDL testbenches to raw data arrays. The arrays are as follows:
- * - The "colorSizes" array, which first contains the number of rows, columns, non-zero values
- *   and colors, and the size, in elements, of the NROffsets array, followed by:
- *   the number of rows (rounded to the nearest 32), the number of rows (not rounded),
- *   the number of columns (not rounded) and the number of non-zero values
- *   (rounded to the nearest 32) for every partition.
- *   This array is zero padded up to the nearest 64-byte cacheline.
- * - The "colIndicesInColor" array, which contains for every partition, from which elements
- *   in the global X vector the elements of that X vector partition came.
- *   For example, if a matrix partition only has non-zero values in columns 1, 3 and 6, then
- *   that X vector partition will only have three elements, and the color_col_indices array
- *   will contain 1, 3 and 6 for that partition.
- *   This array is zero padded up to the nearest 64-byte cacheline for every partition.
- * - The "nnzValues" array contains all non-zero values of each partition of the matrix.
- *   This array is zero-padded so that each color has a multiple of 32 elements (to have the
- *   same number of elements per partition as the column indices array).
- * - The "colIndices" array contains all column indices of each partition of the matrix.
- *   These column indices are the local indices for that partition, so to be used, first a
- *   local X vector partition needs to be loaded into some local memory (this is done using
- *   data from the _color_col_indices array), before these column indices can be used as
- *   addresses to that local memory to read the desired X vector values.
- *   This array is zero-padded so that data for every partition fills up a number of complete
- *   cachelines (this means every color has a multiple of 32 elements).
- * - "NROffsets" is the name of the array that contains the new row offsets for
- *   all elements of every partition of the matrix. New row offsets are 8-bit values which
- *   are 0 if that element is not the first element in a row, or which, if that element is
- *   the first element of a row) is equal to the amount of empty rows between that new row
- *   and the row before it plus 1. This array is zero-padded so that data for every partition
- *   fills up a number of complete cachelines (this means every color has a multiple of 64 elements).
- */
-int Matrix::toRDF(int numColors, std::vector<int>& nodesPerColor,
-                  std::vector<std::vector<int> >& colIndicesInColor, int nnzsThisRowLimit,
-                  std::vector<std::vector<double> >& ubNnzValues, short int *ubColIndices, int *nnzValsSizes, unsigned char *NROffsets, int *colorSizes)
-{
-    auto mat = this;
-
-    int doneRows = 0;
-    int totalRowNum = 0;  // total number of non-empty rows
-    int nnzsPerColor = 0; // total number of nnzs in current color, padded to multiple of 32 for each color
-    int maxNNZsPerColor = 0; // max of nnzsPerColor
-
-    int totalValSize = 0; // sum of nnzsPerColor, padded
-
-    std::vector<int> nnzRowsPerColor(numColors);
-
-    // find number of nnzs per color and number of non-empty rows
-    for (int c = 0; c < numColors; c++) {
-        int numRows = 0;
-        nnzRowsPerColor[c] = 0;
-        int firstNnzOfColor = mat->rowPointers[doneRows];
-        int lastNnzOfColor = mat->rowPointers[doneRows + nodesPerColor[c]];
-        nnzsPerColor = roundUpTo(lastNnzOfColor - firstNnzOfColor, 32); // round up to nearest 16 for short ints of column indices
-        totalValSize += nnzsPerColor;
-        maxNNZsPerColor = std::max(nnzsPerColor, maxNNZsPerColor);
-        int row = doneRows;
-        for (; row < doneRows + nodesPerColor[c]; row++) {
-            if ( mat->rowPointers[row] != mat->rowPointers[row + 1]) {
-                numRows++;
-                nnzRowsPerColor[c] = nnzRowsPerColor[c] + 1;
-            }
-        }
-
-        doneRows = row;
-        totalRowNum += numRows;
-    }
-
-    int conseqZeroRows = 0;       // number of consecutive empty rows
-    int maxConseqZeroRows = 0;
-    int numEmptyRows = 0;         // total number of empty rows
-    std::vector<int> rowOffsets(totalRowNum);
-    std::vector<int> nnzRowPointers(totalRowNum + 1, 0); // rowPointers, but only for non empty rows
-    std::vector<int> colorValPointers(numColors + 1);    // points to first nnz of first row of each color
-    std::vector<int> colorValZeroPointers(numColors);    // points to first padded zero for each color
-
-    int nonEmptyRowIdx = 0;   // read all rows, but only keep non empty rows, this idx keeps track of how many non empty rows where seen
-    doneRows = 0;
-
-    int totalPaddingSize = 0;  // number of padded zeros from previous colors
-    int NROffsetSize = 0;  // number of NROffsets entries, padded to multiple of 64 for each color
-    int maxRows = 0;
-    int maxNNZsPerRow = 0;
-
-    // determine the row offset of each row (amount of zero rows between it and the previous non-zero row)
-    // this is later converted to rowOffset for each nnz
-    for (int c = 0; c < numColors; c++) {
-        conseqZeroRows = 0;
-        for (int row = doneRows; row < doneRows + nodesPerColor[c]; row++) {
-            int nnzsThisRow = mat->rowPointers[row + 1] - mat->rowPointers[row];
-            if (nnzsThisRow == 0) {
-                conseqZeroRows++;
-                numEmptyRows++;
-            } else {
-                maxNNZsPerRow = std::max(nnzsThisRow, maxNNZsPerRow);
-                nnzRowPointers[nonEmptyRowIdx + 1] = mat->rowPointers[row + 1];
-                rowOffsets[nonEmptyRowIdx] = conseqZeroRows;
-                maxConseqZeroRows = std::max(conseqZeroRows, maxConseqZeroRows);
-                conseqZeroRows = 0;
-                nonEmptyRowIdx++;
-            }
-        }
-        // calculate sizes that include zeropadding
-        colorValZeroPointers[c] = nnzRowPointers[nonEmptyRowIdx] + totalPaddingSize;
-        colorValPointers[c + 1] = roundUpTo(colorValZeroPointers[c], 32);
-        totalPaddingSize += colorValPointers[c + 1] - colorValZeroPointers[c];
-        NROffsetSize += roundUpTo(colorValPointers[c + 1] - colorValPointers[c], 64);
-
-        doneRows += nodesPerColor[c];
-        maxRows = std::max(nodesPerColor[c], maxRows);
-    }
-
-    if (maxNNZsPerRow > nnzsThisRowLimit) {
-        std::ostringstream errorstring;
-        errorstring << "ERROR: Current reordering exceeds maximum number of non-zero values per row limit: " << maxNNZsPerRow << " > " << nnzsThisRowLimit;
-        OPM_THROW(std::logic_error, errorstring.str());
-    }
-
-    // create and fill RDF arrays
-    colorSizes[3] = colorValPointers[numColors];  // total number of nnzs the FPGA has to process, including zeropadding
-    colorSizes[5] = NROffsetSize;
-
-    for (int c = 0; c < numColors; c++) {
-        colorSizes[c * 4 + 8] = nnzRowsPerColor[c];
-        colorSizes[c * 4 + 11] = colorValPointers[c + 1] - colorValPointers[c];
-    }
-
-    int rowIndex = 0;  // keep track of where to read/write
-    int valIndex = 0;
-    int NRIndex = 0;
-    int halfwayPoint = colorValPointers[numColors] / 2;
-    nnzValsSizes[0] = colorValPointers[numColors];
-
-    colorSizes[7] = halfwayPoint;
-
-    for (int c = 0; c < numColors; c++) {
-        int nnzsThisRow;
-        // make sure 32 values are written in batches (pad with zeros if needed)
-        for (int v = colorValPointers[c]; v < colorValPointers[c + 1]; v += 32) {
-            for (int vb = 0; vb < 32; vb++) {
-
-                // if there are enough values for the whole cacheline
-                if (v + vb < colorValZeroPointers[c]) {
-                    ubNnzValues[0][v + vb] = mat->nnzValues[valIndex];
-                    ubColIndices[v + vb] = static_cast<short int>(colIndicesInColor[c][mat->colIndices[valIndex]]);
-
-                    // if this val is the first of a row
-                    if (nnzRowPointers[rowIndex] == valIndex) {
-
-                        if (rowOffsets[rowIndex] + 1 >= 255) {
-                            std::ostringstream errorstring;
-                            errorstring << "ERROR: row offset size exceeded in row " << rowIndex << " with an offset of " << rowOffsets[rowIndex] + 1;
-                            OPM_THROW(std::logic_error, errorstring.str());
-                        }
-
-                        NROffsets[NRIndex] =  static_cast<unsigned char>(rowOffsets[rowIndex] + 1);
-
-                        // skip all empty rows
-                        while (rowIndex < mat->N && nnzRowPointers[rowIndex] == valIndex) {
-                            rowIndex++;
-                            nnzsThisRow = 0;
-                        }
-                        nnzsThisRow++;
-                    }
-                    else
-                    {
-                        NROffsets[NRIndex] = (unsigned char) 0;
-                        nnzsThisRow++;
-                    }
-                    valIndex++;
-                }
-                else // zeropadding is needed
-                {
-                    ubNnzValues[0][v + vb] = 0.0;
-                    ubColIndices[v + vb] = static_cast<short int>(colIndicesInColor[c][mat->colIndices[valIndex - 1]]);
-                    NROffsets[NRIndex] = 0;
-                }
-                NRIndex++;
-
-            }
-        }
-
-        // zeropad the NROffsets file
-        while (NRIndex % 64 != 0) {
-            NROffsets[NRIndex] = 0;
-            NRIndex++;
-        }
-    }
-
-    return 0;
-}
-#endif
-
-
 } // namespace Accelerator
 } // namespace Opm
diff --git a/opm/simulators/linalg/bda/Matrix.hpp b/opm/simulators/linalg/bda/Matrix.hpp
index d06ee57e6..ded6331c4 100644
--- a/opm/simulators/linalg/bda/Matrix.hpp
+++ b/opm/simulators/linalg/bda/Matrix.hpp
@@ -56,28 +56,6 @@ public:
         M = M_;
     }
 
-#if HAVE_FPGA
-    /// Converts this matrix to the dataformat used by the FPGA.
-    /// The FPGA uses a new data format called CSRO (Compressed Sparse Row Offset).
-    /// The purpose of this format is to allow the data to be streamable.
-    /// The rowPointers array has an unpredictable reading pattern/timing,
-    /// it also needs a extra work if a row is shorter than a cacheline.
-    /// The array of N+1 rowPointers is replaced by an array of nnz rowOffsets.
-    /// The value of this offset is 0, unless the corresponding nnz is the first of a row,
-    /// in that case it is 'the number of empty rows preceeding it + 1'.
-    /// The FPGA can simply add the rowOffset to the current rowIdx to get the new rowIdx.
-    /// Example:
-    /// [1 0 0 3 0]    nnzValues   [1 3 2 2 1 4 3 4 1]
-    /// [0 2 2 0 1]    colIndices  [0 3 1 2 4 0 1 2 4]
-    /// [4 0 0 0 0] -> rowPointers [0 2 5 6 6 9]
-    /// [0 0 0 0 0]    rowOffsets  [1 0 1 0 0 1 2 0 0]
-    /// [0 3 4 0 1]
-    /// The rowOffset is stored in 1 byte, meaning the maximum value is 255.
-    int toRDF(int numColors, std::vector<int>& nodesPerColor,
-        std::vector<std::vector<int> >& colIndicesInColor, int nnzsPerRowLimit, 
-        std::vector<std::vector<double> >& ubNnzValues, short int *ubColIndices, int *nnzValsSizes, unsigned char *NROffsets, int *colorSizes);
-#endif
-
     std::vector<double> nnzValues;
     std::vector<int> colIndices;
     std::vector<int> rowPointers;
diff --git a/opm/simulators/linalg/bda/WellContributions.cpp b/opm/simulators/linalg/bda/WellContributions.cpp
index 58cfda8c5..3be495600 100644
--- a/opm/simulators/linalg/bda/WellContributions.cpp
+++ b/opm/simulators/linalg/bda/WellContributions.cpp
@@ -53,10 +53,6 @@ WellContributions::create(const std::string& accelerator_mode, bool useWellConn)
         OPM_THROW(std::runtime_error, "Cannot initialize well contributions: OpenCL is not enabled");
 #endif
     }
-    else if(accelerator_mode.compare("fpga") == 0){
-        // unused for FPGA, but must be defined to avoid error
-        return std::make_unique<WellContributions>();
-    }
     else if(accelerator_mode.compare("amgcl") == 0){
         if (!useWellConn) {
             OPM_THROW(std::logic_error, "Error amgcl requires --matrix-add-well-contributions=true");
diff --git a/tests/test_cusparseSolver.cpp b/tests/test_cusparseSolver.cpp
index 322132509..1d08ed008 100644
--- a/tests/test_cusparseSolver.cpp
+++ b/tests/test_cusparseSolver.cpp
@@ -98,7 +98,6 @@ testCusparseSolver(const boost::property_tree::ptree& prm, Matrix<bz>& matrix, V
     const int platformID = 0;                     // unused
     const int deviceID = 0;
     const std::string accelerator_mode("cusparse");
-    const std::string fpga_bitstream("empty");    // unused
     const std::string linsolver("ilu0");
     Dune::InverseOperatorResult result;
 
@@ -108,7 +107,6 @@ testCusparseSolver(const boost::property_tree::ptree& prm, Matrix<bz>& matrix, V
     std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
     try {
         bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
-                                                                               fpga_bitstream,
                                                                                linear_solver_verbosity,
                                                                                maxit,
                                                                                tolerance,
diff --git a/tests/test_openclSolver.cpp b/tests/test_openclSolver.cpp
index 16163d30c..c9abfb6c1 100644
--- a/tests/test_openclSolver.cpp
+++ b/tests/test_openclSolver.cpp
@@ -97,12 +97,10 @@ createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::BdaBri
     const int platformID = 0;
     const int deviceID = 0;
     const std::string accelerator_mode("opencl");
-    const std::string fpga_bitstream("empty");    // unused
     const std::string linsolver("ilu0");
 
     try {
         bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
-                                                                               fpga_bitstream,
                                                                                linear_solver_verbosity,
                                                                                maxit,
                                                                                tolerance,
diff --git a/tests/test_rocalutionSolver.cpp b/tests/test_rocalutionSolver.cpp
index e158c9658..ef48f9dfa 100644
--- a/tests/test_rocalutionSolver.cpp
+++ b/tests/test_rocalutionSolver.cpp
@@ -92,7 +92,6 @@ testRocalutionSolver(const boost::property_tree::ptree& prm, Matrix<bz>& matrix,
     const int platformID = 0;
     const int deviceID = 0;
     const std::string accelerator_mode("rocalution");
-    const std::string fpga_bitstream("empty");    // unused
     const std::string linsolver("ilu0");
     Dune::InverseOperatorResult result;
 
@@ -101,7 +100,6 @@ testRocalutionSolver(const boost::property_tree::ptree& prm, Matrix<bz>& matrix,
     std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
     try {
         bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
-                                                                               fpga_bitstream,
                                                                                linear_solver_verbosity,
                                                                                maxit,
                                                                                tolerance,