From 7f8faa018b014efec6721d9c6b5f9c1801e55b22 Mon Sep 17 00:00:00 2001
From: "T.D. (Tongdong) Qiu" <tqiu@ce-cuda01.ewi.tudelft.nl>
Date: Wed, 1 Jul 2020 14:50:34 +0200
Subject: [PATCH] Replaced timing function with Dune::Timer

---
 opm/simulators/linalg/bda/BILU0.cpp           |  60 ++-----
 opm/simulators/linalg/bda/BdaSolver.hpp       |  12 +-
 .../linalg/bda/cusparseSolverBackend.cu       |  55 ++----
 .../linalg/bda/cusparseSolverBackend.hpp      |   1 -
 .../linalg/bda/openclSolverBackend.cpp        | 163 +++++-------------
 .../linalg/bda/openclSolverBackend.hpp        |   1 -
 6 files changed, 80 insertions(+), 212 deletions(-)
diff --git a/opm/simulators/linalg/bda/BILU0.cpp b/opm/simulators/linalg/bda/BILU0.cpp
index 52ac6b6cd..a491bc21c 100644
--- a/opm/simulators/linalg/bda/BILU0.cpp
+++ b/opm/simulators/linalg/bda/BILU0.cpp
@@ -20,6 +20,7 @@
 #include <config.h>
 #include <opm/common/OpmLog/OpmLog.hpp>
 #include <opm/common/ErrorMacros.hpp>
+#include <dune/common/timer.hh>
 
 #include <opm/simulators/linalg/bda/BdaSolver.hpp>
 #include <opm/simulators/linalg/bda/BILU0.hpp>
@@ -30,10 +31,7 @@ namespace bda
 {
 
     using Opm::OpmLog;
-
-    // define 'second' as 'BdaSolver<>::second', this allows usage of the second() function for timing
-    // typedefs cannot handle templates
-    const auto second = BdaSolver<>::second;
+    using Dune::Timer;
 
     template <unsigned int block_size>
     BILU0<block_size>::BILU0(bool level_scheduling_, bool graph_coloring_, int verbosity_) :
@@ -62,7 +60,6 @@ namespace bda
     bool BILU0<block_size>::init(BlockedMatrix *mat)
     {
         const unsigned int bs = block_size;
-        double t1 = 0.0, t2 = 0.0;
         BlockedMatrix *CSCmat = nullptr;
 
         this->N = mat->Nb * block_size;
@@ -79,21 +76,16 @@ namespace bda
             CSCmat->nnzValues = new double[nnzbs * bs * bs];
             CSCmat->colIndices = new int[nnzbs];
             CSCmat->rowPointers = new int[Nb + 1];
-            if(verbosity >= 3){
-                t1 = second();
-            }
+            Timer t_convert;
             bcsr_to_bcsc<block_size>(mat->nnzValues, mat->colIndices, mat->rowPointers, CSCmat->nnzValues, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb);
             if(verbosity >= 3){
-                t2 = second();
                 std::ostringstream out;
-                out << "BILU0 convert CSR to CSC: " << t2 - t1 << " s";
+                out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s\n";
                 OpmLog::info(out.str());
             }
         }
 
-        if(verbosity >= 3){
-            t1 = second();
-        }
+        Timer t_analysis;
         rMat = allocateBlockedMatrix<block_size>(mat->Nb, mat->nnzbs);
         LUMat = soft_copyBlockedMatrix(rMat);
         if (level_scheduling) {
@@ -105,9 +97,8 @@ namespace bda
             return false;
         }
         if(verbosity >= 3){
-            t2 = second();
             std::ostringstream out;
-            out << "BILU0 analysis took: " << t2 - t1 << " s, " << numColors << " colors";
+            out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors";
             OpmLog::info(out.str());
         }
 
@@ -161,27 +152,21 @@ namespace bda
     bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
     {
         const unsigned int bs = block_size;
-        double t1 = 0.0, t2 = 0.0;
-        if (verbosity >= 3){
-            t1 = second();
-        }
+
+        Timer t_reorder;
         blocked_reorder_matrix_by_pattern<block_size>(mat, toOrder, fromOrder, rMat);
         if (verbosity >= 3){
-            t2 = second();
             std::ostringstream out;
-            out << "BILU0 reorder matrix: " << t2 - t1 << " s";
+            out << "BILU0 reorder matrix: " << t_reorder.stop() << " s";
             OpmLog::info(out.str());
         }
 
         // TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
-        if (verbosity >= 3){
-            t1 = second();
-        }
+        Timer t_copy;
         memcpy(LUMat->nnzValues, rMat->nnzValues, sizeof(double) * bs * bs * rMat->nnzbs);
         if (verbosity >= 3){
-            t2 = second();
             std::ostringstream out;
-            out << "BILU0 memcpy: " << t2 - t1 << " s";
+            out << "BILU0 memcpy: " << t_copy.stop() << " s";
             OpmLog::info(out.str());
         }
 
@@ -191,9 +176,8 @@ namespace bda
 
         int LSize = 0;
 
-        if (verbosity >= 3){
-            t1 = second();
-        }
+        Timer t_decomposition;
+
         // go through all rows
         for (i = 0; i < LUMat->Nb; i++) {
             iRowStart = LUMat->rowPointers[i];
@@ -272,15 +256,12 @@ namespace bda
             URowIndex++;
         }
         if (verbosity >= 3) {
-            t2 = second();
             std::ostringstream out;
-            out << "BILU0 decomposition: " << t2 - t1 << " s";
+            out << "BILU0 decomposition: " << t_decomposition.stop() << " s";
             OpmLog::info(out.str());
         }
 
-        if (verbosity >= 3) {
-            t1 = second();
-        }
+        Timer t_copyToGpu;
         if (pattern_uploaded == false) {
             queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, LMat->nnzbs * sizeof(int), LMat->colIndices);
             queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, UMat->nnzbs * sizeof(int), UMat->colIndices);
@@ -292,9 +273,8 @@ namespace bda
         queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(double) * bs * bs, UMat->nnzValues);
         queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, Nb * sizeof(double) * bs * bs, invDiagVals);
         if (verbosity >= 3) {
-            t2 = second();
             std::ostringstream out;
-            out << "BILU0 copy to GPU: " << t2 - t1 << " s";
+            out << "BILU0 copy to GPU: " << t_copyToGpu.stop() << " s";
             OpmLog::info(out.str());
         }
 
@@ -307,11 +287,8 @@ namespace bda
     template <unsigned int block_size>
     void BILU0<block_size>::apply(cl::Buffer& x, cl::Buffer& y)
     {
-        double t1 = 0.0, t2 = 0.0;
-        if (verbosity >= 3) {
-            t1 = second();
-        }
         cl::Event event;
+        Timer t_apply;
 
         for(int color = 0; color < numColors; ++color){
             event = (*ILU_apply1)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), s.Lvals, s.Lcols, s.Lrows, (unsigned int)Nb, x, y, s.rowsPerColor, color, block_size, cl::Local(lmem_per_work_group));
@@ -324,9 +301,8 @@ namespace bda
 
         if (verbosity >= 3) {
             event.wait();
-            t2 = second();
             std::ostringstream out;
-            out << "BILU0 apply: " << t2 - t1 << " s";
+            out << "BILU0 apply: " << t_apply.stop() << " s";
             OpmLog::info(out.str());
         }
     }
diff --git a/opm/simulators/linalg/bda/BdaSolver.hpp b/opm/simulators/linalg/bda/BdaSolver.hpp
index f61269a4b..962d95dfc 100644
--- a/opm/simulators/linalg/bda/BdaSolver.hpp
+++ b/opm/simulators/linalg/bda/BdaSolver.hpp
@@ -21,8 +21,6 @@
 #define OPM_BDASOLVER_BACKEND_HEADER_INCLUDED
 
 
-#include <sys/time.h>
-
 #include <opm/simulators/linalg/bda/BdaResult.hpp>
 #include <opm/simulators/linalg/bda/BdaSolverStatus.hpp>
 #include <opm/simulators/linalg/bda/WellContributions.hpp>
@@ -35,8 +33,7 @@ namespace bda
 
     /// This class serves to simplify choosing between different backend solvers, such as cusparseSolver and openclSolver
     /// This class is abstract, no instantiations can of it can be made, only of its children
-    /// Without a default block_size value, the BILU0 class cannot use BdaSolver::second()
-    template <unsigned int block_size = 3>
+    template <unsigned int block_size>
     class BdaSolver
     {
 
@@ -74,13 +71,6 @@ namespace bda
 
         virtual void get_result(double *x) = 0;
 
-        /// Different implementations of BdaSolver can use this function for timing
-        static double second(void) {
-            struct timeval tv;
-            gettimeofday(&tv, nullptr);
-            return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
-        }
-
     }; // end class BdaSolver
 
 } // end namespace bda
diff --git a/opm/simulators/linalg/bda/cusparseSolverBackend.cu b/opm/simulators/linalg/bda/cusparseSolverBackend.cu
index 4457830e3..5a9338a71 100644
--- a/opm/simulators/linalg/bda/cusparseSolverBackend.cu
+++ b/opm/simulators/linalg/bda/cusparseSolverBackend.cu
@@ -23,6 +23,7 @@
 #include <sstream>
 
 #include <opm/common/OpmLog/OpmLog.hpp>
+#include <dune/common/timer.hh>
 
 #include <opm/simulators/linalg/bda/cusparseSolverBackend.hpp>
 #include <opm/simulators/linalg/bda/BdaResult.hpp>
@@ -40,6 +41,7 @@ namespace bda
 {
 
 using Opm::OpmLog;
+using Dune::Timer;
 
 const cusparseSolvePolicy_t policy = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
 const cusparseOperation_t operation  = CUSPARSE_OPERATION_NON_TRANSPOSE;
@@ -56,7 +58,7 @@ cusparseSolverBackend<block_size>::~cusparseSolverBackend() {
 
 template <unsigned int block_size>
 void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) {
-    double t_total1, t_total2;
+    Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
     int n = N;
     double rho = 1.0, rhop;
     double alpha, nalpha, beta;
@@ -67,8 +69,6 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
     double mone = -1.0;
     float it;
 
-    t_total1 = second();
-
     if (wellContribs.getNumWells() > 0) {
         wellContribs.setCudaStream(stream);
     }
@@ -169,12 +169,10 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
         }
     }
 
-    t_total2 = second();
-
     res.iterations = std::min(it, (float)maxit);
     res.reduction = norm / norm_0;
     res.conv_rate  = static_cast<double>(pow(res.reduction, 1.0 / it));
-    res.elapsed = t_total2 - t_total1;
+    res.elapsed = t_total.stop();
     res.converged = (it != (maxit + 0.5));
 
     if (verbosity > 0) {
@@ -284,11 +282,7 @@ void cusparseSolverBackend<block_size>::finalize() {
 
 template <unsigned int block_size>
 void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b) {
-
-    double t1, t2;
-    if (verbosity > 2) {
-        t1 = second();
-    }
+    Timer t;
 
 #if COPY_ROW_BY_ROW
     int sum = 0;
@@ -309,9 +303,8 @@ void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *ro
 
     if (verbosity > 2) {
         cudaStreamSynchronize(stream);
-        t2 = second();
         std::ostringstream out;
-        out << "cusparseSolver::copy_system_to_gpu(): " << t2 - t1 << " s";
+        out << "cusparseSolver::copy_system_to_gpu(): " << t.stop() << " s";
         OpmLog::info(out.str());
     }
 } // end copy_system_to_gpu()
@@ -320,11 +313,7 @@ void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *ro
 // don't copy rowpointers and colindices, they stay the same
 template <unsigned int block_size>
 void cusparseSolverBackend<block_size>::update_system_on_gpu(double *vals, int *rows, double *b) {
-
-    double t1, t2;
-    if (verbosity > 2) {
-        t1 = second();
-    }
+    Timer t;
 
 #if COPY_ROW_BY_ROW
     int sum = 0;
@@ -343,9 +332,8 @@ void cusparseSolverBackend<block_size>::update_system_on_gpu(double *vals, int *
 
     if (verbosity > 2) {
         cudaStreamSynchronize(stream);
-        t2 = second();
         std::ostringstream out;
-        out << "cusparseSolver::update_system_on_gpu(): " << t2 - t1 << " s";
+        out << "cusparseSolver::update_system_on_gpu(): " << t.stop() << " s";
         OpmLog::info(out.str());
     }
 } // end update_system_on_gpu()
@@ -361,11 +349,7 @@ template <unsigned int block_size>
 bool cusparseSolverBackend<block_size>::analyse_matrix() {
 
     int d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
-    double t1, t2;
-
-    if (verbosity > 2) {
-        t1 = second();
-    }
+    Timer t;
 
     cusparseCreateMatDescr(&descr_B);
     cusparseCreateMatDescr(&descr_M);
@@ -428,9 +412,8 @@ bool cusparseSolverBackend<block_size>::analyse_matrix() {
 
     if (verbosity > 2) {
         cudaStreamSynchronize(stream);
-        t2 = second();
         std::ostringstream out;
-        out << "cusparseSolver::analyse_matrix(): " << t2 - t1 << " s";
+        out << "cusparseSolver::analyse_matrix(): " << t.stop() << " s";
         OpmLog::info(out.str());
     }
 
@@ -441,11 +424,7 @@ bool cusparseSolverBackend<block_size>::analyse_matrix() {
 
 template <unsigned int block_size>
 bool cusparseSolverBackend<block_size>::create_preconditioner() {
-
-    double t1, t2;
-    if (verbosity > 2) {
-        t1 = second();
-    }
+    Timer t;
 
     d_mCols = d_bCols;
     d_mRows = d_bRows;
@@ -463,9 +442,8 @@ bool cusparseSolverBackend<block_size>::create_preconditioner() {
 
     if (verbosity > 2) {
         cudaStreamSynchronize(stream);
-        t2 = second();
         std::ostringstream out;
-        out << "cusparseSolver::create_preconditioner(): " << t2 - t1 << " s";
+        out << "cusparseSolver::create_preconditioner(): " << t.stop() << " s";
         OpmLog::info(out.str());
     }
     return true;
@@ -485,19 +463,14 @@ void cusparseSolverBackend<block_size>::solve_system(WellContributions& wellCont
 // caller must be sure that x is a valid array
 template <unsigned int block_size>
 void cusparseSolverBackend<block_size>::get_result(double *x) {
-
-    double t1, t2;
-    if (verbosity > 2) {
-        t1 = second();
-    }
+    Timer t;
 
     cudaMemcpyAsync(x, d_x, N * sizeof(double), cudaMemcpyDeviceToHost, stream);
     cudaStreamSynchronize(stream);
 
     if (verbosity > 2) {
-        t2 = second();
         std::ostringstream out;
-        out << "cusparseSolver::get_result(): " << t2 - t1 << " s";
+        out << "cusparseSolver::get_result(): " << t.stop() << " s";
         OpmLog::info(out.str());
     }
 } // end get_result()
diff --git a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp
index aa8df2170..cd3b84610 100644
--- a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp
@@ -44,7 +44,6 @@ class cusparseSolverBackend : public BdaSolver<block_size> {
     using Base::verbosity;
     using Base::maxit;
     using Base::tolerance;
-    using Base::second;
     using Base::initialized;
     typedef BdaSolverStatus::Status Status;
 
diff --git a/opm/simulators/linalg/bda/openclSolverBackend.cpp b/opm/simulators/linalg/bda/openclSolverBackend.cpp
index 2e6081636..9f90467d7 100644
--- a/opm/simulators/linalg/bda/openclSolverBackend.cpp
+++ b/opm/simulators/linalg/bda/openclSolverBackend.cpp
@@ -23,6 +23,7 @@
 
 #include <opm/common/OpmLog/OpmLog.hpp>
 #include <opm/common/ErrorMacros.hpp>
+#include <dune/common/timer.hh>
 
 
 #define __CL_ENABLE_EXCEPTIONS
@@ -48,6 +49,7 @@ namespace bda
 {
 
 using Opm::OpmLog;
+using Dune::Timer;
 
 template <unsigned int block_size>
 openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_) {
@@ -72,14 +74,11 @@ unsigned int openclSolverBackend<block_size>::ceilDivision(const unsigned int A,
 template <unsigned int block_size>
 double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out)
 {
-    double t1 = 0.0, t2 = 0.0;
     const unsigned int work_group_size = 1024;
     const unsigned int num_work_groups = ceilDivision(N, work_group_size);
     const unsigned int total_work_items = num_work_groups * work_group_size;
     const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
-    if (verbosity >= 4) {
-        t1 = second();
-    }
+    Timer t_dot;
 
     cl::Event event = (*dot_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in1, in2, out, N, cl::Local(lmem_per_work_group));
 
@@ -92,9 +91,8 @@ double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl
 
     if (verbosity >= 4) {
         event.wait();
-        t2 = second();
         std::ostringstream oss;
-        oss << "openclSolver dot_w time: " << t2 - t1;
+        oss << std::scientific << "openclSolver dot_w time: " << t_dot.stop() << " s";
         OpmLog::info(oss.str());
     }
 
@@ -104,14 +102,11 @@ double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl
 template <unsigned int block_size>
 double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
 {
-    double t1 = 0.0, t2 = 0.0;
     const unsigned int work_group_size = 1024;
     const unsigned int num_work_groups = ceilDivision(N, work_group_size);
     const unsigned int total_work_items = num_work_groups * work_group_size;
     const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
-    if (verbosity >= 4) {
-        t1 = second();
-    }
+    Timer t_norm;
 
     cl::Event event = (*norm_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in, out, N, cl::Local(lmem_per_work_group));
 
@@ -125,9 +120,8 @@ double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
 
     if (verbosity >= 4) {
         event.wait();
-        t2 = second();
         std::ostringstream oss;
-        oss << "openclSolver norm_w time: " << t2 - t1;
+        oss << std::scientific << "openclSolver norm_w time: " << t_norm.stop() << " s";
         OpmLog::info(oss.str());
     }
 
@@ -137,21 +131,17 @@ double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::axpy_w(cl::Buffer in, const double a, cl::Buffer out)
 {
-    double t1 = 0.0, t2 = 0.0;
     const unsigned int work_group_size = 32;
     const unsigned int num_work_groups = ceilDivision(N, work_group_size);
     const unsigned int total_work_items = num_work_groups * work_group_size;
-    if (verbosity >= 4) {
-        t1 = second();
-    }
+    Timer t_axpy;
 
     cl::Event event = (*axpy_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in, a, out, N);
 
     if (verbosity >= 4) {
         event.wait();
-        t2 = second();
         std::ostringstream oss;
-        oss << "openclSolver axpy_w time: " << t2 - t1;
+        oss << std::scientific << "openclSolver axpy_w time: " << t_axpy.stop() << " s";
         OpmLog::info(oss.str());
     }
 }
@@ -159,21 +149,17 @@ void openclSolverBackend<block_size>::axpy_w(cl::Buffer in, const double a, cl::
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, const double omega, const double beta)
 {
-    double t1 = 0.0, t2 = 0.0;
     const unsigned int work_group_size = 32;
     const unsigned int num_work_groups = ceilDivision(N, work_group_size);
     const unsigned int total_work_items = num_work_groups * work_group_size;
-    if (verbosity >= 4) {
-        t1 = second();
-    }
+    Timer t_custom;
 
     cl::Event event = (*custom_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), p, v, r, omega, beta, N);
 
     if (verbosity >= 4) {
         event.wait();
-        t2 = second();
         std::ostringstream oss;
-        oss << "openclSolver custom_w time: " << t2 - t1;
+        oss << std::scientific << "openclSolver custom_w time: " << t_custom.stop() << " s";
         OpmLog::info(oss.str());
     }
 }
@@ -181,22 +167,18 @@ void openclSolverBackend<block_size>::custom_w(cl::Buffer p, cl::Buffer v, cl::B
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b)
 {
-    double t1 = 0.0, t2 = 0.0;
     const unsigned int work_group_size = 32;
     const unsigned int num_work_groups = ceilDivision(N, work_group_size);
     const unsigned int total_work_items = num_work_groups * work_group_size;
     const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
-    if (verbosity >= 4) {
-        t1 = second();
-    }
+    Timer t_spmv;
 
     cl::Event event = (*spmv_blocked_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), vals, cols, rows, Nb, x, b, block_size, cl::Local(lmem_per_work_group));
 
     if (verbosity >= 4) {
         event.wait();
-        t2 = second();
         std::ostringstream oss;
-        oss << "openclSolver spmv_blocked_w time: " << t2 - t1;
+        oss << std::scientific << "openclSolver spmv_blocked_w time: " << t_spmv.stop() << " s";
         OpmLog::info(oss.str());
     }
 }
@@ -209,9 +191,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
     double rho, rhop, beta, alpha, omega, tmp1, tmp2;
     double norm, norm_0;
 
-    double t_total1, t_total2, t1 = 0.0, t2 = 0.0;
-    double prec_time = 0.0, spmv_time = 0.0, well_time = 0.0, rest_time = 0.0;
-    t_total1 = second();
+    Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
 
     wellContribs.setOpenCLQueue(queue.get());
     wellContribs.setReordering(toOrder, true);
@@ -245,7 +225,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
         OpmLog::info(out.str());
     }
 
-    t1 = second();
+    t_rest.start();
     for (it = 0.5; it < maxit; it += 0.5) {
         rhop = rho;
         rho = dot_w(d_rw, d_r, d_tmp);
@@ -254,37 +234,32 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
             beta = (rho / rhop) * (alpha / omega);
             custom_w(d_p, d_v, d_r, omega, beta);
         }
-        t2 = second();
-        rest_time += t2 - t1;
+        t_rest.stop();
 
         // pw = prec(p)
-        t1 = second();
+        t_prec.start();
         prec->apply(d_p, d_pw);
-        t2 = second();
-        prec_time += t2 - t1;
+        t_prec.stop();
 
         // v = A * pw
-        t1 = second();
+        t_spmv.start();
         spmv_blocked_w(d_Avals, d_Acols, d_Arows, d_pw, d_v);
-        t2 = second();
-        spmv_time += t2 - t1;
+        t_spmv.stop();
 
         // apply wellContributions
         if (wellContribs.getNumWells() > 0) {
-            t1 = second();
+            t_well.start();
             wellContribs.apply(d_pw, d_v);
-            t2 = second();
-            well_time += t2 - t1;
+            t_well.stop();
         }
 
-        t1 = second();
+        t_rest.start();
         tmp1 = dot_w(d_rw, d_v, d_tmp);
         alpha = rho / tmp1;
         axpy_w(d_v, -alpha, d_r);      // r = r - alpha * v
         axpy_w(d_pw, alpha, d_x);      // x = x + alpha * pw
         norm = norm_w(d_r, d_tmp);
-        t2 = second();
-        rest_time += t2 - t1;
+        t_rest.stop();
 
         if (norm < tolerance * norm_0) {
             break;
@@ -293,34 +268,30 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
         it += 0.5;
 
         // s = prec(r)
-        t1 = second();
+        t_prec.start();
         prec->apply(d_r, d_s);
-        t2 = second();
-        prec_time += t2 - t1;
+        t_prec.stop();
 
         // t = A * s
-        t1 = second();
+        t_spmv.start();
         spmv_blocked_w(d_Avals, d_Acols, d_Arows, d_s, d_t);
-        t2 = second();
-        spmv_time += t2 - t1;
+        t_spmv.stop();
 
         // apply wellContributions
         if (wellContribs.getNumWells() > 0) {
-            t1 = second();
+        	t_well.start();
             wellContribs.apply(d_s, d_t);
-            t2 = second();
-            well_time += t2 - t1;
+        	t_well.stop();
         }
 
-        t1 = second();
+        t_rest.start();
         tmp1 = dot_w(d_t, d_r, d_tmp);
         tmp2 = dot_w(d_t, d_t, d_tmp);
         omega = tmp1 / tmp2;
         axpy_w(d_s, omega, d_x);     // x = x + omega * s
         axpy_w(d_t, -omega, d_r);    // r = r - omega * t
         norm = norm_w(d_r, d_tmp);
-        t2 = second();
-        rest_time += t2 - t1;
+        t_rest.stop();
 
         if (norm < tolerance * norm_0) {
             break;
@@ -333,14 +304,10 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
         }
     }
 
-    t2 = second();
-    t_total2 = second();
-    rest_time += t2 - t1;
-
     res.iterations = std::min(it, (float)maxit);
     res.reduction = norm / norm_0;
     res.conv_rate  = static_cast<double>(pow(res.reduction, 1.0 / it));
-    res.elapsed = t_total2 - t_total1;
+    res.elapsed = t_total.stop();
     res.converged = (it != (maxit + 0.5));
 
     if (verbosity > 0) {
@@ -556,12 +523,7 @@ void openclSolverBackend<block_size>::finalize() {
 
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::copy_system_to_gpu() {
-
-    double t1 = 0.0, t2 = 0.0;
-    if (verbosity > 2) {
-        t1 = second();
-    }
-
+	Timer t;
     cl::Event event;
 
 #if COPY_ROW_BY_ROW
@@ -583,9 +545,8 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
     event.wait();
 
     if (verbosity > 2) {
-        t2 = second();
         std::ostringstream out;
-        out << "openclSolver::copy_system_to_gpu(): " << t2 - t1 << " s";
+        out << "openclSolver::copy_system_to_gpu(): " << t.stop() << " s";
         OpmLog::info(out.str());
     }
 } // end copy_system_to_gpu()
@@ -594,12 +555,7 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
 // don't copy rowpointers and colindices, they stay the same
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::update_system_on_gpu() {
-
-    double t1 = 0.0, t2 = 0.0;
-    if (verbosity > 2) {
-        t1 = second();
-    }
-
+	Timer t;
     cl::Event event;
 
 #if COPY_ROW_BY_ROW
@@ -619,9 +575,8 @@ void openclSolverBackend<block_size>::update_system_on_gpu() {
     event.wait();
 
     if (verbosity > 2) {
-        t2 = second();
         std::ostringstream out;
-        out << "openclSolver::update_system_on_gpu(): " << t2 - t1 << " s";
+        out << "openclSolver::update_system_on_gpu(): " << t.stop() << " s";
         OpmLog::info(out.str());
     }
 } // end update_system_on_gpu()
@@ -629,12 +584,7 @@ void openclSolverBackend<block_size>::update_system_on_gpu() {
 
 template <unsigned int block_size>
 bool openclSolverBackend<block_size>::analyse_matrix() {
-
-    double t1 = 0.0, t2 = 0.0;
-
-    if (verbosity > 2) {
-        t1 = second();
-    }
+	Timer t;
 
     bool success = prec->init(mat);
     int work_group_size = 32;
@@ -648,9 +598,8 @@ bool openclSolverBackend<block_size>::analyse_matrix() {
     rmat = prec->getRMat();
 
     if (verbosity > 2) {
-        t2 = second();
         std::ostringstream out;
-        out << "openclSolver::analyse_matrix(): " << t2 - t1 << " s";
+        out << "openclSolver::analyse_matrix(): " << t.stop() << " s";
         OpmLog::info(out.str());
     }
 
@@ -662,18 +611,14 @@ bool openclSolverBackend<block_size>::analyse_matrix() {
 
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
-    double t1 = 0.0, t2 = 0.0;
-    if (verbosity > 2) {
-        t1 = second();
-    }
+    Timer t;
 
     mat->nnzValues = vals;
     blocked_reorder_vector_by_pattern<block_size>(mat->Nb, b, fromOrder, rb);
 
     if (verbosity > 2) {
-        t2 = second();
         std::ostringstream out;
-        out << "openclSolver::update_system(): " << t2 - t1 << " s";
+        out << "openclSolver::update_system(): " << t.stop() << " s";
         OpmLog::info(out.str());
     }
 } // end update_system()
@@ -681,18 +626,13 @@ void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
 
 template <unsigned int block_size>
 bool openclSolverBackend<block_size>::create_preconditioner() {
-
-    double t1 = 0.0, t2 = 0.0;
-    if (verbosity > 2) {
-        t1 = second();
-    }
+	Timer t;
 
     bool result = prec->create_preconditioner(mat);
 
     if (verbosity > 2) {
-        t2 = second();
         std::ostringstream out;
-        out << "openclSolver::create_preconditioner(): " << t2 - t1 << " s";
+        out << "openclSolver::create_preconditioner(): " << t.stop() << " s";
         OpmLog::info(out.str());
     }
     return result;
@@ -701,18 +641,14 @@ bool openclSolverBackend<block_size>::create_preconditioner() {
 
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::solve_system(WellContributions& wellContribs, BdaResult &res) {
-    // actually solve
-    double t1 = 0.0, t2 = 0.0;
-    if (verbosity > 2) {
-        t1 = second();
-    }
+    Timer t;
 
+    // actually solve
     gpu_pbicgstab(wellContribs, res);
 
     if (verbosity > 2) {
-        t2 = second();
         std::ostringstream out;
-        out << "openclSolver::solve_system(): " << t2 - t1 << " s";
+        out << "openclSolver::solve_system(): " << t.stop() << " s";
         OpmLog::info(out.str());
     }
 
@@ -723,19 +659,14 @@ void openclSolverBackend<block_size>::solve_system(WellContributions& wellContri
 // caller must be sure that x is a valid array
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::get_result(double *x) {
-
-    double t1 = 0.0, t2 = 0.0;
-    if (verbosity > 2) {
-        t1 = second();
-    }
+	Timer t;
 
     queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, rb);
     blocked_reorder_vector_by_pattern<block_size>(mat->Nb, rb, toOrder, x);
 
     if (verbosity > 2) {
-        t2 = second();
         std::ostringstream out;
-        out << "openclSolver::get_result(): " << t2 - t1 << " s";
+        out << "openclSolver::get_result(): " << t.stop() << " s";
         OpmLog::info(out.str());
     }
 } // end get_result()
diff --git a/opm/simulators/linalg/bda/openclSolverBackend.hpp b/opm/simulators/linalg/bda/openclSolverBackend.hpp
index 4ea70607a..7c013ba86 100644
--- a/opm/simulators/linalg/bda/openclSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/openclSolverBackend.hpp
@@ -50,7 +50,6 @@ class openclSolverBackend : public BdaSolver<block_size>
     using Base::verbosity;
     using Base::maxit;
     using Base::tolerance;
-    using Base::second;
     using Base::initialized;
     typedef BdaSolverStatus::Status Status;