Replaced timing function with Dune::Timer

2025-02-25 18:55:30 -06:00 · 2020-07-01 14:50:34 +02:00 · 2020-07-01 14:50:34 +02:00 · 7f8faa018b
commit 7f8faa018b
parent 845563c37b
6 changed files with 80 additions and 212 deletions
--- a/opm/simulators/linalg/bda/BILU0.cpp
+++ b/opm/simulators/linalg/bda/BILU0.cpp
@ -20,6 +20,7 @@
 #include <config.h>
 #include <opm/common/OpmLog/OpmLog.hpp>
 #include <opm/common/ErrorMacros.hpp>
 #include <dune/common/timer.hh>
 #include <opm/simulators/linalg/bda/BdaSolver.hpp>
 #include <opm/simulators/linalg/bda/BILU0.hpp>
@ -30,10 +31,7 @@ namespace bda
 {
    using Opm::OpmLog;
-
+    using Dune::Timer;
    // define 'second' as 'BdaSolver<>::second', this allows usage of the second() function for timing
    // typedefs cannot handle templates
    const auto second = BdaSolver<>::second;
    template <unsigned int block_size>
    BILU0<block_size>::BILU0(bool level_scheduling_, bool graph_coloring_, int verbosity_) :
@ -62,7 +60,6 @@ namespace bda
    bool BILU0<block_size>::init(BlockedMatrix *mat)
    {
        const unsigned int bs = block_size;
        double t1 = 0.0, t2 = 0.0;
        BlockedMatrix *CSCmat = nullptr;
        this->N = mat->Nb * block_size;
@ -79,21 +76,16 @@ namespace bda
            CSCmat->nnzValues = new double[nnzbs * bs * bs];
            CSCmat->colIndices = new int[nnzbs];
            CSCmat->rowPointers = new int[Nb + 1];
-            if(verbosity >= 3){
+            Timer t_convert;
                t1 = second();
            }
            bcsr_to_bcsc<block_size>(mat->nnzValues, mat->colIndices, mat->rowPointers, CSCmat->nnzValues, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb);
            if(verbosity >= 3){
                t2 = second();
                std::ostringstream out;
-                out << "BILU0 convert CSR to CSC: " << t2 - t1 << " s";
+                out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s\n";
                OpmLog::info(out.str());
            }
        }
-        if(verbosity >= 3){
+        Timer t_analysis;
            t1 = second();
        }
        rMat = allocateBlockedMatrix<block_size>(mat->Nb, mat->nnzbs);
        LUMat = soft_copyBlockedMatrix(rMat);
        if (level_scheduling) {
@ -105,9 +97,8 @@ namespace bda
            return false;
        }
        if(verbosity >= 3){
            t2 = second();
            std::ostringstream out;
-            out << "BILU0 analysis took: " << t2 - t1 << " s, " << numColors << " colors";
+            out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors";
            OpmLog::info(out.str());
        }
@ -161,27 +152,21 @@ namespace bda
    bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
    {
        const unsigned int bs = block_size;
-        double t1 = 0.0, t2 = 0.0;
+
-        if (verbosity >= 3){
+        Timer t_reorder;
            t1 = second();
        }
        blocked_reorder_matrix_by_pattern<block_size>(mat, toOrder, fromOrder, rMat);
        if (verbosity >= 3){
            t2 = second();
            std::ostringstream out;
-            out << "BILU0 reorder matrix: " << t2 - t1 << " s";
+            out << "BILU0 reorder matrix: " << t_reorder.stop() << " s";
            OpmLog::info(out.str());
        }
        // TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
-        if (verbosity >= 3){
+        Timer t_copy;
            t1 = second();
        }
        memcpy(LUMat->nnzValues, rMat->nnzValues, sizeof(double) * bs * bs * rMat->nnzbs);
        if (verbosity >= 3){
            t2 = second();
            std::ostringstream out;
-            out << "BILU0 memcpy: " << t2 - t1 << " s";
+            out << "BILU0 memcpy: " << t_copy.stop() << " s";
            OpmLog::info(out.str());
        }
@ -191,9 +176,8 @@ namespace bda
        int LSize = 0;
-        if (verbosity >= 3){
+        Timer t_decomposition;
-            t1 = second();
+
        }
        // go through all rows
        for (i = 0; i < LUMat->Nb; i++) {
            iRowStart = LUMat->rowPointers[i];
@ -272,15 +256,12 @@ namespace bda
            URowIndex++;
        }
        if (verbosity >= 3) {
            t2 = second();
            std::ostringstream out;
-            out << "BILU0 decomposition: " << t2 - t1 << " s";
+            out << "BILU0 decomposition: " << t_decomposition.stop() << " s";
            OpmLog::info(out.str());
        }
-        if (verbosity >= 3) {
+        Timer t_copyToGpu;
            t1 = second();
        }
        if (pattern_uploaded == false) {
            queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, LMat->nnzbs * sizeof(int), LMat->colIndices);
            queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, UMat->nnzbs * sizeof(int), UMat->colIndices);
@ -292,9 +273,8 @@ namespace bda
        queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(double) * bs * bs, UMat->nnzValues);
        queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, Nb * sizeof(double) * bs * bs, invDiagVals);
        if (verbosity >= 3) {
            t2 = second();
            std::ostringstream out;
-            out << "BILU0 copy to GPU: " << t2 - t1 << " s";
+            out << "BILU0 copy to GPU: " << t_copyToGpu.stop() << " s";
            OpmLog::info(out.str());
        }
@ -307,11 +287,8 @@ namespace bda
    template <unsigned int block_size>
    void BILU0<block_size>::apply(cl::Buffer& x, cl::Buffer& y)
    {
        double t1 = 0.0, t2 = 0.0;
        if (verbosity >= 3) {
            t1 = second();
        }
        cl::Event event;
        Timer t_apply;
        for(int color = 0; color < numColors; ++color){
            event = (*ILU_apply1)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), s.Lvals, s.Lcols, s.Lrows, (unsigned int)Nb, x, y, s.rowsPerColor, color, block_size, cl::Local(lmem_per_work_group));
@ -324,9 +301,8 @@ namespace bda
        if (verbosity >= 3) {
            event.wait();
            t2 = second();
            std::ostringstream out;
-            out << "BILU0 apply: " << t2 - t1 << " s";
+            out << "BILU0 apply: " << t_apply.stop() << " s";
            OpmLog::info(out.str());
        }
    }
--- a/opm/simulators/linalg/bda/BdaSolver.hpp
+++ b/opm/simulators/linalg/bda/BdaSolver.hpp
@ -21,8 +21,6 @@
 #define OPM_BDASOLVER_BACKEND_HEADER_INCLUDED
 #include <sys/time.h>
 #include <opm/simulators/linalg/bda/BdaResult.hpp>
 #include <opm/simulators/linalg/bda/BdaSolverStatus.hpp>
 #include <opm/simulators/linalg/bda/WellContributions.hpp>
@ -35,8 +33,7 @@ namespace bda
    /// This class serves to simplify choosing between different backend solvers, such as cusparseSolver and openclSolver
    /// This class is abstract, no instantiations can of it can be made, only of its children
-    /// Without a default block_size value, the BILU0 class cannot use BdaSolver::second()
+    template <unsigned int block_size>
    template <unsigned int block_size = 3>
    class BdaSolver
    {
@ -74,13 +71,6 @@ namespace bda
        virtual void get_result(double *x) = 0;
        /// Different implementations of BdaSolver can use this function for timing
        static double second(void) {
            struct timeval tv;
            gettimeofday(&tv, nullptr);
            return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
        }
    }; // end class BdaSolver
 } // end namespace bda
--- a/opm/simulators/linalg/bda/cusparseSolverBackend.cu
+++ b/opm/simulators/linalg/bda/cusparseSolverBackend.cu
@ -23,6 +23,7 @@
 #include <sstream>
 #include <opm/common/OpmLog/OpmLog.hpp>
 #include <dune/common/timer.hh>
 #include <opm/simulators/linalg/bda/cusparseSolverBackend.hpp>
 #include <opm/simulators/linalg/bda/BdaResult.hpp>
@ -40,6 +41,7 @@ namespace bda
 {
 using Opm::OpmLog;
 using Dune::Timer;
 const cusparseSolvePolicy_t policy = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
 const cusparseOperation_t operation  = CUSPARSE_OPERATION_NON_TRANSPOSE;
@ -56,7 +58,7 @@ cusparseSolverBackend<block_size>::~cusparseSolverBackend() {
 template <unsigned int block_size>
 void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) {
-    double t_total1, t_total2;
+    Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
    int n = N;
    double rho = 1.0, rhop;
    double alpha, nalpha, beta;
@ -67,8 +69,6 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
    double mone = -1.0;
    float it;
    t_total1 = second();
    if (wellContribs.getNumWells() > 0) {
        wellContribs.setCudaStream(stream);
    }
@ -169,12 +169,10 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
        }
    }
    t_total2 = second();
    res.iterations = std::min(it, (float)maxit);
    res.reduction = norm / norm_0;
    res.conv_rate  = static_cast<double>(pow(res.reduction, 1.0 / it));
-    res.elapsed = t_total2 - t_total1;
+    res.elapsed = t_total.stop();
    res.converged = (it != (maxit + 0.5));
    if (verbosity > 0) {
@ -284,11 +282,7 @@ void cusparseSolverBackend<block_size>::finalize() {
 template <unsigned int block_size>
 void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b) {
-
+    Timer t;
    double t1, t2;
    if (verbosity > 2) {
        t1 = second();
    }
 #if COPY_ROW_BY_ROW
    int sum = 0;
@ -309,9 +303,8 @@ void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *ro
    if (verbosity > 2) {
        cudaStreamSynchronize(stream);
        t2 = second();
        std::ostringstream out;
-        out << "cusparseSolver::copy_system_to_gpu(): " << t2 - t1 << " s";
+        out << "cusparseSolver::copy_system_to_gpu(): " << t.stop() << " s";
        OpmLog::info(out.str());
    }
 } // end copy_system_to_gpu()
@ -320,11 +313,7 @@ void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *ro
 // don't copy rowpointers and colindices, they stay the same
 template <unsigned int block_size>
 void cusparseSolverBackend<block_size>::update_system_on_gpu(double *vals, int *rows, double *b) {
-
+    Timer t;
    double t1, t2;
    if (verbosity > 2) {
        t1 = second();
    }
 #if COPY_ROW_BY_ROW
    int sum = 0;
@ -343,9 +332,8 @@ void cusparseSolverBackend<block_size>::update_system_on_gpu(double *vals, int *
    if (verbosity > 2) {
        cudaStreamSynchronize(stream);
        t2 = second();
        std::ostringstream out;
-        out << "cusparseSolver::update_system_on_gpu(): " << t2 - t1 << " s";
+        out << "cusparseSolver::update_system_on_gpu(): " << t.stop() << " s";
        OpmLog::info(out.str());
    }
 } // end update_system_on_gpu()
@ -361,11 +349,7 @@ template <unsigned int block_size>
 bool cusparseSolverBackend<block_size>::analyse_matrix() {
    int d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
-    double t1, t2;
+    Timer t;
    if (verbosity > 2) {
        t1 = second();
    }
    cusparseCreateMatDescr(&descr_B);
    cusparseCreateMatDescr(&descr_M);
@ -428,9 +412,8 @@ bool cusparseSolverBackend<block_size>::analyse_matrix() {
    if (verbosity > 2) {
        cudaStreamSynchronize(stream);
        t2 = second();
        std::ostringstream out;
-        out << "cusparseSolver::analyse_matrix(): " << t2 - t1 << " s";
+        out << "cusparseSolver::analyse_matrix(): " << t.stop() << " s";
        OpmLog::info(out.str());
    }
@ -441,11 +424,7 @@ bool cusparseSolverBackend<block_size>::analyse_matrix() {
 template <unsigned int block_size>
 bool cusparseSolverBackend<block_size>::create_preconditioner() {
-
+    Timer t;
    double t1, t2;
    if (verbosity > 2) {
        t1 = second();
    }
    d_mCols = d_bCols;
    d_mRows = d_bRows;
@ -463,9 +442,8 @@ bool cusparseSolverBackend<block_size>::create_preconditioner() {
    if (verbosity > 2) {
        cudaStreamSynchronize(stream);
        t2 = second();
        std::ostringstream out;
-        out << "cusparseSolver::create_preconditioner(): " << t2 - t1 << " s";
+        out << "cusparseSolver::create_preconditioner(): " << t.stop() << " s";
        OpmLog::info(out.str());
    }
    return true;
@ -485,19 +463,14 @@ void cusparseSolverBackend<block_size>::solve_system(WellContributions& wellCont
 // caller must be sure that x is a valid array
 template <unsigned int block_size>
 void cusparseSolverBackend<block_size>::get_result(double *x) {
-
+    Timer t;
    double t1, t2;
    if (verbosity > 2) {
        t1 = second();
    }
    cudaMemcpyAsync(x, d_x, N * sizeof(double), cudaMemcpyDeviceToHost, stream);
    cudaStreamSynchronize(stream);
    if (verbosity > 2) {
        t2 = second();
        std::ostringstream out;
-        out << "cusparseSolver::get_result(): " << t2 - t1 << " s";
+        out << "cusparseSolver::get_result(): " << t.stop() << " s";
        OpmLog::info(out.str());
    }
 } // end get_result()
--- a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp
@ -44,7 +44,6 @@ class cusparseSolverBackend : public BdaSolver<block_size> {
    using Base::verbosity;
    using Base::maxit;
    using Base::tolerance;
    using Base::second;
    using Base::initialized;
    typedef BdaSolverStatus::Status Status;
--- a/opm/simulators/linalg/bda/openclSolverBackend.cpp
+++ b/opm/simulators/linalg/bda/openclSolverBackend.cpp
@ -23,6 +23,7 @@
 #include <opm/common/OpmLog/OpmLog.hpp>
 #include <opm/common/ErrorMacros.hpp>
 #include <dune/common/timer.hh>
 #define __CL_ENABLE_EXCEPTIONS
@ -48,6 +49,7 @@ namespace bda
 {
 using Opm::OpmLog;
 using Dune::Timer;
 template <unsigned int block_size>
 openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_) {
@ -72,14 +74,11 @@ unsigned int openclSolverBackend<block_size>::ceilDivision(const unsigned int A,
 template <unsigned int block_size>
 double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out)
 {
    double t1 = 0.0, t2 = 0.0;
    const unsigned int work_group_size = 1024;
    const unsigned int num_work_groups = ceilDivision(N, work_group_size);
    const unsigned int total_work_items = num_work_groups * work_group_size;
    const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
-    if (verbosity >= 4) {
+    Timer t_dot;
        t1 = second();
    }
    cl::Event event = (*dot_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in1, in2, out, N, cl::Local(lmem_per_work_group));
@ -92,9 +91,8 @@ double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl
    if (verbosity >= 4) {
        event.wait();
        t2 = second();
        std::ostringstream oss;
-        oss << "openclSolver dot_w time: " << t2 - t1;
+        oss << std::scientific << "openclSolver dot_w time: " << t_dot.stop() << " s";
        OpmLog::info(oss.str());
    }
@ -104,14 +102,11 @@ double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl
 template <unsigned int block_size>
 double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
 {
    double t1 = 0.0, t2 = 0.0;
    const unsigned int work_group_size = 1024;
    const unsigned int num_work_groups = ceilDivision(N, work_group_size);
    const unsigned int total_work_items = num_work_groups * work_group_size;
    const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
-    if (verbosity >= 4) {
+    Timer t_norm;
        t1 = second();
    }
    cl::Event event = (*norm_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in, out, N, cl::Local(lmem_per_work_group));
@ -125,9 +120,8 @@ double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
    if (verbosity >= 4) {
        event.wait();
        t2 = second();
        std::ostringstream oss;
-        oss << "openclSolver norm_w time: " << t2 - t1;
+        oss << std::scientific << "openclSolver norm_w time: " << t_norm.stop() << " s";
        OpmLog::info(oss.str());
    }
@ -137,21 +131,17 @@ double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::axpy_w(cl::Buffer in, const double a, cl::Buffer out)
 {
    double t1 = 0.0, t2 = 0.0;
    const unsigned int work_group_size = 32;
    const unsigned int num_work_groups = ceilDivision(N, work_group_size);
    const unsigned int total_work_items = num_work_groups * work_group_size;
-    if (verbosity >= 4) {
+    Timer t_axpy;
        t1 = second();
    }
    cl::Event event = (*axpy_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in, a, out, N);
    if (verbosity >= 4) {
        event.wait();
        t2 = second();
        std::ostringstream oss;
-        oss << "openclSolver axpy_w time: " << t2 - t1;
+        oss << std::scientific << "openclSolver axpy_w time: " << t_axpy.stop() << " s";
        OpmLog::info(oss.str());
    }
 }
@ -159,21 +149,17 @@ void openclSolverBackend<block_size>::axpy_w(cl::Buffer in, const double a, cl::
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, const double omega, const double beta)
 {
    double t1 = 0.0, t2 = 0.0;
    const unsigned int work_group_size = 32;
    const unsigned int num_work_groups = ceilDivision(N, work_group_size);
    const unsigned int total_work_items = num_work_groups * work_group_size;
-    if (verbosity >= 4) {
+    Timer t_custom;
        t1 = second();
    }
    cl::Event event = (*custom_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), p, v, r, omega, beta, N);
    if (verbosity >= 4) {
        event.wait();
        t2 = second();
        std::ostringstream oss;
-        oss << "openclSolver custom_w time: " << t2 - t1;
+        oss << std::scientific << "openclSolver custom_w time: " << t_custom.stop() << " s";
        OpmLog::info(oss.str());
    }
 }
@ -181,22 +167,18 @@ void openclSolverBackend<block_size>::custom_w(cl::Buffer p, cl::Buffer v, cl::B
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b)
 {
    double t1 = 0.0, t2 = 0.0;
    const unsigned int work_group_size = 32;
    const unsigned int num_work_groups = ceilDivision(N, work_group_size);
    const unsigned int total_work_items = num_work_groups * work_group_size;
    const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
-    if (verbosity >= 4) {
+    Timer t_spmv;
        t1 = second();
    }
    cl::Event event = (*spmv_blocked_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), vals, cols, rows, Nb, x, b, block_size, cl::Local(lmem_per_work_group));
    if (verbosity >= 4) {
        event.wait();
        t2 = second();
        std::ostringstream oss;
-        oss << "openclSolver spmv_blocked_w time: " << t2 - t1;
+        oss << std::scientific << "openclSolver spmv_blocked_w time: " << t_spmv.stop() << " s";
        OpmLog::info(oss.str());
    }
 }
@ -209,9 +191,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
    double rho, rhop, beta, alpha, omega, tmp1, tmp2;
    double norm, norm_0;
-    double t_total1, t_total2, t1 = 0.0, t2 = 0.0;
+    Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
    double prec_time = 0.0, spmv_time = 0.0, well_time = 0.0, rest_time = 0.0;
    t_total1 = second();
    wellContribs.setOpenCLQueue(queue.get());
    wellContribs.setReordering(toOrder, true);
@ -245,7 +225,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
        OpmLog::info(out.str());
    }
-    t1 = second();
+    t_rest.start();
    for (it = 0.5; it < maxit; it += 0.5) {
        rhop = rho;
        rho = dot_w(d_rw, d_r, d_tmp);
@ -254,37 +234,32 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
            beta = (rho / rhop) * (alpha / omega);
            custom_w(d_p, d_v, d_r, omega, beta);
        }
-        t2 = second();
+        t_rest.stop();
        rest_time += t2 - t1;
        // pw = prec(p)
-        t1 = second();
+        t_prec.start();
        prec->apply(d_p, d_pw);
-        t2 = second();
+        t_prec.stop();
        prec_time += t2 - t1;
        // v = A * pw
-        t1 = second();
+        t_spmv.start();
        spmv_blocked_w(d_Avals, d_Acols, d_Arows, d_pw, d_v);
-        t2 = second();
+        t_spmv.stop();
        spmv_time += t2 - t1;
        // apply wellContributions
        if (wellContribs.getNumWells() > 0) {
-            t1 = second();
+            t_well.start();
            wellContribs.apply(d_pw, d_v);
-            t2 = second();
+            t_well.stop();
            well_time += t2 - t1;
        }
-        t1 = second();
+        t_rest.start();
        tmp1 = dot_w(d_rw, d_v, d_tmp);
        alpha = rho / tmp1;
        axpy_w(d_v, -alpha, d_r);      // r = r - alpha * v
        axpy_w(d_pw, alpha, d_x);      // x = x + alpha * pw
        norm = norm_w(d_r, d_tmp);
-        t2 = second();
+        t_rest.stop();
        rest_time += t2 - t1;
        if (norm < tolerance * norm_0) {
            break;
@ -293,34 +268,30 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
        it += 0.5;
        // s = prec(r)
-        t1 = second();
+        t_prec.start();
        prec->apply(d_r, d_s);
-        t2 = second();
+        t_prec.stop();
        prec_time += t2 - t1;
        // t = A * s
-        t1 = second();
+        t_spmv.start();
        spmv_blocked_w(d_Avals, d_Acols, d_Arows, d_s, d_t);
-        t2 = second();
+        t_spmv.stop();
        spmv_time += t2 - t1;
        // apply wellContributions
        if (wellContribs.getNumWells() > 0) {
-            t1 = second();
+        	t_well.start();
            wellContribs.apply(d_s, d_t);
-            t2 = second();
+        	t_well.stop();
            well_time += t2 - t1;
        }
-        t1 = second();
+        t_rest.start();
        tmp1 = dot_w(d_t, d_r, d_tmp);
        tmp2 = dot_w(d_t, d_t, d_tmp);
        omega = tmp1 / tmp2;
        axpy_w(d_s, omega, d_x);     // x = x + omega * s
        axpy_w(d_t, -omega, d_r);    // r = r - omega * t
        norm = norm_w(d_r, d_tmp);
-        t2 = second();
+        t_rest.stop();
        rest_time += t2 - t1;
        if (norm < tolerance * norm_0) {
            break;
@ -333,14 +304,10 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
        }
    }
    t2 = second();
    t_total2 = second();
    rest_time += t2 - t1;
    res.iterations = std::min(it, (float)maxit);
    res.reduction = norm / norm_0;
    res.conv_rate  = static_cast<double>(pow(res.reduction, 1.0 / it));
-    res.elapsed = t_total2 - t_total1;
+    res.elapsed = t_total.stop();
    res.converged = (it != (maxit + 0.5));
    if (verbosity > 0) {
@ -556,12 +523,7 @@ void openclSolverBackend<block_size>::finalize() {
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::copy_system_to_gpu() {
-
+	Timer t;
    double t1 = 0.0, t2 = 0.0;
    if (verbosity > 2) {
        t1 = second();
    }
    cl::Event event;
 #if COPY_ROW_BY_ROW
@ -583,9 +545,8 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
    event.wait();
    if (verbosity > 2) {
        t2 = second();
        std::ostringstream out;
-        out << "openclSolver::copy_system_to_gpu(): " << t2 - t1 << " s";
+        out << "openclSolver::copy_system_to_gpu(): " << t.stop() << " s";
        OpmLog::info(out.str());
    }
 } // end copy_system_to_gpu()
@ -594,12 +555,7 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
 // don't copy rowpointers and colindices, they stay the same
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::update_system_on_gpu() {
-
+	Timer t;
    double t1 = 0.0, t2 = 0.0;
    if (verbosity > 2) {
        t1 = second();
    }
    cl::Event event;
 #if COPY_ROW_BY_ROW
@ -619,9 +575,8 @@ void openclSolverBackend<block_size>::update_system_on_gpu() {
    event.wait();
    if (verbosity > 2) {
        t2 = second();
        std::ostringstream out;
-        out << "openclSolver::update_system_on_gpu(): " << t2 - t1 << " s";
+        out << "openclSolver::update_system_on_gpu(): " << t.stop() << " s";
        OpmLog::info(out.str());
    }
 } // end update_system_on_gpu()
@ -629,12 +584,7 @@ void openclSolverBackend<block_size>::update_system_on_gpu() {
 template <unsigned int block_size>
 bool openclSolverBackend<block_size>::analyse_matrix() {
-
+	Timer t;
    double t1 = 0.0, t2 = 0.0;
    if (verbosity > 2) {
        t1 = second();
    }
    bool success = prec->init(mat);
    int work_group_size = 32;
@ -648,9 +598,8 @@ bool openclSolverBackend<block_size>::analyse_matrix() {
    rmat = prec->getRMat();
    if (verbosity > 2) {
        t2 = second();
        std::ostringstream out;
-        out << "openclSolver::analyse_matrix(): " << t2 - t1 << " s";
+        out << "openclSolver::analyse_matrix(): " << t.stop() << " s";
        OpmLog::info(out.str());
    }
@ -662,18 +611,14 @@ bool openclSolverBackend<block_size>::analyse_matrix() {
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
-    double t1 = 0.0, t2 = 0.0;
+    Timer t;
    if (verbosity > 2) {
        t1 = second();
    }
    mat->nnzValues = vals;
    blocked_reorder_vector_by_pattern<block_size>(mat->Nb, b, fromOrder, rb);
    if (verbosity > 2) {
        t2 = second();
        std::ostringstream out;
-        out << "openclSolver::update_system(): " << t2 - t1 << " s";
+        out << "openclSolver::update_system(): " << t.stop() << " s";
        OpmLog::info(out.str());
    }
 } // end update_system()
@ -681,18 +626,13 @@ void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
 template <unsigned int block_size>
 bool openclSolverBackend<block_size>::create_preconditioner() {
-
+	Timer t;
    double t1 = 0.0, t2 = 0.0;
    if (verbosity > 2) {
        t1 = second();
    }
    bool result = prec->create_preconditioner(mat);
    if (verbosity > 2) {
        t2 = second();
        std::ostringstream out;
-        out << "openclSolver::create_preconditioner(): " << t2 - t1 << " s";
+        out << "openclSolver::create_preconditioner(): " << t.stop() << " s";
        OpmLog::info(out.str());
    }
    return result;
@ -701,18 +641,14 @@ bool openclSolverBackend<block_size>::create_preconditioner() {
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::solve_system(WellContributions& wellContribs, BdaResult &res) {
-    // actually solve
+    Timer t;
    double t1 = 0.0, t2 = 0.0;
    if (verbosity > 2) {
        t1 = second();
    }
    // actually solve
    gpu_pbicgstab(wellContribs, res);
    if (verbosity > 2) {
        t2 = second();
        std::ostringstream out;
-        out << "openclSolver::solve_system(): " << t2 - t1 << " s";
+        out << "openclSolver::solve_system(): " << t.stop() << " s";
        OpmLog::info(out.str());
    }
@ -723,19 +659,14 @@ void openclSolverBackend<block_size>::solve_system(WellContributions& wellContri
 // caller must be sure that x is a valid array
 template <unsigned int block_size>
 void openclSolverBackend<block_size>::get_result(double *x) {
-
+	Timer t;
    double t1 = 0.0, t2 = 0.0;
    if (verbosity > 2) {
        t1 = second();
    }
    queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, rb);
    blocked_reorder_vector_by_pattern<block_size>(mat->Nb, rb, toOrder, x);
    if (verbosity > 2) {
        t2 = second();
        std::ostringstream out;
-        out << "openclSolver::get_result(): " << t2 - t1 << " s";
+        out << "openclSolver::get_result(): " << t.stop() << " s";
        OpmLog::info(out.str());
    }
 } // end get_result()
--- a/opm/simulators/linalg/bda/openclSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/openclSolverBackend.hpp
@ -50,7 +50,6 @@ class openclSolverBackend : public BdaSolver<block_size>
    using Base::verbosity;
    using Base::maxit;
    using Base::tolerance;
    using Base::second;
    using Base::initialized;
    typedef BdaSolverStatus::Status Status;