From 7f8faa018b014efec6721d9c6b5f9c1801e55b22 Mon Sep 17 00:00:00 2001 From: "T.D. (Tongdong) Qiu" Date: Wed, 1 Jul 2020 14:50:34 +0200 Subject: [PATCH] Replaced timing function with Dune::Timer --- opm/simulators/linalg/bda/BILU0.cpp | 60 ++----- opm/simulators/linalg/bda/BdaSolver.hpp | 12 +- .../linalg/bda/cusparseSolverBackend.cu | 55 ++---- .../linalg/bda/cusparseSolverBackend.hpp | 1 - .../linalg/bda/openclSolverBackend.cpp | 163 +++++------------- .../linalg/bda/openclSolverBackend.hpp | 1 - 6 files changed, 80 insertions(+), 212 deletions(-) diff --git a/opm/simulators/linalg/bda/BILU0.cpp b/opm/simulators/linalg/bda/BILU0.cpp index 52ac6b6cd..a491bc21c 100644 --- a/opm/simulators/linalg/bda/BILU0.cpp +++ b/opm/simulators/linalg/bda/BILU0.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -30,10 +31,7 @@ namespace bda { using Opm::OpmLog; - - // define 'second' as 'BdaSolver<>::second', this allows usage of the second() function for timing - // typedefs cannot handle templates - const auto second = BdaSolver<>::second; + using Dune::Timer; template BILU0::BILU0(bool level_scheduling_, bool graph_coloring_, int verbosity_) : @@ -62,7 +60,6 @@ namespace bda bool BILU0::init(BlockedMatrix *mat) { const unsigned int bs = block_size; - double t1 = 0.0, t2 = 0.0; BlockedMatrix *CSCmat = nullptr; this->N = mat->Nb * block_size; @@ -79,21 +76,16 @@ namespace bda CSCmat->nnzValues = new double[nnzbs * bs * bs]; CSCmat->colIndices = new int[nnzbs]; CSCmat->rowPointers = new int[Nb + 1]; - if(verbosity >= 3){ - t1 = second(); - } + Timer t_convert; bcsr_to_bcsc(mat->nnzValues, mat->colIndices, mat->rowPointers, CSCmat->nnzValues, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb); if(verbosity >= 3){ - t2 = second(); std::ostringstream out; - out << "BILU0 convert CSR to CSC: " << t2 - t1 << " s"; + out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s\n"; OpmLog::info(out.str()); } } - if(verbosity >= 3){ - t1 = second(); - } + Timer t_analysis; rMat = allocateBlockedMatrix(mat->Nb, mat->nnzbs); LUMat = soft_copyBlockedMatrix(rMat); if (level_scheduling) { @@ -105,9 +97,8 @@ namespace bda return false; } if(verbosity >= 3){ - t2 = second(); std::ostringstream out; - out << "BILU0 analysis took: " << t2 - t1 << " s, " << numColors << " colors"; + out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors"; OpmLog::info(out.str()); } @@ -161,27 +152,21 @@ namespace bda bool BILU0::create_preconditioner(BlockedMatrix *mat) { const unsigned int bs = block_size; - double t1 = 0.0, t2 = 0.0; - if (verbosity >= 3){ - t1 = second(); - } + + Timer t_reorder; blocked_reorder_matrix_by_pattern(mat, toOrder, fromOrder, rMat); if (verbosity >= 3){ - t2 = second(); std::ostringstream out; - out << "BILU0 reorder matrix: " << t2 - t1 << " s"; + out << "BILU0 reorder matrix: " << t_reorder.stop() << " s"; OpmLog::info(out.str()); } // TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp - if (verbosity >= 3){ - t1 = second(); - } + Timer t_copy; memcpy(LUMat->nnzValues, rMat->nnzValues, sizeof(double) * bs * bs * rMat->nnzbs); if (verbosity >= 3){ - t2 = second(); std::ostringstream out; - out << "BILU0 memcpy: " << t2 - t1 << " s"; + out << "BILU0 memcpy: " << t_copy.stop() << " s"; OpmLog::info(out.str()); } @@ -191,9 +176,8 @@ namespace bda int LSize = 0; - if (verbosity >= 3){ - t1 = second(); - } + Timer t_decomposition; + // go through all rows for (i = 0; i < LUMat->Nb; i++) { iRowStart = LUMat->rowPointers[i]; @@ -272,15 +256,12 @@ namespace bda URowIndex++; } if (verbosity >= 3) { - t2 = second(); std::ostringstream out; - out << "BILU0 decomposition: " << t2 - t1 << " s"; + out << "BILU0 decomposition: " << t_decomposition.stop() << " s"; OpmLog::info(out.str()); } - if (verbosity >= 3) { - t1 = second(); - } + Timer t_copyToGpu; if (pattern_uploaded == false) { queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, LMat->nnzbs * sizeof(int), LMat->colIndices); queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, UMat->nnzbs * sizeof(int), UMat->colIndices); @@ -292,9 +273,8 @@ namespace bda queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(double) * bs * bs, UMat->nnzValues); queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, Nb * sizeof(double) * bs * bs, invDiagVals); if (verbosity >= 3) { - t2 = second(); std::ostringstream out; - out << "BILU0 copy to GPU: " << t2 - t1 << " s"; + out << "BILU0 copy to GPU: " << t_copyToGpu.stop() << " s"; OpmLog::info(out.str()); } @@ -307,11 +287,8 @@ namespace bda template void BILU0::apply(cl::Buffer& x, cl::Buffer& y) { - double t1 = 0.0, t2 = 0.0; - if (verbosity >= 3) { - t1 = second(); - } cl::Event event; + Timer t_apply; for(int color = 0; color < numColors; ++color){ event = (*ILU_apply1)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), s.Lvals, s.Lcols, s.Lrows, (unsigned int)Nb, x, y, s.rowsPerColor, color, block_size, cl::Local(lmem_per_work_group)); @@ -324,9 +301,8 @@ namespace bda if (verbosity >= 3) { event.wait(); - t2 = second(); std::ostringstream out; - out << "BILU0 apply: " << t2 - t1 << " s"; + out << "BILU0 apply: " << t_apply.stop() << " s"; OpmLog::info(out.str()); } } diff --git a/opm/simulators/linalg/bda/BdaSolver.hpp b/opm/simulators/linalg/bda/BdaSolver.hpp index f61269a4b..962d95dfc 100644 --- a/opm/simulators/linalg/bda/BdaSolver.hpp +++ b/opm/simulators/linalg/bda/BdaSolver.hpp @@ -21,8 +21,6 @@ #define OPM_BDASOLVER_BACKEND_HEADER_INCLUDED -#include - #include #include #include @@ -35,8 +33,7 @@ namespace bda /// This class serves to simplify choosing between different backend solvers, such as cusparseSolver and openclSolver /// This class is abstract, no instantiations can of it can be made, only of its children - /// Without a default block_size value, the BILU0 class cannot use BdaSolver::second() - template + template class BdaSolver { @@ -74,13 +71,6 @@ namespace bda virtual void get_result(double *x) = 0; - /// Different implementations of BdaSolver can use this function for timing - static double second(void) { - struct timeval tv; - gettimeofday(&tv, nullptr); - return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; - } - }; // end class BdaSolver } // end namespace bda diff --git a/opm/simulators/linalg/bda/cusparseSolverBackend.cu b/opm/simulators/linalg/bda/cusparseSolverBackend.cu index 4457830e3..5a9338a71 100644 --- a/opm/simulators/linalg/bda/cusparseSolverBackend.cu +++ b/opm/simulators/linalg/bda/cusparseSolverBackend.cu @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -40,6 +41,7 @@ namespace bda { using Opm::OpmLog; +using Dune::Timer; const cusparseSolvePolicy_t policy = CUSPARSE_SOLVE_POLICY_USE_LEVEL; const cusparseOperation_t operation = CUSPARSE_OPERATION_NON_TRANSPOSE; @@ -56,7 +58,7 @@ cusparseSolverBackend::~cusparseSolverBackend() { template void cusparseSolverBackend::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) { - double t_total1, t_total2; + Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false); int n = N; double rho = 1.0, rhop; double alpha, nalpha, beta; @@ -67,8 +69,6 @@ void cusparseSolverBackend::gpu_pbicgstab(WellContributions& wellCon double mone = -1.0; float it; - t_total1 = second(); - if (wellContribs.getNumWells() > 0) { wellContribs.setCudaStream(stream); } @@ -169,12 +169,10 @@ void cusparseSolverBackend::gpu_pbicgstab(WellContributions& wellCon } } - t_total2 = second(); - res.iterations = std::min(it, (float)maxit); res.reduction = norm / norm_0; res.conv_rate = static_cast(pow(res.reduction, 1.0 / it)); - res.elapsed = t_total2 - t_total1; + res.elapsed = t_total.stop(); res.converged = (it != (maxit + 0.5)); if (verbosity > 0) { @@ -284,11 +282,7 @@ void cusparseSolverBackend::finalize() { template void cusparseSolverBackend::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b) { - - double t1, t2; - if (verbosity > 2) { - t1 = second(); - } + Timer t; #if COPY_ROW_BY_ROW int sum = 0; @@ -309,9 +303,8 @@ void cusparseSolverBackend::copy_system_to_gpu(double *vals, int *ro if (verbosity > 2) { cudaStreamSynchronize(stream); - t2 = second(); std::ostringstream out; - out << "cusparseSolver::copy_system_to_gpu(): " << t2 - t1 << " s"; + out << "cusparseSolver::copy_system_to_gpu(): " << t.stop() << " s"; OpmLog::info(out.str()); } } // end copy_system_to_gpu() @@ -320,11 +313,7 @@ void cusparseSolverBackend::copy_system_to_gpu(double *vals, int *ro // don't copy rowpointers and colindices, they stay the same template void cusparseSolverBackend::update_system_on_gpu(double *vals, int *rows, double *b) { - - double t1, t2; - if (verbosity > 2) { - t1 = second(); - } + Timer t; #if COPY_ROW_BY_ROW int sum = 0; @@ -343,9 +332,8 @@ void cusparseSolverBackend::update_system_on_gpu(double *vals, int * if (verbosity > 2) { cudaStreamSynchronize(stream); - t2 = second(); std::ostringstream out; - out << "cusparseSolver::update_system_on_gpu(): " << t2 - t1 << " s"; + out << "cusparseSolver::update_system_on_gpu(): " << t.stop() << " s"; OpmLog::info(out.str()); } } // end update_system_on_gpu() @@ -361,11 +349,7 @@ template bool cusparseSolverBackend::analyse_matrix() { int d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize; - double t1, t2; - - if (verbosity > 2) { - t1 = second(); - } + Timer t; cusparseCreateMatDescr(&descr_B); cusparseCreateMatDescr(&descr_M); @@ -428,9 +412,8 @@ bool cusparseSolverBackend::analyse_matrix() { if (verbosity > 2) { cudaStreamSynchronize(stream); - t2 = second(); std::ostringstream out; - out << "cusparseSolver::analyse_matrix(): " << t2 - t1 << " s"; + out << "cusparseSolver::analyse_matrix(): " << t.stop() << " s"; OpmLog::info(out.str()); } @@ -441,11 +424,7 @@ bool cusparseSolverBackend::analyse_matrix() { template bool cusparseSolverBackend::create_preconditioner() { - - double t1, t2; - if (verbosity > 2) { - t1 = second(); - } + Timer t; d_mCols = d_bCols; d_mRows = d_bRows; @@ -463,9 +442,8 @@ bool cusparseSolverBackend::create_preconditioner() { if (verbosity > 2) { cudaStreamSynchronize(stream); - t2 = second(); std::ostringstream out; - out << "cusparseSolver::create_preconditioner(): " << t2 - t1 << " s"; + out << "cusparseSolver::create_preconditioner(): " << t.stop() << " s"; OpmLog::info(out.str()); } return true; @@ -485,19 +463,14 @@ void cusparseSolverBackend::solve_system(WellContributions& wellCont // caller must be sure that x is a valid array template void cusparseSolverBackend::get_result(double *x) { - - double t1, t2; - if (verbosity > 2) { - t1 = second(); - } + Timer t; cudaMemcpyAsync(x, d_x, N * sizeof(double), cudaMemcpyDeviceToHost, stream); cudaStreamSynchronize(stream); if (verbosity > 2) { - t2 = second(); std::ostringstream out; - out << "cusparseSolver::get_result(): " << t2 - t1 << " s"; + out << "cusparseSolver::get_result(): " << t.stop() << " s"; OpmLog::info(out.str()); } } // end get_result() diff --git a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp index aa8df2170..cd3b84610 100644 --- a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp +++ b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp @@ -44,7 +44,6 @@ class cusparseSolverBackend : public BdaSolver { using Base::verbosity; using Base::maxit; using Base::tolerance; - using Base::second; using Base::initialized; typedef BdaSolverStatus::Status Status; diff --git a/opm/simulators/linalg/bda/openclSolverBackend.cpp b/opm/simulators/linalg/bda/openclSolverBackend.cpp index 2e6081636..9f90467d7 100644 --- a/opm/simulators/linalg/bda/openclSolverBackend.cpp +++ b/opm/simulators/linalg/bda/openclSolverBackend.cpp @@ -23,6 +23,7 @@ #include #include +#include #define __CL_ENABLE_EXCEPTIONS @@ -48,6 +49,7 @@ namespace bda { using Opm::OpmLog; +using Dune::Timer; template openclSolverBackend::openclSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver(verbosity_, maxit_, tolerance_) { @@ -72,14 +74,11 @@ unsigned int openclSolverBackend::ceilDivision(const unsigned int A, template double openclSolverBackend::dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out) { - double t1 = 0.0, t2 = 0.0; const unsigned int work_group_size = 1024; const unsigned int num_work_groups = ceilDivision(N, work_group_size); const unsigned int total_work_items = num_work_groups * work_group_size; const unsigned int lmem_per_work_group = sizeof(double) * work_group_size; - if (verbosity >= 4) { - t1 = second(); - } + Timer t_dot; cl::Event event = (*dot_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in1, in2, out, N, cl::Local(lmem_per_work_group)); @@ -92,9 +91,8 @@ double openclSolverBackend::dot_w(cl::Buffer in1, cl::Buffer in2, cl if (verbosity >= 4) { event.wait(); - t2 = second(); std::ostringstream oss; - oss << "openclSolver dot_w time: " << t2 - t1; + oss << std::scientific << "openclSolver dot_w time: " << t_dot.stop() << " s"; OpmLog::info(oss.str()); } @@ -104,14 +102,11 @@ double openclSolverBackend::dot_w(cl::Buffer in1, cl::Buffer in2, cl template double openclSolverBackend::norm_w(cl::Buffer in, cl::Buffer out) { - double t1 = 0.0, t2 = 0.0; const unsigned int work_group_size = 1024; const unsigned int num_work_groups = ceilDivision(N, work_group_size); const unsigned int total_work_items = num_work_groups * work_group_size; const unsigned int lmem_per_work_group = sizeof(double) * work_group_size; - if (verbosity >= 4) { - t1 = second(); - } + Timer t_norm; cl::Event event = (*norm_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in, out, N, cl::Local(lmem_per_work_group)); @@ -125,9 +120,8 @@ double openclSolverBackend::norm_w(cl::Buffer in, cl::Buffer out) if (verbosity >= 4) { event.wait(); - t2 = second(); std::ostringstream oss; - oss << "openclSolver norm_w time: " << t2 - t1; + oss << std::scientific << "openclSolver norm_w time: " << t_norm.stop() << " s"; OpmLog::info(oss.str()); } @@ -137,21 +131,17 @@ double openclSolverBackend::norm_w(cl::Buffer in, cl::Buffer out) template void openclSolverBackend::axpy_w(cl::Buffer in, const double a, cl::Buffer out) { - double t1 = 0.0, t2 = 0.0; const unsigned int work_group_size = 32; const unsigned int num_work_groups = ceilDivision(N, work_group_size); const unsigned int total_work_items = num_work_groups * work_group_size; - if (verbosity >= 4) { - t1 = second(); - } + Timer t_axpy; cl::Event event = (*axpy_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in, a, out, N); if (verbosity >= 4) { event.wait(); - t2 = second(); std::ostringstream oss; - oss << "openclSolver axpy_w time: " << t2 - t1; + oss << std::scientific << "openclSolver axpy_w time: " << t_axpy.stop() << " s"; OpmLog::info(oss.str()); } } @@ -159,21 +149,17 @@ void openclSolverBackend::axpy_w(cl::Buffer in, const double a, cl:: template void openclSolverBackend::custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, const double omega, const double beta) { - double t1 = 0.0, t2 = 0.0; const unsigned int work_group_size = 32; const unsigned int num_work_groups = ceilDivision(N, work_group_size); const unsigned int total_work_items = num_work_groups * work_group_size; - if (verbosity >= 4) { - t1 = second(); - } + Timer t_custom; cl::Event event = (*custom_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), p, v, r, omega, beta, N); if (verbosity >= 4) { event.wait(); - t2 = second(); std::ostringstream oss; - oss << "openclSolver custom_w time: " << t2 - t1; + oss << std::scientific << "openclSolver custom_w time: " << t_custom.stop() << " s"; OpmLog::info(oss.str()); } } @@ -181,22 +167,18 @@ void openclSolverBackend::custom_w(cl::Buffer p, cl::Buffer v, cl::B template void openclSolverBackend::spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b) { - double t1 = 0.0, t2 = 0.0; const unsigned int work_group_size = 32; const unsigned int num_work_groups = ceilDivision(N, work_group_size); const unsigned int total_work_items = num_work_groups * work_group_size; const unsigned int lmem_per_work_group = sizeof(double) * work_group_size; - if (verbosity >= 4) { - t1 = second(); - } + Timer t_spmv; cl::Event event = (*spmv_blocked_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), vals, cols, rows, Nb, x, b, block_size, cl::Local(lmem_per_work_group)); if (verbosity >= 4) { event.wait(); - t2 = second(); std::ostringstream oss; - oss << "openclSolver spmv_blocked_w time: " << t2 - t1; + oss << std::scientific << "openclSolver spmv_blocked_w time: " << t_spmv.stop() << " s"; OpmLog::info(oss.str()); } } @@ -209,9 +191,7 @@ void openclSolverBackend::gpu_pbicgstab(WellContributions& wellContr double rho, rhop, beta, alpha, omega, tmp1, tmp2; double norm, norm_0; - double t_total1, t_total2, t1 = 0.0, t2 = 0.0; - double prec_time = 0.0, spmv_time = 0.0, well_time = 0.0, rest_time = 0.0; - t_total1 = second(); + Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false); wellContribs.setOpenCLQueue(queue.get()); wellContribs.setReordering(toOrder, true); @@ -245,7 +225,7 @@ void openclSolverBackend::gpu_pbicgstab(WellContributions& wellContr OpmLog::info(out.str()); } - t1 = second(); + t_rest.start(); for (it = 0.5; it < maxit; it += 0.5) { rhop = rho; rho = dot_w(d_rw, d_r, d_tmp); @@ -254,37 +234,32 @@ void openclSolverBackend::gpu_pbicgstab(WellContributions& wellContr beta = (rho / rhop) * (alpha / omega); custom_w(d_p, d_v, d_r, omega, beta); } - t2 = second(); - rest_time += t2 - t1; + t_rest.stop(); // pw = prec(p) - t1 = second(); + t_prec.start(); prec->apply(d_p, d_pw); - t2 = second(); - prec_time += t2 - t1; + t_prec.stop(); // v = A * pw - t1 = second(); + t_spmv.start(); spmv_blocked_w(d_Avals, d_Acols, d_Arows, d_pw, d_v); - t2 = second(); - spmv_time += t2 - t1; + t_spmv.stop(); // apply wellContributions if (wellContribs.getNumWells() > 0) { - t1 = second(); + t_well.start(); wellContribs.apply(d_pw, d_v); - t2 = second(); - well_time += t2 - t1; + t_well.stop(); } - t1 = second(); + t_rest.start(); tmp1 = dot_w(d_rw, d_v, d_tmp); alpha = rho / tmp1; axpy_w(d_v, -alpha, d_r); // r = r - alpha * v axpy_w(d_pw, alpha, d_x); // x = x + alpha * pw norm = norm_w(d_r, d_tmp); - t2 = second(); - rest_time += t2 - t1; + t_rest.stop(); if (norm < tolerance * norm_0) { break; @@ -293,34 +268,30 @@ void openclSolverBackend::gpu_pbicgstab(WellContributions& wellContr it += 0.5; // s = prec(r) - t1 = second(); + t_prec.start(); prec->apply(d_r, d_s); - t2 = second(); - prec_time += t2 - t1; + t_prec.stop(); // t = A * s - t1 = second(); + t_spmv.start(); spmv_blocked_w(d_Avals, d_Acols, d_Arows, d_s, d_t); - t2 = second(); - spmv_time += t2 - t1; + t_spmv.stop(); // apply wellContributions if (wellContribs.getNumWells() > 0) { - t1 = second(); + t_well.start(); wellContribs.apply(d_s, d_t); - t2 = second(); - well_time += t2 - t1; + t_well.stop(); } - t1 = second(); + t_rest.start(); tmp1 = dot_w(d_t, d_r, d_tmp); tmp2 = dot_w(d_t, d_t, d_tmp); omega = tmp1 / tmp2; axpy_w(d_s, omega, d_x); // x = x + omega * s axpy_w(d_t, -omega, d_r); // r = r - omega * t norm = norm_w(d_r, d_tmp); - t2 = second(); - rest_time += t2 - t1; + t_rest.stop(); if (norm < tolerance * norm_0) { break; @@ -333,14 +304,10 @@ void openclSolverBackend::gpu_pbicgstab(WellContributions& wellContr } } - t2 = second(); - t_total2 = second(); - rest_time += t2 - t1; - res.iterations = std::min(it, (float)maxit); res.reduction = norm / norm_0; res.conv_rate = static_cast(pow(res.reduction, 1.0 / it)); - res.elapsed = t_total2 - t_total1; + res.elapsed = t_total.stop(); res.converged = (it != (maxit + 0.5)); if (verbosity > 0) { @@ -556,12 +523,7 @@ void openclSolverBackend::finalize() { template void openclSolverBackend::copy_system_to_gpu() { - - double t1 = 0.0, t2 = 0.0; - if (verbosity > 2) { - t1 = second(); - } - + Timer t; cl::Event event; #if COPY_ROW_BY_ROW @@ -583,9 +545,8 @@ void openclSolverBackend::copy_system_to_gpu() { event.wait(); if (verbosity > 2) { - t2 = second(); std::ostringstream out; - out << "openclSolver::copy_system_to_gpu(): " << t2 - t1 << " s"; + out << "openclSolver::copy_system_to_gpu(): " << t.stop() << " s"; OpmLog::info(out.str()); } } // end copy_system_to_gpu() @@ -594,12 +555,7 @@ void openclSolverBackend::copy_system_to_gpu() { // don't copy rowpointers and colindices, they stay the same template void openclSolverBackend::update_system_on_gpu() { - - double t1 = 0.0, t2 = 0.0; - if (verbosity > 2) { - t1 = second(); - } - + Timer t; cl::Event event; #if COPY_ROW_BY_ROW @@ -619,9 +575,8 @@ void openclSolverBackend::update_system_on_gpu() { event.wait(); if (verbosity > 2) { - t2 = second(); std::ostringstream out; - out << "openclSolver::update_system_on_gpu(): " << t2 - t1 << " s"; + out << "openclSolver::update_system_on_gpu(): " << t.stop() << " s"; OpmLog::info(out.str()); } } // end update_system_on_gpu() @@ -629,12 +584,7 @@ void openclSolverBackend::update_system_on_gpu() { template bool openclSolverBackend::analyse_matrix() { - - double t1 = 0.0, t2 = 0.0; - - if (verbosity > 2) { - t1 = second(); - } + Timer t; bool success = prec->init(mat); int work_group_size = 32; @@ -648,9 +598,8 @@ bool openclSolverBackend::analyse_matrix() { rmat = prec->getRMat(); if (verbosity > 2) { - t2 = second(); std::ostringstream out; - out << "openclSolver::analyse_matrix(): " << t2 - t1 << " s"; + out << "openclSolver::analyse_matrix(): " << t.stop() << " s"; OpmLog::info(out.str()); } @@ -662,18 +611,14 @@ bool openclSolverBackend::analyse_matrix() { template void openclSolverBackend::update_system(double *vals, double *b) { - double t1 = 0.0, t2 = 0.0; - if (verbosity > 2) { - t1 = second(); - } + Timer t; mat->nnzValues = vals; blocked_reorder_vector_by_pattern(mat->Nb, b, fromOrder, rb); if (verbosity > 2) { - t2 = second(); std::ostringstream out; - out << "openclSolver::update_system(): " << t2 - t1 << " s"; + out << "openclSolver::update_system(): " << t.stop() << " s"; OpmLog::info(out.str()); } } // end update_system() @@ -681,18 +626,13 @@ void openclSolverBackend::update_system(double *vals, double *b) { template bool openclSolverBackend::create_preconditioner() { - - double t1 = 0.0, t2 = 0.0; - if (verbosity > 2) { - t1 = second(); - } + Timer t; bool result = prec->create_preconditioner(mat); if (verbosity > 2) { - t2 = second(); std::ostringstream out; - out << "openclSolver::create_preconditioner(): " << t2 - t1 << " s"; + out << "openclSolver::create_preconditioner(): " << t.stop() << " s"; OpmLog::info(out.str()); } return result; @@ -701,18 +641,14 @@ bool openclSolverBackend::create_preconditioner() { template void openclSolverBackend::solve_system(WellContributions& wellContribs, BdaResult &res) { - // actually solve - double t1 = 0.0, t2 = 0.0; - if (verbosity > 2) { - t1 = second(); - } + Timer t; + // actually solve gpu_pbicgstab(wellContribs, res); if (verbosity > 2) { - t2 = second(); std::ostringstream out; - out << "openclSolver::solve_system(): " << t2 - t1 << " s"; + out << "openclSolver::solve_system(): " << t.stop() << " s"; OpmLog::info(out.str()); } @@ -723,19 +659,14 @@ void openclSolverBackend::solve_system(WellContributions& wellContri // caller must be sure that x is a valid array template void openclSolverBackend::get_result(double *x) { - - double t1 = 0.0, t2 = 0.0; - if (verbosity > 2) { - t1 = second(); - } + Timer t; queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, rb); blocked_reorder_vector_by_pattern(mat->Nb, rb, toOrder, x); if (verbosity > 2) { - t2 = second(); std::ostringstream out; - out << "openclSolver::get_result(): " << t2 - t1 << " s"; + out << "openclSolver::get_result(): " << t.stop() << " s"; OpmLog::info(out.str()); } } // end get_result() diff --git a/opm/simulators/linalg/bda/openclSolverBackend.hpp b/opm/simulators/linalg/bda/openclSolverBackend.hpp index 4ea70607a..7c013ba86 100644 --- a/opm/simulators/linalg/bda/openclSolverBackend.hpp +++ b/opm/simulators/linalg/bda/openclSolverBackend.hpp @@ -50,7 +50,6 @@ class openclSolverBackend : public BdaSolver using Base::verbosity; using Base::maxit; using Base::tolerance; - using Base::second; using Base::initialized; typedef BdaSolverStatus::Status Status;