diff --git a/opm/simulators/linalg/bda/BILU0.cpp b/opm/simulators/linalg/bda/BILU0.cpp index 6dadf7625..6dc037468 100644 --- a/opm/simulators/linalg/bda/BILU0.cpp +++ b/opm/simulators/linalg/bda/BILU0.cpp @@ -202,6 +202,8 @@ namespace bda } #endif + Timer t_total, t_preprocessing; + // Ut is actually BSC format std::unique_ptr > Ut = std::make_unique >(Nb, (nnzbs + Nb) / 2); @@ -280,7 +282,14 @@ namespace bda // Ltmp is only needed for CPU decomposition, GPU creates GPU buffer for Ltmp double *Utmp = new double[Ut->nnzbs * block_size * block_size]; + if (verbosity >= 3) { + std::ostringstream out; + out << "BILU0 ChowPatel preprocessing: " << t_preprocessing.stop() << " s"; + OpmLog::info(out.str()); + } + // actual ILU decomposition + Timer t_decomposition; #if CHOW_PATEL_GPU chowPatelIlu.decomposition(queue, context, Ut->rowPointers, Ut->colIndices, Ut->nnzValues, Ut->nnzbs, @@ -402,6 +411,14 @@ namespace bda delete[] Ltmp; #endif + if (verbosity >= 3){ + std::ostringstream out; + out << "BILU0 ChowPatel decomposition: " << t_decomposition.stop() << " s"; + OpmLog::info(out.str()); + } + + Timer t_postprocessing; + // convert Ut to BSR // diagonal stored separately std::vector ptr(Nb+1, 0); @@ -439,6 +456,15 @@ namespace bda std::rotate(ptr.begin(), ptr.end() - 1, ptr.end()); ptr.front() = 0; + + if (verbosity >= 3){ + std::ostringstream out; + out << "BILU0 ChowPatel postprocessing: " << t_postprocessing.stop() << " s"; + OpmLog::info(out.str()); + } + + Timer t_copyToGpu; + events.resize(3); queue->enqueueWriteBuffer(s.Lvals, CL_FALSE, 0, Lmat->nnzbs * bs * bs * sizeof(double), Lmat->nnzValues, nullptr, &events[0]); queue->enqueueWriteBuffer(s.Uvals, CL_FALSE, 0, Umat->nnzbs * bs * bs * sizeof(double), Utmp, nullptr, &events[1]); @@ -470,6 +496,13 @@ namespace bda OPM_THROW(std::logic_error, "BILU0 OpenCL enqueueWriteBuffer error"); } + if (verbosity >= 3){ + std::ostringstream out; + out << "BILU0 ChowPatel copy to GPU: " << t_copyToGpu.stop() << " s\n"; + out << "BILU0 ChowPatel total: " << t_total.stop() << " s"; + OpmLog::info(out.str()); + } + delete[] Utmp; #endif // CHOW_PATEL } diff --git a/opm/simulators/linalg/bda/ChowPatelIlu.cpp b/opm/simulators/linalg/bda/ChowPatelIlu.cpp index 894b3f05c..b1ac6448b 100644 --- a/opm/simulators/linalg/bda/ChowPatelIlu.cpp +++ b/opm/simulators/linalg/bda/ChowPatelIlu.cpp @@ -529,7 +529,7 @@ void ChowPatelIlu::decomposition( err |= queue->enqueueWriteBuffer(d_LU_cols, CL_FALSE, 0, sizeof(int) * LU_nnzbs, LU_cols, nullptr, &events[5]); cl::WaitForEvents(events); events.clear(); - if (verbosity >= 3){ + if (verbosity >= 4){ std::ostringstream out; out << "ChowPatelIlu copy sparsity pattern time: " << t_copy_pattern.stop() << " s"; OpmLog::info(out.str()); @@ -550,7 +550,7 @@ void ChowPatelIlu::decomposition( err |= queue->enqueueWriteBuffer(d_LU_vals, CL_FALSE, 0, sizeof(double) * LU_nnzbs * block_size * block_size, LU_vals, nullptr, &events[2]); cl::WaitForEvents(events); events.clear(); - if (verbosity >= 3){ + if (verbosity >= 4){ std::ostringstream out; out << "ChowPatelIlu copy1 time: " << t_copy1.stop() << " s"; OpmLog::info(out.str()); @@ -585,7 +585,7 @@ void ChowPatelIlu::decomposition( d_Ut_idxs, d_L_cols, d_LU_cols, *Larg2, *Uarg2, Nb, cl::Local(lmem_per_work_group), cl::Local(lmem_per_work_group)); event.wait(); - if (verbosity >= 3){ + if (verbosity >= 4){ std::ostringstream out; out << "ChowPatelIlu sweep kernel time: " << t_kernel.stop() << " s"; OpmLog::info(out.str()); @@ -604,7 +604,7 @@ void ChowPatelIlu::decomposition( } cl::WaitForEvents(events); events.clear(); - if (verbosity >= 3){ + if (verbosity >= 4){ std::ostringstream out; out << "ChowPatelIlu copy2 time: " << t_copy2.stop() << " s"; OpmLog::info(out.str());