Added timers to ChowPatelIlu

This commit is contained in:
Tong Dong Qiu 2021-03-03 17:10:55 +01:00
parent b87e9dad9a
commit 87e3f1d72d
2 changed files with 37 additions and 4 deletions

View File

@ -202,6 +202,8 @@ namespace bda
} }
#endif #endif
Timer t_total, t_preprocessing;
// Ut is actually BSC format // Ut is actually BSC format
std::unique_ptr<BlockedMatrix<bs> > Ut = std::make_unique<BlockedMatrix<bs> >(Nb, (nnzbs + Nb) / 2); std::unique_ptr<BlockedMatrix<bs> > Ut = std::make_unique<BlockedMatrix<bs> >(Nb, (nnzbs + Nb) / 2);
@ -280,7 +282,14 @@ namespace bda
// Ltmp is only needed for CPU decomposition, GPU creates GPU buffer for Ltmp // Ltmp is only needed for CPU decomposition, GPU creates GPU buffer for Ltmp
double *Utmp = new double[Ut->nnzbs * block_size * block_size]; double *Utmp = new double[Ut->nnzbs * block_size * block_size];
if (verbosity >= 3) {
std::ostringstream out;
out << "BILU0 ChowPatel preprocessing: " << t_preprocessing.stop() << " s";
OpmLog::info(out.str());
}
// actual ILU decomposition // actual ILU decomposition
Timer t_decomposition;
#if CHOW_PATEL_GPU #if CHOW_PATEL_GPU
chowPatelIlu.decomposition(queue, context, chowPatelIlu.decomposition(queue, context,
Ut->rowPointers, Ut->colIndices, Ut->nnzValues, Ut->nnzbs, Ut->rowPointers, Ut->colIndices, Ut->nnzValues, Ut->nnzbs,
@ -402,6 +411,14 @@ namespace bda
delete[] Ltmp; delete[] Ltmp;
#endif #endif
if (verbosity >= 3){
std::ostringstream out;
out << "BILU0 ChowPatel decomposition: " << t_decomposition.stop() << " s";
OpmLog::info(out.str());
}
Timer t_postprocessing;
// convert Ut to BSR // convert Ut to BSR
// diagonal stored separately // diagonal stored separately
std::vector<int> ptr(Nb+1, 0); std::vector<int> ptr(Nb+1, 0);
@ -439,6 +456,15 @@ namespace bda
std::rotate(ptr.begin(), ptr.end() - 1, ptr.end()); std::rotate(ptr.begin(), ptr.end() - 1, ptr.end());
ptr.front() = 0; ptr.front() = 0;
if (verbosity >= 3){
std::ostringstream out;
out << "BILU0 ChowPatel postprocessing: " << t_postprocessing.stop() << " s";
OpmLog::info(out.str());
}
Timer t_copyToGpu;
events.resize(3); events.resize(3);
queue->enqueueWriteBuffer(s.Lvals, CL_FALSE, 0, Lmat->nnzbs * bs * bs * sizeof(double), Lmat->nnzValues, nullptr, &events[0]); queue->enqueueWriteBuffer(s.Lvals, CL_FALSE, 0, Lmat->nnzbs * bs * bs * sizeof(double), Lmat->nnzValues, nullptr, &events[0]);
queue->enqueueWriteBuffer(s.Uvals, CL_FALSE, 0, Umat->nnzbs * bs * bs * sizeof(double), Utmp, nullptr, &events[1]); queue->enqueueWriteBuffer(s.Uvals, CL_FALSE, 0, Umat->nnzbs * bs * bs * sizeof(double), Utmp, nullptr, &events[1]);
@ -470,6 +496,13 @@ namespace bda
OPM_THROW(std::logic_error, "BILU0 OpenCL enqueueWriteBuffer error"); OPM_THROW(std::logic_error, "BILU0 OpenCL enqueueWriteBuffer error");
} }
if (verbosity >= 3){
std::ostringstream out;
out << "BILU0 ChowPatel copy to GPU: " << t_copyToGpu.stop() << " s\n";
out << "BILU0 ChowPatel total: " << t_total.stop() << " s";
OpmLog::info(out.str());
}
delete[] Utmp; delete[] Utmp;
#endif // CHOW_PATEL #endif // CHOW_PATEL
} }

View File

@ -529,7 +529,7 @@ void ChowPatelIlu::decomposition(
err |= queue->enqueueWriteBuffer(d_LU_cols, CL_FALSE, 0, sizeof(int) * LU_nnzbs, LU_cols, nullptr, &events[5]); err |= queue->enqueueWriteBuffer(d_LU_cols, CL_FALSE, 0, sizeof(int) * LU_nnzbs, LU_cols, nullptr, &events[5]);
cl::WaitForEvents(events); cl::WaitForEvents(events);
events.clear(); events.clear();
if (verbosity >= 3){ if (verbosity >= 4){
std::ostringstream out; std::ostringstream out;
out << "ChowPatelIlu copy sparsity pattern time: " << t_copy_pattern.stop() << " s"; out << "ChowPatelIlu copy sparsity pattern time: " << t_copy_pattern.stop() << " s";
OpmLog::info(out.str()); OpmLog::info(out.str());
@ -550,7 +550,7 @@ void ChowPatelIlu::decomposition(
err |= queue->enqueueWriteBuffer(d_LU_vals, CL_FALSE, 0, sizeof(double) * LU_nnzbs * block_size * block_size, LU_vals, nullptr, &events[2]); err |= queue->enqueueWriteBuffer(d_LU_vals, CL_FALSE, 0, sizeof(double) * LU_nnzbs * block_size * block_size, LU_vals, nullptr, &events[2]);
cl::WaitForEvents(events); cl::WaitForEvents(events);
events.clear(); events.clear();
if (verbosity >= 3){ if (verbosity >= 4){
std::ostringstream out; std::ostringstream out;
out << "ChowPatelIlu copy1 time: " << t_copy1.stop() << " s"; out << "ChowPatelIlu copy1 time: " << t_copy1.stop() << " s";
OpmLog::info(out.str()); OpmLog::info(out.str());
@ -585,7 +585,7 @@ void ChowPatelIlu::decomposition(
d_Ut_idxs, d_L_cols, d_LU_cols, d_Ut_idxs, d_L_cols, d_LU_cols,
*Larg2, *Uarg2, Nb, cl::Local(lmem_per_work_group), cl::Local(lmem_per_work_group)); *Larg2, *Uarg2, Nb, cl::Local(lmem_per_work_group), cl::Local(lmem_per_work_group));
event.wait(); event.wait();
if (verbosity >= 3){ if (verbosity >= 4){
std::ostringstream out; std::ostringstream out;
out << "ChowPatelIlu sweep kernel time: " << t_kernel.stop() << " s"; out << "ChowPatelIlu sweep kernel time: " << t_kernel.stop() << " s";
OpmLog::info(out.str()); OpmLog::info(out.str());
@ -604,7 +604,7 @@ void ChowPatelIlu::decomposition(
} }
cl::WaitForEvents(events); cl::WaitForEvents(events);
events.clear(); events.clear();
if (verbosity >= 3){ if (verbosity >= 4){
std::ostringstream out; std::ostringstream out;
out << "ChowPatelIlu copy2 time: " << t_copy2.stop() << " s"; out << "ChowPatelIlu copy2 time: " << t_copy2.stop() << " s";
OpmLog::info(out.str()); OpmLog::info(out.str());