Replaced timing function with Dune::Timer

This commit is contained in:
T.D. (Tongdong) Qiu 2020-07-01 14:50:34 +02:00
parent 845563c37b
commit 7f8faa018b
6 changed files with 80 additions and 212 deletions

View File

@ -20,6 +20,7 @@
#include <config.h>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <dune/common/timer.hh>
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
#include <opm/simulators/linalg/bda/BILU0.hpp>
@ -30,10 +31,7 @@ namespace bda
{
using Opm::OpmLog;
// define 'second' as 'BdaSolver<>::second', this allows usage of the second() function for timing
// typedefs cannot handle templates
const auto second = BdaSolver<>::second;
using Dune::Timer;
template <unsigned int block_size>
BILU0<block_size>::BILU0(bool level_scheduling_, bool graph_coloring_, int verbosity_) :
@ -62,7 +60,6 @@ namespace bda
bool BILU0<block_size>::init(BlockedMatrix *mat)
{
const unsigned int bs = block_size;
double t1 = 0.0, t2 = 0.0;
BlockedMatrix *CSCmat = nullptr;
this->N = mat->Nb * block_size;
@ -79,21 +76,16 @@ namespace bda
CSCmat->nnzValues = new double[nnzbs * bs * bs];
CSCmat->colIndices = new int[nnzbs];
CSCmat->rowPointers = new int[Nb + 1];
if(verbosity >= 3){
t1 = second();
}
Timer t_convert;
bcsr_to_bcsc<block_size>(mat->nnzValues, mat->colIndices, mat->rowPointers, CSCmat->nnzValues, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb);
if(verbosity >= 3){
t2 = second();
std::ostringstream out;
out << "BILU0 convert CSR to CSC: " << t2 - t1 << " s";
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s\n";
OpmLog::info(out.str());
}
}
if(verbosity >= 3){
t1 = second();
}
Timer t_analysis;
rMat = allocateBlockedMatrix<block_size>(mat->Nb, mat->nnzbs);
LUMat = soft_copyBlockedMatrix(rMat);
if (level_scheduling) {
@ -105,9 +97,8 @@ namespace bda
return false;
}
if(verbosity >= 3){
t2 = second();
std::ostringstream out;
out << "BILU0 analysis took: " << t2 - t1 << " s, " << numColors << " colors";
out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors";
OpmLog::info(out.str());
}
@ -161,27 +152,21 @@ namespace bda
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
{
const unsigned int bs = block_size;
double t1 = 0.0, t2 = 0.0;
if (verbosity >= 3){
t1 = second();
}
Timer t_reorder;
blocked_reorder_matrix_by_pattern<block_size>(mat, toOrder, fromOrder, rMat);
if (verbosity >= 3){
t2 = second();
std::ostringstream out;
out << "BILU0 reorder matrix: " << t2 - t1 << " s";
out << "BILU0 reorder matrix: " << t_reorder.stop() << " s";
OpmLog::info(out.str());
}
// TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
if (verbosity >= 3){
t1 = second();
}
Timer t_copy;
memcpy(LUMat->nnzValues, rMat->nnzValues, sizeof(double) * bs * bs * rMat->nnzbs);
if (verbosity >= 3){
t2 = second();
std::ostringstream out;
out << "BILU0 memcpy: " << t2 - t1 << " s";
out << "BILU0 memcpy: " << t_copy.stop() << " s";
OpmLog::info(out.str());
}
@ -191,9 +176,8 @@ namespace bda
int LSize = 0;
if (verbosity >= 3){
t1 = second();
}
Timer t_decomposition;
// go through all rows
for (i = 0; i < LUMat->Nb; i++) {
iRowStart = LUMat->rowPointers[i];
@ -272,15 +256,12 @@ namespace bda
URowIndex++;
}
if (verbosity >= 3) {
t2 = second();
std::ostringstream out;
out << "BILU0 decomposition: " << t2 - t1 << " s";
out << "BILU0 decomposition: " << t_decomposition.stop() << " s";
OpmLog::info(out.str());
}
if (verbosity >= 3) {
t1 = second();
}
Timer t_copyToGpu;
if (pattern_uploaded == false) {
queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, LMat->nnzbs * sizeof(int), LMat->colIndices);
queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, UMat->nnzbs * sizeof(int), UMat->colIndices);
@ -292,9 +273,8 @@ namespace bda
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(double) * bs * bs, UMat->nnzValues);
queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, Nb * sizeof(double) * bs * bs, invDiagVals);
if (verbosity >= 3) {
t2 = second();
std::ostringstream out;
out << "BILU0 copy to GPU: " << t2 - t1 << " s";
out << "BILU0 copy to GPU: " << t_copyToGpu.stop() << " s";
OpmLog::info(out.str());
}
@ -307,11 +287,8 @@ namespace bda
template <unsigned int block_size>
void BILU0<block_size>::apply(cl::Buffer& x, cl::Buffer& y)
{
double t1 = 0.0, t2 = 0.0;
if (verbosity >= 3) {
t1 = second();
}
cl::Event event;
Timer t_apply;
for(int color = 0; color < numColors; ++color){
event = (*ILU_apply1)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), s.Lvals, s.Lcols, s.Lrows, (unsigned int)Nb, x, y, s.rowsPerColor, color, block_size, cl::Local(lmem_per_work_group));
@ -324,9 +301,8 @@ namespace bda
if (verbosity >= 3) {
event.wait();
t2 = second();
std::ostringstream out;
out << "BILU0 apply: " << t2 - t1 << " s";
out << "BILU0 apply: " << t_apply.stop() << " s";
OpmLog::info(out.str());
}
}

View File

@ -21,8 +21,6 @@
#define OPM_BDASOLVER_BACKEND_HEADER_INCLUDED
#include <sys/time.h>
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/bda/BdaSolverStatus.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
@ -35,8 +33,7 @@ namespace bda
/// This class serves to simplify choosing between different backend solvers, such as cusparseSolver and openclSolver
/// This class is abstract, no instantiations can of it can be made, only of its children
/// Without a default block_size value, the BILU0 class cannot use BdaSolver::second()
template <unsigned int block_size = 3>
template <unsigned int block_size>
class BdaSolver
{
@ -74,13 +71,6 @@ namespace bda
virtual void get_result(double *x) = 0;
/// Different implementations of BdaSolver can use this function for timing
static double second(void) {
struct timeval tv;
gettimeofday(&tv, nullptr);
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
}
}; // end class BdaSolver
} // end namespace bda

View File

@ -23,6 +23,7 @@
#include <sstream>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <dune/common/timer.hh>
#include <opm/simulators/linalg/bda/cusparseSolverBackend.hpp>
#include <opm/simulators/linalg/bda/BdaResult.hpp>
@ -40,6 +41,7 @@ namespace bda
{
using Opm::OpmLog;
using Dune::Timer;
const cusparseSolvePolicy_t policy = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
const cusparseOperation_t operation = CUSPARSE_OPERATION_NON_TRANSPOSE;
@ -56,7 +58,7 @@ cusparseSolverBackend<block_size>::~cusparseSolverBackend() {
template <unsigned int block_size>
void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) {
double t_total1, t_total2;
Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
int n = N;
double rho = 1.0, rhop;
double alpha, nalpha, beta;
@ -67,8 +69,6 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
double mone = -1.0;
float it;
t_total1 = second();
if (wellContribs.getNumWells() > 0) {
wellContribs.setCudaStream(stream);
}
@ -169,12 +169,10 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
}
}
t_total2 = second();
res.iterations = std::min(it, (float)maxit);
res.reduction = norm / norm_0;
res.conv_rate = static_cast<double>(pow(res.reduction, 1.0 / it));
res.elapsed = t_total2 - t_total1;
res.elapsed = t_total.stop();
res.converged = (it != (maxit + 0.5));
if (verbosity > 0) {
@ -284,11 +282,7 @@ void cusparseSolverBackend<block_size>::finalize() {
template <unsigned int block_size>
void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b) {
double t1, t2;
if (verbosity > 2) {
t1 = second();
}
Timer t;
#if COPY_ROW_BY_ROW
int sum = 0;
@ -309,9 +303,8 @@ void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *ro
if (verbosity > 2) {
cudaStreamSynchronize(stream);
t2 = second();
std::ostringstream out;
out << "cusparseSolver::copy_system_to_gpu(): " << t2 - t1 << " s";
out << "cusparseSolver::copy_system_to_gpu(): " << t.stop() << " s";
OpmLog::info(out.str());
}
} // end copy_system_to_gpu()
@ -320,11 +313,7 @@ void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *ro
// don't copy rowpointers and colindices, they stay the same
template <unsigned int block_size>
void cusparseSolverBackend<block_size>::update_system_on_gpu(double *vals, int *rows, double *b) {
double t1, t2;
if (verbosity > 2) {
t1 = second();
}
Timer t;
#if COPY_ROW_BY_ROW
int sum = 0;
@ -343,9 +332,8 @@ void cusparseSolverBackend<block_size>::update_system_on_gpu(double *vals, int *
if (verbosity > 2) {
cudaStreamSynchronize(stream);
t2 = second();
std::ostringstream out;
out << "cusparseSolver::update_system_on_gpu(): " << t2 - t1 << " s";
out << "cusparseSolver::update_system_on_gpu(): " << t.stop() << " s";
OpmLog::info(out.str());
}
} // end update_system_on_gpu()
@ -361,11 +349,7 @@ template <unsigned int block_size>
bool cusparseSolverBackend<block_size>::analyse_matrix() {
int d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
double t1, t2;
if (verbosity > 2) {
t1 = second();
}
Timer t;
cusparseCreateMatDescr(&descr_B);
cusparseCreateMatDescr(&descr_M);
@ -428,9 +412,8 @@ bool cusparseSolverBackend<block_size>::analyse_matrix() {
if (verbosity > 2) {
cudaStreamSynchronize(stream);
t2 = second();
std::ostringstream out;
out << "cusparseSolver::analyse_matrix(): " << t2 - t1 << " s";
out << "cusparseSolver::analyse_matrix(): " << t.stop() << " s";
OpmLog::info(out.str());
}
@ -441,11 +424,7 @@ bool cusparseSolverBackend<block_size>::analyse_matrix() {
template <unsigned int block_size>
bool cusparseSolverBackend<block_size>::create_preconditioner() {
double t1, t2;
if (verbosity > 2) {
t1 = second();
}
Timer t;
d_mCols = d_bCols;
d_mRows = d_bRows;
@ -463,9 +442,8 @@ bool cusparseSolverBackend<block_size>::create_preconditioner() {
if (verbosity > 2) {
cudaStreamSynchronize(stream);
t2 = second();
std::ostringstream out;
out << "cusparseSolver::create_preconditioner(): " << t2 - t1 << " s";
out << "cusparseSolver::create_preconditioner(): " << t.stop() << " s";
OpmLog::info(out.str());
}
return true;
@ -485,19 +463,14 @@ void cusparseSolverBackend<block_size>::solve_system(WellContributions& wellCont
// caller must be sure that x is a valid array
template <unsigned int block_size>
void cusparseSolverBackend<block_size>::get_result(double *x) {
double t1, t2;
if (verbosity > 2) {
t1 = second();
}
Timer t;
cudaMemcpyAsync(x, d_x, N * sizeof(double), cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
if (verbosity > 2) {
t2 = second();
std::ostringstream out;
out << "cusparseSolver::get_result(): " << t2 - t1 << " s";
out << "cusparseSolver::get_result(): " << t.stop() << " s";
OpmLog::info(out.str());
}
} // end get_result()

View File

@ -44,7 +44,6 @@ class cusparseSolverBackend : public BdaSolver<block_size> {
using Base::verbosity;
using Base::maxit;
using Base::tolerance;
using Base::second;
using Base::initialized;
typedef BdaSolverStatus::Status Status;

View File

@ -23,6 +23,7 @@
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <dune/common/timer.hh>
#define __CL_ENABLE_EXCEPTIONS
@ -48,6 +49,7 @@ namespace bda
{
using Opm::OpmLog;
using Dune::Timer;
template <unsigned int block_size>
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_) {
@ -72,14 +74,11 @@ unsigned int openclSolverBackend<block_size>::ceilDivision(const unsigned int A,
template <unsigned int block_size>
double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out)
{
double t1 = 0.0, t2 = 0.0;
const unsigned int work_group_size = 1024;
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
const unsigned int total_work_items = num_work_groups * work_group_size;
const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
if (verbosity >= 4) {
t1 = second();
}
Timer t_dot;
cl::Event event = (*dot_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in1, in2, out, N, cl::Local(lmem_per_work_group));
@ -92,9 +91,8 @@ double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl
if (verbosity >= 4) {
event.wait();
t2 = second();
std::ostringstream oss;
oss << "openclSolver dot_w time: " << t2 - t1;
oss << std::scientific << "openclSolver dot_w time: " << t_dot.stop() << " s";
OpmLog::info(oss.str());
}
@ -104,14 +102,11 @@ double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl
template <unsigned int block_size>
double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
{
double t1 = 0.0, t2 = 0.0;
const unsigned int work_group_size = 1024;
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
const unsigned int total_work_items = num_work_groups * work_group_size;
const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
if (verbosity >= 4) {
t1 = second();
}
Timer t_norm;
cl::Event event = (*norm_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in, out, N, cl::Local(lmem_per_work_group));
@ -125,9 +120,8 @@ double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
if (verbosity >= 4) {
event.wait();
t2 = second();
std::ostringstream oss;
oss << "openclSolver norm_w time: " << t2 - t1;
oss << std::scientific << "openclSolver norm_w time: " << t_norm.stop() << " s";
OpmLog::info(oss.str());
}
@ -137,21 +131,17 @@ double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
template <unsigned int block_size>
void openclSolverBackend<block_size>::axpy_w(cl::Buffer in, const double a, cl::Buffer out)
{
double t1 = 0.0, t2 = 0.0;
const unsigned int work_group_size = 32;
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
const unsigned int total_work_items = num_work_groups * work_group_size;
if (verbosity >= 4) {
t1 = second();
}
Timer t_axpy;
cl::Event event = (*axpy_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in, a, out, N);
if (verbosity >= 4) {
event.wait();
t2 = second();
std::ostringstream oss;
oss << "openclSolver axpy_w time: " << t2 - t1;
oss << std::scientific << "openclSolver axpy_w time: " << t_axpy.stop() << " s";
OpmLog::info(oss.str());
}
}
@ -159,21 +149,17 @@ void openclSolverBackend<block_size>::axpy_w(cl::Buffer in, const double a, cl::
template <unsigned int block_size>
void openclSolverBackend<block_size>::custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, const double omega, const double beta)
{
double t1 = 0.0, t2 = 0.0;
const unsigned int work_group_size = 32;
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
const unsigned int total_work_items = num_work_groups * work_group_size;
if (verbosity >= 4) {
t1 = second();
}
Timer t_custom;
cl::Event event = (*custom_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), p, v, r, omega, beta, N);
if (verbosity >= 4) {
event.wait();
t2 = second();
std::ostringstream oss;
oss << "openclSolver custom_w time: " << t2 - t1;
oss << std::scientific << "openclSolver custom_w time: " << t_custom.stop() << " s";
OpmLog::info(oss.str());
}
}
@ -181,22 +167,18 @@ void openclSolverBackend<block_size>::custom_w(cl::Buffer p, cl::Buffer v, cl::B
template <unsigned int block_size>
void openclSolverBackend<block_size>::spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b)
{
double t1 = 0.0, t2 = 0.0;
const unsigned int work_group_size = 32;
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
const unsigned int total_work_items = num_work_groups * work_group_size;
const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
if (verbosity >= 4) {
t1 = second();
}
Timer t_spmv;
cl::Event event = (*spmv_blocked_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), vals, cols, rows, Nb, x, b, block_size, cl::Local(lmem_per_work_group));
if (verbosity >= 4) {
event.wait();
t2 = second();
std::ostringstream oss;
oss << "openclSolver spmv_blocked_w time: " << t2 - t1;
oss << std::scientific << "openclSolver spmv_blocked_w time: " << t_spmv.stop() << " s";
OpmLog::info(oss.str());
}
}
@ -209,9 +191,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
double rho, rhop, beta, alpha, omega, tmp1, tmp2;
double norm, norm_0;
double t_total1, t_total2, t1 = 0.0, t2 = 0.0;
double prec_time = 0.0, spmv_time = 0.0, well_time = 0.0, rest_time = 0.0;
t_total1 = second();
Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
wellContribs.setOpenCLQueue(queue.get());
wellContribs.setReordering(toOrder, true);
@ -245,7 +225,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
OpmLog::info(out.str());
}
t1 = second();
t_rest.start();
for (it = 0.5; it < maxit; it += 0.5) {
rhop = rho;
rho = dot_w(d_rw, d_r, d_tmp);
@ -254,37 +234,32 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
beta = (rho / rhop) * (alpha / omega);
custom_w(d_p, d_v, d_r, omega, beta);
}
t2 = second();
rest_time += t2 - t1;
t_rest.stop();
// pw = prec(p)
t1 = second();
t_prec.start();
prec->apply(d_p, d_pw);
t2 = second();
prec_time += t2 - t1;
t_prec.stop();
// v = A * pw
t1 = second();
t_spmv.start();
spmv_blocked_w(d_Avals, d_Acols, d_Arows, d_pw, d_v);
t2 = second();
spmv_time += t2 - t1;
t_spmv.stop();
// apply wellContributions
if (wellContribs.getNumWells() > 0) {
t1 = second();
t_well.start();
wellContribs.apply(d_pw, d_v);
t2 = second();
well_time += t2 - t1;
t_well.stop();
}
t1 = second();
t_rest.start();
tmp1 = dot_w(d_rw, d_v, d_tmp);
alpha = rho / tmp1;
axpy_w(d_v, -alpha, d_r); // r = r - alpha * v
axpy_w(d_pw, alpha, d_x); // x = x + alpha * pw
norm = norm_w(d_r, d_tmp);
t2 = second();
rest_time += t2 - t1;
t_rest.stop();
if (norm < tolerance * norm_0) {
break;
@ -293,34 +268,30 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
it += 0.5;
// s = prec(r)
t1 = second();
t_prec.start();
prec->apply(d_r, d_s);
t2 = second();
prec_time += t2 - t1;
t_prec.stop();
// t = A * s
t1 = second();
t_spmv.start();
spmv_blocked_w(d_Avals, d_Acols, d_Arows, d_s, d_t);
t2 = second();
spmv_time += t2 - t1;
t_spmv.stop();
// apply wellContributions
if (wellContribs.getNumWells() > 0) {
t1 = second();
t_well.start();
wellContribs.apply(d_s, d_t);
t2 = second();
well_time += t2 - t1;
t_well.stop();
}
t1 = second();
t_rest.start();
tmp1 = dot_w(d_t, d_r, d_tmp);
tmp2 = dot_w(d_t, d_t, d_tmp);
omega = tmp1 / tmp2;
axpy_w(d_s, omega, d_x); // x = x + omega * s
axpy_w(d_t, -omega, d_r); // r = r - omega * t
norm = norm_w(d_r, d_tmp);
t2 = second();
rest_time += t2 - t1;
t_rest.stop();
if (norm < tolerance * norm_0) {
break;
@ -333,14 +304,10 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
}
}
t2 = second();
t_total2 = second();
rest_time += t2 - t1;
res.iterations = std::min(it, (float)maxit);
res.reduction = norm / norm_0;
res.conv_rate = static_cast<double>(pow(res.reduction, 1.0 / it));
res.elapsed = t_total2 - t_total1;
res.elapsed = t_total.stop();
res.converged = (it != (maxit + 0.5));
if (verbosity > 0) {
@ -556,12 +523,7 @@ void openclSolverBackend<block_size>::finalize() {
template <unsigned int block_size>
void openclSolverBackend<block_size>::copy_system_to_gpu() {
double t1 = 0.0, t2 = 0.0;
if (verbosity > 2) {
t1 = second();
}
Timer t;
cl::Event event;
#if COPY_ROW_BY_ROW
@ -583,9 +545,8 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
event.wait();
if (verbosity > 2) {
t2 = second();
std::ostringstream out;
out << "openclSolver::copy_system_to_gpu(): " << t2 - t1 << " s";
out << "openclSolver::copy_system_to_gpu(): " << t.stop() << " s";
OpmLog::info(out.str());
}
} // end copy_system_to_gpu()
@ -594,12 +555,7 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
// don't copy rowpointers and colindices, they stay the same
template <unsigned int block_size>
void openclSolverBackend<block_size>::update_system_on_gpu() {
double t1 = 0.0, t2 = 0.0;
if (verbosity > 2) {
t1 = second();
}
Timer t;
cl::Event event;
#if COPY_ROW_BY_ROW
@ -619,9 +575,8 @@ void openclSolverBackend<block_size>::update_system_on_gpu() {
event.wait();
if (verbosity > 2) {
t2 = second();
std::ostringstream out;
out << "openclSolver::update_system_on_gpu(): " << t2 - t1 << " s";
out << "openclSolver::update_system_on_gpu(): " << t.stop() << " s";
OpmLog::info(out.str());
}
} // end update_system_on_gpu()
@ -629,12 +584,7 @@ void openclSolverBackend<block_size>::update_system_on_gpu() {
template <unsigned int block_size>
bool openclSolverBackend<block_size>::analyse_matrix() {
double t1 = 0.0, t2 = 0.0;
if (verbosity > 2) {
t1 = second();
}
Timer t;
bool success = prec->init(mat);
int work_group_size = 32;
@ -648,9 +598,8 @@ bool openclSolverBackend<block_size>::analyse_matrix() {
rmat = prec->getRMat();
if (verbosity > 2) {
t2 = second();
std::ostringstream out;
out << "openclSolver::analyse_matrix(): " << t2 - t1 << " s";
out << "openclSolver::analyse_matrix(): " << t.stop() << " s";
OpmLog::info(out.str());
}
@ -662,18 +611,14 @@ bool openclSolverBackend<block_size>::analyse_matrix() {
template <unsigned int block_size>
void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
double t1 = 0.0, t2 = 0.0;
if (verbosity > 2) {
t1 = second();
}
Timer t;
mat->nnzValues = vals;
blocked_reorder_vector_by_pattern<block_size>(mat->Nb, b, fromOrder, rb);
if (verbosity > 2) {
t2 = second();
std::ostringstream out;
out << "openclSolver::update_system(): " << t2 - t1 << " s";
out << "openclSolver::update_system(): " << t.stop() << " s";
OpmLog::info(out.str());
}
} // end update_system()
@ -681,18 +626,13 @@ void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
template <unsigned int block_size>
bool openclSolverBackend<block_size>::create_preconditioner() {
double t1 = 0.0, t2 = 0.0;
if (verbosity > 2) {
t1 = second();
}
Timer t;
bool result = prec->create_preconditioner(mat);
if (verbosity > 2) {
t2 = second();
std::ostringstream out;
out << "openclSolver::create_preconditioner(): " << t2 - t1 << " s";
out << "openclSolver::create_preconditioner(): " << t.stop() << " s";
OpmLog::info(out.str());
}
return result;
@ -701,18 +641,14 @@ bool openclSolverBackend<block_size>::create_preconditioner() {
template <unsigned int block_size>
void openclSolverBackend<block_size>::solve_system(WellContributions& wellContribs, BdaResult &res) {
// actually solve
double t1 = 0.0, t2 = 0.0;
if (verbosity > 2) {
t1 = second();
}
Timer t;
// actually solve
gpu_pbicgstab(wellContribs, res);
if (verbosity > 2) {
t2 = second();
std::ostringstream out;
out << "openclSolver::solve_system(): " << t2 - t1 << " s";
out << "openclSolver::solve_system(): " << t.stop() << " s";
OpmLog::info(out.str());
}
@ -723,19 +659,14 @@ void openclSolverBackend<block_size>::solve_system(WellContributions& wellContri
// caller must be sure that x is a valid array
template <unsigned int block_size>
void openclSolverBackend<block_size>::get_result(double *x) {
double t1 = 0.0, t2 = 0.0;
if (verbosity > 2) {
t1 = second();
}
Timer t;
queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, rb);
blocked_reorder_vector_by_pattern<block_size>(mat->Nb, rb, toOrder, x);
if (verbosity > 2) {
t2 = second();
std::ostringstream out;
out << "openclSolver::get_result(): " << t2 - t1 << " s";
out << "openclSolver::get_result(): " << t.stop() << " s";
OpmLog::info(out.str());
}
} // end get_result()

View File

@ -50,7 +50,6 @@ class openclSolverBackend : public BdaSolver<block_size>
using Base::verbosity;
using Base::maxit;
using Base::tolerance;
using Base::second;
using Base::initialized;
typedef BdaSolverStatus::Status Status;