mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Replaced timing function with Dune::Timer
This commit is contained in:
parent
845563c37b
commit
7f8faa018b
@ -20,6 +20,7 @@
|
|||||||
#include <config.h>
|
#include <config.h>
|
||||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||||
#include <opm/common/ErrorMacros.hpp>
|
#include <opm/common/ErrorMacros.hpp>
|
||||||
|
#include <dune/common/timer.hh>
|
||||||
|
|
||||||
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
|
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
|
||||||
#include <opm/simulators/linalg/bda/BILU0.hpp>
|
#include <opm/simulators/linalg/bda/BILU0.hpp>
|
||||||
@ -30,10 +31,7 @@ namespace bda
|
|||||||
{
|
{
|
||||||
|
|
||||||
using Opm::OpmLog;
|
using Opm::OpmLog;
|
||||||
|
using Dune::Timer;
|
||||||
// define 'second' as 'BdaSolver<>::second', this allows usage of the second() function for timing
|
|
||||||
// typedefs cannot handle templates
|
|
||||||
const auto second = BdaSolver<>::second;
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
BILU0<block_size>::BILU0(bool level_scheduling_, bool graph_coloring_, int verbosity_) :
|
BILU0<block_size>::BILU0(bool level_scheduling_, bool graph_coloring_, int verbosity_) :
|
||||||
@ -62,7 +60,6 @@ namespace bda
|
|||||||
bool BILU0<block_size>::init(BlockedMatrix *mat)
|
bool BILU0<block_size>::init(BlockedMatrix *mat)
|
||||||
{
|
{
|
||||||
const unsigned int bs = block_size;
|
const unsigned int bs = block_size;
|
||||||
double t1 = 0.0, t2 = 0.0;
|
|
||||||
BlockedMatrix *CSCmat = nullptr;
|
BlockedMatrix *CSCmat = nullptr;
|
||||||
|
|
||||||
this->N = mat->Nb * block_size;
|
this->N = mat->Nb * block_size;
|
||||||
@ -79,21 +76,16 @@ namespace bda
|
|||||||
CSCmat->nnzValues = new double[nnzbs * bs * bs];
|
CSCmat->nnzValues = new double[nnzbs * bs * bs];
|
||||||
CSCmat->colIndices = new int[nnzbs];
|
CSCmat->colIndices = new int[nnzbs];
|
||||||
CSCmat->rowPointers = new int[Nb + 1];
|
CSCmat->rowPointers = new int[Nb + 1];
|
||||||
if(verbosity >= 3){
|
Timer t_convert;
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
bcsr_to_bcsc<block_size>(mat->nnzValues, mat->colIndices, mat->rowPointers, CSCmat->nnzValues, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb);
|
bcsr_to_bcsc<block_size>(mat->nnzValues, mat->colIndices, mat->rowPointers, CSCmat->nnzValues, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb);
|
||||||
if(verbosity >= 3){
|
if(verbosity >= 3){
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "BILU0 convert CSR to CSC: " << t2 - t1 << " s";
|
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s\n";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(verbosity >= 3){
|
Timer t_analysis;
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
rMat = allocateBlockedMatrix<block_size>(mat->Nb, mat->nnzbs);
|
rMat = allocateBlockedMatrix<block_size>(mat->Nb, mat->nnzbs);
|
||||||
LUMat = soft_copyBlockedMatrix(rMat);
|
LUMat = soft_copyBlockedMatrix(rMat);
|
||||||
if (level_scheduling) {
|
if (level_scheduling) {
|
||||||
@ -105,9 +97,8 @@ namespace bda
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if(verbosity >= 3){
|
if(verbosity >= 3){
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "BILU0 analysis took: " << t2 - t1 << " s, " << numColors << " colors";
|
out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -161,27 +152,21 @@ namespace bda
|
|||||||
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
|
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
|
||||||
{
|
{
|
||||||
const unsigned int bs = block_size;
|
const unsigned int bs = block_size;
|
||||||
double t1 = 0.0, t2 = 0.0;
|
|
||||||
if (verbosity >= 3){
|
Timer t_reorder;
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
blocked_reorder_matrix_by_pattern<block_size>(mat, toOrder, fromOrder, rMat);
|
blocked_reorder_matrix_by_pattern<block_size>(mat, toOrder, fromOrder, rMat);
|
||||||
if (verbosity >= 3){
|
if (verbosity >= 3){
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "BILU0 reorder matrix: " << t2 - t1 << " s";
|
out << "BILU0 reorder matrix: " << t_reorder.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
|
// TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
|
||||||
if (verbosity >= 3){
|
Timer t_copy;
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
memcpy(LUMat->nnzValues, rMat->nnzValues, sizeof(double) * bs * bs * rMat->nnzbs);
|
memcpy(LUMat->nnzValues, rMat->nnzValues, sizeof(double) * bs * bs * rMat->nnzbs);
|
||||||
if (verbosity >= 3){
|
if (verbosity >= 3){
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "BILU0 memcpy: " << t2 - t1 << " s";
|
out << "BILU0 memcpy: " << t_copy.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -191,9 +176,8 @@ namespace bda
|
|||||||
|
|
||||||
int LSize = 0;
|
int LSize = 0;
|
||||||
|
|
||||||
if (verbosity >= 3){
|
Timer t_decomposition;
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
// go through all rows
|
// go through all rows
|
||||||
for (i = 0; i < LUMat->Nb; i++) {
|
for (i = 0; i < LUMat->Nb; i++) {
|
||||||
iRowStart = LUMat->rowPointers[i];
|
iRowStart = LUMat->rowPointers[i];
|
||||||
@ -272,15 +256,12 @@ namespace bda
|
|||||||
URowIndex++;
|
URowIndex++;
|
||||||
}
|
}
|
||||||
if (verbosity >= 3) {
|
if (verbosity >= 3) {
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "BILU0 decomposition: " << t2 - t1 << " s";
|
out << "BILU0 decomposition: " << t_decomposition.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (verbosity >= 3) {
|
Timer t_copyToGpu;
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
if (pattern_uploaded == false) {
|
if (pattern_uploaded == false) {
|
||||||
queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, LMat->nnzbs * sizeof(int), LMat->colIndices);
|
queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, LMat->nnzbs * sizeof(int), LMat->colIndices);
|
||||||
queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, UMat->nnzbs * sizeof(int), UMat->colIndices);
|
queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, UMat->nnzbs * sizeof(int), UMat->colIndices);
|
||||||
@ -292,9 +273,8 @@ namespace bda
|
|||||||
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(double) * bs * bs, UMat->nnzValues);
|
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(double) * bs * bs, UMat->nnzValues);
|
||||||
queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, Nb * sizeof(double) * bs * bs, invDiagVals);
|
queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, Nb * sizeof(double) * bs * bs, invDiagVals);
|
||||||
if (verbosity >= 3) {
|
if (verbosity >= 3) {
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "BILU0 copy to GPU: " << t2 - t1 << " s";
|
out << "BILU0 copy to GPU: " << t_copyToGpu.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -307,11 +287,8 @@ namespace bda
|
|||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void BILU0<block_size>::apply(cl::Buffer& x, cl::Buffer& y)
|
void BILU0<block_size>::apply(cl::Buffer& x, cl::Buffer& y)
|
||||||
{
|
{
|
||||||
double t1 = 0.0, t2 = 0.0;
|
|
||||||
if (verbosity >= 3) {
|
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
cl::Event event;
|
cl::Event event;
|
||||||
|
Timer t_apply;
|
||||||
|
|
||||||
for(int color = 0; color < numColors; ++color){
|
for(int color = 0; color < numColors; ++color){
|
||||||
event = (*ILU_apply1)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), s.Lvals, s.Lcols, s.Lrows, (unsigned int)Nb, x, y, s.rowsPerColor, color, block_size, cl::Local(lmem_per_work_group));
|
event = (*ILU_apply1)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), s.Lvals, s.Lcols, s.Lrows, (unsigned int)Nb, x, y, s.rowsPerColor, color, block_size, cl::Local(lmem_per_work_group));
|
||||||
@ -324,9 +301,8 @@ namespace bda
|
|||||||
|
|
||||||
if (verbosity >= 3) {
|
if (verbosity >= 3) {
|
||||||
event.wait();
|
event.wait();
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "BILU0 apply: " << t2 - t1 << " s";
|
out << "BILU0 apply: " << t_apply.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -21,8 +21,6 @@
|
|||||||
#define OPM_BDASOLVER_BACKEND_HEADER_INCLUDED
|
#define OPM_BDASOLVER_BACKEND_HEADER_INCLUDED
|
||||||
|
|
||||||
|
|
||||||
#include <sys/time.h>
|
|
||||||
|
|
||||||
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
||||||
#include <opm/simulators/linalg/bda/BdaSolverStatus.hpp>
|
#include <opm/simulators/linalg/bda/BdaSolverStatus.hpp>
|
||||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||||
@ -35,8 +33,7 @@ namespace bda
|
|||||||
|
|
||||||
/// This class serves to simplify choosing between different backend solvers, such as cusparseSolver and openclSolver
|
/// This class serves to simplify choosing between different backend solvers, such as cusparseSolver and openclSolver
|
||||||
/// This class is abstract, no instantiations can of it can be made, only of its children
|
/// This class is abstract, no instantiations can of it can be made, only of its children
|
||||||
/// Without a default block_size value, the BILU0 class cannot use BdaSolver::second()
|
template <unsigned int block_size>
|
||||||
template <unsigned int block_size = 3>
|
|
||||||
class BdaSolver
|
class BdaSolver
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -74,13 +71,6 @@ namespace bda
|
|||||||
|
|
||||||
virtual void get_result(double *x) = 0;
|
virtual void get_result(double *x) = 0;
|
||||||
|
|
||||||
/// Different implementations of BdaSolver can use this function for timing
|
|
||||||
static double second(void) {
|
|
||||||
struct timeval tv;
|
|
||||||
gettimeofday(&tv, nullptr);
|
|
||||||
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
}; // end class BdaSolver
|
}; // end class BdaSolver
|
||||||
|
|
||||||
} // end namespace bda
|
} // end namespace bda
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||||
|
#include <dune/common/timer.hh>
|
||||||
|
|
||||||
#include <opm/simulators/linalg/bda/cusparseSolverBackend.hpp>
|
#include <opm/simulators/linalg/bda/cusparseSolverBackend.hpp>
|
||||||
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
||||||
@ -40,6 +41,7 @@ namespace bda
|
|||||||
{
|
{
|
||||||
|
|
||||||
using Opm::OpmLog;
|
using Opm::OpmLog;
|
||||||
|
using Dune::Timer;
|
||||||
|
|
||||||
const cusparseSolvePolicy_t policy = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
|
const cusparseSolvePolicy_t policy = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
|
||||||
const cusparseOperation_t operation = CUSPARSE_OPERATION_NON_TRANSPOSE;
|
const cusparseOperation_t operation = CUSPARSE_OPERATION_NON_TRANSPOSE;
|
||||||
@ -56,7 +58,7 @@ cusparseSolverBackend<block_size>::~cusparseSolverBackend() {
|
|||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) {
|
void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) {
|
||||||
double t_total1, t_total2;
|
Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
|
||||||
int n = N;
|
int n = N;
|
||||||
double rho = 1.0, rhop;
|
double rho = 1.0, rhop;
|
||||||
double alpha, nalpha, beta;
|
double alpha, nalpha, beta;
|
||||||
@ -67,8 +69,6 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
|
|||||||
double mone = -1.0;
|
double mone = -1.0;
|
||||||
float it;
|
float it;
|
||||||
|
|
||||||
t_total1 = second();
|
|
||||||
|
|
||||||
if (wellContribs.getNumWells() > 0) {
|
if (wellContribs.getNumWells() > 0) {
|
||||||
wellContribs.setCudaStream(stream);
|
wellContribs.setCudaStream(stream);
|
||||||
}
|
}
|
||||||
@ -169,12 +169,10 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
t_total2 = second();
|
|
||||||
|
|
||||||
res.iterations = std::min(it, (float)maxit);
|
res.iterations = std::min(it, (float)maxit);
|
||||||
res.reduction = norm / norm_0;
|
res.reduction = norm / norm_0;
|
||||||
res.conv_rate = static_cast<double>(pow(res.reduction, 1.0 / it));
|
res.conv_rate = static_cast<double>(pow(res.reduction, 1.0 / it));
|
||||||
res.elapsed = t_total2 - t_total1;
|
res.elapsed = t_total.stop();
|
||||||
res.converged = (it != (maxit + 0.5));
|
res.converged = (it != (maxit + 0.5));
|
||||||
|
|
||||||
if (verbosity > 0) {
|
if (verbosity > 0) {
|
||||||
@ -284,11 +282,7 @@ void cusparseSolverBackend<block_size>::finalize() {
|
|||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b) {
|
void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b) {
|
||||||
|
Timer t;
|
||||||
double t1, t2;
|
|
||||||
if (verbosity > 2) {
|
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
#if COPY_ROW_BY_ROW
|
#if COPY_ROW_BY_ROW
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
@ -309,9 +303,8 @@ void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *ro
|
|||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
cudaStreamSynchronize(stream);
|
cudaStreamSynchronize(stream);
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "cusparseSolver::copy_system_to_gpu(): " << t2 - t1 << " s";
|
out << "cusparseSolver::copy_system_to_gpu(): " << t.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
} // end copy_system_to_gpu()
|
} // end copy_system_to_gpu()
|
||||||
@ -320,11 +313,7 @@ void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *ro
|
|||||||
// don't copy rowpointers and colindices, they stay the same
|
// don't copy rowpointers and colindices, they stay the same
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void cusparseSolverBackend<block_size>::update_system_on_gpu(double *vals, int *rows, double *b) {
|
void cusparseSolverBackend<block_size>::update_system_on_gpu(double *vals, int *rows, double *b) {
|
||||||
|
Timer t;
|
||||||
double t1, t2;
|
|
||||||
if (verbosity > 2) {
|
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
#if COPY_ROW_BY_ROW
|
#if COPY_ROW_BY_ROW
|
||||||
int sum = 0;
|
int sum = 0;
|
||||||
@ -343,9 +332,8 @@ void cusparseSolverBackend<block_size>::update_system_on_gpu(double *vals, int *
|
|||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
cudaStreamSynchronize(stream);
|
cudaStreamSynchronize(stream);
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "cusparseSolver::update_system_on_gpu(): " << t2 - t1 << " s";
|
out << "cusparseSolver::update_system_on_gpu(): " << t.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
} // end update_system_on_gpu()
|
} // end update_system_on_gpu()
|
||||||
@ -361,11 +349,7 @@ template <unsigned int block_size>
|
|||||||
bool cusparseSolverBackend<block_size>::analyse_matrix() {
|
bool cusparseSolverBackend<block_size>::analyse_matrix() {
|
||||||
|
|
||||||
int d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
int d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
||||||
double t1, t2;
|
Timer t;
|
||||||
|
|
||||||
if (verbosity > 2) {
|
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
cusparseCreateMatDescr(&descr_B);
|
cusparseCreateMatDescr(&descr_B);
|
||||||
cusparseCreateMatDescr(&descr_M);
|
cusparseCreateMatDescr(&descr_M);
|
||||||
@ -428,9 +412,8 @@ bool cusparseSolverBackend<block_size>::analyse_matrix() {
|
|||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
cudaStreamSynchronize(stream);
|
cudaStreamSynchronize(stream);
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "cusparseSolver::analyse_matrix(): " << t2 - t1 << " s";
|
out << "cusparseSolver::analyse_matrix(): " << t.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -441,11 +424,7 @@ bool cusparseSolverBackend<block_size>::analyse_matrix() {
|
|||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
bool cusparseSolverBackend<block_size>::create_preconditioner() {
|
bool cusparseSolverBackend<block_size>::create_preconditioner() {
|
||||||
|
Timer t;
|
||||||
double t1, t2;
|
|
||||||
if (verbosity > 2) {
|
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
d_mCols = d_bCols;
|
d_mCols = d_bCols;
|
||||||
d_mRows = d_bRows;
|
d_mRows = d_bRows;
|
||||||
@ -463,9 +442,8 @@ bool cusparseSolverBackend<block_size>::create_preconditioner() {
|
|||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
cudaStreamSynchronize(stream);
|
cudaStreamSynchronize(stream);
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "cusparseSolver::create_preconditioner(): " << t2 - t1 << " s";
|
out << "cusparseSolver::create_preconditioner(): " << t.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -485,19 +463,14 @@ void cusparseSolverBackend<block_size>::solve_system(WellContributions& wellCont
|
|||||||
// caller must be sure that x is a valid array
|
// caller must be sure that x is a valid array
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void cusparseSolverBackend<block_size>::get_result(double *x) {
|
void cusparseSolverBackend<block_size>::get_result(double *x) {
|
||||||
|
Timer t;
|
||||||
double t1, t2;
|
|
||||||
if (verbosity > 2) {
|
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
cudaMemcpyAsync(x, d_x, N * sizeof(double), cudaMemcpyDeviceToHost, stream);
|
cudaMemcpyAsync(x, d_x, N * sizeof(double), cudaMemcpyDeviceToHost, stream);
|
||||||
cudaStreamSynchronize(stream);
|
cudaStreamSynchronize(stream);
|
||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "cusparseSolver::get_result(): " << t2 - t1 << " s";
|
out << "cusparseSolver::get_result(): " << t.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
} // end get_result()
|
} // end get_result()
|
||||||
|
@ -44,7 +44,6 @@ class cusparseSolverBackend : public BdaSolver<block_size> {
|
|||||||
using Base::verbosity;
|
using Base::verbosity;
|
||||||
using Base::maxit;
|
using Base::maxit;
|
||||||
using Base::tolerance;
|
using Base::tolerance;
|
||||||
using Base::second;
|
|
||||||
using Base::initialized;
|
using Base::initialized;
|
||||||
typedef BdaSolverStatus::Status Status;
|
typedef BdaSolverStatus::Status Status;
|
||||||
|
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
|
|
||||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||||
#include <opm/common/ErrorMacros.hpp>
|
#include <opm/common/ErrorMacros.hpp>
|
||||||
|
#include <dune/common/timer.hh>
|
||||||
|
|
||||||
|
|
||||||
#define __CL_ENABLE_EXCEPTIONS
|
#define __CL_ENABLE_EXCEPTIONS
|
||||||
@ -48,6 +49,7 @@ namespace bda
|
|||||||
{
|
{
|
||||||
|
|
||||||
using Opm::OpmLog;
|
using Opm::OpmLog;
|
||||||
|
using Dune::Timer;
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_) {
|
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_) {
|
||||||
@ -72,14 +74,11 @@ unsigned int openclSolverBackend<block_size>::ceilDivision(const unsigned int A,
|
|||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out)
|
double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out)
|
||||||
{
|
{
|
||||||
double t1 = 0.0, t2 = 0.0;
|
|
||||||
const unsigned int work_group_size = 1024;
|
const unsigned int work_group_size = 1024;
|
||||||
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
|
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
|
||||||
const unsigned int total_work_items = num_work_groups * work_group_size;
|
const unsigned int total_work_items = num_work_groups * work_group_size;
|
||||||
const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
|
const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
|
||||||
if (verbosity >= 4) {
|
Timer t_dot;
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
cl::Event event = (*dot_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in1, in2, out, N, cl::Local(lmem_per_work_group));
|
cl::Event event = (*dot_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in1, in2, out, N, cl::Local(lmem_per_work_group));
|
||||||
|
|
||||||
@ -92,9 +91,8 @@ double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl
|
|||||||
|
|
||||||
if (verbosity >= 4) {
|
if (verbosity >= 4) {
|
||||||
event.wait();
|
event.wait();
|
||||||
t2 = second();
|
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "openclSolver dot_w time: " << t2 - t1;
|
oss << std::scientific << "openclSolver dot_w time: " << t_dot.stop() << " s";
|
||||||
OpmLog::info(oss.str());
|
OpmLog::info(oss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -104,14 +102,11 @@ double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl
|
|||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
|
double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
|
||||||
{
|
{
|
||||||
double t1 = 0.0, t2 = 0.0;
|
|
||||||
const unsigned int work_group_size = 1024;
|
const unsigned int work_group_size = 1024;
|
||||||
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
|
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
|
||||||
const unsigned int total_work_items = num_work_groups * work_group_size;
|
const unsigned int total_work_items = num_work_groups * work_group_size;
|
||||||
const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
|
const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
|
||||||
if (verbosity >= 4) {
|
Timer t_norm;
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
cl::Event event = (*norm_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in, out, N, cl::Local(lmem_per_work_group));
|
cl::Event event = (*norm_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in, out, N, cl::Local(lmem_per_work_group));
|
||||||
|
|
||||||
@ -125,9 +120,8 @@ double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
|
|||||||
|
|
||||||
if (verbosity >= 4) {
|
if (verbosity >= 4) {
|
||||||
event.wait();
|
event.wait();
|
||||||
t2 = second();
|
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "openclSolver norm_w time: " << t2 - t1;
|
oss << std::scientific << "openclSolver norm_w time: " << t_norm.stop() << " s";
|
||||||
OpmLog::info(oss.str());
|
OpmLog::info(oss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,21 +131,17 @@ double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
|
|||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::axpy_w(cl::Buffer in, const double a, cl::Buffer out)
|
void openclSolverBackend<block_size>::axpy_w(cl::Buffer in, const double a, cl::Buffer out)
|
||||||
{
|
{
|
||||||
double t1 = 0.0, t2 = 0.0;
|
|
||||||
const unsigned int work_group_size = 32;
|
const unsigned int work_group_size = 32;
|
||||||
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
|
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
|
||||||
const unsigned int total_work_items = num_work_groups * work_group_size;
|
const unsigned int total_work_items = num_work_groups * work_group_size;
|
||||||
if (verbosity >= 4) {
|
Timer t_axpy;
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
cl::Event event = (*axpy_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in, a, out, N);
|
cl::Event event = (*axpy_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), in, a, out, N);
|
||||||
|
|
||||||
if (verbosity >= 4) {
|
if (verbosity >= 4) {
|
||||||
event.wait();
|
event.wait();
|
||||||
t2 = second();
|
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "openclSolver axpy_w time: " << t2 - t1;
|
oss << std::scientific << "openclSolver axpy_w time: " << t_axpy.stop() << " s";
|
||||||
OpmLog::info(oss.str());
|
OpmLog::info(oss.str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -159,21 +149,17 @@ void openclSolverBackend<block_size>::axpy_w(cl::Buffer in, const double a, cl::
|
|||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, const double omega, const double beta)
|
void openclSolverBackend<block_size>::custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, const double omega, const double beta)
|
||||||
{
|
{
|
||||||
double t1 = 0.0, t2 = 0.0;
|
|
||||||
const unsigned int work_group_size = 32;
|
const unsigned int work_group_size = 32;
|
||||||
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
|
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
|
||||||
const unsigned int total_work_items = num_work_groups * work_group_size;
|
const unsigned int total_work_items = num_work_groups * work_group_size;
|
||||||
if (verbosity >= 4) {
|
Timer t_custom;
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
cl::Event event = (*custom_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), p, v, r, omega, beta, N);
|
cl::Event event = (*custom_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), p, v, r, omega, beta, N);
|
||||||
|
|
||||||
if (verbosity >= 4) {
|
if (verbosity >= 4) {
|
||||||
event.wait();
|
event.wait();
|
||||||
t2 = second();
|
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "openclSolver custom_w time: " << t2 - t1;
|
oss << std::scientific << "openclSolver custom_w time: " << t_custom.stop() << " s";
|
||||||
OpmLog::info(oss.str());
|
OpmLog::info(oss.str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -181,22 +167,18 @@ void openclSolverBackend<block_size>::custom_w(cl::Buffer p, cl::Buffer v, cl::B
|
|||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b)
|
void openclSolverBackend<block_size>::spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b)
|
||||||
{
|
{
|
||||||
double t1 = 0.0, t2 = 0.0;
|
|
||||||
const unsigned int work_group_size = 32;
|
const unsigned int work_group_size = 32;
|
||||||
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
|
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
|
||||||
const unsigned int total_work_items = num_work_groups * work_group_size;
|
const unsigned int total_work_items = num_work_groups * work_group_size;
|
||||||
const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
|
const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
|
||||||
if (verbosity >= 4) {
|
Timer t_spmv;
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
cl::Event event = (*spmv_blocked_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), vals, cols, rows, Nb, x, b, block_size, cl::Local(lmem_per_work_group));
|
cl::Event event = (*spmv_blocked_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), vals, cols, rows, Nb, x, b, block_size, cl::Local(lmem_per_work_group));
|
||||||
|
|
||||||
if (verbosity >= 4) {
|
if (verbosity >= 4) {
|
||||||
event.wait();
|
event.wait();
|
||||||
t2 = second();
|
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "openclSolver spmv_blocked_w time: " << t2 - t1;
|
oss << std::scientific << "openclSolver spmv_blocked_w time: " << t_spmv.stop() << " s";
|
||||||
OpmLog::info(oss.str());
|
OpmLog::info(oss.str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -209,9 +191,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
|||||||
double rho, rhop, beta, alpha, omega, tmp1, tmp2;
|
double rho, rhop, beta, alpha, omega, tmp1, tmp2;
|
||||||
double norm, norm_0;
|
double norm, norm_0;
|
||||||
|
|
||||||
double t_total1, t_total2, t1 = 0.0, t2 = 0.0;
|
Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
|
||||||
double prec_time = 0.0, spmv_time = 0.0, well_time = 0.0, rest_time = 0.0;
|
|
||||||
t_total1 = second();
|
|
||||||
|
|
||||||
wellContribs.setOpenCLQueue(queue.get());
|
wellContribs.setOpenCLQueue(queue.get());
|
||||||
wellContribs.setReordering(toOrder, true);
|
wellContribs.setReordering(toOrder, true);
|
||||||
@ -245,7 +225,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
|||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
t1 = second();
|
t_rest.start();
|
||||||
for (it = 0.5; it < maxit; it += 0.5) {
|
for (it = 0.5; it < maxit; it += 0.5) {
|
||||||
rhop = rho;
|
rhop = rho;
|
||||||
rho = dot_w(d_rw, d_r, d_tmp);
|
rho = dot_w(d_rw, d_r, d_tmp);
|
||||||
@ -254,37 +234,32 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
|||||||
beta = (rho / rhop) * (alpha / omega);
|
beta = (rho / rhop) * (alpha / omega);
|
||||||
custom_w(d_p, d_v, d_r, omega, beta);
|
custom_w(d_p, d_v, d_r, omega, beta);
|
||||||
}
|
}
|
||||||
t2 = second();
|
t_rest.stop();
|
||||||
rest_time += t2 - t1;
|
|
||||||
|
|
||||||
// pw = prec(p)
|
// pw = prec(p)
|
||||||
t1 = second();
|
t_prec.start();
|
||||||
prec->apply(d_p, d_pw);
|
prec->apply(d_p, d_pw);
|
||||||
t2 = second();
|
t_prec.stop();
|
||||||
prec_time += t2 - t1;
|
|
||||||
|
|
||||||
// v = A * pw
|
// v = A * pw
|
||||||
t1 = second();
|
t_spmv.start();
|
||||||
spmv_blocked_w(d_Avals, d_Acols, d_Arows, d_pw, d_v);
|
spmv_blocked_w(d_Avals, d_Acols, d_Arows, d_pw, d_v);
|
||||||
t2 = second();
|
t_spmv.stop();
|
||||||
spmv_time += t2 - t1;
|
|
||||||
|
|
||||||
// apply wellContributions
|
// apply wellContributions
|
||||||
if (wellContribs.getNumWells() > 0) {
|
if (wellContribs.getNumWells() > 0) {
|
||||||
t1 = second();
|
t_well.start();
|
||||||
wellContribs.apply(d_pw, d_v);
|
wellContribs.apply(d_pw, d_v);
|
||||||
t2 = second();
|
t_well.stop();
|
||||||
well_time += t2 - t1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
t1 = second();
|
t_rest.start();
|
||||||
tmp1 = dot_w(d_rw, d_v, d_tmp);
|
tmp1 = dot_w(d_rw, d_v, d_tmp);
|
||||||
alpha = rho / tmp1;
|
alpha = rho / tmp1;
|
||||||
axpy_w(d_v, -alpha, d_r); // r = r - alpha * v
|
axpy_w(d_v, -alpha, d_r); // r = r - alpha * v
|
||||||
axpy_w(d_pw, alpha, d_x); // x = x + alpha * pw
|
axpy_w(d_pw, alpha, d_x); // x = x + alpha * pw
|
||||||
norm = norm_w(d_r, d_tmp);
|
norm = norm_w(d_r, d_tmp);
|
||||||
t2 = second();
|
t_rest.stop();
|
||||||
rest_time += t2 - t1;
|
|
||||||
|
|
||||||
if (norm < tolerance * norm_0) {
|
if (norm < tolerance * norm_0) {
|
||||||
break;
|
break;
|
||||||
@ -293,34 +268,30 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
|||||||
it += 0.5;
|
it += 0.5;
|
||||||
|
|
||||||
// s = prec(r)
|
// s = prec(r)
|
||||||
t1 = second();
|
t_prec.start();
|
||||||
prec->apply(d_r, d_s);
|
prec->apply(d_r, d_s);
|
||||||
t2 = second();
|
t_prec.stop();
|
||||||
prec_time += t2 - t1;
|
|
||||||
|
|
||||||
// t = A * s
|
// t = A * s
|
||||||
t1 = second();
|
t_spmv.start();
|
||||||
spmv_blocked_w(d_Avals, d_Acols, d_Arows, d_s, d_t);
|
spmv_blocked_w(d_Avals, d_Acols, d_Arows, d_s, d_t);
|
||||||
t2 = second();
|
t_spmv.stop();
|
||||||
spmv_time += t2 - t1;
|
|
||||||
|
|
||||||
// apply wellContributions
|
// apply wellContributions
|
||||||
if (wellContribs.getNumWells() > 0) {
|
if (wellContribs.getNumWells() > 0) {
|
||||||
t1 = second();
|
t_well.start();
|
||||||
wellContribs.apply(d_s, d_t);
|
wellContribs.apply(d_s, d_t);
|
||||||
t2 = second();
|
t_well.stop();
|
||||||
well_time += t2 - t1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
t1 = second();
|
t_rest.start();
|
||||||
tmp1 = dot_w(d_t, d_r, d_tmp);
|
tmp1 = dot_w(d_t, d_r, d_tmp);
|
||||||
tmp2 = dot_w(d_t, d_t, d_tmp);
|
tmp2 = dot_w(d_t, d_t, d_tmp);
|
||||||
omega = tmp1 / tmp2;
|
omega = tmp1 / tmp2;
|
||||||
axpy_w(d_s, omega, d_x); // x = x + omega * s
|
axpy_w(d_s, omega, d_x); // x = x + omega * s
|
||||||
axpy_w(d_t, -omega, d_r); // r = r - omega * t
|
axpy_w(d_t, -omega, d_r); // r = r - omega * t
|
||||||
norm = norm_w(d_r, d_tmp);
|
norm = norm_w(d_r, d_tmp);
|
||||||
t2 = second();
|
t_rest.stop();
|
||||||
rest_time += t2 - t1;
|
|
||||||
|
|
||||||
if (norm < tolerance * norm_0) {
|
if (norm < tolerance * norm_0) {
|
||||||
break;
|
break;
|
||||||
@ -333,14 +304,10 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
t2 = second();
|
|
||||||
t_total2 = second();
|
|
||||||
rest_time += t2 - t1;
|
|
||||||
|
|
||||||
res.iterations = std::min(it, (float)maxit);
|
res.iterations = std::min(it, (float)maxit);
|
||||||
res.reduction = norm / norm_0;
|
res.reduction = norm / norm_0;
|
||||||
res.conv_rate = static_cast<double>(pow(res.reduction, 1.0 / it));
|
res.conv_rate = static_cast<double>(pow(res.reduction, 1.0 / it));
|
||||||
res.elapsed = t_total2 - t_total1;
|
res.elapsed = t_total.stop();
|
||||||
res.converged = (it != (maxit + 0.5));
|
res.converged = (it != (maxit + 0.5));
|
||||||
|
|
||||||
if (verbosity > 0) {
|
if (verbosity > 0) {
|
||||||
@ -556,12 +523,7 @@ void openclSolverBackend<block_size>::finalize() {
|
|||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::copy_system_to_gpu() {
|
void openclSolverBackend<block_size>::copy_system_to_gpu() {
|
||||||
|
Timer t;
|
||||||
double t1 = 0.0, t2 = 0.0;
|
|
||||||
if (verbosity > 2) {
|
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
cl::Event event;
|
cl::Event event;
|
||||||
|
|
||||||
#if COPY_ROW_BY_ROW
|
#if COPY_ROW_BY_ROW
|
||||||
@ -583,9 +545,8 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
|
|||||||
event.wait();
|
event.wait();
|
||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "openclSolver::copy_system_to_gpu(): " << t2 - t1 << " s";
|
out << "openclSolver::copy_system_to_gpu(): " << t.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
} // end copy_system_to_gpu()
|
} // end copy_system_to_gpu()
|
||||||
@ -594,12 +555,7 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
|
|||||||
// don't copy rowpointers and colindices, they stay the same
|
// don't copy rowpointers and colindices, they stay the same
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::update_system_on_gpu() {
|
void openclSolverBackend<block_size>::update_system_on_gpu() {
|
||||||
|
Timer t;
|
||||||
double t1 = 0.0, t2 = 0.0;
|
|
||||||
if (verbosity > 2) {
|
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
cl::Event event;
|
cl::Event event;
|
||||||
|
|
||||||
#if COPY_ROW_BY_ROW
|
#if COPY_ROW_BY_ROW
|
||||||
@ -619,9 +575,8 @@ void openclSolverBackend<block_size>::update_system_on_gpu() {
|
|||||||
event.wait();
|
event.wait();
|
||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "openclSolver::update_system_on_gpu(): " << t2 - t1 << " s";
|
out << "openclSolver::update_system_on_gpu(): " << t.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
} // end update_system_on_gpu()
|
} // end update_system_on_gpu()
|
||||||
@ -629,12 +584,7 @@ void openclSolverBackend<block_size>::update_system_on_gpu() {
|
|||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
bool openclSolverBackend<block_size>::analyse_matrix() {
|
bool openclSolverBackend<block_size>::analyse_matrix() {
|
||||||
|
Timer t;
|
||||||
double t1 = 0.0, t2 = 0.0;
|
|
||||||
|
|
||||||
if (verbosity > 2) {
|
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool success = prec->init(mat);
|
bool success = prec->init(mat);
|
||||||
int work_group_size = 32;
|
int work_group_size = 32;
|
||||||
@ -648,9 +598,8 @@ bool openclSolverBackend<block_size>::analyse_matrix() {
|
|||||||
rmat = prec->getRMat();
|
rmat = prec->getRMat();
|
||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "openclSolver::analyse_matrix(): " << t2 - t1 << " s";
|
out << "openclSolver::analyse_matrix(): " << t.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -662,18 +611,14 @@ bool openclSolverBackend<block_size>::analyse_matrix() {
|
|||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
|
void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
|
||||||
double t1 = 0.0, t2 = 0.0;
|
Timer t;
|
||||||
if (verbosity > 2) {
|
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
mat->nnzValues = vals;
|
mat->nnzValues = vals;
|
||||||
blocked_reorder_vector_by_pattern<block_size>(mat->Nb, b, fromOrder, rb);
|
blocked_reorder_vector_by_pattern<block_size>(mat->Nb, b, fromOrder, rb);
|
||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "openclSolver::update_system(): " << t2 - t1 << " s";
|
out << "openclSolver::update_system(): " << t.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
} // end update_system()
|
} // end update_system()
|
||||||
@ -681,18 +626,13 @@ void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
|
|||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
bool openclSolverBackend<block_size>::create_preconditioner() {
|
bool openclSolverBackend<block_size>::create_preconditioner() {
|
||||||
|
Timer t;
|
||||||
double t1 = 0.0, t2 = 0.0;
|
|
||||||
if (verbosity > 2) {
|
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool result = prec->create_preconditioner(mat);
|
bool result = prec->create_preconditioner(mat);
|
||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "openclSolver::create_preconditioner(): " << t2 - t1 << " s";
|
out << "openclSolver::create_preconditioner(): " << t.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
@ -701,18 +641,14 @@ bool openclSolverBackend<block_size>::create_preconditioner() {
|
|||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::solve_system(WellContributions& wellContribs, BdaResult &res) {
|
void openclSolverBackend<block_size>::solve_system(WellContributions& wellContribs, BdaResult &res) {
|
||||||
// actually solve
|
Timer t;
|
||||||
double t1 = 0.0, t2 = 0.0;
|
|
||||||
if (verbosity > 2) {
|
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// actually solve
|
||||||
gpu_pbicgstab(wellContribs, res);
|
gpu_pbicgstab(wellContribs, res);
|
||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "openclSolver::solve_system(): " << t2 - t1 << " s";
|
out << "openclSolver::solve_system(): " << t.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -723,19 +659,14 @@ void openclSolverBackend<block_size>::solve_system(WellContributions& wellContri
|
|||||||
// caller must be sure that x is a valid array
|
// caller must be sure that x is a valid array
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::get_result(double *x) {
|
void openclSolverBackend<block_size>::get_result(double *x) {
|
||||||
|
Timer t;
|
||||||
double t1 = 0.0, t2 = 0.0;
|
|
||||||
if (verbosity > 2) {
|
|
||||||
t1 = second();
|
|
||||||
}
|
|
||||||
|
|
||||||
queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, rb);
|
queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, rb);
|
||||||
blocked_reorder_vector_by_pattern<block_size>(mat->Nb, rb, toOrder, x);
|
blocked_reorder_vector_by_pattern<block_size>(mat->Nb, rb, toOrder, x);
|
||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
t2 = second();
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "openclSolver::get_result(): " << t2 - t1 << " s";
|
out << "openclSolver::get_result(): " << t.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
} // end get_result()
|
} // end get_result()
|
||||||
|
@ -50,7 +50,6 @@ class openclSolverBackend : public BdaSolver<block_size>
|
|||||||
using Base::verbosity;
|
using Base::verbosity;
|
||||||
using Base::maxit;
|
using Base::maxit;
|
||||||
using Base::tolerance;
|
using Base::tolerance;
|
||||||
using Base::second;
|
|
||||||
using Base::initialized;
|
using Base::initialized;
|
||||||
typedef BdaSolverStatus::Status Status;
|
typedef BdaSolverStatus::Status Status;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user