mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
split in API specific classes for Cuda/OpenCL this to 1) it's cleaner 2) it avoids pulling in openCL code in cuda classes which leads to clashes between nvidia headers and opencl.hpp there is still too much API specific things in interface between the bda components to work through a virtual interface so we still have to cast to the relevant implementation in various places.
519 lines
19 KiB
Plaintext
519 lines
19 KiB
Plaintext
/*
|
|
Copyright 2019 Equinor ASA
|
|
|
|
This file is part of the Open Porous Media project (OPM).
|
|
|
|
OPM is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
OPM is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <config.h>
|
|
|
|
#include <cuda_runtime.h>
|
|
#include <sstream>
|
|
|
|
#include <opm/common/OpmLog/OpmLog.hpp>
|
|
#include <dune/common/timer.hh>
|
|
|
|
#include <opm/simulators/linalg/bda/cusparseSolverBackend.hpp>
|
|
#include <opm/simulators/linalg/bda/cuWellContributions.hpp>
|
|
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
|
#include <opm/simulators/linalg/bda/cuda_header.hpp>
|
|
|
|
#include "cublas_v2.h"
|
|
#include "cusparse_v2.h"
|
|
// For more information about cusparse, check https://docs.nvidia.com/cuda/cusparse/index.html
|
|
|
|
// iff true, the nonzeroes of the matrix are copied row-by-row into a contiguous, pinned memory array, then a single GPU memcpy is done
|
|
// otherwise, the nonzeroes of the matrix are assumed to be in a contiguous array, and a single GPU memcpy is enough
|
|
#define COPY_ROW_BY_ROW 0
|
|
|
|
namespace Opm
|
|
{
|
|
namespace Accelerator
|
|
{
|
|
|
|
using Opm::OpmLog;
|
|
using Dune::Timer;
|
|
|
|
const cusparseSolvePolicy_t policy = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
|
|
const cusparseOperation_t operation = CUSPARSE_OPERATION_NON_TRANSPOSE;
|
|
const cusparseDirection_t order = CUSPARSE_DIRECTION_ROW;
|
|
|
|
|
|
template <unsigned int block_size>
|
|
cusparseSolverBackend<block_size>::cusparseSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int deviceID_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, deviceID_) {}
|
|
|
|
template <unsigned int block_size>
|
|
cusparseSolverBackend<block_size>::~cusparseSolverBackend() {
|
|
finalize();
|
|
}
|
|
|
|
template <unsigned int block_size>
|
|
void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) {
|
|
Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
|
|
int n = N;
|
|
double rho = 1.0, rhop;
|
|
double alpha, nalpha, beta;
|
|
double omega, nomega, tmp1, tmp2;
|
|
double norm, norm_0;
|
|
double zero = 0.0;
|
|
double one = 1.0;
|
|
double mone = -1.0;
|
|
float it;
|
|
|
|
if (wellContribs.getNumWells() > 0) {
|
|
static_cast<WellContributionsCuda&>(wellContribs).setCudaStream(stream);
|
|
}
|
|
|
|
cusparseDbsrmv(cusparseHandle, order, operation, Nb, Nb, nnzb, &one, descr_M, d_bVals, d_bRows, d_bCols, block_size, d_x, &zero, d_r);
|
|
|
|
cublasDscal(cublasHandle, n, &mone, d_r, 1);
|
|
cublasDaxpy(cublasHandle, n, &one, d_b, 1, d_r, 1);
|
|
cublasDcopy(cublasHandle, n, d_r, 1, d_rw, 1);
|
|
cublasDcopy(cublasHandle, n, d_r, 1, d_p, 1);
|
|
cublasDnrm2(cublasHandle, n, d_r, 1, &norm_0);
|
|
|
|
if (verbosity > 1) {
|
|
std::ostringstream out;
|
|
out << std::scientific << "cusparseSolver initial norm: " << norm_0;
|
|
OpmLog::info(out.str());
|
|
}
|
|
|
|
for (it = 0.5; it < maxit; it += 0.5) {
|
|
rhop = rho;
|
|
cublasDdot(cublasHandle, n, d_rw, 1, d_r, 1, &rho);
|
|
|
|
if (it > 1) {
|
|
beta = (rho / rhop) * (alpha / omega);
|
|
nomega = -omega;
|
|
cublasDaxpy(cublasHandle, n, &nomega, d_v, 1, d_p, 1);
|
|
cublasDscal(cublasHandle, n, &beta, d_p, 1);
|
|
cublasDaxpy(cublasHandle, n, &one, d_r, 1, d_p, 1);
|
|
}
|
|
|
|
// apply ilu0
|
|
cusparseDbsrsv2_solve(cusparseHandle, order, \
|
|
operation, Nb, nnzb, &one, \
|
|
descr_L, d_mVals, d_mRows, d_mCols, block_size, info_L, d_p, d_t, policy, d_buffer);
|
|
cusparseDbsrsv2_solve(cusparseHandle, order, \
|
|
operation, Nb, nnzb, &one, \
|
|
descr_U, d_mVals, d_mRows, d_mCols, block_size, info_U, d_t, d_pw, policy, d_buffer);
|
|
|
|
// spmv
|
|
cusparseDbsrmv(cusparseHandle, order, \
|
|
operation, Nb, Nb, nnzb, \
|
|
&one, descr_M, d_bVals, d_bRows, d_bCols, block_size, d_pw, &zero, d_v);
|
|
|
|
// apply wellContributions
|
|
if (wellContribs.getNumWells() > 0) {
|
|
static_cast<WellContributionsCuda&>(wellContribs).apply(d_pw, d_v);
|
|
}
|
|
|
|
cublasDdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1);
|
|
alpha = rho / tmp1;
|
|
nalpha = -alpha;
|
|
cublasDaxpy(cublasHandle, n, &nalpha, d_v, 1, d_r, 1);
|
|
cublasDaxpy(cublasHandle, n, &alpha, d_pw, 1, d_x, 1);
|
|
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
|
|
|
|
if (norm < tolerance * norm_0) {
|
|
break;
|
|
}
|
|
|
|
it += 0.5;
|
|
|
|
// apply ilu0
|
|
cusparseDbsrsv2_solve(cusparseHandle, order, \
|
|
operation, Nb, nnzb, &one, \
|
|
descr_L, d_mVals, d_mRows, d_mCols, block_size, info_L, d_r, d_t, policy, d_buffer);
|
|
cusparseDbsrsv2_solve(cusparseHandle, order, \
|
|
operation, Nb, nnzb, &one, \
|
|
descr_U, d_mVals, d_mRows, d_mCols, block_size, info_U, d_t, d_s, policy, d_buffer);
|
|
|
|
// spmv
|
|
cusparseDbsrmv(cusparseHandle, order, \
|
|
operation, Nb, Nb, nnzb, &one, descr_M, \
|
|
d_bVals, d_bRows, d_bCols, block_size, d_s, &zero, d_t);
|
|
|
|
// apply wellContributions
|
|
if (wellContribs.getNumWells() > 0) {
|
|
static_cast<WellContributionsCuda&>(wellContribs).apply(d_s, d_t);
|
|
}
|
|
|
|
cublasDdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1);
|
|
cublasDdot(cublasHandle, n, d_t, 1, d_t, 1, &tmp2);
|
|
omega = tmp1 / tmp2;
|
|
nomega = -omega;
|
|
cublasDaxpy(cublasHandle, n, &omega, d_s, 1, d_x, 1);
|
|
cublasDaxpy(cublasHandle, n, &nomega, d_t, 1, d_r, 1);
|
|
|
|
cublasDnrm2(cublasHandle, n, d_r, 1, &norm);
|
|
|
|
|
|
if (norm < tolerance * norm_0) {
|
|
break;
|
|
}
|
|
|
|
if (verbosity > 1) {
|
|
std::ostringstream out;
|
|
out << "it: " << it << std::scientific << ", norm: " << norm;
|
|
OpmLog::info(out.str());
|
|
}
|
|
}
|
|
|
|
res.iterations = std::min(it, (float)maxit);
|
|
res.reduction = norm / norm_0;
|
|
res.conv_rate = static_cast<double>(pow(res.reduction, 1.0 / it));
|
|
res.elapsed = t_total.stop();
|
|
res.converged = (it != (maxit + 0.5));
|
|
|
|
if (verbosity > 0) {
|
|
std::ostringstream out;
|
|
out << "=== converged: " << res.converged << ", conv_rate: " << res.conv_rate << ", time: " << res.elapsed << \
|
|
", time per iteration: " << res.elapsed / it << ", iterations: " << it;
|
|
OpmLog::info(out.str());
|
|
}
|
|
}
|
|
|
|
|
|
template <unsigned int block_size>
|
|
void cusparseSolverBackend<block_size>::initialize(int N, int nnz, int dim) {
|
|
this->N = N;
|
|
this->nnz = nnz;
|
|
this->nnzb = nnz / block_size / block_size;
|
|
Nb = (N + dim - 1) / dim;
|
|
std::ostringstream out;
|
|
out << "Initializing GPU, matrix size: " << N << " blocks, nnz: " << nnzb << " blocks";
|
|
OpmLog::info(out.str());
|
|
out.str("");
|
|
out.clear();
|
|
out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance;
|
|
OpmLog::info(out.str());
|
|
|
|
cudaSetDevice(deviceID);
|
|
cudaCheckLastError("Could not get device");
|
|
struct cudaDeviceProp props;
|
|
cudaGetDeviceProperties(&props, deviceID);
|
|
cudaCheckLastError("Could not get device properties");
|
|
out.str("");
|
|
out.clear();
|
|
out << "Name GPU: " << props.name << ", Compute Capability: " << props.major << "." << props.minor;
|
|
OpmLog::info(out.str());
|
|
|
|
cudaStreamCreate(&stream);
|
|
cudaCheckLastError("Could not create stream");
|
|
|
|
cublasCreate(&cublasHandle);
|
|
cudaCheckLastError("Could not create cublasHandle");
|
|
|
|
cusparseCreate(&cusparseHandle);
|
|
cudaCheckLastError("Could not create cusparseHandle");
|
|
|
|
cudaMalloc((void**)&d_x, sizeof(double) * N);
|
|
cudaMalloc((void**)&d_b, sizeof(double) * N);
|
|
cudaMalloc((void**)&d_r, sizeof(double) * N);
|
|
cudaMalloc((void**)&d_rw, sizeof(double) * N);
|
|
cudaMalloc((void**)&d_p, sizeof(double) * N);
|
|
cudaMalloc((void**)&d_pw, sizeof(double) * N);
|
|
cudaMalloc((void**)&d_s, sizeof(double) * N);
|
|
cudaMalloc((void**)&d_t, sizeof(double) * N);
|
|
cudaMalloc((void**)&d_v, sizeof(double) * N);
|
|
cudaMalloc((void**)&d_bVals, sizeof(double) * nnz);
|
|
cudaMalloc((void**)&d_bCols, sizeof(int) * nnzb);
|
|
cudaMalloc((void**)&d_bRows, sizeof(int) * (Nb + 1));
|
|
cudaMalloc((void**)&d_mVals, sizeof(double) * nnz);
|
|
cudaCheckLastError("Could not allocate enough memory on GPU");
|
|
|
|
cublasSetStream(cublasHandle, stream);
|
|
cudaCheckLastError("Could not set stream to cublas");
|
|
cusparseSetStream(cusparseHandle, stream);
|
|
cudaCheckLastError("Could not set stream to cusparse");
|
|
|
|
#if COPY_ROW_BY_ROW
|
|
cudaMallocHost((void**)&vals_contiguous, sizeof(double) * nnz);
|
|
cudaCheckLastError("Could not allocate pinned memory");
|
|
#endif
|
|
|
|
initialized = true;
|
|
} // end initialize()
|
|
|
|
template <unsigned int block_size>
|
|
void cusparseSolverBackend<block_size>::finalize() {
|
|
if (initialized) {
|
|
cudaFree(d_x);
|
|
cudaFree(d_b);
|
|
cudaFree(d_r);
|
|
cudaFree(d_rw);
|
|
cudaFree(d_p);
|
|
cudaFree(d_pw);
|
|
cudaFree(d_s);
|
|
cudaFree(d_t);
|
|
cudaFree(d_v);
|
|
cudaFree(d_mVals);
|
|
cudaFree(d_bVals);
|
|
cudaFree(d_bCols);
|
|
cudaFree(d_bRows);
|
|
cudaFree(d_buffer);
|
|
cusparseDestroyBsrilu02Info(info_M);
|
|
cusparseDestroyBsrsv2Info(info_L);
|
|
cusparseDestroyBsrsv2Info(info_U);
|
|
cusparseDestroyMatDescr(descr_B);
|
|
cusparseDestroyMatDescr(descr_M);
|
|
cusparseDestroyMatDescr(descr_L);
|
|
cusparseDestroyMatDescr(descr_U);
|
|
cusparseDestroy(cusparseHandle);
|
|
cublasDestroy(cublasHandle);
|
|
#if COPY_ROW_BY_ROW
|
|
cudaFreeHost(vals_contiguous);
|
|
#endif
|
|
cudaStreamDestroy(stream);
|
|
}
|
|
} // end finalize()
|
|
|
|
|
|
template <unsigned int block_size>
|
|
void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b) {
|
|
Timer t;
|
|
|
|
#if COPY_ROW_BY_ROW
|
|
int sum = 0;
|
|
for (int i = 0; i < Nb; ++i) {
|
|
int size_row = rows[i + 1] - rows[i];
|
|
memcpy(vals_contiguous + sum, vals + sum, size_row * sizeof(double) * block_size * block_size);
|
|
sum += size_row * block_size * block_size;
|
|
}
|
|
cudaMemcpyAsync(d_bVals, vals_contiguous, nnz * sizeof(double), cudaMemcpyHostToDevice, stream);
|
|
#else
|
|
cudaMemcpyAsync(d_bVals, vals, nnz * sizeof(double), cudaMemcpyHostToDevice, stream);
|
|
#endif
|
|
|
|
cudaMemcpyAsync(d_bCols, cols, nnzb * sizeof(int), cudaMemcpyHostToDevice, stream);
|
|
cudaMemcpyAsync(d_bRows, rows, (Nb + 1) * sizeof(int), cudaMemcpyHostToDevice, stream);
|
|
cudaMemcpyAsync(d_b, b, N * sizeof(double), cudaMemcpyHostToDevice, stream);
|
|
cudaMemsetAsync(d_x, 0, sizeof(double) * N, stream);
|
|
|
|
if (verbosity > 2) {
|
|
cudaStreamSynchronize(stream);
|
|
std::ostringstream out;
|
|
out << "cusparseSolver::copy_system_to_gpu(): " << t.stop() << " s";
|
|
OpmLog::info(out.str());
|
|
}
|
|
} // end copy_system_to_gpu()
|
|
|
|
|
|
// don't copy rowpointers and colindices, they stay the same
|
|
template <unsigned int block_size>
|
|
void cusparseSolverBackend<block_size>::update_system_on_gpu(double *vals, int *rows, double *b) {
|
|
Timer t;
|
|
|
|
#if COPY_ROW_BY_ROW
|
|
int sum = 0;
|
|
for (int i = 0; i < Nb; ++i) {
|
|
int size_row = rows[i + 1] - rows[i];
|
|
memcpy(vals_contiguous + sum, vals + sum, size_row * sizeof(double) * block_size * block_size);
|
|
sum += size_row * block_size * block_size;
|
|
}
|
|
cudaMemcpyAsync(d_bVals, vals_contiguous, nnz * sizeof(double), cudaMemcpyHostToDevice, stream);
|
|
#else
|
|
cudaMemcpyAsync(d_bVals, vals, nnz * sizeof(double), cudaMemcpyHostToDevice, stream);
|
|
#endif
|
|
|
|
cudaMemcpyAsync(d_b, b, N * sizeof(double), cudaMemcpyHostToDevice, stream);
|
|
cudaMemsetAsync(d_x, 0, sizeof(double) * N, stream);
|
|
|
|
if (verbosity > 2) {
|
|
cudaStreamSynchronize(stream);
|
|
std::ostringstream out;
|
|
out << "cusparseSolver::update_system_on_gpu(): " << t.stop() << " s";
|
|
OpmLog::info(out.str());
|
|
}
|
|
} // end update_system_on_gpu()
|
|
|
|
|
|
template <unsigned int block_size>
|
|
void cusparseSolverBackend<block_size>::reset_prec_on_gpu() {
|
|
cudaMemcpyAsync(d_mVals, d_bVals, nnz * sizeof(double), cudaMemcpyDeviceToDevice, stream);
|
|
}
|
|
|
|
|
|
template <unsigned int block_size>
|
|
bool cusparseSolverBackend<block_size>::analyse_matrix() {
|
|
|
|
int d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
|
Timer t;
|
|
|
|
cusparseCreateMatDescr(&descr_B);
|
|
cusparseCreateMatDescr(&descr_M);
|
|
cusparseSetMatType(descr_B, CUSPARSE_MATRIX_TYPE_GENERAL);
|
|
cusparseSetMatType(descr_M, CUSPARSE_MATRIX_TYPE_GENERAL);
|
|
const cusparseIndexBase_t base_type = CUSPARSE_INDEX_BASE_ZERO; // matrices from Flow are base0
|
|
|
|
cusparseSetMatIndexBase(descr_B, base_type);
|
|
cusparseSetMatIndexBase(descr_M, base_type);
|
|
|
|
cusparseCreateMatDescr(&descr_L);
|
|
cusparseSetMatIndexBase(descr_L, base_type);
|
|
cusparseSetMatType(descr_L, CUSPARSE_MATRIX_TYPE_GENERAL);
|
|
cusparseSetMatFillMode(descr_L, CUSPARSE_FILL_MODE_LOWER);
|
|
cusparseSetMatDiagType(descr_L, CUSPARSE_DIAG_TYPE_UNIT);
|
|
|
|
cusparseCreateMatDescr(&descr_U);
|
|
cusparseSetMatIndexBase(descr_U, base_type);
|
|
cusparseSetMatType(descr_U, CUSPARSE_MATRIX_TYPE_GENERAL);
|
|
cusparseSetMatFillMode(descr_U, CUSPARSE_FILL_MODE_UPPER);
|
|
cusparseSetMatDiagType(descr_U, CUSPARSE_DIAG_TYPE_NON_UNIT);
|
|
cudaCheckLastError("Could not initialize matrix descriptions");
|
|
|
|
cusparseCreateBsrilu02Info(&info_M);
|
|
cusparseCreateBsrsv2Info(&info_L);
|
|
cusparseCreateBsrsv2Info(&info_U);
|
|
cudaCheckLastError("Could not create analysis info");
|
|
|
|
cusparseDbsrilu02_bufferSize(cusparseHandle, order, Nb, nnzb,
|
|
descr_M, d_bVals, d_bRows, d_bCols, block_size, info_M, &d_bufferSize_M);
|
|
cusparseDbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzb,
|
|
descr_L, d_bVals, d_bRows, d_bCols, block_size, info_L, &d_bufferSize_L);
|
|
cusparseDbsrsv2_bufferSize(cusparseHandle, order, operation, Nb, nnzb,
|
|
descr_U, d_bVals, d_bRows, d_bCols, block_size, info_U, &d_bufferSize_U);
|
|
cudaCheckLastError();
|
|
d_bufferSize = std::max(d_bufferSize_M, std::max(d_bufferSize_L, d_bufferSize_U));
|
|
|
|
cudaMalloc((void**)&d_buffer, d_bufferSize);
|
|
|
|
// analysis of ilu LU decomposition
|
|
cusparseDbsrilu02_analysis(cusparseHandle, order, \
|
|
Nb, nnzb, descr_B, d_bVals, d_bRows, d_bCols, \
|
|
block_size, info_M, policy, d_buffer);
|
|
|
|
int structural_zero;
|
|
cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero);
|
|
if (CUSPARSE_STATUS_ZERO_PIVOT == status) {
|
|
return false;
|
|
}
|
|
|
|
// analysis of ilu apply
|
|
cusparseDbsrsv2_analysis(cusparseHandle, order, operation, \
|
|
Nb, nnzb, descr_L, d_bVals, d_bRows, d_bCols, \
|
|
block_size, info_L, policy, d_buffer);
|
|
|
|
cusparseDbsrsv2_analysis(cusparseHandle, order, operation, \
|
|
Nb, nnzb, descr_U, d_bVals, d_bRows, d_bCols, \
|
|
block_size, info_U, policy, d_buffer);
|
|
cudaCheckLastError("Could not analyse level information");
|
|
|
|
if (verbosity > 2) {
|
|
cudaStreamSynchronize(stream);
|
|
std::ostringstream out;
|
|
out << "cusparseSolver::analyse_matrix(): " << t.stop() << " s";
|
|
OpmLog::info(out.str());
|
|
}
|
|
|
|
analysis_done = true;
|
|
|
|
return true;
|
|
} // end analyse_matrix()
|
|
|
|
template <unsigned int block_size>
|
|
bool cusparseSolverBackend<block_size>::create_preconditioner() {
|
|
Timer t;
|
|
|
|
d_mCols = d_bCols;
|
|
d_mRows = d_bRows;
|
|
cusparseDbsrilu02(cusparseHandle, order, \
|
|
Nb, nnzb, descr_M, d_mVals, d_mRows, d_mCols, \
|
|
block_size, info_M, policy, d_buffer);
|
|
cudaCheckLastError("Could not perform ilu decomposition");
|
|
|
|
int structural_zero;
|
|
// cusparseXbsrilu02_zeroPivot() calls cudaDeviceSynchronize()
|
|
cusparseStatus_t status = cusparseXbsrilu02_zeroPivot(cusparseHandle, info_M, &structural_zero);
|
|
if (CUSPARSE_STATUS_ZERO_PIVOT == status) {
|
|
return false;
|
|
}
|
|
|
|
if (verbosity > 2) {
|
|
cudaStreamSynchronize(stream);
|
|
std::ostringstream out;
|
|
out << "cusparseSolver::create_preconditioner(): " << t.stop() << " s";
|
|
OpmLog::info(out.str());
|
|
}
|
|
return true;
|
|
} // end create_preconditioner()
|
|
|
|
|
|
template <unsigned int block_size>
|
|
void cusparseSolverBackend<block_size>::solve_system(WellContributions& wellContribs, BdaResult &res) {
|
|
// actually solve
|
|
gpu_pbicgstab(wellContribs, res);
|
|
cudaStreamSynchronize(stream);
|
|
cudaCheckLastError("Something went wrong during the GPU solve");
|
|
} // end solve_system()
|
|
|
|
|
|
// copy result to host memory
|
|
// caller must be sure that x is a valid array
|
|
template <unsigned int block_size>
|
|
void cusparseSolverBackend<block_size>::get_result(double *x) {
|
|
Timer t;
|
|
|
|
cudaMemcpyAsync(x, d_x, N * sizeof(double), cudaMemcpyDeviceToHost, stream);
|
|
cudaStreamSynchronize(stream);
|
|
|
|
if (verbosity > 2) {
|
|
std::ostringstream out;
|
|
out << "cusparseSolver::get_result(): " << t.stop() << " s";
|
|
OpmLog::info(out.str());
|
|
}
|
|
} // end get_result()
|
|
|
|
|
|
|
|
template <unsigned int block_size>
|
|
SolverStatus cusparseSolverBackend<block_size>::solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
|
|
if (initialized == false) {
|
|
initialize(N, nnz, dim);
|
|
copy_system_to_gpu(vals, rows, cols, b);
|
|
} else {
|
|
update_system_on_gpu(vals, rows, b);
|
|
}
|
|
if (analysis_done == false) {
|
|
if (!analyse_matrix()) {
|
|
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
|
|
}
|
|
}
|
|
reset_prec_on_gpu();
|
|
if (create_preconditioner()) {
|
|
solve_system(wellContribs, res);
|
|
} else {
|
|
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
|
}
|
|
return SolverStatus::BDA_SOLVER_SUCCESS;
|
|
}
|
|
|
|
|
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
|
template cusparseSolverBackend<n>::cusparseSolverBackend(int, int, double, unsigned int); \
|
|
|
|
INSTANTIATE_BDA_FUNCTIONS(1);
|
|
INSTANTIATE_BDA_FUNCTIONS(2);
|
|
INSTANTIATE_BDA_FUNCTIONS(3);
|
|
INSTANTIATE_BDA_FUNCTIONS(4);
|
|
INSTANTIATE_BDA_FUNCTIONS(5);
|
|
INSTANTIATE_BDA_FUNCTIONS(6);
|
|
|
|
#undef INSTANTIATE_BDA_FUNCTIONS
|
|
|
|
} // namespace Accelerator
|
|
} // namespace Opm
|