mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
cusparseSolver can now handle MultisegmentWells, they are actually applied on CPU via SuiteSparse/UMFPACK
This commit is contained in:
138
opm/simulators/linalg/bda/MultisegmentWellContribution.cpp
Normal file
138
opm/simulators/linalg/bda/MultisegmentWellContribution.cpp
Normal file
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
Copyright 2020 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <config.h> // CMake
|
||||
#include <fstream>
|
||||
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
|
||||
#if HAVE_UMFPACK
|
||||
#include <dune/istl/umfpack.hh>
|
||||
#include <opm/simulators/linalg/bda/cuda_header.hpp>
|
||||
#endif // HAVE_UMFPACK
|
||||
|
||||
#include <opm/simulators/linalg/bda/MultisegmentWellContribution.hpp>
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
MultisegmentWellContribution::MultisegmentWellContribution(unsigned int dim_, unsigned int dim_wells_,
|
||||
unsigned int Nb_, unsigned int Mb_,
|
||||
unsigned int BnumBlocks_, double *Bvalues, unsigned int *BcolIndices, unsigned int *BrowPointers,
|
||||
unsigned int DnumBlocks_, double *Dvalues, int *DcolPointers, int *DrowIndices,
|
||||
double *Cvalues)
|
||||
: dim(dim_), dim_wells(dim_wells_), N(Nb_*dim), Nb(Nb_), M(Mb_*dim_wells), Mb(Mb_), DnumBlocks(DnumBlocks_), BnumBlocks(BnumBlocks_)
|
||||
{
|
||||
Cvals.insert(Cvals.end(), Cvalues, Cvalues + BnumBlocks*dim*dim_wells);
|
||||
Dvals.insert(Dvals.end(), Dvalues, Dvalues + DnumBlocks*dim_wells*dim_wells);
|
||||
Bvals.insert(Bvals.end(), Bvalues, Bvalues + BnumBlocks*dim*dim_wells);
|
||||
Ccols.insert(Ccols.end(), BcolIndices, BcolIndices + BnumBlocks);
|
||||
Bcols.insert(Bcols.end(), BcolIndices, BcolIndices + BnumBlocks);
|
||||
Crows.insert(Crows.end(), BrowPointers, BrowPointers + M + 1);
|
||||
Brows.insert(Brows.end(), BrowPointers, BrowPointers + M + 1);
|
||||
|
||||
Dcols.insert(Dcols.end(), DcolPointers, DcolPointers + M + 1);
|
||||
Drows.insert(Drows.end(), DrowIndices, DrowIndices + DnumBlocks*dim_wells*dim_wells);
|
||||
|
||||
z1.resize(Mb * dim_wells);
|
||||
z2.resize(Mb * dim_wells);
|
||||
|
||||
// allocate pinned memory on host
|
||||
cudaMallocHost(&h_x, sizeof(double) * N);
|
||||
cudaMallocHost(&h_y, sizeof(double) * N);
|
||||
|
||||
umfpack_di_symbolic(M, M, Dcols.data(), Drows.data(), Dvals.data(), &UMFPACK_Symbolic, nullptr, nullptr);
|
||||
umfpack_di_numeric(Dcols.data(), Drows.data(), Dvals.data(), UMFPACK_Symbolic, &UMFPACK_Numeric, nullptr, nullptr);
|
||||
}
|
||||
|
||||
MultisegmentWellContribution::~MultisegmentWellContribution()
|
||||
{
|
||||
cudaFreeHost(h_x);
|
||||
cudaFreeHost(h_y);
|
||||
|
||||
umfpack_di_free_symbolic(&UMFPACK_Symbolic);
|
||||
umfpack_di_free_numeric(&UMFPACK_Numeric);
|
||||
}
|
||||
|
||||
|
||||
// Apply the MultisegmentWellContribution, similar to MultisegmentWell::apply()
|
||||
// y -= (C^T * (D^-1 * (B * x)))
|
||||
void MultisegmentWellContribution::apply(double *d_x, double *d_y)
|
||||
{
|
||||
// copy vectors x and y from GPU to CPU
|
||||
cudaMemcpyAsync(h_x, d_x, sizeof(double) * N, cudaMemcpyDeviceToHost, stream);
|
||||
cudaMemcpyAsync(h_y, d_y, sizeof(double) * N, cudaMemcpyDeviceToHost, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
|
||||
// reset z1 and z2
|
||||
std::fill(z1.begin(), z1.end(), 0.0);
|
||||
std::fill(z2.begin(), z2.end(), 0.0);
|
||||
|
||||
// z1 = B * x
|
||||
for (unsigned int row = 0; row < Mb; ++row) {
|
||||
// for every block in the row
|
||||
for (unsigned int blockID = Brows[row]; blockID < Brows[row+1]; ++blockID) {
|
||||
unsigned int colIdx = Bcols[blockID];
|
||||
for (unsigned int j = 0; j < dim_wells; ++j) {
|
||||
double temp = 0.0;
|
||||
for (unsigned int k = 0; k < dim; ++k) {
|
||||
temp += Bvals[blockID * dim * dim_wells + j * dim + k] * h_x[colIdx * dim + k];
|
||||
}
|
||||
z1[row * dim_wells + j] += temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// z2 = D^-1 * (B * x)
|
||||
// umfpack
|
||||
umfpack_di_solve(UMFPACK_A, Dcols.data(), Drows.data(), Dvals.data(), z2.data(), z1.data(), UMFPACK_Numeric, nullptr, nullptr);
|
||||
|
||||
// y -= (C^T * z2)
|
||||
// y -= (C^T * (D^-1 * (B * x)))
|
||||
for (unsigned int row = 0; row < Mb; ++row) {
|
||||
// for every block in the row
|
||||
for (unsigned int blockID = Brows[row]; blockID < Brows[row+1]; ++blockID) {
|
||||
unsigned int colIdx = Bcols[blockID];
|
||||
for (unsigned int j = 0; j < dim; ++j) {
|
||||
double temp = 0.0;
|
||||
for (unsigned int k = 0; k < dim_wells; ++k) {
|
||||
temp += Cvals[blockID * dim * dim_wells + j + k * dim] * z2[row * dim_wells + k];
|
||||
}
|
||||
h_y[colIdx * dim + j] -= temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// copy vector y from CPU to GPU
|
||||
cudaMemcpyAsync(d_y, h_y, sizeof(double) * N, cudaMemcpyHostToDevice, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
}
|
||||
|
||||
void MultisegmentWellContribution::setCudaStream(cudaStream_t stream_)
|
||||
{
|
||||
stream = stream_;
|
||||
}
|
||||
|
||||
|
||||
} //namespace Opm
|
||||
|
||||
Reference in New Issue
Block a user