mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
add block-jacobi partitioner option. Add block-jacobi matrix for use in OpenCL preconditioner
Rebased
This commit is contained in:
parent
aba4c5f487
commit
e360c00b73
@ -85,6 +85,10 @@ struct EdgeWeightsMethod {
|
|||||||
using type = UndefinedProperty;
|
using type = UndefinedProperty;
|
||||||
};
|
};
|
||||||
template<class TypeTag, class MyTypeTag>
|
template<class TypeTag, class MyTypeTag>
|
||||||
|
struct NumJacobiBlocks {
|
||||||
|
using type = UndefinedProperty;
|
||||||
|
};
|
||||||
|
template<class TypeTag, class MyTypeTag>
|
||||||
struct OwnerCellsFirst {
|
struct OwnerCellsFirst {
|
||||||
using type = UndefinedProperty;
|
using type = UndefinedProperty;
|
||||||
};
|
};
|
||||||
@ -133,6 +137,10 @@ struct EdgeWeightsMethod<TypeTag, TTag::EclBaseVanguard> {
|
|||||||
static constexpr int value = 1;
|
static constexpr int value = 1;
|
||||||
};
|
};
|
||||||
template<class TypeTag>
|
template<class TypeTag>
|
||||||
|
struct NumJacobiBlocks<TypeTag, TTag::EclBaseVanguard> {
|
||||||
|
static constexpr int value = 0;
|
||||||
|
};
|
||||||
|
template<class TypeTag>
|
||||||
struct OwnerCellsFirst<TypeTag, TTag::EclBaseVanguard> {
|
struct OwnerCellsFirst<TypeTag, TTag::EclBaseVanguard> {
|
||||||
static constexpr bool value = true;
|
static constexpr bool value = true;
|
||||||
};
|
};
|
||||||
@ -211,6 +219,8 @@ public:
|
|||||||
"When restarting: should we try to initialize wells and groups from historical SCHEDULE section.");
|
"When restarting: should we try to initialize wells and groups from historical SCHEDULE section.");
|
||||||
EWOMS_REGISTER_PARAM(TypeTag, int, EdgeWeightsMethod,
|
EWOMS_REGISTER_PARAM(TypeTag, int, EdgeWeightsMethod,
|
||||||
"Choose edge-weighing strategy: 0=uniform, 1=trans, 2=log(trans).");
|
"Choose edge-weighing strategy: 0=uniform, 1=trans, 2=log(trans).");
|
||||||
|
EWOMS_REGISTER_PARAM(TypeTag, int, NumJacobiBlocks,
|
||||||
|
"Number of blocks to be created for the Block-Jacobi preconditioner.");
|
||||||
EWOMS_REGISTER_PARAM(TypeTag, bool, OwnerCellsFirst,
|
EWOMS_REGISTER_PARAM(TypeTag, bool, OwnerCellsFirst,
|
||||||
"Order cells owned by rank before ghost/overlap cells.");
|
"Order cells owned by rank before ghost/overlap cells.");
|
||||||
EWOMS_REGISTER_PARAM(TypeTag, bool, SerialPartitioning,
|
EWOMS_REGISTER_PARAM(TypeTag, bool, SerialPartitioning,
|
||||||
@ -235,6 +245,7 @@ public:
|
|||||||
{
|
{
|
||||||
fileName_ = EWOMS_GET_PARAM(TypeTag, std::string, EclDeckFileName);
|
fileName_ = EWOMS_GET_PARAM(TypeTag, std::string, EclDeckFileName);
|
||||||
edgeWeightsMethod_ = Dune::EdgeWeightMethod(EWOMS_GET_PARAM(TypeTag, int, EdgeWeightsMethod));
|
edgeWeightsMethod_ = Dune::EdgeWeightMethod(EWOMS_GET_PARAM(TypeTag, int, EdgeWeightsMethod));
|
||||||
|
numJacobiBlocks_ = EWOMS_GET_PARAM(TypeTag, int, NumJacobiBlocks);
|
||||||
ownersFirst_ = EWOMS_GET_PARAM(TypeTag, bool, OwnerCellsFirst);
|
ownersFirst_ = EWOMS_GET_PARAM(TypeTag, bool, OwnerCellsFirst);
|
||||||
serialPartitioning_ = EWOMS_GET_PARAM(TypeTag, bool, SerialPartitioning);
|
serialPartitioning_ = EWOMS_GET_PARAM(TypeTag, bool, SerialPartitioning);
|
||||||
zoltanImbalanceTol_ = EWOMS_GET_PARAM(TypeTag, double, ZoltanImbalanceTol);
|
zoltanImbalanceTol_ = EWOMS_GET_PARAM(TypeTag, double, ZoltanImbalanceTol);
|
||||||
|
@ -136,7 +136,7 @@ public:
|
|||||||
this->serialPartitioning(), this->enableDistributedWells(),
|
this->serialPartitioning(), this->enableDistributedWells(),
|
||||||
this->zoltanImbalanceTol(), this->gridView(),
|
this->zoltanImbalanceTol(), this->gridView(),
|
||||||
this->schedule(), this->centroids_,
|
this->schedule(), this->centroids_,
|
||||||
this->eclState(), this->parallelWells_);
|
this->eclState(), this->parallelWells_, this->numJacobiBlocks());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
this->updateGridView_();
|
this->updateGridView_();
|
||||||
@ -192,6 +192,7 @@ protected:
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<TransmissibilityType> globalTrans_;
|
std::unique_ptr<TransmissibilityType> globalTrans_;
|
||||||
|
//std::vector<int> cell_part_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Opm
|
} // namespace Opm
|
||||||
|
@ -82,12 +82,13 @@ void EclGenericCpGridVanguard<ElementMapper,GridView,Scalar>::doLoadBalance_(Dun
|
|||||||
const Schedule& schedule,
|
const Schedule& schedule,
|
||||||
std::vector<double>& centroids,
|
std::vector<double>& centroids,
|
||||||
EclipseState& eclState1,
|
EclipseState& eclState1,
|
||||||
EclGenericVanguard::ParallelWellStruct& parallelWells)
|
EclGenericVanguard::ParallelWellStruct& parallelWells,
|
||||||
|
int numJacobiBlocks)
|
||||||
{
|
{
|
||||||
int mpiSize = 1;
|
int mpiSize = 1;
|
||||||
MPI_Comm_size(grid_->comm(), &mpiSize);
|
MPI_Comm_size(grid_->comm(), &mpiSize);
|
||||||
|
|
||||||
if (mpiSize > 1) {
|
if (mpiSize > 1 || numJacobiBlocks > 0) {
|
||||||
// the CpGrid's loadBalance() method likes to have the transmissibilities as
|
// the CpGrid's loadBalance() method likes to have the transmissibilities as
|
||||||
// its edge weights. since this is (kind of) a layering violation and
|
// its edge weights. since this is (kind of) a layering violation and
|
||||||
// transmissibilities are relatively expensive to compute, we only do it if
|
// transmissibilities are relatively expensive to compute, we only do it if
|
||||||
@ -131,53 +132,60 @@ void EclGenericCpGridVanguard<ElementMapper,GridView,Scalar>::doLoadBalance_(Dun
|
|||||||
}
|
}
|
||||||
|
|
||||||
//distribute the grid and switch to the distributed view.
|
//distribute the grid and switch to the distributed view.
|
||||||
{
|
if (mpiSize > 1) {
|
||||||
const auto wells = schedule.getWellsatEnd();
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
{
|
||||||
auto& eclState = dynamic_cast<ParallelEclipseState&>(eclState1);
|
const auto wells = schedule.getWellsatEnd();
|
||||||
const EclipseGrid* eclGrid = nullptr;
|
|
||||||
|
|
||||||
if (grid_->comm().rank() == 0)
|
try
|
||||||
{
|
{
|
||||||
eclGrid = &eclState.getInputGrid();
|
auto& eclState = dynamic_cast<ParallelEclipseState&>(eclState1);
|
||||||
}
|
const EclipseGrid* eclGrid = nullptr;
|
||||||
|
|
||||||
PropsCentroidsDataHandle<Dune::CpGrid> handle(*grid_, eclState, eclGrid, centroids,
|
|
||||||
cartesianIndexMapper());
|
|
||||||
if (loadBalancerSet)
|
|
||||||
{
|
|
||||||
std::vector<int> parts;
|
|
||||||
if (grid_->comm().rank() == 0)
|
if (grid_->comm().rank() == 0)
|
||||||
{
|
{
|
||||||
parts = (*externalLoadBalancer)(*grid_);
|
eclGrid = &eclState.getInputGrid();
|
||||||
}
|
}
|
||||||
parallelWells = std::get<1>(grid_->loadBalance(handle, parts, &wells, ownersFirst, false, 1));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
parallelWells =
|
|
||||||
std::get<1>(grid_->loadBalance(handle, edgeWeightsMethod, &wells, serialPartitioning,
|
|
||||||
faceTrans.data(), ownersFirst, false, 1, true, zoltanImbalanceTol,
|
|
||||||
enableDistributedWells));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch(const std::bad_cast& e)
|
|
||||||
{
|
|
||||||
std::ostringstream message;
|
|
||||||
message << "Parallel simulator setup is incorrect as it does not use ParallelEclipseState ("
|
|
||||||
<< e.what() <<")"<<std::flush;
|
|
||||||
OpmLog::error(message.str());
|
|
||||||
std::rethrow_exception(std::current_exception());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
grid_->switchToDistributedView();
|
|
||||||
|
|
||||||
|
PropsCentroidsDataHandle<Dune::CpGrid> handle(*grid_, eclState, eclGrid, centroids,
|
||||||
|
cartesianIndexMapper());
|
||||||
|
if (loadBalancerSet)
|
||||||
|
{
|
||||||
|
std::vector<int> parts;
|
||||||
|
if (grid_->comm().rank() == 0)
|
||||||
|
{
|
||||||
|
parts = (*externalLoadBalancer)(*grid_);
|
||||||
|
}
|
||||||
|
parallelWells = std::get<1>(grid_->loadBalance(handle, parts, &wells, ownersFirst, false, 1));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
parallelWells =
|
||||||
|
std::get<1>(grid_->loadBalance(handle, edgeWeightsMethod, &wells, serialPartitioning,
|
||||||
|
faceTrans.data(), ownersFirst, false, 1, true, zoltanImbalanceTol,
|
||||||
|
enableDistributedWells));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch(const std::bad_cast& e)
|
||||||
|
{
|
||||||
|
std::ostringstream message;
|
||||||
|
message << "Parallel simulator setup is incorrect as it does not use ParallelEclipseState ("
|
||||||
|
<< e.what() <<")"<<std::flush;
|
||||||
|
OpmLog::error(message.str());
|
||||||
|
std::rethrow_exception(std::current_exception());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
grid_->switchToDistributedView();
|
||||||
|
}
|
||||||
|
|
||||||
// Calling Schedule::filterConnections would remove any perforated
|
// Calling Schedule::filterConnections would remove any perforated
|
||||||
// cells that exist only on other ranks even in the case of distributed wells
|
// cells that exist only on other ranks even in the case of distributed wells
|
||||||
// But we need all connections to figure out the first cell of a well (e.g. for
|
// But we need all connections to figure out the first cell of a well (e.g. for
|
||||||
// pressure). Hence this is now skipped. Rank 0 had everything even before.
|
// pressure). Hence this is now skipped. Rank 0 had everything even before.
|
||||||
|
if (numJacobiBlocks > 0 && mpiSize == 1) {
|
||||||
|
const auto wells = schedule.getWellsatEnd();
|
||||||
|
cell_part_.resize(grid_->numCells());
|
||||||
|
cell_part_ = grid_->zoltanPartitionWithoutScatter(&wells, faceTrans.data(), numJacobiBlocks, zoltanImbalanceTol);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,6 +101,10 @@ public:
|
|||||||
*/
|
*/
|
||||||
const CartesianIndexMapper& equilCartesianIndexMapper() const;
|
const CartesianIndexMapper& equilCartesianIndexMapper() const;
|
||||||
|
|
||||||
|
std::vector<int> cellPartition() const
|
||||||
|
{
|
||||||
|
return cell_part_;
|
||||||
|
}
|
||||||
protected:
|
protected:
|
||||||
/*!
|
/*!
|
||||||
* \brief Distribute the simulation grid over multiple processes
|
* \brief Distribute the simulation grid over multiple processes
|
||||||
@ -114,7 +118,8 @@ protected:
|
|||||||
const GridView& gridv, const Schedule& schedule,
|
const GridView& gridv, const Schedule& schedule,
|
||||||
std::vector<double>& centroids,
|
std::vector<double>& centroids,
|
||||||
EclipseState& eclState,
|
EclipseState& eclState,
|
||||||
EclGenericVanguard::ParallelWellStruct& parallelWells);
|
EclGenericVanguard::ParallelWellStruct& parallelWells,
|
||||||
|
int numJacobiBlocks);
|
||||||
|
|
||||||
void distributeFieldProps_(EclipseState& eclState);
|
void distributeFieldProps_(EclipseState& eclState);
|
||||||
#endif
|
#endif
|
||||||
@ -137,6 +142,7 @@ protected:
|
|||||||
std::unique_ptr<CartesianIndexMapper> equilCartesianIndexMapper_;
|
std::unique_ptr<CartesianIndexMapper> equilCartesianIndexMapper_;
|
||||||
|
|
||||||
int mpiRank;
|
int mpiRank;
|
||||||
|
std::vector<int> cell_part_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Opm
|
} // namespace Opm
|
||||||
|
@ -249,6 +249,12 @@ public:
|
|||||||
Dune::EdgeWeightMethod edgeWeightsMethod() const
|
Dune::EdgeWeightMethod edgeWeightsMethod() const
|
||||||
{ return edgeWeightsMethod_; }
|
{ return edgeWeightsMethod_; }
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Number of blocks in the Block-Jacobi preconditioner.
|
||||||
|
*/
|
||||||
|
int numJacobiBlocks() const
|
||||||
|
{ return numJacobiBlocks_; }
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief Parameter that decide if cells owned by rank are ordered before ghost cells.
|
* \brief Parameter that decide if cells owned by rank are ordered before ghost cells.
|
||||||
*/
|
*/
|
||||||
@ -323,6 +329,7 @@ protected:
|
|||||||
std::string caseName_;
|
std::string caseName_;
|
||||||
std::string fileName_;
|
std::string fileName_;
|
||||||
Dune::EdgeWeightMethod edgeWeightsMethod_;
|
Dune::EdgeWeightMethod edgeWeightsMethod_;
|
||||||
|
int numJacobiBlocks_;
|
||||||
bool ownersFirst_;
|
bool ownersFirst_;
|
||||||
bool serialPartitioning_;
|
bool serialPartitioning_;
|
||||||
double zoltanImbalanceTol_;
|
double zoltanImbalanceTol_;
|
||||||
|
@ -161,7 +161,15 @@ namespace Opm
|
|||||||
// Set it up manually
|
// Set it up manually
|
||||||
ElementMapper elemMapper(simulator_.vanguard().gridView(), Dune::mcmgElementLayout());
|
ElementMapper elemMapper(simulator_.vanguard().gridView(), Dune::mcmgElementLayout());
|
||||||
detail::findOverlapAndInterior(simulator_.vanguard().grid(), elemMapper, overlapRows_, interiorRows_);
|
detail::findOverlapAndInterior(simulator_.vanguard().grid(), elemMapper, overlapRows_, interiorRows_);
|
||||||
|
numJacobiBlocks_ = EWOMS_GET_PARAM(TypeTag, int, NumJacobiBlocks);
|
||||||
|
useWellConn_ = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions);
|
||||||
|
if (numJacobiBlocks_ > 1) {
|
||||||
|
const auto wellsForConn = simulator_.vanguard().schedule().getWellsatEnd();
|
||||||
|
detail::setWellConnections(simulator_.vanguard().grid(), wellsForConn, useWellConn_,
|
||||||
|
wellConnectionsGraph_, numJacobiBlocks_);
|
||||||
|
std::cout << "Create block-Jacobi pattern" << std::endl;
|
||||||
|
blockJacobiAdjacency();
|
||||||
|
}
|
||||||
useWellConn_ = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions);
|
useWellConn_ = EWOMS_GET_PARAM(TypeTag, bool, MatrixAddWellContributions);
|
||||||
#if HAVE_FPGA
|
#if HAVE_FPGA
|
||||||
// check usage of MatrixAddWellContributions: for FPGA they must be included
|
// check usage of MatrixAddWellContributions: for FPGA they must be included
|
||||||
@ -273,8 +281,15 @@ namespace Opm
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (numJacobiBlocks_ > 1) {
|
||||||
|
copyMatToBlockJac(getMatrix(), *blockJacobiForGPUILU0_);
|
||||||
// Const_cast needed since the CUDA stuff overwrites values for better matrix condition..
|
// Const_cast needed since the CUDA stuff overwrites values for better matrix condition..
|
||||||
bdaBridge->solve_system(const_cast<Matrix*>(&getMatrix()), *rhs_, *wellContribs, result);
|
bdaBridge->solve_system(const_cast<Matrix*>(&getMatrix()), &*blockJacobiForGPUILU0_,
|
||||||
|
numJacobiBlocks_, *rhs_, *wellContribs, result);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
bdaBridge->solve_system(const_cast<Matrix*>(&getMatrix()), const_cast<Matrix*>(&getMatrix()),
|
||||||
|
numJacobiBlocks_, *rhs_, *wellContribs, result);
|
||||||
if (result.converged) {
|
if (result.converged) {
|
||||||
// get result vector x from non-Dune backend, iff solve was successful
|
// get result vector x from non-Dune backend, iff solve was successful
|
||||||
bdaBridge->get_result(x);
|
bdaBridge->get_result(x);
|
||||||
@ -497,6 +512,78 @@ namespace Opm
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create sparsity pattern for block-Jacobi matrix based on partitioning of grid.
|
||||||
|
void blockJacobiAdjacency()
|
||||||
|
{
|
||||||
|
const auto& grid = simulator_.vanguard().grid();
|
||||||
|
std::vector<int> cell_part = simulator_.vanguard().cellPartition();
|
||||||
|
|
||||||
|
typedef typename Matrix::size_type size_type;
|
||||||
|
size_type numCells = grid.size( 0 );
|
||||||
|
blockJacobiForGPUILU0_.reset(new Matrix(numCells, numCells, Matrix::random));
|
||||||
|
|
||||||
|
std::vector<std::set<size_type>> pattern;
|
||||||
|
pattern.resize(numCells);
|
||||||
|
|
||||||
|
const auto& lid = grid.localIdSet();
|
||||||
|
const auto& gridView = grid.leafGridView();
|
||||||
|
auto elemIt = gridView.template begin<0>();
|
||||||
|
const auto& elemEndIt = gridView.template end<0>();
|
||||||
|
|
||||||
|
//Loop over cells
|
||||||
|
for (; elemIt != elemEndIt; ++elemIt)
|
||||||
|
{
|
||||||
|
|
||||||
|
const auto& elem = *elemIt;
|
||||||
|
size_type idx = lid.id(elem);
|
||||||
|
pattern[idx].insert(idx);
|
||||||
|
|
||||||
|
// Add well non-zero connections
|
||||||
|
for (auto wc = wellConnectionsGraph_[idx].begin(); wc!=wellConnectionsGraph_[idx].end(); ++wc)
|
||||||
|
pattern[idx].insert(*wc);
|
||||||
|
|
||||||
|
int locPart = cell_part[idx];
|
||||||
|
|
||||||
|
//Add neighbor if it is on the same part
|
||||||
|
auto isend = gridView.iend(elem);
|
||||||
|
for (auto is = gridView.ibegin(elem); is!=isend; ++is)
|
||||||
|
{
|
||||||
|
//check if face has neighbor
|
||||||
|
if (is->neighbor())
|
||||||
|
{
|
||||||
|
size_type nid = lid.id(is->outside());
|
||||||
|
int nabPart = cell_part[nid];
|
||||||
|
if (locPart == nabPart)
|
||||||
|
pattern[idx].insert(nid);
|
||||||
|
}
|
||||||
|
|
||||||
|
blockJacobiForGPUILU0_->setrowsize(idx, pattern[idx].size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
blockJacobiForGPUILU0_->endrowsizes();
|
||||||
|
for (size_type dofId = 0; dofId < numCells; ++dofId)
|
||||||
|
{
|
||||||
|
auto nabIdx = pattern[dofId].begin();
|
||||||
|
auto endNab = pattern[dofId].end();
|
||||||
|
for (; nabIdx != endNab; ++nabIdx)
|
||||||
|
{
|
||||||
|
blockJacobiForGPUILU0_->addindex(dofId, *nabIdx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
blockJacobiForGPUILU0_->endindices();
|
||||||
|
}
|
||||||
|
|
||||||
|
void copyMatToBlockJac(Matrix& mat, Matrix& blockJac)
|
||||||
|
{
|
||||||
|
auto rbegin = blockJac.begin();
|
||||||
|
auto rend = blockJac.end();
|
||||||
|
for (auto row = rbegin; row != rend; ++row) {
|
||||||
|
for (auto col = (*row).begin(); col != (*row).end(); ++col) {
|
||||||
|
blockJac[row.index()][col.index()] = mat[row.index()][col.index()];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Matrix& getMatrix()
|
Matrix& getMatrix()
|
||||||
{
|
{
|
||||||
@ -517,6 +604,8 @@ namespace Opm
|
|||||||
Matrix* matrix_;
|
Matrix* matrix_;
|
||||||
Vector *rhs_;
|
Vector *rhs_;
|
||||||
|
|
||||||
|
std::unique_ptr<Matrix> blockJacobiForGPUILU0_;
|
||||||
|
|
||||||
std::unique_ptr<FlexibleSolverType> flexibleSolver_;
|
std::unique_ptr<FlexibleSolverType> flexibleSolver_;
|
||||||
std::unique_ptr<AbstractOperatorType> linearOperatorForFlexibleSolver_;
|
std::unique_ptr<AbstractOperatorType> linearOperatorForFlexibleSolver_;
|
||||||
std::unique_ptr<WellModelAsLinearOperator<WellModel, Vector, Vector>> wellOperator_;
|
std::unique_ptr<WellModelAsLinearOperator<WellModel, Vector, Vector>> wellOperator_;
|
||||||
@ -530,6 +619,7 @@ namespace Opm
|
|||||||
FlowLinearSolverParameters parameters_;
|
FlowLinearSolverParameters parameters_;
|
||||||
PropertyTree prm_;
|
PropertyTree prm_;
|
||||||
bool scale_variables_;
|
bool scale_variables_;
|
||||||
|
int numJacobiBlocks_;
|
||||||
|
|
||||||
std::shared_ptr< CommunicationType > comm_;
|
std::shared_ptr< CommunicationType > comm_;
|
||||||
}; // end ISTLSolver
|
}; // end ISTLSolver
|
||||||
|
@ -193,6 +193,8 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::copySparsityPatternFromI
|
|||||||
|
|
||||||
template <class BridgeMatrix, class BridgeVector, int block_size>
|
template <class BridgeMatrix, class BridgeVector, int block_size>
|
||||||
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system([[maybe_unused]] BridgeMatrix* mat,
|
void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system([[maybe_unused]] BridgeMatrix* mat,
|
||||||
|
[[maybe_unused]] BridgeMatrix* blockMat,
|
||||||
|
[[maybe_unused]] int numJacobiBlocks,
|
||||||
[[maybe_unused]] BridgeVector& b,
|
[[maybe_unused]] BridgeVector& b,
|
||||||
[[maybe_unused]] WellContributions& wellContribs,
|
[[maybe_unused]] WellContributions& wellContribs,
|
||||||
[[maybe_unused]] InverseOperatorResult& res)
|
[[maybe_unused]] InverseOperatorResult& res)
|
||||||
@ -209,6 +211,11 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system([[maybe_unu
|
|||||||
const int nnzb = (h_rows.empty()) ? mat->nonzeroes() : h_rows.back();
|
const int nnzb = (h_rows.empty()) ? mat->nonzeroes() : h_rows.back();
|
||||||
const int nnz = nnzb * dim * dim;
|
const int nnz = nnzb * dim * dim;
|
||||||
|
|
||||||
|
static std::vector<int> bm_h_rows;
|
||||||
|
static std::vector<int> bm_h_cols;
|
||||||
|
const int bm_nnzb = (bm_h_rows.empty()) ? blockMat->nonzeroes() : bm_h_rows.back();
|
||||||
|
const int bm_nnz = bm_nnzb * dim * dim;
|
||||||
|
|
||||||
if (dim != 3) {
|
if (dim != 3) {
|
||||||
OpmLog::warning("BdaSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program");
|
OpmLog::warning("BdaSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program");
|
||||||
use_gpu = use_fpga = false;
|
use_gpu = use_fpga = false;
|
||||||
@ -221,8 +228,15 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system([[maybe_unu
|
|||||||
copySparsityPatternFromISTL(*mat, h_rows, h_cols);
|
copySparsityPatternFromISTL(*mat, h_rows, h_cols);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (bm_h_rows.capacity() == 0) {
|
||||||
|
bm_h_rows.reserve(Nb+1);
|
||||||
|
bm_h_cols.reserve(bm_nnzb);
|
||||||
|
copySparsityPatternFromISTL(*blockMat, bm_h_rows, bm_h_cols);
|
||||||
|
}
|
||||||
|
|
||||||
Dune::Timer t_zeros;
|
Dune::Timer t_zeros;
|
||||||
int numZeros = checkZeroDiagonal(*mat);
|
int numZeros = checkZeroDiagonal(*mat);
|
||||||
|
int bm_numZeros = checkZeroDiagonal(*blockMat);
|
||||||
if (verbosity >= 2) {
|
if (verbosity >= 2) {
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "Checking zeros took: " << t_zeros.stop() << " s, found " << numZeros << " zeros";
|
out << "Checking zeros took: " << t_zeros.stop() << " s, found " << numZeros << " zeros";
|
||||||
@ -232,9 +246,17 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system([[maybe_unu
|
|||||||
|
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
// actually solve
|
// actually solve
|
||||||
|
SolverStatus status;
|
||||||
|
if (numJacobiBlocks > 1)
|
||||||
|
status = backend->solve_system2(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])),
|
||||||
|
bm_nnz, static_cast<double*>(&(((*blockMat)[0][0][0][0]))), bm_h_rows.data(), bm_h_cols.data(),
|
||||||
|
wellContribs, result);
|
||||||
|
else
|
||||||
|
status = backend->solve_system(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])), wellContribs, result);
|
||||||
|
|
||||||
|
|
||||||
// assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, the chosen BdaSolver is expected to perform undefined behaviour
|
// assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, the chosen BdaSolver is expected to perform undefined behaviour
|
||||||
SolverStatus status = backend->solve_system(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])), wellContribs, result);
|
//SolverStatus status = backend->solve_system(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])), wellContribs, result);
|
||||||
switch(status) {
|
switch(status) {
|
||||||
case SolverStatus::BDA_SOLVER_SUCCESS:
|
case SolverStatus::BDA_SOLVER_SUCCESS:
|
||||||
//OpmLog::info("BdaSolver converged");
|
//OpmLog::info("BdaSolver converged");
|
||||||
|
@ -65,7 +65,7 @@ public:
|
|||||||
/// \param[in] b vector b, should be of type Dune::BlockVector
|
/// \param[in] b vector b, should be of type Dune::BlockVector
|
||||||
/// \param[in] wellContribs contains all WellContributions, to apply them separately, instead of adding them to matrix A
|
/// \param[in] wellContribs contains all WellContributions, to apply them separately, instead of adding them to matrix A
|
||||||
/// \param[inout] result summary of solver result
|
/// \param[inout] result summary of solver result
|
||||||
void solve_system(BridgeMatrix *mat, BridgeVector &b, WellContributions& wellContribs, InverseOperatorResult &result);
|
void solve_system(BridgeMatrix *mat, BridgeMatrix *blockMat, int numJacobiBlocks, BridgeVector &b, WellContributions& wellContribs, InverseOperatorResult &result);
|
||||||
|
|
||||||
/// Get the resulting x vector
|
/// Get the resulting x vector
|
||||||
/// \param[inout] x vector x, should be of type Dune::BlockVector
|
/// \param[inout] x vector x, should be of type Dune::BlockVector
|
||||||
|
@ -89,6 +89,11 @@ namespace Accelerator {
|
|||||||
double *vals, int *rows, int *cols,
|
double *vals, int *rows, int *cols,
|
||||||
double *b, WellContributions& wellContribs, BdaResult &res) = 0;
|
double *b, WellContributions& wellContribs, BdaResult &res) = 0;
|
||||||
|
|
||||||
|
virtual SolverStatus solve_system2(int N_, int nnz_, int dim,
|
||||||
|
double *vals, int *rows, int *cols, double *b,
|
||||||
|
int nnz2, double *vals2, int *rows2, int *cols2,
|
||||||
|
WellContributions& wellContribs, BdaResult &res) = 0;
|
||||||
|
|
||||||
virtual void get_result(double *x) = 0;
|
virtual void get_result(double *x) = 0;
|
||||||
|
|
||||||
}; // end class BdaSolver
|
}; // end class BdaSolver
|
||||||
|
@ -223,7 +223,6 @@ void FpgaSolverBackend<block_size>::get_result(double *x_)
|
|||||||
}
|
}
|
||||||
} // end get_result()
|
} // end get_result()
|
||||||
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
SolverStatus FpgaSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions&, BdaResult &res)
|
SolverStatus FpgaSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions&, BdaResult &res)
|
||||||
{
|
{
|
||||||
@ -250,6 +249,14 @@ SolverStatus FpgaSolverBackend<block_size>::solve_system(int N_, int nnz_, int d
|
|||||||
return SolverStatus::BDA_SOLVER_SUCCESS;
|
return SolverStatus::BDA_SOLVER_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <unsigned int block_size>
|
||||||
|
SolverStatus FpgaSolverBackend<block_size>::solve_system2(int N_, int nnz_, int dim,
|
||||||
|
double *vals, int *rows, int *cols, double *b,
|
||||||
|
int nnz2, double *vals2, int *rows2, int *cols2,
|
||||||
|
WellContributions& wellContribs, BdaResult &res)
|
||||||
|
{
|
||||||
|
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void FpgaSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, double *vals, int *rows, int *cols)
|
void FpgaSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, double *vals, int *rows, int *cols)
|
||||||
|
@ -255,6 +255,11 @@ public:
|
|||||||
/// \return status code
|
/// \return status code
|
||||||
SolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
SolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
||||||
|
|
||||||
|
SolverStatus solve_system2(int N_, int nnz_, int dim,
|
||||||
|
double *vals, int *rows, int *cols, double *b,
|
||||||
|
int nnz2, double *vals2, int *rows2, int *cols2,
|
||||||
|
WellContributions& wellContribs, BdaResult &res) override;
|
||||||
|
|
||||||
/// Get result after linear solve, and peform postprocessing if necessary
|
/// Get result after linear solve, and peform postprocessing if necessary
|
||||||
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
||||||
void get_result(double *x) override;
|
void get_result(double *x) override;
|
||||||
|
@ -359,6 +359,15 @@ void amgclSolverBackend<block_size>::get_result(double *x_) {
|
|||||||
} // end get_result()
|
} // end get_result()
|
||||||
|
|
||||||
|
|
||||||
|
template <unsigned int block_size>
|
||||||
|
SolverStatus amgclSolverBackend<block_size>::solve_system2(int N_, int nnz_, int dim,
|
||||||
|
double *vals, int *rows, int *cols, double *b,
|
||||||
|
int nnz2, double *vals2, int *rows2, int *cols2,
|
||||||
|
WellContributions& wellContribs, BdaResult &res)
|
||||||
|
{
|
||||||
|
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
SolverStatus amgclSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions&, BdaResult &res) {
|
SolverStatus amgclSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions&, BdaResult &res) {
|
||||||
if (initialized == false) {
|
if (initialized == false) {
|
||||||
@ -371,6 +380,7 @@ SolverStatus amgclSolverBackend<block_size>::solve_system(int N_, int nnz_, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
template amgclSolverBackend<n>::amgclSolverBackend(int, int, double, unsigned int, unsigned int); \
|
template amgclSolverBackend<n>::amgclSolverBackend(int, int, double, unsigned int, unsigned int); \
|
||||||
|
|
||||||
|
@ -145,6 +145,11 @@ public:
|
|||||||
/// \return status code
|
/// \return status code
|
||||||
SolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
SolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
||||||
|
|
||||||
|
SolverStatus solve_system2(int N_, int nnz_, int dim,
|
||||||
|
double *vals, int *rows, int *cols, double *b,
|
||||||
|
int nnz2, double *vals2, int *rows2, int *cols2,
|
||||||
|
WellContributions& wellContribs, BdaResult &res) override;
|
||||||
|
|
||||||
/// Get result after linear solve, and peform postprocessing if necessary
|
/// Get result after linear solve, and peform postprocessing if necessary
|
||||||
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
||||||
void get_result(double *x) override;
|
void get_result(double *x) override;
|
||||||
|
@ -500,7 +500,14 @@ SolverStatus cusparseSolverBackend<block_size>::solve_system(int N, int nnz, int
|
|||||||
}
|
}
|
||||||
return SolverStatus::BDA_SOLVER_SUCCESS;
|
return SolverStatus::BDA_SOLVER_SUCCESS;
|
||||||
}
|
}
|
||||||
|
template <unsigned int block_size>
|
||||||
|
SolverStatus cusparseSolverBackend<block_size>::solve_system2(int N_, int nnz_, int dim,
|
||||||
|
double *vals, int *rows, int *cols, double *b,
|
||||||
|
int nnz2, double *vals2, int *rows2, int *cols2,
|
||||||
|
WellContributions& wellContribs, BdaResult &res)
|
||||||
|
{
|
||||||
|
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
template cusparseSolverBackend<n>::cusparseSolverBackend(int, int, double, unsigned int); \
|
template cusparseSolverBackend<n>::cusparseSolverBackend(int, int, double, unsigned int); \
|
||||||
|
@ -138,6 +138,11 @@ public:
|
|||||||
/// \return status code
|
/// \return status code
|
||||||
SolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
SolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
||||||
|
|
||||||
|
SolverStatus solve_system2(int N_, int nnz_, int dim,
|
||||||
|
double *vals, int *rows, int *cols, double *b,
|
||||||
|
int nnz2, double *vals2, int *rows2, int *cols2,
|
||||||
|
WellContributions& wellContribs, BdaResult &res) override;
|
||||||
|
|
||||||
/// Get resulting vector x after linear solve, also includes post processing if necessary
|
/// Get resulting vector x after linear solve, also includes post processing if necessary
|
||||||
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
||||||
void get_result(double *x) override;
|
void get_result(double *x) override;
|
||||||
|
@ -149,7 +149,127 @@ bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
|
|||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
} // end init()
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <unsigned int block_size>
|
||||||
|
bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat)
|
||||||
|
{
|
||||||
|
const unsigned int bs = block_size;
|
||||||
|
|
||||||
|
this->N = mat->Nb * block_size;
|
||||||
|
this->Nb = mat->Nb;
|
||||||
|
this->nnz = mat->nnzbs * block_size * block_size;
|
||||||
|
this->nnzb = mat->nnzbs;
|
||||||
|
|
||||||
|
this->nnz_jm = jacMat->nnzbs * block_size * block_size;
|
||||||
|
this->nnzbs_jm = jacMat->nnzbs;
|
||||||
|
|
||||||
|
int *CSCRowIndices = nullptr;
|
||||||
|
int *CSCColPointers = nullptr;
|
||||||
|
|
||||||
|
if (opencl_ilu_reorder == ILUReorder::NONE) {
|
||||||
|
LUmat = std::make_unique<BlockedMatrix>(*mat);
|
||||||
|
} else {
|
||||||
|
toOrder.resize(Nb);
|
||||||
|
fromOrder.resize(Nb);
|
||||||
|
CSCRowIndices = new int[nnzbs_jm];
|
||||||
|
CSCColPointers = new int[Nb + 1];
|
||||||
|
rmat = std::make_shared<BlockedMatrix>(mat->Nb, mat->nnzbs, block_size);
|
||||||
|
rJacMat = std::make_shared<BlockedMatrix>(jacMat->Nb, jacMat->nnzbs, block_size);
|
||||||
|
LUmat = std::make_unique<BlockedMatrix>(*rJacMat);
|
||||||
|
|
||||||
|
Timer t_convert;
|
||||||
|
csrPatternToCsc(jacMat->colIndices, jacMat->rowPointers, CSCRowIndices, CSCColPointers, jacMat->Nb);
|
||||||
|
if(verbosity >= 3){
|
||||||
|
std::ostringstream out;
|
||||||
|
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s";
|
||||||
|
OpmLog::info(out.str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Timer t_analysis;
|
||||||
|
std::ostringstream out;
|
||||||
|
if (opencl_ilu_reorder == ILUReorder::LEVEL_SCHEDULING) {
|
||||||
|
out << "BILU0 reordering strategy: " << "level_scheduling\n";
|
||||||
|
findLevelScheduling(jacMat->colIndices, jacMat->rowPointers, CSCRowIndices, CSCColPointers, jacMat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
|
||||||
|
for (int iii = 0; iii < numColors; ++iii) { out << "rpc: "<< rowsPerColor[iii] << "\n";}
|
||||||
|
out << "numColors: "<< numColors << "\n";
|
||||||
|
} else if (opencl_ilu_reorder == ILUReorder::GRAPH_COLORING) {
|
||||||
|
out << "BILU0 reordering strategy: " << "graph_coloring\n";
|
||||||
|
findGraphColoring<block_size>(jacMat->colIndices, jacMat->rowPointers, CSCRowIndices, CSCColPointers, jacMat->Nb, jacMat->Nb, jacMat->Nb, &numColors, toOrder.data(), fromOrder.data(), rowsPerColor);
|
||||||
|
for (int iii = 0; iii < numColors; ++iii) { out << "rpc: "<< rowsPerColor[iii] << "\n";}
|
||||||
|
out << "numColors: "<< numColors << "\n";
|
||||||
|
} else if (opencl_ilu_reorder == ILUReorder::NONE) {
|
||||||
|
out << "BILU0 reordering strategy: none\n";
|
||||||
|
// numColors = 1;
|
||||||
|
// rowsPerColor.emplace_back(Nb);
|
||||||
|
numColors = Nb;
|
||||||
|
for(int i = 0; i < Nb; ++i){
|
||||||
|
rowsPerColor.emplace_back(1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
OPM_THROW(std::logic_error, "Error ilu reordering strategy not set correctly\n");
|
||||||
|
}
|
||||||
|
if(verbosity >= 1){
|
||||||
|
out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors\n";
|
||||||
|
}
|
||||||
|
#if CHOW_PATEL
|
||||||
|
out << "BILU0 CHOW_PATEL: " << CHOW_PATEL << ", CHOW_PATEL_GPU: " << CHOW_PATEL_GPU;
|
||||||
|
#endif
|
||||||
|
OpmLog::info(out.str());
|
||||||
|
|
||||||
|
|
||||||
|
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||||
|
delete[] CSCRowIndices;
|
||||||
|
delete[] CSCColPointers;
|
||||||
|
}
|
||||||
|
|
||||||
|
diagIndex.resize(mat->Nb);
|
||||||
|
invDiagVals.resize(mat->Nb * bs * bs);
|
||||||
|
|
||||||
|
#if CHOW_PATEL
|
||||||
|
Lmat = std::make_unique<BlockedMatrix<block_size> >(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||||
|
Umat = std::make_unique<BlockedMatrix<block_size> >(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
LUmat->nnzValues = new double[jacMat->nnzbs * bs * bs];
|
||||||
|
|
||||||
|
s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * mat->Nb);
|
||||||
|
s.rowsPerColor = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (numColors + 1));
|
||||||
|
s.diagIndex = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LUmat->Nb);
|
||||||
|
#if CHOW_PATEL
|
||||||
|
s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Lmat->nnzbs);
|
||||||
|
s.Lcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Lmat->nnzbs);
|
||||||
|
s.Lrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Lmat->Nb + 1));
|
||||||
|
s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Lmat->nnzbs);
|
||||||
|
s.Ucols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Lmat->nnzbs);
|
||||||
|
s.Urows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Lmat->Nb + 1));
|
||||||
|
#else
|
||||||
|
s.LUvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * LUmat->nnzbs);
|
||||||
|
s.LUcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LUmat->nnzbs);
|
||||||
|
s.LUrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (LUmat->Nb + 1));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
events.resize(2);
|
||||||
|
err = queue->enqueueWriteBuffer(s.invDiagVals, CL_FALSE, 0, mat->Nb * sizeof(double) * bs * bs, invDiagVals.data(), nullptr, &events[0]);
|
||||||
|
|
||||||
|
rowsPerColorPrefix.resize(numColors + 1); // resize initializes value 0.0
|
||||||
|
for (int i = 0; i < numColors; ++i) {
|
||||||
|
rowsPerColorPrefix[i+1] = rowsPerColorPrefix[i] + rowsPerColor[i];
|
||||||
|
}
|
||||||
|
err |= queue->enqueueWriteBuffer(s.rowsPerColor, CL_FALSE, 0, (numColors + 1) * sizeof(int), rowsPerColorPrefix.data(), nullptr, &events[1]);
|
||||||
|
|
||||||
|
cl::WaitForEvents(events);
|
||||||
|
events.clear();
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
// enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL
|
||||||
|
OPM_THROW(std::logic_error, "BILU0 OpenCL enqueueWriteBuffer error");
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
@ -245,6 +365,104 @@ bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
|
|||||||
return true;
|
return true;
|
||||||
} // end create_preconditioner()
|
} // end create_preconditioner()
|
||||||
|
|
||||||
|
|
||||||
|
template <unsigned int block_size>
|
||||||
|
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat)
|
||||||
|
{
|
||||||
|
const unsigned int bs = block_size;
|
||||||
|
auto *m = mat;
|
||||||
|
auto *jm = jacMat;
|
||||||
|
|
||||||
|
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||||
|
m = rmat.get();
|
||||||
|
jm = rJacMat.get();
|
||||||
|
Timer t_reorder;
|
||||||
|
reorderBlockedMatrixByPattern(mat, toOrder.data(), fromOrder.data(), rmat.get());
|
||||||
|
reorderBlockedMatrixByPattern(jacMat, toOrder.data(), fromOrder.data(), rJacMat.get());
|
||||||
|
|
||||||
|
if (verbosity >= 3){
|
||||||
|
std::ostringstream out;
|
||||||
|
out << "BILU0 reorder matrix: " << t_reorder.stop() << " s";
|
||||||
|
OpmLog::info(out.str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
|
||||||
|
// this copy can have mat or rmat ->nnzValues as origin, depending on the reorder strategy
|
||||||
|
Timer t_copy;
|
||||||
|
memcpy(LUmat->nnzValues, jm->nnzValues, sizeof(double) * bs * bs * jm->nnzbs);
|
||||||
|
|
||||||
|
if (verbosity >= 3){
|
||||||
|
std::ostringstream out;
|
||||||
|
out << "BILU0 memcpy: " << t_copy.stop() << " s";
|
||||||
|
OpmLog::info(out.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
#if CHOW_PATEL
|
||||||
|
chowPatelIlu.decomposition(queue, context,
|
||||||
|
LUmat.get(), Lmat.get(), Umat.get(),
|
||||||
|
invDiagVals, diagIndex,
|
||||||
|
s.diagIndex, s.invDiagVals,
|
||||||
|
s.Lvals, s.Lcols, s.Lrows,
|
||||||
|
s.Uvals, s.Ucols, s.Urows);
|
||||||
|
#else
|
||||||
|
Timer t_copyToGpu;
|
||||||
|
|
||||||
|
events.resize(1);
|
||||||
|
queue->enqueueWriteBuffer(s.LUvals, CL_FALSE, 0, LUmat->nnzbs * bs * bs * sizeof(double), LUmat->nnzValues, nullptr, &events[0]);
|
||||||
|
|
||||||
|
std::call_once(pattern_uploaded, [&](){
|
||||||
|
// find the positions of each diagonal block
|
||||||
|
// must be done after reordering
|
||||||
|
for (int row = 0; row < Nb; ++row) {
|
||||||
|
int rowStart = LUmat->rowPointers[row];
|
||||||
|
int rowEnd = LUmat->rowPointers[row+1];
|
||||||
|
|
||||||
|
auto candidate = std::find(LUmat->colIndices + rowStart, LUmat->colIndices + rowEnd, row);
|
||||||
|
assert(candidate != LUmat->colIndices + rowEnd);
|
||||||
|
diagIndex[row] = candidate - LUmat->colIndices;
|
||||||
|
}
|
||||||
|
events.resize(4);
|
||||||
|
queue->enqueueWriteBuffer(s.diagIndex, CL_FALSE, 0, Nb * sizeof(int), diagIndex.data(), nullptr, &events[1]);
|
||||||
|
queue->enqueueWriteBuffer(s.LUcols, CL_FALSE, 0, LUmat->nnzbs * sizeof(int), LUmat->colIndices, nullptr, &events[2]);
|
||||||
|
queue->enqueueWriteBuffer(s.LUrows, CL_FALSE, 0, (LUmat->Nb + 1) * sizeof(int), LUmat->rowPointers, nullptr, &events[3]);
|
||||||
|
});
|
||||||
|
|
||||||
|
cl::WaitForEvents(events);
|
||||||
|
events.clear();
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
// enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL
|
||||||
|
OPM_THROW(std::logic_error, "BILU0 OpenCL enqueueWriteBuffer error");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbosity >= 3) {
|
||||||
|
std::ostringstream out;
|
||||||
|
out << "BILU0 copy to GPU: " << t_copyToGpu.stop() << " s";
|
||||||
|
OpmLog::info(out.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
Timer t_decomposition;
|
||||||
|
std::ostringstream out;
|
||||||
|
cl::Event event;
|
||||||
|
for (int color = 0; color < numColors; ++color) {
|
||||||
|
const unsigned int firstRow = rowsPerColorPrefix[color];
|
||||||
|
const unsigned int lastRow = rowsPerColorPrefix[color+1];
|
||||||
|
if (verbosity >= 4) {
|
||||||
|
out << "color " << color << ": " << firstRow << " - " << lastRow << " = " << lastRow - firstRow << "\n";
|
||||||
|
}
|
||||||
|
OpenclKernels::ILU_decomp(firstRow, lastRow, s.LUvals, s.LUcols, s.LUrows, s.diagIndex, s.invDiagVals, Nb, block_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbosity >= 3) {
|
||||||
|
out << "BILU0 decomposition: " << t_decomposition.stop() << " s";
|
||||||
|
OpmLog::info(out.str());
|
||||||
|
}
|
||||||
|
#endif // CHOW_PATEL
|
||||||
|
|
||||||
|
return true;
|
||||||
|
} // end create_preconditioner()
|
||||||
|
|
||||||
|
|
||||||
// kernels are blocking on an NVIDIA GPU, so waiting for events is not needed
|
// kernels are blocking on an NVIDIA GPU, so waiting for events is not needed
|
||||||
// however, if individual kernel calls are timed, waiting for events is needed
|
// however, if individual kernel calls are timed, waiting for events is needed
|
||||||
// behavior on other GPUs is untested
|
// behavior on other GPUs is untested
|
||||||
|
@ -53,8 +53,11 @@ class BILU0 : public Preconditioner<block_size>
|
|||||||
using Base::err;
|
using Base::err;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
int nnz_jm; // number of nonzeroes of the matrix (scalar)
|
||||||
|
int nnzbs_jm; // number of blocks of the matrix
|
||||||
std::unique_ptr<BlockedMatrix> LUmat = nullptr;
|
std::unique_ptr<BlockedMatrix> LUmat = nullptr;
|
||||||
std::shared_ptr<BlockedMatrix> rmat = nullptr; // only used with PAR_SIM
|
std::shared_ptr<BlockedMatrix> rmat = nullptr; // only used with PAR_SIM
|
||||||
|
std::shared_ptr<BlockedMatrix> rJacMat = nullptr;
|
||||||
#if CHOW_PATEL
|
#if CHOW_PATEL
|
||||||
std::unique_ptr<BlockedMatrix> Lmat = nullptr, Umat = nullptr;
|
std::unique_ptr<BlockedMatrix> Lmat = nullptr, Umat = nullptr;
|
||||||
#endif
|
#endif
|
||||||
@ -92,9 +95,11 @@ public:
|
|||||||
|
|
||||||
// analysis, find reordering if specified
|
// analysis, find reordering if specified
|
||||||
bool analyze_matrix(BlockedMatrix *mat) override;
|
bool analyze_matrix(BlockedMatrix *mat) override;
|
||||||
|
bool analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat);
|
||||||
|
|
||||||
// ilu_decomposition
|
// ilu_decomposition
|
||||||
bool create_preconditioner(BlockedMatrix *mat) override;
|
bool create_preconditioner(BlockedMatrix *mat) override;
|
||||||
|
bool create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat);
|
||||||
|
|
||||||
// apply preconditioner, x = prec(y)
|
// apply preconditioner, x = prec(y)
|
||||||
void apply(const cl::Buffer& y, cl::Buffer& x) override;
|
void apply(const cl::Buffer& y, cl::Buffer& x) override;
|
||||||
|
@ -52,10 +52,21 @@ std::unique_ptr<Preconditioner<block_size> > Preconditioner<block_size>::create(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <unsigned int block_size>
|
||||||
|
bool Preconditioner<block_size>::analyze_matrix(BlockedMatrix *mat, [[maybe_unused]] BlockedMatrix *jacMat) {
|
||||||
|
return analyze_matrix(mat);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <unsigned int block_size>
|
||||||
|
bool Preconditioner<block_size>::create_preconditioner(BlockedMatrix *mat, [[maybe_unused]] BlockedMatrix *jacMat) {
|
||||||
|
return create_preconditioner(mat);
|
||||||
|
}
|
||||||
|
|
||||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
template std::unique_ptr<Preconditioner<n> > Preconditioner<n>::create(PreconditionerType, int, ILUReorder); \
|
template std::unique_ptr<Preconditioner<n> > Preconditioner<n>::create(PreconditionerType, int, ILUReorder); \
|
||||||
template void Preconditioner<n>::setOpencl(std::shared_ptr<cl::Context>&, std::shared_ptr<cl::CommandQueue>&);
|
template void Preconditioner<n>::setOpencl(std::shared_ptr<cl::Context>&, std::shared_ptr<cl::CommandQueue>&); \
|
||||||
|
template bool Preconditioner<n>::analyze_matrix(BlockedMatrix *, BlockedMatrix *); \
|
||||||
|
template bool Preconditioner<n>::create_preconditioner(BlockedMatrix *, BlockedMatrix *);
|
||||||
|
|
||||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||||
|
@ -68,10 +68,14 @@ public:
|
|||||||
|
|
||||||
// analyze matrix, e.g. the sparsity pattern
|
// analyze matrix, e.g. the sparsity pattern
|
||||||
// probably only called once
|
// probably only called once
|
||||||
|
// the version with two params can be overloaded, if not, it will default to using the one param version
|
||||||
virtual bool analyze_matrix(BlockedMatrix *mat) = 0;
|
virtual bool analyze_matrix(BlockedMatrix *mat) = 0;
|
||||||
|
virtual bool analyze_matrix(BlockedMatrix *mat, BlockedMatrix *jacMat);
|
||||||
|
|
||||||
// create/update preconditioner, probably used every linear solve
|
// create/update preconditioner, probably used every linear solve
|
||||||
|
// the version with two params can be overloaded, if not, it will default to using the one param version
|
||||||
virtual bool create_preconditioner(BlockedMatrix *mat) = 0;
|
virtual bool create_preconditioner(BlockedMatrix *mat) = 0;
|
||||||
|
virtual bool create_preconditioner(BlockedMatrix *mat, BlockedMatrix *jacMat);
|
||||||
|
|
||||||
// get reordering mappings
|
// get reordering mappings
|
||||||
virtual int* getToOrder() = 0;
|
virtual int* getToOrder() = 0;
|
||||||
|
@ -441,6 +441,69 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
|||||||
initialized = true;
|
initialized = true;
|
||||||
} // end initialize()
|
} // end initialize()
|
||||||
|
|
||||||
|
template <unsigned int block_size>
|
||||||
|
void openclSolverBackend<block_size>::initialize2(int N_, int nnz_, int dim, double *vals, int *rows, int *cols,
|
||||||
|
int nnz2, double *vals2, int *rows2, int *cols2) {
|
||||||
|
this->N = N_;
|
||||||
|
this->nnz = nnz_;
|
||||||
|
this->nnzb = nnz_ / block_size / block_size;
|
||||||
|
|
||||||
|
this->jac_nnz = nnz2;
|
||||||
|
this->jac_nnzb = nnz2 / block_size / block_size;
|
||||||
|
|
||||||
|
Nb = (N + dim - 1) / dim;
|
||||||
|
std::ostringstream out;
|
||||||
|
out << "Initializing GPU, matrix size: " << N << " blocks, nnzb: " << nnzb << "\n";
|
||||||
|
out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance << "\n";
|
||||||
|
out << "PlatformID: " << platformID << ", deviceID: " << deviceID << "\n";
|
||||||
|
OpmLog::info(out.str());
|
||||||
|
out.str("");
|
||||||
|
out.clear();
|
||||||
|
|
||||||
|
try {
|
||||||
|
prec->setOpencl(context, queue);
|
||||||
|
|
||||||
|
#if COPY_ROW_BY_ROW
|
||||||
|
vals_contiguous = new double[N];
|
||||||
|
#endif
|
||||||
|
mat.reset(new BlockedMatrix(Nb, nnzb, block_size, vals, cols, rows));
|
||||||
|
jacMat.reset(new BlockedMatrix(Nb, jac_nnzb, block_size, vals2, cols2, rows2));
|
||||||
|
|
||||||
|
d_x = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||||
|
d_b = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||||
|
d_rb = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||||
|
d_r = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||||
|
d_rw = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||||
|
d_p = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||||
|
d_pw = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||||
|
d_s = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||||
|
d_t = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||||
|
d_v = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||||
|
d_tmp = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||||
|
|
||||||
|
d_Avals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * nnz);
|
||||||
|
d_Acols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzb);
|
||||||
|
d_Arows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1));
|
||||||
|
|
||||||
|
bool reorder = (opencl_ilu_reorder != ILUReorder::NONE);
|
||||||
|
if (reorder) {
|
||||||
|
rb = new double[N];
|
||||||
|
d_toOrder = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Nb);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (const cl::Error& error) {
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << "OpenCL Error: " << error.what() << "(" << error.err() << ")\n";
|
||||||
|
oss << getErrorString(error.err());
|
||||||
|
// rethrow exception
|
||||||
|
OPM_THROW(std::logic_error, oss.str());
|
||||||
|
} catch (const std::logic_error& error) {
|
||||||
|
// rethrow exception by OPM_THROW in the try{}, without this, a segfault occurs
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
initialized = true;
|
||||||
|
} // end initialize()
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::finalize() {
|
void openclSolverBackend<block_size>::finalize() {
|
||||||
@ -527,8 +590,7 @@ template <unsigned int block_size>
|
|||||||
bool openclSolverBackend<block_size>::analyze_matrix() {
|
bool openclSolverBackend<block_size>::analyze_matrix() {
|
||||||
Timer t;
|
Timer t;
|
||||||
|
|
||||||
// bool success = bilu0->init(mat.get());
|
bool success = prec->analyze_matrix(mat.get(), jacMat.get());
|
||||||
bool success = prec->analyze_matrix(mat.get());
|
|
||||||
|
|
||||||
if (opencl_ilu_reorder == ILUReorder::NONE) {
|
if (opencl_ilu_reorder == ILUReorder::NONE) {
|
||||||
rmat = mat.get();
|
rmat = mat.get();
|
||||||
@ -579,7 +641,7 @@ template <unsigned int block_size>
|
|||||||
bool openclSolverBackend<block_size>::create_preconditioner() {
|
bool openclSolverBackend<block_size>::create_preconditioner() {
|
||||||
Timer t;
|
Timer t;
|
||||||
|
|
||||||
bool result = prec->create_preconditioner(mat.get());
|
bool result = prec->create_preconditioner(mat.get(), jacMat.get());
|
||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
@ -662,7 +724,33 @@ SolverStatus openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int
|
|||||||
solve_system(wellContribs, res);
|
solve_system(wellContribs, res);
|
||||||
return SolverStatus::BDA_SOLVER_SUCCESS;
|
return SolverStatus::BDA_SOLVER_SUCCESS;
|
||||||
}
|
}
|
||||||
|
template <unsigned int block_size>
|
||||||
|
SolverStatus openclSolverBackend<block_size>::solve_system2(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b,
|
||||||
|
int nnz2, double *vals2, int *rows2, int *cols2,
|
||||||
|
WellContributions& wellContribs, BdaResult &res)
|
||||||
|
{
|
||||||
|
if (initialized == false) {
|
||||||
|
initialize2(N_, nnz_, dim, vals, rows, cols, nnz2, vals2, rows2, cols2);
|
||||||
|
if (analysis_done == false) {
|
||||||
|
if (!analyze_matrix()) {
|
||||||
|
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
update_system(vals, b, wellContribs);
|
||||||
|
if (!create_preconditioner()) {
|
||||||
|
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||||
|
}
|
||||||
|
copy_system_to_gpu();
|
||||||
|
} else {
|
||||||
|
update_system(vals, b, wellContribs);
|
||||||
|
if (!create_preconditioner()) {
|
||||||
|
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||||
|
}
|
||||||
|
update_system_on_gpu();
|
||||||
|
}
|
||||||
|
solve_system(wellContribs, res);
|
||||||
|
return SolverStatus::BDA_SOLVER_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
template openclSolverBackend<n>::openclSolverBackend( \
|
template openclSolverBackend<n>::openclSolverBackend( \
|
||||||
|
@ -63,12 +63,15 @@ private:
|
|||||||
|
|
||||||
std::vector<cl::Device> devices;
|
std::vector<cl::Device> devices;
|
||||||
|
|
||||||
|
int jac_nnz;
|
||||||
|
int jac_nnzb;
|
||||||
std::unique_ptr<Preconditioner<block_size> > prec;
|
std::unique_ptr<Preconditioner<block_size> > prec;
|
||||||
// can perform blocked ILU0 and AMG on pressure component
|
// can perform blocked ILU0 and AMG on pressure component
|
||||||
bool is_root; // allow for nested solvers, the root solver is called by BdaBridge
|
bool is_root; // allow for nested solvers, the root solver is called by BdaBridge
|
||||||
int *toOrder = nullptr, *fromOrder = nullptr; // BILU0 reorders rows of the matrix via these mappings
|
int *toOrder = nullptr, *fromOrder = nullptr; // BILU0 reorders rows of the matrix via these mappings
|
||||||
bool analysis_done = false;
|
bool analysis_done = false;
|
||||||
std::unique_ptr<BlockedMatrix> mat = nullptr; // original matrix
|
std::unique_ptr<BlockedMatrix> mat = nullptr; // original matrix
|
||||||
|
std::unique_ptr<BlockedMatrix> jacMat = nullptr; // matrix for preconditioner
|
||||||
BlockedMatrix *rmat = nullptr; // reordered matrix (or original if no reordering), used for spmv
|
BlockedMatrix *rmat = nullptr; // reordered matrix (or original if no reordering), used for spmv
|
||||||
ILUReorder opencl_ilu_reorder; // reordering strategy
|
ILUReorder opencl_ilu_reorder; // reordering strategy
|
||||||
std::vector<cl::Event> events;
|
std::vector<cl::Event> events;
|
||||||
@ -138,6 +141,9 @@ private:
|
|||||||
/// \param[in] cols array of columnIndices, contains nnz values
|
/// \param[in] cols array of columnIndices, contains nnz values
|
||||||
void initialize(int N, int nnz, int dim, double *vals, int *rows, int *cols);
|
void initialize(int N, int nnz, int dim, double *vals, int *rows, int *cols);
|
||||||
|
|
||||||
|
void initialize2(int N, int nnz, int dim, double *vals, int *rows, int *cols,
|
||||||
|
int nnz2, double *vals2, int *rows2, int *cols2);
|
||||||
|
|
||||||
/// Clean memory
|
/// Clean memory
|
||||||
void finalize();
|
void finalize();
|
||||||
|
|
||||||
@ -205,6 +211,10 @@ public:
|
|||||||
/// \return status code
|
/// \return status code
|
||||||
SolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
SolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
||||||
|
|
||||||
|
SolverStatus solve_system2(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b,
|
||||||
|
int nnz2, double *vals2, int *rows2, int *cols2,
|
||||||
|
WellContributions& wellContribs, BdaResult &res) override;
|
||||||
|
|
||||||
/// Solve scalar linear system, for example a coarse system of an AMG preconditioner
|
/// Solve scalar linear system, for example a coarse system of an AMG preconditioner
|
||||||
/// Data is already on the GPU
|
/// Data is already on the GPU
|
||||||
// SolverStatus solve_system(BdaResult &res);
|
// SolverStatus solve_system(BdaResult &res);
|
||||||
@ -218,7 +228,7 @@ public:
|
|||||||
/// \param[in] context the opencl context to be used
|
/// \param[in] context the opencl context to be used
|
||||||
/// \param[in] queue the opencl queue to be used
|
/// \param[in] queue the opencl queue to be used
|
||||||
void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue);
|
void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue);
|
||||||
|
|
||||||
}; // end class openclSolverBackend
|
}; // end class openclSolverBackend
|
||||||
|
|
||||||
} // namespace Accelerator
|
} // namespace Accelerator
|
||||||
|
@ -39,9 +39,9 @@ namespace detail
|
|||||||
/// \param useWellConn Boolean that is true when UseWellContribusion is true
|
/// \param useWellConn Boolean that is true when UseWellContribusion is true
|
||||||
/// \param wellGraph Cell IDs of well cells stored in a graph.
|
/// \param wellGraph Cell IDs of well cells stored in a graph.
|
||||||
template<class Grid, class W>
|
template<class Grid, class W>
|
||||||
void setWellConnections(const Grid& grid, const W& wells, bool useWellConn, std::vector<std::set<int>>& wellGraph)
|
void setWellConnections(const Grid& grid, const W& wells, bool useWellConn, std::vector<std::set<int>>& wellGraph, int numJacobiBlocks)
|
||||||
{
|
{
|
||||||
if ( grid.comm().size() > 1)
|
if ( grid.comm().size() > 1 || numJacobiBlocks > 1)
|
||||||
{
|
{
|
||||||
Dune::CartesianIndexMapper< Grid > cartMapper( grid );
|
Dune::CartesianIndexMapper< Grid > cartMapper( grid );
|
||||||
const int numCells = cartMapper.compressedSize(); // grid.numCells()
|
const int numCells = cartMapper.compressedSize(); // grid.numCells()
|
||||||
|
@ -118,7 +118,7 @@ testOpenclSolver(const boost::property_tree::ptree& prm, Matrix<bz>& matrix, Vec
|
|||||||
}
|
}
|
||||||
auto mat2 = matrix; // deep copy to make sure nnz values are in contiguous memory
|
auto mat2 = matrix; // deep copy to make sure nnz values are in contiguous memory
|
||||||
// matrix created by readMatrixMarket() did not have contiguous memory
|
// matrix created by readMatrixMarket() did not have contiguous memory
|
||||||
bridge->solve_system(&mat2, rhs, *wellContribs, result);
|
bridge->solve_system(&mat2, &mat2, 0, rhs, *wellContribs, result);
|
||||||
bridge->get_result(x);
|
bridge->get_result(x);
|
||||||
|
|
||||||
return x;
|
return x;
|
||||||
|
Loading…
Reference in New Issue
Block a user