mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-01-28 17:04:23 -06:00
Merge pull request #250 from dr-robertk/PR/performance-avoid-zero-matrix-entries
Further performance improvement by avoiding zeros in matrix-matrix product.
This commit is contained in:
commit
28af900f0b
@ -98,6 +98,7 @@ list (APPEND PUBLIC_HEADER_FILES
|
||||
opm/autodiff/BlackoilPropsAdFromDeck.hpp
|
||||
opm/autodiff/BlackoilPropsAdInterface.hpp
|
||||
opm/autodiff/CPRPreconditioner.hpp
|
||||
opm/autodiff/ConservativeSparseSparseProduct.h
|
||||
opm/autodiff/DuneMatrix.hpp
|
||||
opm/autodiff/GeoProps.hpp
|
||||
opm/autodiff/GridHelpers.hpp
|
||||
|
@ -24,11 +24,13 @@
|
||||
|
||||
#include <Eigen/Eigen>
|
||||
#include <Eigen/Sparse>
|
||||
#include <opm/autodiff/fastSparseProduct.hpp>
|
||||
|
||||
#include <opm/core/utility/platform_dependent/reenable_warnings.h>
|
||||
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
@ -102,7 +104,7 @@ namespace Opm
|
||||
}
|
||||
|
||||
/// Create an AutoDiffBlock representing a constant.
|
||||
/// \param[in] val values
|
||||
/// \param[in] val values
|
||||
static AutoDiffBlock constant(const V& val)
|
||||
{
|
||||
return AutoDiffBlock(val);
|
||||
@ -112,7 +114,7 @@ namespace Opm
|
||||
/// This variant requires specifying the block sizes used
|
||||
/// for the Jacobians even though the Jacobian matrices
|
||||
/// themselves will be zero.
|
||||
/// \param[in] val values
|
||||
/// \param[in] val values
|
||||
/// \param[in] blocksizes block pattern
|
||||
static AutoDiffBlock constant(const V& val, const std::vector<int>& blocksizes)
|
||||
{
|
||||
@ -129,7 +131,7 @@ namespace Opm
|
||||
|
||||
/// Create an AutoDiffBlock representing a single variable block.
|
||||
/// \param[in] index index of the variable you are constructing
|
||||
/// \param[in] val values
|
||||
/// \param[in] val values
|
||||
/// \param[in] blocksizes block pattern
|
||||
/// The resulting object will have size() equal to block_pattern[index].
|
||||
/// Its jacobians will all be zero, except for derivative()[index], which
|
||||
@ -154,7 +156,7 @@ namespace Opm
|
||||
}
|
||||
|
||||
/// Create an AutoDiffBlock by directly specifying values and jacobians.
|
||||
/// \param[in] val values
|
||||
/// \param[in] val values
|
||||
/// \param[in] jac vector of jacobians
|
||||
static AutoDiffBlock function(const V& val, const std::vector<M>& jac)
|
||||
{
|
||||
@ -292,7 +294,17 @@ namespace Opm
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
assert(jac_[block].rows() == rhs.jac_[block].rows());
|
||||
assert(jac_[block].cols() == rhs.jac_[block].cols());
|
||||
jac[block] = D2*jac_[block] + D1*rhs.jac_[block];
|
||||
if( jac_[block].nonZeros() == 0 && rhs.jac_[block].nonZeros() == 0 ) {
|
||||
jac[block] = M( D2.rows(), jac_[block].cols() );
|
||||
}
|
||||
else if( jac_[block].nonZeros() == 0 )
|
||||
jac[block] = D1*rhs.jac_[block];
|
||||
else if ( rhs.jac_[block].nonZeros() == 0 ) {
|
||||
jac[block] = D2*jac_[block];
|
||||
}
|
||||
else {
|
||||
jac[block] = D2*jac_[block] + D1*rhs.jac_[block];
|
||||
}
|
||||
}
|
||||
return function(val_ * rhs.val_, jac);
|
||||
}
|
||||
@ -319,7 +331,20 @@ namespace Opm
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
assert(jac_[block].rows() == rhs.jac_[block].rows());
|
||||
assert(jac_[block].cols() == rhs.jac_[block].cols());
|
||||
jac[block] = D3 * (D2*jac_[block] - D1*rhs.jac_[block]);
|
||||
if( jac_[block].nonZeros() == 0 && rhs.jac_[block].nonZeros() == 0 ) {
|
||||
jac[block] = M( D3.rows(), jac_[block].cols() );
|
||||
}
|
||||
else if( jac_[block].nonZeros() == 0 ) {
|
||||
jac[block] = D3 * ( D1*rhs.jac_[block]);
|
||||
jac[block] *= -1.0;
|
||||
}
|
||||
else if ( rhs.jac_[block].nonZeros() == 0 )
|
||||
{
|
||||
jac[block] = D3 * (D2*jac_[block]);
|
||||
}
|
||||
else {
|
||||
jac[block] = D3 * (D2*jac_[block] - D1*rhs.jac_[block]);
|
||||
}
|
||||
}
|
||||
return function(val_ / rhs.val_, jac);
|
||||
}
|
||||
@ -416,7 +441,8 @@ namespace Opm
|
||||
std::vector<typename AutoDiffBlock<Scalar>::M> jac(num_blocks);
|
||||
assert(lhs.cols() == rhs.value().rows());
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
jac[block] = lhs*rhs.derivative()[block];
|
||||
// jac[block] = lhs*rhs.derivative()[block];
|
||||
fastSparseProduct(lhs, rhs.derivative()[block], jac[block]);
|
||||
}
|
||||
typename AutoDiffBlock<Scalar>::V val = lhs*rhs.value().matrix();
|
||||
return AutoDiffBlock<Scalar>::function(val, jac);
|
||||
|
@ -70,7 +70,7 @@ struct HelperOps
|
||||
TwoColInt nbi;
|
||||
extractInternalFaces(grid, internal_faces, nbi);
|
||||
int num_internal=internal_faces.size();
|
||||
|
||||
|
||||
// std::cout << "nbi = \n" << nbi << std::endl;
|
||||
// Create matrices.
|
||||
ngrad.resize(num_internal, nc);
|
||||
@ -189,11 +189,11 @@ namespace {
|
||||
|
||||
|
||||
template <typename Scalar, class IntVec>
|
||||
Eigen::SparseMatrix<Scalar>
|
||||
typename AutoDiffBlock<Scalar>::M
|
||||
constructSupersetSparseMatrix(const int full_size, const IntVec& indices)
|
||||
{
|
||||
const int subset_size = indices.size();
|
||||
Eigen::SparseMatrix<Scalar> mat(full_size, subset_size);
|
||||
typename AutoDiffBlock<Scalar>::M mat(full_size, subset_size);
|
||||
mat.reserve(Eigen::VectorXi::Constant(subset_size, 1));
|
||||
for (int i = 0; i < subset_size; ++i) {
|
||||
mat.insert(indices[i], i) = 1;
|
||||
@ -204,18 +204,6 @@ namespace {
|
||||
} // anon namespace
|
||||
|
||||
|
||||
/// Returns x(indices).
|
||||
template <typename Scalar, class IntVec>
|
||||
AutoDiffBlock<Scalar>
|
||||
subset(const AutoDiffBlock<Scalar>& x,
|
||||
const IntVec& indices)
|
||||
{
|
||||
Eigen::SparseMatrix<Scalar> sub
|
||||
= constructSupersetSparseMatrix<Scalar>(x.value().size(), indices).transpose();
|
||||
return sub * x;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Returns x(indices).
|
||||
template <typename Scalar, class IntVec>
|
||||
@ -223,9 +211,25 @@ Eigen::Array<Scalar, Eigen::Dynamic, 1>
|
||||
subset(const Eigen::Array<Scalar, Eigen::Dynamic, 1>& x,
|
||||
const IntVec& indices)
|
||||
{
|
||||
return (constructSupersetSparseMatrix<Scalar>(x.size(), indices).transpose() * x.matrix()).array();
|
||||
typedef typename Eigen::Array<Scalar, Eigen::Dynamic, 1>::Index Index;
|
||||
const Index size = indices.size();
|
||||
Eigen::Array<Scalar, Eigen::Dynamic, 1> ret( size );
|
||||
for( Index i=0; i<size; ++i )
|
||||
ret[ i ] = x[ indices[ i ] ];
|
||||
|
||||
return std::move(ret);
|
||||
}
|
||||
|
||||
/// Returns x(indices).
|
||||
template <typename Scalar, class IntVec>
|
||||
AutoDiffBlock<Scalar>
|
||||
subset(const AutoDiffBlock<Scalar>& x,
|
||||
const IntVec& indices)
|
||||
{
|
||||
const typename AutoDiffBlock<Scalar>::M sub
|
||||
= constructSupersetSparseMatrix<Scalar>(x.value().size(), indices).transpose();
|
||||
return sub * x;
|
||||
}
|
||||
|
||||
|
||||
/// Returns v where v(indices) == x, v(!indices) == 0 and v.size() == n.
|
||||
@ -357,9 +361,10 @@ spdiag(const AutoDiffBlock<double>::V& d)
|
||||
|
||||
|
||||
/// Returns the input expression, but with all Jacobians collapsed to one.
|
||||
template <class Matrix>
|
||||
inline
|
||||
AutoDiffBlock<double>
|
||||
collapseJacs(const AutoDiffBlock<double>& x)
|
||||
void
|
||||
collapseJacs(const AutoDiffBlock<double>& x, Matrix& jacobian)
|
||||
{
|
||||
typedef AutoDiffBlock<double> ADB;
|
||||
const int nb = x.numBlocks();
|
||||
@ -383,9 +388,21 @@ collapseJacs(const AutoDiffBlock<double>& x)
|
||||
block_col_start += jac.cols();
|
||||
}
|
||||
// Build final jacobian.
|
||||
jacobian = Matrix(x.size(), block_col_start);
|
||||
jacobian.setFromTriplets(t.begin(), t.end());
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Returns the input expression, but with all Jacobians collapsed to one.
|
||||
inline
|
||||
AutoDiffBlock<double>
|
||||
collapseJacs(const AutoDiffBlock<double>& x)
|
||||
{
|
||||
typedef AutoDiffBlock<double> ADB;
|
||||
// Build final jacobian.
|
||||
std::vector<ADB::M> jacs(1);
|
||||
jacs[0].resize(x.size(), block_col_start);
|
||||
jacs[0].setFromTriplets(t.begin(), t.end());
|
||||
collapseJacs( x, jacs[ 0 ] );
|
||||
return ADB::function(x.value(), jacs);
|
||||
}
|
||||
|
||||
|
@ -392,7 +392,7 @@ namespace Opm
|
||||
const int num_blocks = pw.numBlocks();
|
||||
std::vector<ADB::M> jacs(num_blocks);
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
jacs[block] = dmudp_diag * pw.derivative()[block];
|
||||
fastSparseProduct(dmudp_diag, pw.derivative()[block], jacs[block]);
|
||||
}
|
||||
return ADB::function(mu, jacs);
|
||||
}
|
||||
@ -427,7 +427,10 @@ namespace Opm
|
||||
const int num_blocks = po.numBlocks();
|
||||
std::vector<ADB::M> jacs(num_blocks);
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
jacs[block] = dmudp_diag * po.derivative()[block] + dmudr_diag * rs.derivative()[block];
|
||||
fastSparseProduct(dmudp_diag, po.derivative()[block], jacs[block]);
|
||||
ADB::M temp;
|
||||
fastSparseProduct(dmudr_diag, rs.derivative()[block], temp);
|
||||
jacs[block] += temp;
|
||||
}
|
||||
return ADB::function(mu, jacs);
|
||||
}
|
||||
@ -458,7 +461,7 @@ namespace Opm
|
||||
const int num_blocks = pg.numBlocks();
|
||||
std::vector<ADB::M> jacs(num_blocks);
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
jacs[block] = dmudp_diag * pg.derivative()[block];
|
||||
fastSparseProduct(dmudp_diag, pg.derivative()[block], jacs[block]);
|
||||
}
|
||||
return ADB::function(mu, jacs);
|
||||
}
|
||||
@ -493,7 +496,10 @@ namespace Opm
|
||||
const int num_blocks = pg.numBlocks();
|
||||
std::vector<ADB::M> jacs(num_blocks);
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
jacs[block] = dmudp_diag * pg.derivative()[block] + dmudr_diag * rv.derivative()[block];
|
||||
fastSparseProduct(dmudp_diag, pg.derivative()[block], jacs[block]);
|
||||
ADB::M temp;
|
||||
fastSparseProduct(dmudr_diag, rv.derivative()[block], temp);
|
||||
jacs[block] += temp;
|
||||
}
|
||||
return ADB::function(mu, jacs);
|
||||
}
|
||||
@ -653,7 +659,7 @@ namespace Opm
|
||||
const int num_blocks = pw.numBlocks();
|
||||
std::vector<ADB::M> jacs(num_blocks);
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
jacs[block] = dbdp_diag * pw.derivative()[block];
|
||||
fastSparseProduct(dbdp_diag, pw.derivative()[block], jacs[block]);
|
||||
}
|
||||
return ADB::function(b, jacs);
|
||||
}
|
||||
@ -689,7 +695,10 @@ namespace Opm
|
||||
const int num_blocks = po.numBlocks();
|
||||
std::vector<ADB::M> jacs(num_blocks);
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
jacs[block] = dbdp_diag * po.derivative()[block] + dbdr_diag * rs.derivative()[block];
|
||||
fastSparseProduct(dbdp_diag, po.derivative()[block], jacs[block]);
|
||||
ADB::M temp;
|
||||
fastSparseProduct(dbdr_diag, rs.derivative()[block], temp);
|
||||
jacs[block] += temp;
|
||||
}
|
||||
return ADB::function(b, jacs);
|
||||
}
|
||||
@ -721,7 +730,7 @@ namespace Opm
|
||||
const int num_blocks = pg.numBlocks();
|
||||
std::vector<ADB::M> jacs(num_blocks);
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
jacs[block] = dbdp_diag * pg.derivative()[block];
|
||||
fastSparseProduct(dbdp_diag, pg.derivative()[block], jacs[block]);
|
||||
}
|
||||
return ADB::function(b, jacs);
|
||||
}
|
||||
@ -753,11 +762,14 @@ namespace Opm
|
||||
b.data(), dbdp.data(), dbdr.data());
|
||||
|
||||
ADB::M dbdp_diag = spdiag(dbdp);
|
||||
ADB::M dmudr_diag = spdiag(dbdr);
|
||||
ADB::M dbdr_diag = spdiag(dbdr);
|
||||
const int num_blocks = pg.numBlocks();
|
||||
std::vector<ADB::M> jacs(num_blocks);
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
jacs[block] = dbdp_diag * pg.derivative()[block] + dmudr_diag * rv.derivative()[block];;
|
||||
fastSparseProduct(dbdp_diag, pg.derivative()[block], jacs[block]);
|
||||
ADB::M temp;
|
||||
fastSparseProduct(dbdr_diag, rv.derivative()[block], temp);
|
||||
jacs[block] += temp;
|
||||
}
|
||||
return ADB::function(b, jacs);
|
||||
}
|
||||
@ -817,7 +829,7 @@ namespace Opm
|
||||
const int num_blocks = po.numBlocks();
|
||||
std::vector<ADB::M> jacs(num_blocks);
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
jacs[block] = drbubdp_diag * po.derivative()[block];
|
||||
fastSparseProduct(drbubdp_diag, po.derivative()[block], jacs[block]);
|
||||
}
|
||||
return ADB::function(rbub, jacs);
|
||||
}
|
||||
@ -889,7 +901,7 @@ namespace Opm
|
||||
const int num_blocks = po.numBlocks();
|
||||
std::vector<ADB::M> jacs(num_blocks);
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
jacs[block] = drvdp_diag * po.derivative()[block];
|
||||
fastSparseProduct(drvdp_diag, po.derivative()[block], jacs[block]);
|
||||
}
|
||||
return ADB::function(rv, jacs);
|
||||
}
|
||||
@ -1004,7 +1016,9 @@ namespace Opm
|
||||
const int column = phase1_pos + np*phase2_pos; // Recall: Fortran ordering from props_.relperm()
|
||||
ADB::M dkr1_ds2_diag = spdiag(dkr.col(column));
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
jacs[block] += dkr1_ds2_diag * s[phase2]->derivative()[block];
|
||||
ADB::M temp;
|
||||
fastSparseProduct(dkr1_ds2_diag, s[phase2]->derivative()[block], temp);
|
||||
jacs[block] += temp;
|
||||
}
|
||||
}
|
||||
relperms.emplace_back(ADB::function(kr.col(phase1_pos), jacs));
|
||||
@ -1062,7 +1076,9 @@ namespace Opm
|
||||
const int column = phase1_pos + numActivePhases*phase2_pos; // Recall: Fortran ordering from props_.relperm()
|
||||
ADB::M dpc1_ds2_diag = spdiag(dpc.col(column));
|
||||
for (int block = 0; block < numBlocks; ++block) {
|
||||
jacs[block] += dpc1_ds2_diag * s[phase2]->derivative()[block];
|
||||
ADB::M temp;
|
||||
fastSparseProduct(dpc1_ds2_diag, s[phase2]->derivative()[block], temp);
|
||||
jacs[block] += temp;
|
||||
}
|
||||
}
|
||||
adbCapPressures.emplace_back(ADB::function(pc.col(phase1_pos), jacs));
|
||||
|
@ -2059,7 +2059,7 @@ namespace {
|
||||
const int num_blocks = p.numBlocks();
|
||||
std::vector<ADB::M> jacs(num_blocks);
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
jacs[block] = dpm_diag * p.derivative()[block];
|
||||
fastSparseProduct(dpm_diag, p.derivative()[block], jacs[block]);
|
||||
}
|
||||
return ADB::function(pm, jacs);
|
||||
} else {
|
||||
@ -2087,7 +2087,7 @@ namespace {
|
||||
const int num_blocks = p.numBlocks();
|
||||
std::vector<ADB::M> jacs(num_blocks);
|
||||
for (int block = 0; block < num_blocks; ++block) {
|
||||
jacs[block] = dtm_diag * p.derivative()[block];
|
||||
fastSparseProduct(dtm_diag, p.derivative()[block], jacs[block]);
|
||||
}
|
||||
return ADB::function(tm, jacs);
|
||||
} else {
|
||||
|
@ -261,7 +261,7 @@ namespace Opm
|
||||
#endif
|
||||
M id(Jn[n].rows(), Jn[n].cols());
|
||||
id.setIdentity();
|
||||
const M Di = solver.solve(id);
|
||||
const Eigen::SparseMatrix<M::Scalar, Eigen::ColMajor> Di = solver.solve(id);
|
||||
|
||||
// compute inv(D)*bn for the update of the right hand side
|
||||
const Eigen::VectorXd& Dibn = solver.solve(eqs[n].value().matrix());
|
||||
@ -280,7 +280,9 @@ namespace Opm
|
||||
continue;
|
||||
}
|
||||
// solve Du = C
|
||||
const M u = Di * Jn[var]; // solver.solve(Jn[var]);
|
||||
// const M u = Di * Jn[var]; // solver.solve(Jn[var]);
|
||||
M u;
|
||||
fastSparseProduct(Di, Jn[var], u); // solver.solve(Jn[var]);
|
||||
for (int eq = 0; eq < num_eq; ++eq) {
|
||||
if (eq == n) {
|
||||
continue;
|
||||
@ -293,7 +295,9 @@ namespace Opm
|
||||
jacs[eq].push_back(Je[var]);
|
||||
M& J = jacs[eq].back();
|
||||
// Subtract Bu (B*inv(D)*C)
|
||||
J -= B * u;
|
||||
M Bu;
|
||||
fastSparseProduct(B, u, Bu);
|
||||
J -= Bu;
|
||||
}
|
||||
}
|
||||
|
||||
@ -398,6 +402,7 @@ namespace Opm
|
||||
void formEllipticSystem(const int num_phases,
|
||||
const std::vector<ADB>& eqs_in,
|
||||
Eigen::SparseMatrix<double, Eigen::RowMajor>& A,
|
||||
// M& A,
|
||||
V& b)
|
||||
{
|
||||
if (num_phases != 3) {
|
||||
|
146
opm/autodiff/fastSparseProduct.hpp
Normal file
146
opm/autodiff/fastSparseProduct.hpp
Normal file
@ -0,0 +1,146 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
// This file has been modified for use in the OPM project codebase.
|
||||
|
||||
#ifndef OPM_FASTSPARSEPRODUCT_HEADER_INCLUDED
|
||||
#define OPM_FASTSPARSEPRODUCT_HEADER_INCLUDED
|
||||
|
||||
#include <Eigen/Sparse>
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include <Eigen/Core>
|
||||
|
||||
namespace Opm {
|
||||
|
||||
template < unsigned int depth >
|
||||
struct QuickSort
|
||||
{
|
||||
template <typename T>
|
||||
static inline void sort(T begin, T end)
|
||||
{
|
||||
if (begin != end)
|
||||
{
|
||||
T middle = std::partition (begin, end,
|
||||
std::bind2nd(std::less<typename std::iterator_traits<T>::value_type>(), *begin)
|
||||
);
|
||||
QuickSort< depth-1 >::sort(begin, middle);
|
||||
|
||||
// std::sort (max(begin + 1, middle), end);
|
||||
T new_middle = begin;
|
||||
QuickSort< depth-1 >::sort(++new_middle, end);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct QuickSort< 0 >
|
||||
{
|
||||
template <typename T>
|
||||
static inline void sort(T begin, T end)
|
||||
{
|
||||
// fall back to standard insertion sort
|
||||
std::sort( begin, end );
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<typename Lhs, typename Rhs, typename ResultType>
|
||||
void fastSparseProduct(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
||||
{
|
||||
// initialize result
|
||||
res = ResultType(lhs.rows(), rhs.cols());
|
||||
|
||||
// if one of the matrices does not contain non zero elements
|
||||
// the result will only contain an empty matrix
|
||||
if( lhs.nonZeros() == 0 || rhs.nonZeros() == 0 )
|
||||
return;
|
||||
|
||||
typedef typename Eigen::internal::remove_all<Lhs>::type::Scalar Scalar;
|
||||
typedef typename Eigen::internal::remove_all<Lhs>::type::Index Index;
|
||||
|
||||
// make sure to call innerSize/outerSize since we fake the storage order.
|
||||
Index rows = lhs.innerSize();
|
||||
Index cols = rhs.outerSize();
|
||||
eigen_assert(lhs.outerSize() == rhs.innerSize());
|
||||
|
||||
std::vector<bool> mask(rows,false);
|
||||
Eigen::Matrix<Scalar,Eigen::Dynamic,1> values(rows);
|
||||
Eigen::Matrix<Index, Eigen::Dynamic,1> indices(rows);
|
||||
|
||||
// estimate the number of non zero entries
|
||||
// given a rhs column containing Y non zeros, we assume that the respective Y columns
|
||||
// of the lhs differs in average of one non zeros, thus the number of non zeros for
|
||||
// the product of a rhs column with the lhs is X+Y where X is the average number of non zero
|
||||
// per column of the lhs.
|
||||
// Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
|
||||
Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros();
|
||||
|
||||
res.setZero();
|
||||
res.reserve(Index(estimated_nnz_prod));
|
||||
|
||||
//const Scalar epsilon = std::numeric_limits< Scalar >::epsilon();
|
||||
const Scalar epsilon = 0.0;
|
||||
|
||||
// we compute each column of the result, one after the other
|
||||
for (Index j=0; j<cols; ++j)
|
||||
{
|
||||
Index nnz = 0;
|
||||
for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
|
||||
{
|
||||
const Scalar y = rhsIt.value();
|
||||
for (typename Lhs::InnerIterator lhsIt(lhs, rhsIt.index()); lhsIt; ++lhsIt)
|
||||
{
|
||||
const Scalar val = lhsIt.value() * y;
|
||||
if( std::abs( val ) > epsilon )
|
||||
{
|
||||
const Index i = lhsIt.index();
|
||||
if(!mask[i])
|
||||
{
|
||||
mask[i] = true;
|
||||
values[i] = val;
|
||||
indices[nnz] = i;
|
||||
++nnz;
|
||||
}
|
||||
else
|
||||
values[i] += val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( nnz > 1 )
|
||||
{
|
||||
// sort indices for sorted insertion to avoid later copying
|
||||
QuickSort< 1 >::sort( indices.data(), indices.data()+nnz );
|
||||
}
|
||||
|
||||
res.startVec(j);
|
||||
// ordered insertion
|
||||
// still using insertBackByOuterInnerUnordered since we know what we are doing
|
||||
for(Index k=0; k<nnz; ++k)
|
||||
{
|
||||
const Index i = indices[k];
|
||||
res.insertBackByOuterInnerUnordered(j,i) = values[i];
|
||||
mask[i] = false;
|
||||
}
|
||||
|
||||
}
|
||||
res.finalize();
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // end namespace Opm
|
||||
|
||||
#endif // OPM_FASTSPARSEPRODUCT_HEADER_INCLUDED
|
Loading…
Reference in New Issue
Block a user