mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Merge pull request #1 from atgeirr/performance-mods
Performance mods from atgeirr, great job.
This commit is contained in:
commit
1cd3dcadc6
@ -22,9 +22,9 @@
|
|||||||
|
|
||||||
#include <opm/core/utility/platform_dependent/disable_warnings.h>
|
#include <opm/core/utility/platform_dependent/disable_warnings.h>
|
||||||
|
|
||||||
#include <opm/autodiff/ConservativeSparseSparseProduct.h>
|
|
||||||
#include <Eigen/Eigen>
|
#include <Eigen/Eigen>
|
||||||
#include <Eigen/Sparse>
|
#include <Eigen/Sparse>
|
||||||
|
#include <opm/autodiff/fastSparseProduct.hpp>
|
||||||
|
|
||||||
#include <opm/core/utility/platform_dependent/reenable_warnings.h>
|
#include <opm/core/utility/platform_dependent/reenable_warnings.h>
|
||||||
|
|
||||||
@ -441,7 +441,8 @@ namespace Opm
|
|||||||
std::vector<typename AutoDiffBlock<Scalar>::M> jac(num_blocks);
|
std::vector<typename AutoDiffBlock<Scalar>::M> jac(num_blocks);
|
||||||
assert(lhs.cols() == rhs.value().rows());
|
assert(lhs.cols() == rhs.value().rows());
|
||||||
for (int block = 0; block < num_blocks; ++block) {
|
for (int block = 0; block < num_blocks; ++block) {
|
||||||
jac[block] = lhs*rhs.derivative()[block];
|
// jac[block] = lhs*rhs.derivative()[block];
|
||||||
|
fastSparseProduct(lhs, rhs.derivative()[block], jac[block]);
|
||||||
}
|
}
|
||||||
typename AutoDiffBlock<Scalar>::V val = lhs*rhs.value().matrix();
|
typename AutoDiffBlock<Scalar>::V val = lhs*rhs.value().matrix();
|
||||||
return AutoDiffBlock<Scalar>::function(val, jac);
|
return AutoDiffBlock<Scalar>::function(val, jac);
|
||||||
|
@ -392,7 +392,7 @@ namespace Opm
|
|||||||
const int num_blocks = pw.numBlocks();
|
const int num_blocks = pw.numBlocks();
|
||||||
std::vector<ADB::M> jacs(num_blocks);
|
std::vector<ADB::M> jacs(num_blocks);
|
||||||
for (int block = 0; block < num_blocks; ++block) {
|
for (int block = 0; block < num_blocks; ++block) {
|
||||||
jacs[block] = dmudp_diag * pw.derivative()[block];
|
fastSparseProduct(dmudp_diag, pw.derivative()[block], jacs[block]);
|
||||||
}
|
}
|
||||||
return ADB::function(mu, jacs);
|
return ADB::function(mu, jacs);
|
||||||
}
|
}
|
||||||
@ -427,7 +427,10 @@ namespace Opm
|
|||||||
const int num_blocks = po.numBlocks();
|
const int num_blocks = po.numBlocks();
|
||||||
std::vector<ADB::M> jacs(num_blocks);
|
std::vector<ADB::M> jacs(num_blocks);
|
||||||
for (int block = 0; block < num_blocks; ++block) {
|
for (int block = 0; block < num_blocks; ++block) {
|
||||||
jacs[block] = dmudp_diag * po.derivative()[block] + dmudr_diag * rs.derivative()[block];
|
fastSparseProduct(dmudp_diag, po.derivative()[block], jacs[block]);
|
||||||
|
ADB::M temp;
|
||||||
|
fastSparseProduct(dmudr_diag, rs.derivative()[block], temp);
|
||||||
|
jacs[block] += temp;
|
||||||
}
|
}
|
||||||
return ADB::function(mu, jacs);
|
return ADB::function(mu, jacs);
|
||||||
}
|
}
|
||||||
@ -458,7 +461,7 @@ namespace Opm
|
|||||||
const int num_blocks = pg.numBlocks();
|
const int num_blocks = pg.numBlocks();
|
||||||
std::vector<ADB::M> jacs(num_blocks);
|
std::vector<ADB::M> jacs(num_blocks);
|
||||||
for (int block = 0; block < num_blocks; ++block) {
|
for (int block = 0; block < num_blocks; ++block) {
|
||||||
jacs[block] = dmudp_diag * pg.derivative()[block];
|
fastSparseProduct(dmudp_diag, pg.derivative()[block], jacs[block]);
|
||||||
}
|
}
|
||||||
return ADB::function(mu, jacs);
|
return ADB::function(mu, jacs);
|
||||||
}
|
}
|
||||||
@ -493,7 +496,10 @@ namespace Opm
|
|||||||
const int num_blocks = pg.numBlocks();
|
const int num_blocks = pg.numBlocks();
|
||||||
std::vector<ADB::M> jacs(num_blocks);
|
std::vector<ADB::M> jacs(num_blocks);
|
||||||
for (int block = 0; block < num_blocks; ++block) {
|
for (int block = 0; block < num_blocks; ++block) {
|
||||||
jacs[block] = dmudp_diag * pg.derivative()[block] + dmudr_diag * rv.derivative()[block];
|
fastSparseProduct(dmudp_diag, pg.derivative()[block], jacs[block]);
|
||||||
|
ADB::M temp;
|
||||||
|
fastSparseProduct(dmudr_diag, rv.derivative()[block], temp);
|
||||||
|
jacs[block] += temp;
|
||||||
}
|
}
|
||||||
return ADB::function(mu, jacs);
|
return ADB::function(mu, jacs);
|
||||||
}
|
}
|
||||||
@ -653,7 +659,7 @@ namespace Opm
|
|||||||
const int num_blocks = pw.numBlocks();
|
const int num_blocks = pw.numBlocks();
|
||||||
std::vector<ADB::M> jacs(num_blocks);
|
std::vector<ADB::M> jacs(num_blocks);
|
||||||
for (int block = 0; block < num_blocks; ++block) {
|
for (int block = 0; block < num_blocks; ++block) {
|
||||||
jacs[block] = dbdp_diag * pw.derivative()[block];
|
fastSparseProduct(dbdp_diag, pw.derivative()[block], jacs[block]);
|
||||||
}
|
}
|
||||||
return ADB::function(b, jacs);
|
return ADB::function(b, jacs);
|
||||||
}
|
}
|
||||||
@ -689,7 +695,10 @@ namespace Opm
|
|||||||
const int num_blocks = po.numBlocks();
|
const int num_blocks = po.numBlocks();
|
||||||
std::vector<ADB::M> jacs(num_blocks);
|
std::vector<ADB::M> jacs(num_blocks);
|
||||||
for (int block = 0; block < num_blocks; ++block) {
|
for (int block = 0; block < num_blocks; ++block) {
|
||||||
jacs[block] = dbdp_diag * po.derivative()[block] + dbdr_diag * rs.derivative()[block];
|
fastSparseProduct(dbdp_diag, po.derivative()[block], jacs[block]);
|
||||||
|
ADB::M temp;
|
||||||
|
fastSparseProduct(dbdr_diag, rs.derivative()[block], temp);
|
||||||
|
jacs[block] += temp;
|
||||||
}
|
}
|
||||||
return ADB::function(b, jacs);
|
return ADB::function(b, jacs);
|
||||||
}
|
}
|
||||||
@ -721,7 +730,7 @@ namespace Opm
|
|||||||
const int num_blocks = pg.numBlocks();
|
const int num_blocks = pg.numBlocks();
|
||||||
std::vector<ADB::M> jacs(num_blocks);
|
std::vector<ADB::M> jacs(num_blocks);
|
||||||
for (int block = 0; block < num_blocks; ++block) {
|
for (int block = 0; block < num_blocks; ++block) {
|
||||||
jacs[block] = dbdp_diag * pg.derivative()[block];
|
fastSparseProduct(dbdp_diag, pg.derivative()[block], jacs[block]);
|
||||||
}
|
}
|
||||||
return ADB::function(b, jacs);
|
return ADB::function(b, jacs);
|
||||||
}
|
}
|
||||||
@ -753,11 +762,14 @@ namespace Opm
|
|||||||
b.data(), dbdp.data(), dbdr.data());
|
b.data(), dbdp.data(), dbdr.data());
|
||||||
|
|
||||||
ADB::M dbdp_diag = spdiag(dbdp);
|
ADB::M dbdp_diag = spdiag(dbdp);
|
||||||
ADB::M dmudr_diag = spdiag(dbdr);
|
ADB::M dbdr_diag = spdiag(dbdr);
|
||||||
const int num_blocks = pg.numBlocks();
|
const int num_blocks = pg.numBlocks();
|
||||||
std::vector<ADB::M> jacs(num_blocks);
|
std::vector<ADB::M> jacs(num_blocks);
|
||||||
for (int block = 0; block < num_blocks; ++block) {
|
for (int block = 0; block < num_blocks; ++block) {
|
||||||
jacs[block] = dbdp_diag * pg.derivative()[block] + dmudr_diag * rv.derivative()[block];;
|
fastSparseProduct(dbdp_diag, pg.derivative()[block], jacs[block]);
|
||||||
|
ADB::M temp;
|
||||||
|
fastSparseProduct(dbdr_diag, rv.derivative()[block], temp);
|
||||||
|
jacs[block] += temp;
|
||||||
}
|
}
|
||||||
return ADB::function(b, jacs);
|
return ADB::function(b, jacs);
|
||||||
}
|
}
|
||||||
@ -817,7 +829,7 @@ namespace Opm
|
|||||||
const int num_blocks = po.numBlocks();
|
const int num_blocks = po.numBlocks();
|
||||||
std::vector<ADB::M> jacs(num_blocks);
|
std::vector<ADB::M> jacs(num_blocks);
|
||||||
for (int block = 0; block < num_blocks; ++block) {
|
for (int block = 0; block < num_blocks; ++block) {
|
||||||
jacs[block] = drbubdp_diag * po.derivative()[block];
|
fastSparseProduct(drbubdp_diag, po.derivative()[block], jacs[block]);
|
||||||
}
|
}
|
||||||
return ADB::function(rbub, jacs);
|
return ADB::function(rbub, jacs);
|
||||||
}
|
}
|
||||||
@ -889,7 +901,7 @@ namespace Opm
|
|||||||
const int num_blocks = po.numBlocks();
|
const int num_blocks = po.numBlocks();
|
||||||
std::vector<ADB::M> jacs(num_blocks);
|
std::vector<ADB::M> jacs(num_blocks);
|
||||||
for (int block = 0; block < num_blocks; ++block) {
|
for (int block = 0; block < num_blocks; ++block) {
|
||||||
jacs[block] = drvdp_diag * po.derivative()[block];
|
fastSparseProduct(drvdp_diag, po.derivative()[block], jacs[block]);
|
||||||
}
|
}
|
||||||
return ADB::function(rv, jacs);
|
return ADB::function(rv, jacs);
|
||||||
}
|
}
|
||||||
@ -1004,7 +1016,9 @@ namespace Opm
|
|||||||
const int column = phase1_pos + np*phase2_pos; // Recall: Fortran ordering from props_.relperm()
|
const int column = phase1_pos + np*phase2_pos; // Recall: Fortran ordering from props_.relperm()
|
||||||
ADB::M dkr1_ds2_diag = spdiag(dkr.col(column));
|
ADB::M dkr1_ds2_diag = spdiag(dkr.col(column));
|
||||||
for (int block = 0; block < num_blocks; ++block) {
|
for (int block = 0; block < num_blocks; ++block) {
|
||||||
jacs[block] += dkr1_ds2_diag * s[phase2]->derivative()[block];
|
ADB::M temp;
|
||||||
|
fastSparseProduct(dkr1_ds2_diag, s[phase2]->derivative()[block], temp);
|
||||||
|
jacs[block] += temp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
relperms.emplace_back(ADB::function(kr.col(phase1_pos), jacs));
|
relperms.emplace_back(ADB::function(kr.col(phase1_pos), jacs));
|
||||||
@ -1062,7 +1076,9 @@ namespace Opm
|
|||||||
const int column = phase1_pos + numActivePhases*phase2_pos; // Recall: Fortran ordering from props_.relperm()
|
const int column = phase1_pos + numActivePhases*phase2_pos; // Recall: Fortran ordering from props_.relperm()
|
||||||
ADB::M dpc1_ds2_diag = spdiag(dpc.col(column));
|
ADB::M dpc1_ds2_diag = spdiag(dpc.col(column));
|
||||||
for (int block = 0; block < numBlocks; ++block) {
|
for (int block = 0; block < numBlocks; ++block) {
|
||||||
jacs[block] += dpc1_ds2_diag * s[phase2]->derivative()[block];
|
ADB::M temp;
|
||||||
|
fastSparseProduct(dpc1_ds2_diag, s[phase2]->derivative()[block], temp);
|
||||||
|
jacs[block] += temp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
adbCapPressures.emplace_back(ADB::function(pc.col(phase1_pos), jacs));
|
adbCapPressures.emplace_back(ADB::function(pc.col(phase1_pos), jacs));
|
||||||
|
@ -1,332 +0,0 @@
|
|||||||
// This file is part of Eigen, a lightweight C++ template library
|
|
||||||
// for linear algebra.
|
|
||||||
//
|
|
||||||
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
||||||
//
|
|
||||||
// This Source Code Form is subject to the terms of the Mozilla
|
|
||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
||||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
||||||
|
|
||||||
#ifndef EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H
|
|
||||||
#define EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H
|
|
||||||
|
|
||||||
#warning "Using overloaded Eigen::ConservativeSparseSparseProduct.h"
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <iterator>
|
|
||||||
#include <functional>
|
|
||||||
#include <limits>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include <Eigen/Core>
|
|
||||||
|
|
||||||
namespace Eigen {
|
|
||||||
|
|
||||||
// forward declaration of SparseMatrix
|
|
||||||
template<typename _Scalar, int _Options, typename _Index>
|
|
||||||
class SparseMatrix;
|
|
||||||
|
|
||||||
|
|
||||||
namespace internal {
|
|
||||||
|
|
||||||
template < unsigned int depth >
|
|
||||||
struct QuickSort
|
|
||||||
{
|
|
||||||
template <typename T>
|
|
||||||
static inline void sort(T begin, T end)
|
|
||||||
{
|
|
||||||
if (begin != end)
|
|
||||||
{
|
|
||||||
T middle = std::partition (begin, end,
|
|
||||||
std::bind2nd(std::less<typename std::iterator_traits<T>::value_type>(), *begin)
|
|
||||||
);
|
|
||||||
QuickSort< depth-1 >::sort(begin, middle);
|
|
||||||
|
|
||||||
// std::sort (max(begin + 1, middle), end);
|
|
||||||
T new_middle = begin;
|
|
||||||
QuickSort< depth-1 >::sort(++new_middle, end);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <>
|
|
||||||
struct QuickSort< 0 >
|
|
||||||
{
|
|
||||||
template <typename T>
|
|
||||||
static inline void sort(T begin, T end)
|
|
||||||
{
|
|
||||||
// fall back to standard insertion sort
|
|
||||||
std::sort( begin, end );
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
template<typename Lhs, typename Rhs, typename ResultType>
|
|
||||||
static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
|
||||||
{
|
|
||||||
// if one of the matrices does not contain non zero elements
|
|
||||||
// the result will only contain an empty matrix
|
|
||||||
if( lhs.nonZeros() == 0 || rhs.nonZeros() == 0 )
|
|
||||||
return ;
|
|
||||||
|
|
||||||
typedef typename remove_all<Lhs>::type::Scalar Scalar;
|
|
||||||
typedef typename remove_all<Lhs>::type::Index Index;
|
|
||||||
|
|
||||||
// make sure to call innerSize/outerSize since we fake the storage order.
|
|
||||||
Index rows = lhs.innerSize();
|
|
||||||
Index cols = rhs.outerSize();
|
|
||||||
eigen_assert(lhs.outerSize() == rhs.innerSize());
|
|
||||||
|
|
||||||
std::vector<bool> mask(rows,false);
|
|
||||||
Matrix<Scalar,Dynamic,1> values(rows);
|
|
||||||
Matrix<Index,Dynamic,1> indices(rows);
|
|
||||||
|
|
||||||
// estimate the number of non zero entries
|
|
||||||
// given a rhs column containing Y non zeros, we assume that the respective Y columns
|
|
||||||
// of the lhs differs in average of one non zeros, thus the number of non zeros for
|
|
||||||
// the product of a rhs column with the lhs is X+Y where X is the average number of non zero
|
|
||||||
// per column of the lhs.
|
|
||||||
// Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
|
|
||||||
Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros();
|
|
||||||
|
|
||||||
res.setZero();
|
|
||||||
res.reserve(Index(estimated_nnz_prod));
|
|
||||||
|
|
||||||
//const Scalar epsilon = std::numeric_limits< Scalar >::epsilon();
|
|
||||||
const Scalar epsilon = 1e-15 ;
|
|
||||||
|
|
||||||
// we compute each column of the result, one after the other
|
|
||||||
for (Index j=0; j<cols; ++j)
|
|
||||||
{
|
|
||||||
Index nnz = 0;
|
|
||||||
for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
|
|
||||||
{
|
|
||||||
const Scalar y = rhsIt.value();
|
|
||||||
for (typename Lhs::InnerIterator lhsIt(lhs, rhsIt.index()); lhsIt; ++lhsIt)
|
|
||||||
{
|
|
||||||
const Scalar val = lhsIt.value() * y;
|
|
||||||
if( std::abs( val ) > epsilon )
|
|
||||||
{
|
|
||||||
const Index i = lhsIt.index();
|
|
||||||
if(!mask[i])
|
|
||||||
{
|
|
||||||
mask[i] = true;
|
|
||||||
values[i] = val;
|
|
||||||
indices[nnz] = i;
|
|
||||||
++nnz;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
values[i] += val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if( nnz > 1 )
|
|
||||||
{
|
|
||||||
// sort indices for sorted insertion to avoid later copying
|
|
||||||
QuickSort< 1 >::sort( indices.data(), indices.data()+nnz );
|
|
||||||
}
|
|
||||||
|
|
||||||
res.startVec(j);
|
|
||||||
// ordered insertion
|
|
||||||
// still using insertBackByOuterInnerUnordered since we know what we are doing
|
|
||||||
for(Index k=0; k<nnz; ++k)
|
|
||||||
{
|
|
||||||
const Index i = indices[k];
|
|
||||||
res.insertBackByOuterInnerUnordered(j,i) = values[i];
|
|
||||||
mask[i] = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
// alternative ordered insertion code:
|
|
||||||
|
|
||||||
Index t200 = rows/(log2(200)*1.39);
|
|
||||||
Index t = (rows*100)/139;
|
|
||||||
|
|
||||||
// FIXME reserve nnz non zeros
|
|
||||||
// FIXME implement fast sort algorithms for very small nnz
|
|
||||||
// if the result is sparse enough => use a quick sort
|
|
||||||
// otherwise => loop through the entire vector
|
|
||||||
// In order to avoid to perform an expensive log2 when the
|
|
||||||
// result is clearly very sparse we use a linear bound up to 200.
|
|
||||||
//if((nnz<200 && nnz<t200) || nnz * log2(nnz) < t)
|
|
||||||
//res.startVec(j);
|
|
||||||
if(true)
|
|
||||||
{
|
|
||||||
if(nnz>1) std::sort(indices.data(),indices.data()+nnz);
|
|
||||||
for(Index k=0; k<nnz; ++k)
|
|
||||||
{
|
|
||||||
Index i = indices[k];
|
|
||||||
res.insertBackByOuterInner(j,i) = values[i];
|
|
||||||
mask[i] = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// dense path
|
|
||||||
for(Index i=0; i<rows; ++i)
|
|
||||||
{
|
|
||||||
if(mask[i])
|
|
||||||
{
|
|
||||||
mask[i] = false;
|
|
||||||
res.insertBackByOuterInner(j,i) = values[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
|
||||||
res.finalize();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // end namespace internal
|
|
||||||
|
|
||||||
namespace internal {
|
|
||||||
|
|
||||||
template<typename Lhs, typename Rhs, typename ResultType,
|
|
||||||
int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,
|
|
||||||
int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,
|
|
||||||
int ResStorageOrder = (traits<ResultType>::Flags&RowMajorBit) ? RowMajor : ColMajor>
|
|
||||||
struct conservative_sparse_sparse_product_selector;
|
|
||||||
|
|
||||||
template<typename Lhs, typename Rhs, typename ResultType>
|
|
||||||
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor>
|
|
||||||
{
|
|
||||||
typedef typename remove_all<Lhs>::type LhsCleaned;
|
|
||||||
typedef typename LhsCleaned::Scalar Scalar;
|
|
||||||
|
|
||||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
|
||||||
{
|
|
||||||
//typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
|
|
||||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
|
|
||||||
//ColMajorMatrix resCol(lhs.rows(),rhs.cols());
|
|
||||||
res = ColMajorMatrix(lhs.rows(),rhs.cols());
|
|
||||||
internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, res);
|
|
||||||
//internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol);
|
|
||||||
// sort the non zeros:
|
|
||||||
//RowMajorMatrix resRow(resCol);
|
|
||||||
//res = resRow;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename Lhs, typename Rhs, typename ResultType>
|
|
||||||
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,ColMajor>
|
|
||||||
{
|
|
||||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
|
||||||
{
|
|
||||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
|
|
||||||
//RowMajorMatrix rhsRow = rhs;
|
|
||||||
//RowMajorMatrix resRow(lhs.rows(), rhs.cols());
|
|
||||||
ColMajorMatrix lhsCol = lhs;
|
|
||||||
res = ResultType( lhs.rows(), rhs.cols() );
|
|
||||||
internal::conservative_sparse_sparse_product_impl<ColMajorMatrix, Rhs, ResultType>( lhsCol, rhs, res );
|
|
||||||
//internal::conservative_sparse_sparse_product_impl<RowMajorMatrix,Lhs,RowMajorMatrix>(rhsRow, lhs, resRow);
|
|
||||||
//res = resRow;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename Lhs, typename Rhs, typename ResultType>
|
|
||||||
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,ColMajor>
|
|
||||||
{
|
|
||||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
|
||||||
{
|
|
||||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
|
|
||||||
ColMajorMatrix rhsCol = rhs;
|
|
||||||
res = ResultType( lhs.rows(), rhs.cols() );
|
|
||||||
internal::conservative_sparse_sparse_product_impl<Lhs, ColMajorMatrix, ResultType>( lhs, rhsCol, res);
|
|
||||||
/*
|
|
||||||
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
|
|
||||||
RowMajorMatrix lhsRow = lhs;
|
|
||||||
RowMajorMatrix resRow(lhs.rows(), rhs.cols());
|
|
||||||
internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorMatrix,RowMajorMatrix>(rhs, lhsRow, resRow);
|
|
||||||
res = resRow;
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename Lhs, typename Rhs, typename ResultType>
|
|
||||||
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,ColMajor>
|
|
||||||
{
|
|
||||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
|
||||||
{
|
|
||||||
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
|
|
||||||
RowMajorMatrix resRow(lhs.rows(), rhs.cols());
|
|
||||||
internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);
|
|
||||||
res = resRow;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
template<typename Lhs, typename Rhs, typename ResultType>
|
|
||||||
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,RowMajor>
|
|
||||||
{
|
|
||||||
typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar;
|
|
||||||
|
|
||||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
|
||||||
{
|
|
||||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
|
|
||||||
ColMajorMatrix resCol(lhs.rows(), rhs.cols());
|
|
||||||
internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol);
|
|
||||||
res = resCol;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename Lhs, typename Rhs, typename ResultType>
|
|
||||||
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,RowMajor>
|
|
||||||
{
|
|
||||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
|
||||||
{
|
|
||||||
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
|
|
||||||
RowMajorMatrix rhsRow = rhs;
|
|
||||||
res = ResultType( lhs.rows(), rhs.cols() );
|
|
||||||
internal::conservative_sparse_sparse_product_impl<Lhs, RowMajorMatrix, ResultType>(rhsRow, lhs, res);
|
|
||||||
/*
|
|
||||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
|
|
||||||
ColMajorMatrix lhsCol = lhs;
|
|
||||||
ColMajorMatrix resCol(lhs.rows(), rhs.cols());
|
|
||||||
internal::conservative_sparse_sparse_product_impl<ColMajorMatrix,Rhs,ColMajorMatrix>(lhsCol, rhs, resCol);
|
|
||||||
res = resCol;
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename Lhs, typename Rhs, typename ResultType>
|
|
||||||
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMajor,RowMajor>
|
|
||||||
{
|
|
||||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
|
||||||
{
|
|
||||||
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
|
|
||||||
RowMajorMatrix lhsRow = lhs;
|
|
||||||
res = RowMajorMatrix( lhs.rows(), rhs.cols() );
|
|
||||||
internal::conservative_sparse_sparse_product_impl<Rhs, RowMajorMatrix, ResultType>(rhs, lhsRow, res);
|
|
||||||
/*
|
|
||||||
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
|
|
||||||
ColMajorMatrix rhsCol = rhs;
|
|
||||||
ColMajorMatrix resCol(lhs.rows(), rhs.cols());
|
|
||||||
internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorMatrix,ColMajorMatrix>(lhs, rhsCol, resCol);
|
|
||||||
res = resCol;
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename Lhs, typename Rhs, typename ResultType>
|
|
||||||
struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,RowMajor>
|
|
||||||
{
|
|
||||||
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
|
||||||
{
|
|
||||||
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
|
|
||||||
//typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
|
|
||||||
res = RowMajorMatrix( lhs.rows(),rhs.cols() );
|
|
||||||
//RowMajorMatrix resRow(lhs.rows(),rhs.cols());
|
|
||||||
internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, res);
|
|
||||||
// sort the non zeros:
|
|
||||||
//ColMajorMatrix resCol(resRow);
|
|
||||||
//res = resCol;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // end namespace internal
|
|
||||||
|
|
||||||
} // end namespace Eigen
|
|
||||||
|
|
||||||
#endif // EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H
|
|
@ -2059,7 +2059,7 @@ namespace {
|
|||||||
const int num_blocks = p.numBlocks();
|
const int num_blocks = p.numBlocks();
|
||||||
std::vector<ADB::M> jacs(num_blocks);
|
std::vector<ADB::M> jacs(num_blocks);
|
||||||
for (int block = 0; block < num_blocks; ++block) {
|
for (int block = 0; block < num_blocks; ++block) {
|
||||||
jacs[block] = dpm_diag * p.derivative()[block];
|
fastSparseProduct(dpm_diag, p.derivative()[block], jacs[block]);
|
||||||
}
|
}
|
||||||
return ADB::function(pm, jacs);
|
return ADB::function(pm, jacs);
|
||||||
} else {
|
} else {
|
||||||
@ -2087,7 +2087,7 @@ namespace {
|
|||||||
const int num_blocks = p.numBlocks();
|
const int num_blocks = p.numBlocks();
|
||||||
std::vector<ADB::M> jacs(num_blocks);
|
std::vector<ADB::M> jacs(num_blocks);
|
||||||
for (int block = 0; block < num_blocks; ++block) {
|
for (int block = 0; block < num_blocks; ++block) {
|
||||||
jacs[block] = dtm_diag * p.derivative()[block];
|
fastSparseProduct(dtm_diag, p.derivative()[block], jacs[block]);
|
||||||
}
|
}
|
||||||
return ADB::function(tm, jacs);
|
return ADB::function(tm, jacs);
|
||||||
} else {
|
} else {
|
||||||
|
@ -279,7 +279,9 @@ namespace Opm
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// solve Du = C
|
// solve Du = C
|
||||||
const M u = Di * Jn[var]; // solver.solve(Jn[var]);
|
// const M u = Di * Jn[var]; // solver.solve(Jn[var]);
|
||||||
|
M u;
|
||||||
|
fastSparseProduct(Di, Jn[var], u); // solver.solve(Jn[var]);
|
||||||
for (int eq = 0; eq < num_eq; ++eq) {
|
for (int eq = 0; eq < num_eq; ++eq) {
|
||||||
if (eq == n) {
|
if (eq == n) {
|
||||||
continue;
|
continue;
|
||||||
@ -292,7 +294,9 @@ namespace Opm
|
|||||||
jacs[eq].push_back(Je[var]);
|
jacs[eq].push_back(Je[var]);
|
||||||
M& J = jacs[eq].back();
|
M& J = jacs[eq].back();
|
||||||
// Subtract Bu (B*inv(D)*C)
|
// Subtract Bu (B*inv(D)*C)
|
||||||
J -= B * u;
|
M Bu;
|
||||||
|
fastSparseProduct(B, u, Bu);
|
||||||
|
J -= Bu;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -397,6 +401,7 @@ namespace Opm
|
|||||||
void formEllipticSystem(const int num_phases,
|
void formEllipticSystem(const int num_phases,
|
||||||
const std::vector<ADB>& eqs_in,
|
const std::vector<ADB>& eqs_in,
|
||||||
Eigen::SparseMatrix<double, Eigen::RowMajor>& A,
|
Eigen::SparseMatrix<double, Eigen::RowMajor>& A,
|
||||||
|
// M& A,
|
||||||
V& b)
|
V& b)
|
||||||
{
|
{
|
||||||
if (num_phases != 3) {
|
if (num_phases != 3) {
|
||||||
|
185
opm/autodiff/fastSparseProduct.hpp
Normal file
185
opm/autodiff/fastSparseProduct.hpp
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
// This file has been modified for use in the OPM project codebase.
|
||||||
|
|
||||||
|
#ifndef OPM_FASTSPARSEPRODUCT_HEADER_INCLUDED
|
||||||
|
#define OPM_FASTSPARSEPRODUCT_HEADER_INCLUDED
|
||||||
|
|
||||||
|
#include <Eigen/Sparse>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iterator>
|
||||||
|
#include <functional>
|
||||||
|
#include <limits>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <Eigen/Core>
|
||||||
|
|
||||||
|
namespace Opm {
|
||||||
|
|
||||||
|
template < unsigned int depth >
|
||||||
|
struct QuickSort
|
||||||
|
{
|
||||||
|
template <typename T>
|
||||||
|
static inline void sort(T begin, T end)
|
||||||
|
{
|
||||||
|
if (begin != end)
|
||||||
|
{
|
||||||
|
T middle = std::partition (begin, end,
|
||||||
|
std::bind2nd(std::less<typename std::iterator_traits<T>::value_type>(), *begin)
|
||||||
|
);
|
||||||
|
QuickSort< depth-1 >::sort(begin, middle);
|
||||||
|
|
||||||
|
// std::sort (max(begin + 1, middle), end);
|
||||||
|
T new_middle = begin;
|
||||||
|
QuickSort< depth-1 >::sort(++new_middle, end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct QuickSort< 0 >
|
||||||
|
{
|
||||||
|
template <typename T>
|
||||||
|
static inline void sort(T begin, T end)
|
||||||
|
{
|
||||||
|
// fall back to standard insertion sort
|
||||||
|
std::sort( begin, end );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<typename Lhs, typename Rhs, typename ResultType>
|
||||||
|
void fastSparseProduct(const Lhs& lhs, const Rhs& rhs, ResultType& res)
|
||||||
|
{
|
||||||
|
using namespace Eigen;
|
||||||
|
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
|
||||||
|
res = ColMajorMatrix(lhs.rows(), rhs.cols());
|
||||||
|
// if one of the matrices does not contain non zero elements
|
||||||
|
// the result will only contain an empty matrix
|
||||||
|
if( lhs.nonZeros() == 0 || rhs.nonZeros() == 0 )
|
||||||
|
return;
|
||||||
|
|
||||||
|
typedef typename Eigen::internal::remove_all<Lhs>::type::Scalar Scalar;
|
||||||
|
typedef typename Eigen::internal::remove_all<Lhs>::type::Index Index;
|
||||||
|
|
||||||
|
// make sure to call innerSize/outerSize since we fake the storage order.
|
||||||
|
Index rows = lhs.innerSize();
|
||||||
|
Index cols = rhs.outerSize();
|
||||||
|
eigen_assert(lhs.outerSize() == rhs.innerSize());
|
||||||
|
|
||||||
|
std::vector<bool> mask(rows,false);
|
||||||
|
Matrix<Scalar,Dynamic,1> values(rows);
|
||||||
|
Matrix<Index,Dynamic,1> indices(rows);
|
||||||
|
|
||||||
|
// estimate the number of non zero entries
|
||||||
|
// given a rhs column containing Y non zeros, we assume that the respective Y columns
|
||||||
|
// of the lhs differs in average of one non zeros, thus the number of non zeros for
|
||||||
|
// the product of a rhs column with the lhs is X+Y where X is the average number of non zero
|
||||||
|
// per column of the lhs.
|
||||||
|
// Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs)
|
||||||
|
Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros();
|
||||||
|
|
||||||
|
res.setZero();
|
||||||
|
res.reserve(Index(estimated_nnz_prod));
|
||||||
|
|
||||||
|
//const Scalar epsilon = std::numeric_limits< Scalar >::epsilon();
|
||||||
|
const Scalar epsilon = 0.0;
|
||||||
|
|
||||||
|
// we compute each column of the result, one after the other
|
||||||
|
for (Index j=0; j<cols; ++j)
|
||||||
|
{
|
||||||
|
Index nnz = 0;
|
||||||
|
for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
|
||||||
|
{
|
||||||
|
const Scalar y = rhsIt.value();
|
||||||
|
for (typename Lhs::InnerIterator lhsIt(lhs, rhsIt.index()); lhsIt; ++lhsIt)
|
||||||
|
{
|
||||||
|
const Scalar val = lhsIt.value() * y;
|
||||||
|
if( std::abs( val ) > epsilon )
|
||||||
|
{
|
||||||
|
const Index i = lhsIt.index();
|
||||||
|
if(!mask[i])
|
||||||
|
{
|
||||||
|
mask[i] = true;
|
||||||
|
values[i] = val;
|
||||||
|
indices[nnz] = i;
|
||||||
|
++nnz;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
values[i] += val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( nnz > 1 )
|
||||||
|
{
|
||||||
|
// sort indices for sorted insertion to avoid later copying
|
||||||
|
// QuickSort< 1 >::sort( indices.data(), indices.data()+nnz );
|
||||||
|
std::sort( indices.data(), indices.data()+nnz );
|
||||||
|
}
|
||||||
|
|
||||||
|
res.startVec(j);
|
||||||
|
// ordered insertion
|
||||||
|
// still using insertBackByOuterInnerUnordered since we know what we are doing
|
||||||
|
for(Index k=0; k<nnz; ++k)
|
||||||
|
{
|
||||||
|
const Index i = indices[k];
|
||||||
|
res.insertBackByOuterInnerUnordered(j,i) = values[i];
|
||||||
|
mask[i] = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// alternative ordered insertion code:
|
||||||
|
|
||||||
|
Index t200 = rows/(log2(200)*1.39);
|
||||||
|
Index t = (rows*100)/139;
|
||||||
|
|
||||||
|
// FIXME reserve nnz non zeros
|
||||||
|
// FIXME implement fast sort algorithms for very small nnz
|
||||||
|
// if the result is sparse enough => use a quick sort
|
||||||
|
// otherwise => loop through the entire vector
|
||||||
|
// In order to avoid to perform an expensive log2 when the
|
||||||
|
// result is clearly very sparse we use a linear bound up to 200.
|
||||||
|
//if((nnz<200 && nnz<t200) || nnz * log2(nnz) < t)
|
||||||
|
//res.startVec(j);
|
||||||
|
if(true)
|
||||||
|
{
|
||||||
|
if(nnz>1) std::sort(indices.data(),indices.data()+nnz);
|
||||||
|
for(Index k=0; k<nnz; ++k)
|
||||||
|
{
|
||||||
|
Index i = indices[k];
|
||||||
|
res.insertBackByOuterInner(j,i) = values[i];
|
||||||
|
mask[i] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// dense path
|
||||||
|
for(Index i=0; i<rows; ++i)
|
||||||
|
{
|
||||||
|
if(mask[i])
|
||||||
|
{
|
||||||
|
mask[i] = false;
|
||||||
|
res.insertBackByOuterInner(j,i) = values[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
res.finalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} // end namespace Opm
|
||||||
|
|
||||||
|
#endif // OPM_FASTSPARSEPRODUCT_HEADER_INCLUDED
|
Loading…
Reference in New Issue
Block a user