From c51a794cac3028222b8bb44df1afc5a247cc3c46 Mon Sep 17 00:00:00 2001 From: Robert K Date: Tue, 2 Dec 2014 10:38:21 +0100 Subject: [PATCH] overloaded ConservativeSparseSparseProduct to speed up matrix-matrix multiplication. --- CMakeLists_files.cmake | 1 + opm/autodiff/AutoDiffBlock.hpp | 9 +- .../ConservativeSparseSparseProduct.h | 318 ++++++++++++++++++ 3 files changed, 324 insertions(+), 4 deletions(-) create mode 100644 opm/autodiff/ConservativeSparseSparseProduct.h diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index cb5456350..3f8c45a06 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -98,6 +98,7 @@ list (APPEND PUBLIC_HEADER_FILES opm/autodiff/BlackoilPropsAdFromDeck.hpp opm/autodiff/BlackoilPropsAdInterface.hpp opm/autodiff/CPRPreconditioner.hpp + opm/autodiff/ConservativeSparseSparseProduct.h opm/autodiff/DuneMatrix.hpp opm/autodiff/GeoProps.hpp opm/autodiff/GridHelpers.hpp diff --git a/opm/autodiff/AutoDiffBlock.hpp b/opm/autodiff/AutoDiffBlock.hpp index c053c6536..19dfd5061 100644 --- a/opm/autodiff/AutoDiffBlock.hpp +++ b/opm/autodiff/AutoDiffBlock.hpp @@ -22,6 +22,7 @@ #include +#include #include #include @@ -102,7 +103,7 @@ namespace Opm } /// Create an AutoDiffBlock representing a constant. - /// \param[in] val values + /// \param[in] val values static AutoDiffBlock constant(const V& val) { return AutoDiffBlock(val); @@ -112,7 +113,7 @@ namespace Opm /// This variant requires specifying the block sizes used /// for the Jacobians even though the Jacobian matrices /// themselves will be zero. - /// \param[in] val values + /// \param[in] val values /// \param[in] blocksizes block pattern static AutoDiffBlock constant(const V& val, const std::vector& blocksizes) { @@ -129,7 +130,7 @@ namespace Opm /// Create an AutoDiffBlock representing a single variable block. /// \param[in] index index of the variable you are constructing - /// \param[in] val values + /// \param[in] val values /// \param[in] blocksizes block pattern /// The resulting object will have size() equal to block_pattern[index]. /// Its jacobians will all be zero, except for derivative()[index], which @@ -154,7 +155,7 @@ namespace Opm } /// Create an AutoDiffBlock by directly specifying values and jacobians. - /// \param[in] val values + /// \param[in] val values /// \param[in] jac vector of jacobians static AutoDiffBlock function(const V& val, const std::vector& jac) { diff --git a/opm/autodiff/ConservativeSparseSparseProduct.h b/opm/autodiff/ConservativeSparseSparseProduct.h new file mode 100644 index 000000000..b1b036913 --- /dev/null +++ b/opm/autodiff/ConservativeSparseSparseProduct.h @@ -0,0 +1,318 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H +#define EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H + +#warning "Using overloaded Eigen::ConservativeSparseSparseProduct.h" + +#include +#include +#include + +#include + +namespace Eigen { + +// forward declaration of SparseMatrix +template +class SparseMatrix; + + +namespace internal { + +template < unsigned int depth > +struct QuickSort +{ + template + static inline void sort(T begin, T end) + { + if (begin != end) + { + T middle = std::partition (begin, end, + std::bind2nd(std::less::value_type>(), *begin) + ); + QuickSort< depth-1 >::sort(begin, middle); + + // std::sort (max(begin + 1, middle), end); + T new_middle = begin; + QuickSort< depth-1 >::sort(++new_middle, end); + } + } +}; + +template <> +struct QuickSort< 0 > +{ + template + static inline void sort(T begin, T end) + { + std::sort( begin, end ); + } +}; + + +template +static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res) +{ + typedef typename remove_all::type::Scalar Scalar; + typedef typename remove_all::type::Index Index; + + // make sure to call innerSize/outerSize since we fake the storage order. + Index rows = lhs.innerSize(); + Index cols = rhs.outerSize(); + eigen_assert(lhs.outerSize() == rhs.innerSize()); + + std::vector mask(rows,false); + Matrix values(rows); + Matrix indices(rows); + + // estimate the number of non zero entries + // given a rhs column containing Y non zeros, we assume that the respective Y columns + // of the lhs differs in average of one non zeros, thus the number of non zeros for + // the product of a rhs column with the lhs is X+Y where X is the average number of non zero + // per column of the lhs. + // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) + Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros(); + + res.setZero(); + res.reserve(Index(estimated_nnz_prod)); + // we compute each column of the result, one after the other + for (Index j=0; j 1 ) + { + // sort indices for sorted insertion to avoid later copying + QuickSort< 1 >::sort( indices.data(), indices.data()+nnz ); + } + + // ordered insertion + // still using insertBackByOuterInnerUnordered since we know what we are doing + for(Index k=0; k use a quick sort + // otherwise => loop through the entire vector + // In order to avoid to perform an expensive log2 when the + // result is clearly very sparse we use a linear bound up to 200. + //if((nnz<200 && nnz1) std::sort(indices.data(),indices.data()+nnz); + for(Index k=0; k::Flags&RowMajorBit) ? RowMajor : ColMajor, + int RhsStorageOrder = (traits::Flags&RowMajorBit) ? RowMajor : ColMajor, + int ResStorageOrder = (traits::Flags&RowMajorBit) ? RowMajor : ColMajor> +struct conservative_sparse_sparse_product_selector; + +template +struct conservative_sparse_sparse_product_selector +{ + typedef typename remove_all::type LhsCleaned; + typedef typename LhsCleaned::Scalar Scalar; + + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + //typedef SparseMatrix RowMajorMatrix; + typedef SparseMatrix ColMajorMatrix; + //ColMajorMatrix resCol(lhs.rows(),rhs.cols()); + res = ColMajorMatrix(lhs.rows(),rhs.cols()); + internal::conservative_sparse_sparse_product_impl(lhs, rhs, res); + //internal::conservative_sparse_sparse_product_impl(lhs, rhs, resCol); + // sort the non zeros: + //RowMajorMatrix resRow(resCol); + //res = resRow; + } +}; + +template +struct conservative_sparse_sparse_product_selector +{ + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + typedef SparseMatrix ColMajorMatrix; + //RowMajorMatrix rhsRow = rhs; + //RowMajorMatrix resRow(lhs.rows(), rhs.cols()); + ColMajorMatrix lhsCol = lhs; + res = ResultType( lhs.rows(), rhs.cols() ); + internal::conservative_sparse_sparse_product_impl( lhsCol, rhs, res ); + //internal::conservative_sparse_sparse_product_impl(rhsRow, lhs, resRow); + //res = resRow; + } +}; + +template +struct conservative_sparse_sparse_product_selector +{ + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + typedef SparseMatrix ColMajorMatrix; + ColMajorMatrix rhsCol = rhs; + res = ResultType( lhs.rows(), rhs.cols() ); + internal::conservative_sparse_sparse_product_impl( lhs, rhsCol, res); + /* + typedef SparseMatrix RowMajorMatrix; + RowMajorMatrix lhsRow = lhs; + RowMajorMatrix resRow(lhs.rows(), rhs.cols()); + internal::conservative_sparse_sparse_product_impl(rhs, lhsRow, resRow); + res = resRow; + */ + } +}; + +template +struct conservative_sparse_sparse_product_selector +{ + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + typedef SparseMatrix RowMajorMatrix; + RowMajorMatrix resRow(lhs.rows(), rhs.cols()); + internal::conservative_sparse_sparse_product_impl(rhs, lhs, resRow); + res = resRow; + } +}; + + +template +struct conservative_sparse_sparse_product_selector +{ + typedef typename traits::type>::Scalar Scalar; + + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + typedef SparseMatrix ColMajorMatrix; + ColMajorMatrix resCol(lhs.rows(), rhs.cols()); + internal::conservative_sparse_sparse_product_impl(lhs, rhs, resCol); + res = resCol; + } +}; + +template +struct conservative_sparse_sparse_product_selector +{ + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + typedef SparseMatrix RowMajorMatrix; + RowMajorMatrix rhsRow = rhs; + res = ResultType( lhs.rows(), rhs.cols() ); + internal::conservative_sparse_sparse_product_impl(rhsRow, lhs, res); + /* + typedef SparseMatrix ColMajorMatrix; + ColMajorMatrix lhsCol = lhs; + ColMajorMatrix resCol(lhs.rows(), rhs.cols()); + internal::conservative_sparse_sparse_product_impl(lhsCol, rhs, resCol); + res = resCol; + */ + } +}; + +template +struct conservative_sparse_sparse_product_selector +{ + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + typedef SparseMatrix RowMajorMatrix; + RowMajorMatrix lhsRow = lhs; + res = RowMajorMatrix( lhs.rows(), rhs.cols() ); + internal::conservative_sparse_sparse_product_impl(rhs, lhsRow, res); + /* + typedef SparseMatrix ColMajorMatrix; + ColMajorMatrix rhsCol = rhs; + ColMajorMatrix resCol(lhs.rows(), rhs.cols()); + internal::conservative_sparse_sparse_product_impl(lhs, rhsCol, resCol); + res = resCol; + */ + } +}; + +template +struct conservative_sparse_sparse_product_selector +{ + static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) + { + typedef SparseMatrix RowMajorMatrix; + //typedef SparseMatrix ColMajorMatrix; + res = RowMajorMatrix( lhs.rows(),rhs.cols() ); + //RowMajorMatrix resRow(lhs.rows(),rhs.cols()); + internal::conservative_sparse_sparse_product_impl(rhs, lhs, res); + // sort the non zeros: + //ColMajorMatrix resCol(resRow); + //res = resCol; + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H