From e8b3524ffa83dd89f86a9dec1e00bdc441012458 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Atgeirr=20Fl=C3=B8=20Rasmussen?= Date: Fri, 5 Dec 2014 12:56:15 +0100 Subject: [PATCH] Add Robert Kloefkorn's fast sparse product implementation. --- opm/autodiff/fastSparseProduct.hpp | 185 +++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 opm/autodiff/fastSparseProduct.hpp diff --git a/opm/autodiff/fastSparseProduct.hpp b/opm/autodiff/fastSparseProduct.hpp new file mode 100644 index 000000000..0ec72146c --- /dev/null +++ b/opm/autodiff/fastSparseProduct.hpp @@ -0,0 +1,185 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// This file has been modified for use in the OPM project codebase. + +#ifndef OPM_FASTSPARSEPRODUCT_HEADER_INCLUDED +#define OPM_FASTSPARSEPRODUCT_HEADER_INCLUDED + +#include + +#include +#include +#include +#include +#include + +#include + +namespace Opm { + +template < unsigned int depth > +struct QuickSort +{ + template + static inline void sort(T begin, T end) + { + if (begin != end) + { + T middle = std::partition (begin, end, + std::bind2nd(std::less::value_type>(), *begin) + ); + QuickSort< depth-1 >::sort(begin, middle); + + // std::sort (max(begin + 1, middle), end); + T new_middle = begin; + QuickSort< depth-1 >::sort(++new_middle, end); + } + } +}; + +template <> +struct QuickSort< 0 > +{ + template + static inline void sort(T begin, T end) + { + // fall back to standard insertion sort + std::sort( begin, end ); + } +}; + + +template +void fastSparseProduct(const Lhs& lhs, const Rhs& rhs, ResultType& res) +{ + using namespace Eigen; + typedef SparseMatrix ColMajorMatrix; + res = ColMajorMatrix(lhs.rows(), rhs.cols()); + // if one of the matrices does not contain non zero elements + // the result will only contain an empty matrix + if( lhs.nonZeros() == 0 || rhs.nonZeros() == 0 ) + return; + + typedef typename Eigen::internal::remove_all::type::Scalar Scalar; + typedef typename Eigen::internal::remove_all::type::Index Index; + + // make sure to call innerSize/outerSize since we fake the storage order. + Index rows = lhs.innerSize(); + Index cols = rhs.outerSize(); + eigen_assert(lhs.outerSize() == rhs.innerSize()); + + std::vector mask(rows,false); + Matrix values(rows); + Matrix indices(rows); + + // estimate the number of non zero entries + // given a rhs column containing Y non zeros, we assume that the respective Y columns + // of the lhs differs in average of one non zeros, thus the number of non zeros for + // the product of a rhs column with the lhs is X+Y where X is the average number of non zero + // per column of the lhs. + // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) + Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros(); + + res.setZero(); + res.reserve(Index(estimated_nnz_prod)); + + //const Scalar epsilon = std::numeric_limits< Scalar >::epsilon(); + const Scalar epsilon = 0.0; + + // we compute each column of the result, one after the other + for (Index j=0; j epsilon ) + { + if(!mask[i]) + { + mask[i] = true; + values[i] = val; + indices[nnz] = i; + ++nnz; + } + else + values[i] += val; + } + } + } + + if( nnz > 1 ) + { + // sort indices for sorted insertion to avoid later copying + // QuickSort< 1 >::sort( indices.data(), indices.data()+nnz ); + std::sort( indices.data(), indices.data()+nnz ); + } + + res.startVec(j); + // ordered insertion + // still using insertBackByOuterInnerUnordered since we know what we are doing + for(Index k=0; k use a quick sort + // otherwise => loop through the entire vector + // In order to avoid to perform an expensive log2 when the + // result is clearly very sparse we use a linear bound up to 200. + //if((nnz<200 && nnz1) std::sort(indices.data(),indices.data()+nnz); + for(Index k=0; k