From d1adf474ec5a7814843f24bb0330a92e0eddbca2 Mon Sep 17 00:00:00 2001 From: timovanopstal Date: Wed, 9 Dec 2015 10:36:55 +0100 Subject: [PATCH] BLAS/accelerate related fixes --- cmake/Modules/FindCBLAS.cmake | 22 +++-- cmake/Modules/FindIFEMDeps.cmake | 8 +- src/LinAlg/DenseMatrix.C | 154 +++++++++++++++++++------------ src/LinAlg/MatVec.C | 21 +++-- src/LinAlg/matrix.h | 6 +- src/Utility/Tensor.C | 17 +++- 6 files changed, 143 insertions(+), 85 deletions(-) diff --git a/cmake/Modules/FindCBLAS.cmake b/cmake/Modules/FindCBLAS.cmake index 750bea5d..3cd776d2 100644 --- a/cmake/Modules/FindCBLAS.cmake +++ b/cmake/Modules/FindCBLAS.cmake @@ -1,11 +1,17 @@ -IF (CBLAS_LIBRARIES) - SET(CBLAS_FIND_QUIETLY TRUE) -ENDIF (CBLAS_LIBRARIES) +if(APPLE) + set(CBLAS_LIBRARIES "-framework Accelerate") + set(CBLAS_DEFINITIONS -DUSE_ACCELERATE=1) +else() + IF (CBLAS_LIBRARIES) + SET(CBLAS_FIND_QUIETLY TRUE) + ENDIF (CBLAS_LIBRARIES) -FIND_LIBRARY(CBLAS_LIBRARIES - NAMES cblas - PATHS $ENV{HOME}/lib - /usr/lib/atlas/) + FIND_LIBRARY(CBLAS_LIBRARIES + NAMES cblas + PATHS $ENV{HOME}/lib + /usr/lib/atlas/) + set(CBLAS_DEFINITIONS -DUSE_CBLAS=1) +endif() IF(NOT CBLAS_LIBRARIES) FIND_PACKAGE(BLAS) @@ -20,5 +26,5 @@ ENDIF(NOT CBLAS_LIBRARIES) INCLUDE(FindPackageHandleStandardArgs) FIND_PACKAGE_HANDLE_STANDARD_ARGS(CBLAS DEFAULT_MSG - CBLAS_LIBRARIES) + CBLAS_LIBRARIES CBLAS_DEFINITIONS) MARK_AS_ADVANCED(CBLAS_LIBRARIES) diff --git a/cmake/Modules/FindIFEMDeps.cmake b/cmake/Modules/FindIFEMDeps.cmake index cdb81d1e..6f04a2a2 100644 --- a/cmake/Modules/FindIFEMDeps.cmake +++ b/cmake/Modules/FindIFEMDeps.cmake @@ -22,11 +22,13 @@ ENABLE_LANGUAGE(CXX) IF(CMAKE_CXX_COMPILER_ID MATCHES Intel) SET(IFEM_CXX_FLAGS "${IFEM_CXX_FLAGS} -DUSE_MKL -mkl=sequential") SET(IFEM_BUILD_CXX_FLAGS "${IFEM_BUILD_CXX_FLAGS} -DUSE_MKL -mkl=sequential") -ELSE(CMAKE_CXX_COMPILER_ID MATCHES Intel) - SET(IFEM_CXX_FLAGS "${IFEM_CXX_FLAGS} -DUSE_CBLAS") - SET(IFEM_BUILD_CXX_FLAGS "${IFEM_BUILD_CXX_FLAGS} -DUSE_CBLAS") FIND_PACKAGE(CBLAS REQUIRED) FIND_PACKAGE(LAPACK REQUIRED) +ELSE(CMAKE_CXX_COMPILER_ID MATCHES Intel) + FIND_PACKAGE(CBLAS REQUIRED) + FIND_PACKAGE(LAPACK REQUIRED) + SET(IFEM_CXX_FLAGS "${IFEM_CXX_FLAGS} ${CBLAS_DEFINITIONS}") + SET(IFEM_BUILD_CXX_FLAGS "${IFEM_BUILD_CXX_FLAGS} ${CBLAS_DEFINITIONS}") ENDIF(CMAKE_CXX_COMPILER_ID MATCHES Intel) if(MINGW) diff --git a/src/LinAlg/DenseMatrix.C b/src/LinAlg/DenseMatrix.C index 099c6b06..a755d2b7 100644 --- a/src/LinAlg/DenseMatrix.C +++ b/src/LinAlg/DenseMatrix.C @@ -39,6 +39,7 @@ #define dgeev_ dgeev #endif +#if defined(USE_CBLAS) || defined(USE_MKL) extern "C" { //! \brief Adds an element matrix \a eK into the system matrix \a sysK. //! \details This is a FORTRAN-77 subroutine in the SAM library. @@ -53,69 +54,70 @@ void addem2_(const Real* eK, const Real* ttcc, const int* mpar, //! \brief Estimates the reciprocal of the condition number of the matrix \b A. //! \details This is a FORTRAN-77 subroutine in the LAPack library. //! \sa LAPack library documentation. -void dgecon_(const char& norm, const int& n, const Real* A, const int& lda, - const Real& anorm, Real& rcond, Real* work, int* iwork, int& info); +void dgecon_(const char* norm, const int* n, const Real* A, const int* lda, + const Real* anorm, Real* rcond, Real* work, int* iwork, int* info); //! \brief Solves the linear equation system \a A*x=b. //! \details This is a FORTRAN-77 subroutine in the LAPack library. //! \sa LAPack library documentation. -void dgesv_(const int& n, const int& nrhs, - Real* A, const int& lda, int* ipiv, - Real* B, const int& ldb, int& info); +void dgesv_(const int* n, const int* nrhs, + Real* A, const int* lda, int* ipiv, + Real* B, const int* ldb, int* info); //! \brief Solves the equation system \a A*x=b when \a A is already factorized. //! \details This is a FORTRAN-77 subroutine in the LAPack library. //! \sa LAPack library documentation. -void dgetrs_(const char& trans, const int& n, const int& nrhs, - Real* A, const int& lda, int* ipiv, - Real* B, const int& ldb, int& info); +void dgetrs_(const char* trans, const int* n, const int* nrhs, + Real* A, const int* lda, int* ipiv, + Real* B, const int* ldb, int* info); //! \brief Returns a norm of the matrix \b A. //! \details This is a FORTRAN-77 subroutine in the LAPack library. //! \sa LAPack library documentation. -double dlange_(const char& norm, const int& m, const int& n, const Real* A, - const int& lda, Real* work); +double dlange_(const char* norm, const int* m, const int* n, const Real* A, + const int* lda, Real* work); //! \brief Solves the symmetric linear equation system \a A*x=b. //! \details This is a FORTRAN-77 subroutine in the LAPack library. //! \sa LAPack library documentation. -void dposv_(const char& uplo, const int& n, const int& nrhs, - Real* A, const int& lda, Real* B, const int& ldb, int& info); +void dposv_(const char* uplo, const int* n, const int* nrhs, + Real* A, const int* lda, Real* B, const int* ldb, int* info); //! \brief Solves the symmetric equation system \a A*x=b for prefactored \b A. //! \details This is a FORTRAN-77 subroutine in the LAPack library. //! \sa LAPack library documentation. -void dpotrs_(const char& uplo, const int& n, const int& nrhs, - Real* A, const int& lda, Real* B, const int& ldb, int& info); +void dpotrs_(const char* uplo, const int* n, const int* nrhs, + Real* A, const int* lda, Real* B, const int* ldb, int* info); //! \brief Solves the standard eigenproblem \a A*x=(lambda)*x. //! \details This is a FORTRAN-77 subroutine in the LAPack library. //! \sa LAPack library documentation. -void dsyevx_(const char& jobz, const char& range, const char& uplo, - const int& n, Real* a, const int& lda, - const Real& vl, const Real& vu, const int& il, const int& iu, - const Real& abstol, const int& m, Real* w, Real* z, const int& ldz, - Real* work, const int& lwork, int* iwork, int* ifail, int& info); +void dsyevx_(const char* jobz, const char* range, const char* uplo, + const int* n, Real* a, const int* lda, + const Real* vl, const Real* vu, const int* il, const int* iu, + const Real* abstol, const int* m, Real* w, Real* z, const int* ldz, + Real* work, const int* lwork, int* iwork, int* ifail, int* info); //! \brief Solves the generalized eigenproblem \a A*x=(lambda)*B*x. //! \details This is a FORTRAN-77 subroutine in the LAPack library. //! \sa LAPack library documentation. -void dsygvx_(const int& itype, const char& jobz, const char& range, - const char& uplo, const int& n, Real* a, const int& lda, - Real* b, const int& ldb, const Real& vl, const Real& vu, - const int& il, const int& iu, const Real& abstol, - const int& m, Real* w, Real* z, const int& ldz, - Real* work, const int& lwork, int* iwork, int* ifail, int& info); +void dsygvx_(const int* itype, const char* jobz, const char* range, + const char* uplo, const int* n, Real* a, const int* lda, + Real* b, const int* ldb, const Real* vl, const Real* vu, + const int* il, const int* iu, const Real* abstol, + const int* m, Real* w, Real* z, const int* ldz, + Real* work, const int* lwork, int* iwork, int* ifail, int* info); //! \brief Solves the non-symmetric eigenproblem \a A*x=(lambda)*x. //! \details This is a FORTRAN-77 subroutine in the LAPack library. //! \sa LAPack library documentation. -void dgeev_(const char& jobvl, const char& jobvr, - const int& n, Real* a, const int& lda, - Real* wr, Real* wi, Real* vl, const int& ldvl, - Real* vr, const int& ldvr, - Real* work, const int& lwork, int& info); +void dgeev_(const char* jobvl, const char* jobvr, + const int* n, Real* a, const int* lda, + Real* wr, Real* wi, Real* vl, const int* ldvl, + Real* vr, const int* ldvr, + Real* work, const int* lwork, int* info); } +#endif DenseMatrix::DenseMatrix (size_t m, size_t n, bool s) : myMat(m,n), ipiv(nullptr) @@ -469,34 +471,45 @@ bool DenseMatrix::solve (Real* B, size_t nrhs, Real* rcond) if (n > myMat.cols()) return false; // More equations than unknowns const char* dsolv = symm ? "DGESV" : "DPOSV"; -#ifdef USE_CBLAS +#if USE_BLAS int info = 0; + int noncons = n; if (symm) { // Matrix is marked as symmetric - use Cholesky instead of full LU - if (ipiv) - dpotrs_('U',n,nrhs,myMat.ptr(),n,B,n,info); - else + if (ipiv) { + char uplo = 'U'; + int nr = nrhs; + dpotrs_(&uplo,&noncons,&nr,myMat.ptr(),&noncons,B,&noncons,&info); + } else { ipiv = new int[1]; // dummy allocation to flag factorization reuse - dposv_('U',n,nrhs,myMat.ptr(),n,B,n,info); + char uplo = 'U'; + int nr = nrhs; + dposv_(&uplo,&noncons,&nr,myMat.ptr(),&noncons,B,&noncons,&info); } } - else if (ipiv) - dgetrs_('N',n,nrhs,myMat.ptr(),n,ipiv,B,n,info); - else + else if (ipiv) { + char trans = 'N'; + int nr = nrhs; + dgetrs_(&trans,&noncons,&nr,myMat.ptr(),&noncons,ipiv,B,&noncons,&info); + } else { Real anorm; - if (rcond) // Evaluate the 1-norm of the original LHS-matrix - anorm = dlange_('1',n,n,myMat.ptr(),n,rcond); + if (rcond) { // Evaluate the 1-norm of the original LHS-matrix + char norm = '1'; + anorm = dlange_(&norm,&noncons,&noncons,myMat.ptr(),&noncons,rcond); + } ipiv = new int[n]; - dgesv_(n,nrhs,myMat.ptr(),n,ipiv,B,n,info); + int nr = nrhs; + dgesv_(&noncons,&nr,myMat.ptr(),&noncons,ipiv,B,&noncons,&info); if (rcond && info == 0) { // Estimate the condition number Real* work = new Real[4*n]; int* iwork = new int[n]; - dgecon_('1',n,myMat.ptr(),n,anorm,*rcond,work,iwork,info); + char norm='1'; + dgecon_(&norm,&noncons,myMat.ptr(),&noncons,&anorm,rcond,work,iwork,&info); delete[] work; delete[] iwork; } @@ -522,14 +535,20 @@ bool DenseMatrix::solveEig (RealArray& val, Matrix& vec, int nv) if (n < 1 || nv < 1) return true; // No equations to solve if (n > myMat.cols()) return false; -#ifdef USE_CBLAS +#if USE_BLAS std::cout <<" Solving dense eigenproblem using LAPACK::DSYEVX"<< std::endl; int m, info = 0; Real dummy = 0.0; Real abstol = 0.0; // Invoke with Lwork = -1 to estimate work space size - dsyevx_('V','I','U',n,myMat.ptr(),n,dummy,dummy,1,nv, - abstol,m,&val.front(),vec.ptr(),n,&dummy,-1,0,0,info); + char jobz='V'; + char range='I'; + char uplo='U'; + int il = 1; + int lwork = -1; + int noncons = n; + dsyevx_(&jobz,&range,&uplo,&noncons,myMat.ptr(),&noncons,&dummy,&dummy,&il,&nv, + &abstol,&m,&val.front(),vec.ptr(),&noncons,&dummy,&lwork,nullptr,nullptr,&info); if (info == 0) { @@ -540,8 +559,12 @@ bool DenseMatrix::solveEig (RealArray& val, Matrix& vec, int nv) val.resize(n); vec.resize(n,nv); // Solve the eigenproblem - dsyevx_('V','I','U',n,myMat.ptr(),n,dummy,dummy,1,nv, - abstol,m,&val.front(),vec.ptr(),n,work,Lwork,Iwork+n,Iwork,info); + char jobz='V'; + char range='I'; + char uplo='U'; + int il = 1; + dsyevx_(&jobz,&range,&uplo,&noncons,myMat.ptr(),&noncons,&dummy,&dummy,&il,&nv, + &abstol,&m,&val.front(),vec.ptr(),&noncons,work,&Lwork,Iwork+n,Iwork,&info); delete[] work; delete[] Iwork; val.resize(nv); @@ -567,15 +590,22 @@ bool DenseMatrix::solveEig (DenseMatrix& B, RealArray& val, Matrix& vec, int nv, if (n < 1 || nv < 1) return true; // No equations to solve if (n > myMat.cols()) return false; -#ifdef USE_CBLAS +#if USE_BLAS std::cout <<" Solving dense eigenproblem using LAPACK::DSYGVX"<< std::endl; int m, info = 0; Real dummy = 0.0; Real abstol = 0.0; + int itype = 1; + char jobz = 'V'; + char range = 'I'; + char uplo = 'U'; + int il = 1; + int lwork = -1; + int noncons = n; // Invoke with Lwork = -1 to estimate work space size - dsygvx_(1,'V','I','U',n,myMat.ptr(),n,B.myMat.ptr(),n, - dummy,dummy,1,nv,abstol,m,&val.front(),vec.ptr(),n, - &dummy,-1,0,0,info); + dsygvx_(&itype,&jobz,&range,&uplo,&noncons,myMat.ptr(),&noncons,B.myMat.ptr(),&noncons, + &dummy,&dummy,&il,&nv,&abstol,&m,&val.front(),vec.ptr(),&noncons, + &dummy,&lwork,nullptr,nullptr,&info); if (info == 0) { @@ -586,9 +616,9 @@ bool DenseMatrix::solveEig (DenseMatrix& B, RealArray& val, Matrix& vec, int nv, val.resize(n); vec.resize(n,nv); // Solve the eigenproblem - dsygvx_(1,'V','I','U',n,myMat.ptr(),n,B.myMat.ptr(),n, - dummy,dummy,1,nv,abstol,m,&val.front(),vec.ptr(),n, - work,Lwork,Iwork+n,Iwork,info); + dsygvx_(&itype,&jobz,&range,&uplo,&noncons,myMat.ptr(),&noncons,B.myMat.ptr(),&noncons, + &dummy,&dummy,&il,&nv,&abstol,&m,&val.front(),vec.ptr(),&noncons, + work,&Lwork,Iwork+n,Iwork,&info); delete[] work; delete[] Iwork; val.resize(nv); @@ -612,15 +642,19 @@ bool DenseMatrix::solveEig (DenseMatrix& B, RealArray& val, Matrix& vec, int nv, bool DenseMatrix::solveEigNon (RealArray& r_val, RealArray& c_val) { - const size_t n = myMat.rows(); + const int n = myMat.rows(); if (n < 1) return true; // No equations to solve -#ifdef USE_CBLAS +#if USE_BLAS int info = 0; Real dummy = 0.0; + char jobvl = 'N'; + int ldvl = 1; + int lwork = -1; + int noncons = n; // Invoke with Lwork = -1 to estimate work space size - dgeev_('N','N',n,myMat.ptr(),n,&r_val.front(),&c_val.front(), - &dummy,1,&dummy,1,&dummy,-1,info); + dgeev_(&jobvl,&jobvl,&noncons,myMat.ptr(),&noncons,&r_val.front(),&c_val.front(), + &dummy,&ldvl,&dummy,&ldvl,&dummy,&lwork,&info); if (info == 0) { @@ -630,8 +664,8 @@ bool DenseMatrix::solveEigNon (RealArray& r_val, RealArray& c_val) r_val.resize(n); c_val.resize(n); // Solve the eigenproblem - dgeev_('N','N',n,myMat.ptr(),n,&r_val.front(),&c_val.front(), - &dummy,1,&dummy,1,work,Lwork,info); + dgeev_(&jobvl,&jobvl,&noncons,myMat.ptr(),&noncons,&r_val.front(),&c_val.front(), + &dummy,&ldvl,&dummy,&ldvl,work,&Lwork,&info); delete[] work; if (info == 0) return true; } diff --git a/src/LinAlg/MatVec.C b/src/LinAlg/MatVec.C index cd1358d8..8520ddb6 100644 --- a/src/LinAlg/MatVec.C +++ b/src/LinAlg/MatVec.C @@ -24,18 +24,20 @@ #define dgetri_ dgetri #endif +#if defined(USE_CBLAS) || defined(USE_MKL) extern "C" { //! \brief Computes an LU factorization of a general M-by-N matrix A. //! \details This is a FORTRAN-77 subroutine in the LAPack library. //! \sa LAPack library documentation. -void dgetrf_(const int& M, const int& N, double* A, const int& LDA, - int* IPIV, int& INFO); +void dgetrf_(const int* M, const int* N, double* A, const int* LDA, + int* IPIV, int* INFO); //! \brief Computes the inverse of a matrix based on its LU factorization. //! \details This is a FORTRAN-77 subroutine in the LAPack library. //! \sa LAPack library documentation. -void dgetri_(const int& N, double* A, const int& LDA, - int* IPIV, double* WORK, const int& LWORK, int& INFO); +void dgetri_(const int* N, double* A, const int* LDA, + int* IPIV, double* WORK, const int* LWORK, int* INFO); } +#endif // Set default values on some global variables. @@ -131,11 +133,12 @@ bool utl::invert (Matrix& A) if (A.rows() <= 3) return A.inverse() > 0.0; -#ifdef USE_CBLAS +#if USE_BLAS // Use LAPack/BLAS for larger matrices int INFO, N = A.rows() > A.cols() ? A.rows() : A.cols(); int* IPIV = new int[N]; - dgetrf_(N,N,A.ptr(),A.rows(),IPIV,INFO); + int r = A.rows(); + dgetrf_(&N,&N,A.ptr(),&r,IPIV,&INFO); if (INFO != 0) { delete[] IPIV; @@ -143,9 +146,11 @@ bool utl::invert (Matrix& A) return false; } double NWORK; - dgetri_(N,A.ptr(),A.rows(),IPIV,&NWORK,-1,INFO); + int m1 = -1; + dgetri_(&N,A.ptr(),&r,IPIV,&NWORK,&m1,&INFO); double* WORK = new double[int(NWORK)]; - dgetri_(N,A.ptr(),A.rows(),IPIV,WORK,int(NWORK),INFO); + int nwork = NWORK; + dgetri_(&N,A.ptr(),&r,IPIV,WORK,&nwork,&INFO); delete[] IPIV; delete[] WORK; if (INFO == 0) return true; diff --git a/src/LinAlg/matrix.h b/src/LinAlg/matrix.h index 930929c1..89b8eec2 100644 --- a/src/LinAlg/matrix.h +++ b/src/LinAlg/matrix.h @@ -26,6 +26,8 @@ #include #ifdef USE_MKL #include +#elif defined(USE_ACCELERATE) +#include #elif defined(USE_CBLAS) extern "C" { @@ -33,6 +35,8 @@ extern "C" } #endif +#define USE_BLAS defined(USE_CBLAS) || defined(USE_MKL) || defined(USE_ACCELERATE) + #ifdef INDEX_CHECK #if INDEX_CHECK > 1 #define ABORT_ON_INDEX_CHECK abort() @@ -808,7 +812,7 @@ namespace utl //! General utility classes and functions. }; -#if defined(USE_CBLAS) || defined(USE_MKL) +#if USE_BLAS //============================================================================ //=== BLAS-implementation of the matrix/vector multiplication methods ==== //============================================================================ diff --git a/src/Utility/Tensor.C b/src/Utility/Tensor.C index b88f23e4..f4d66bf7 100644 --- a/src/Utility/Tensor.C +++ b/src/Utility/Tensor.C @@ -13,6 +13,7 @@ #include "Tensor.h" #include "Vec3.h" +#include "matrix.h" #include #include #include @@ -23,14 +24,16 @@ #define dsyev_ dsyev #endif +#if defined(USE_CBLAS) || defined(USE_MKL) extern "C" { //! \brief Solves the standard eigenproblem \a A*x=(lambda)*x. //! \details This is a FORTRAN-77 subroutine in the LAPack library. //! \sa LAPack library documentation. -void dsyev_(const char& jobz, const char& uplo, - const int& n, double* a, const int& lda, double* w, - double* work, const int& lwork, int& info); +void dsyev_(const char* jobz, const char* uplo, + const int* n, double* a, const int* lda, double* w, + double* work, const int* lwork, int* info); } +#endif #ifndef epsZ //! \brief Zero tolerance for the incremental rotations. @@ -1129,7 +1132,7 @@ bool SymmTensor::principal (Vec3& p) const bool SymmTensor::principal (Vec3& p, Vec3* pdir, int ndir) const { p = 0.0; -#ifdef USE_CBLAS +#ifdef USE_BLAS // Set up the upper triangle of the symmetric tensor double A[9], W[3]; if (n == 3) @@ -1154,7 +1157,11 @@ bool SymmTensor::principal (Vec3& p, Vec3* pdir, int ndir) const int info = 0; const int Lwork = 12; double Work[Lwork]; - dsyev_('V','U',n,A,n,W,Work,Lwork,info); + char jobz='V'; + char uplo='U'; + int noncons = n; + int Noncons = Lwork; + dsyev_(&jobz,&uplo,&noncons,A,&noncons,W,Work,&Noncons,&info); if (info) { std::cerr <<" *** LAPack::dsyev: info ="<< info;