Merge branch 'FOM' into FOM_dev

2021-06-27 19:46:49 -04:00
parent 803f907edd e1c347d3c1
commit 988eb36ac3
26 changed files with 3751 additions and 873 deletions
--- a/common/Array.cpp
+++ b/common/Array.cpp
@@ -1,117 +1,74 @@
+// clang-format off
 #include "common/Array.h"
 #include "common/Array.hpp"
+#include "common/Utilities.h"

 #include <complex>


-/********************************************************
- *  ArraySize                                            *
- ********************************************************/
-ArraySize::ArraySize( const std::vector<size_t>& N )
-{
-    d_ndim = N.size();
-    d_N[0] = 0;
-    d_N[1] = 1;
-    d_N[2] = 1;
-    d_N[3] = 1;
-    d_N[4] = 1;
-    for ( size_t i = 0; i < d_ndim; i++ )
-        d_N[i] = N[i];
-    d_length = 1;
-    for ( unsigned long i : d_N )
-        d_length *= i;
-    if ( d_ndim == 0 )
-        d_length = 0;
-}
-
-
 /********************************************************
 *  Explicit instantiations of Array                     *
 ********************************************************/
-template class Array<char, FunctionTable>;
-template class Array<uint8_t, FunctionTable>;
-template class Array<uint16_t, FunctionTable>;
-template class Array<uint32_t, FunctionTable>;
-template class Array<uint64_t, FunctionTable>;
-template class Array<int8_t, FunctionTable>;
-template class Array<int16_t, FunctionTable>;
-template class Array<int32_t, FunctionTable>;
-template class Array<int64_t, FunctionTable>;
-template class Array<float, FunctionTable>;
-template class Array<double, FunctionTable>;
+template class Array<char,FunctionTable>;
+template class Array<uint8_t,FunctionTable>;
+template class Array<uint16_t,FunctionTable>;
+template class Array<uint32_t,FunctionTable>;
+template class Array<uint64_t,FunctionTable>;
+template class Array<int8_t,FunctionTable>;
+template class Array<int16_t,FunctionTable>;
+template class Array<int32_t,FunctionTable>;
+template class Array<int64_t,FunctionTable>;
+template class Array<float,FunctionTable>;
+template class Array<double,FunctionTable>;
+template class Array<long double,FunctionTable>;


 /********************************************************
 *  Explicit instantiations of Array<bool>               *
 ********************************************************/
-// clang-format off
-template Array<bool, FunctionTable>::Array();
-template Array<bool, FunctionTable>::~Array();
-template Array<bool, FunctionTable>::Array( size_t );
-template Array<bool, FunctionTable>::Array( size_t, size_t );
-template Array<bool, FunctionTable>::Array( size_t, size_t, size_t );
-template Array<bool, FunctionTable>::Array( size_t, size_t, size_t, size_t );
-template Array<bool, FunctionTable>::Array( size_t, size_t, size_t, size_t, size_t );
-template Array<bool, FunctionTable>::Array( const std::vector<size_t>&, const bool* );
-template Array<bool, FunctionTable>::Array( std::string );
-template Array<bool, FunctionTable>::Array( std::initializer_list<bool> );
-template Array<bool, FunctionTable>::Array( const Array<bool, FunctionTable>& );
-template Array<bool, FunctionTable>::Array( Array<bool, FunctionTable>&& );
-template Array<bool, FunctionTable>& Array<bool, FunctionTable>::operator=( const Array<bool, FunctionTable>& );
-template Array<bool, FunctionTable>& Array<bool, FunctionTable>::operator=( Array<bool, FunctionTable>&& );
-template Array<bool, FunctionTable>& Array<bool, FunctionTable>::operator=( const std::vector<bool>& );
-template void Array<bool, FunctionTable>::fill(bool const&);
-template void Array<bool, FunctionTable>::clear();
-template bool Array<bool, FunctionTable>::operator==(Array<bool, FunctionTable> const&) const;
-template void Array<bool, FunctionTable>::resize( ArraySize const& );
-// clang-format on
+instantiateArrayConstructors( bool )
+template Array<bool,FunctionTable>& Array<bool,FunctionTable>::operator=( const std::vector<bool>& );
+template void Array<bool,FunctionTable>::clear();
+template bool Array<bool,FunctionTable>::operator==(Array<bool,FunctionTable> const&) const;
+template void Array<bool,FunctionTable>::resize( ArraySize const& );


 /********************************************************
 *  Explicit instantiations of Array<std::complex>       *
 ********************************************************/
-// clang-format off
-template Array<std::complex<double>, FunctionTable>::Array();
-template Array<std::complex<double>, FunctionTable>::~Array();
-template Array<std::complex<double>, FunctionTable>::Array( size_t );
-template Array<std::complex<double>, FunctionTable>::Array( size_t, size_t );
-template Array<std::complex<double>, FunctionTable>::Array( size_t, size_t, size_t );
-template Array<std::complex<double>, FunctionTable>::Array( size_t, size_t, size_t, size_t );
-template Array<std::complex<double>, FunctionTable>::Array( size_t, size_t, size_t, size_t, size_t );
-template Array<std::complex<double>, FunctionTable>::Array( const std::vector<size_t>&, const std::complex<double>* );
-template Array<std::complex<double>, FunctionTable>::Array( std::initializer_list<std::complex<double>> );
-template Array<std::complex<double>, FunctionTable>::Array( const Range<std::complex<double>>& range );
-template Array<std::complex<double>, FunctionTable>::Array( const Array<std::complex<double>, FunctionTable>& );
-template Array<std::complex<double>, FunctionTable>::Array( Array<std::complex<double>, FunctionTable>&& );
-template Array<std::complex<double>, FunctionTable>& Array<std::complex<double>, FunctionTable>::operator=( const Array<std::complex<double>, FunctionTable>& );
-template Array<std::complex<double>, FunctionTable>& Array<std::complex<double>, FunctionTable>::operator=( Array<std::complex<double>, FunctionTable>&& );
-template Array<std::complex<double>, FunctionTable>& Array<std::complex<double>, FunctionTable>::operator=( const std::vector<std::complex<double>>& );
-template void Array<std::complex<double>, FunctionTable>::resize( ArraySize const& );
-template void Array<std::complex<double>, FunctionTable>::viewRaw( ArraySize const&, std::complex<double>*, bool, bool );
-template void Array<std::complex<double>, FunctionTable>::fill(std::complex<double> const&);
-template void Array<std::complex<double>, FunctionTable>::clear();
-template bool Array<std::complex<double>, FunctionTable>::operator==(Array<std::complex<double>, FunctionTable> const&) const;
-template Array<std::complex<double>, FunctionTable> Array<std::complex<double>, FunctionTable>::repmat(std::vector<unsigned long, std::allocator<unsigned long> > const&) const;
-// clang-format on
+instantiateArrayConstructors( std::complex<float> )
+instantiateArrayConstructors( std::complex<double> )
+template void Array<std::complex<float>,FunctionTable>::resize( ArraySize const& );
+template void Array<std::complex<double>,FunctionTable>::resize( ArraySize const& );
+template Array<std::complex<double>,FunctionTable>& Array<std::complex<double>,FunctionTable>::operator=(std::vector<std::complex<double>> const&);
+template Array<std::complex<float>,FunctionTable>& Array<std::complex<float>,FunctionTable>::operator=(std::vector<std::complex<float>> const&);
+template void Array<std::complex<float>,FunctionTable>::clear();
+template void Array<std::complex<double>,FunctionTable>::clear();
+template bool Array<std::complex<float>,FunctionTable>::operator==(Array<std::complex<float>,FunctionTable> const&) const;
+template bool Array<std::complex<double>,FunctionTable>::operator==(Array<std::complex<double>,FunctionTable> const&) const;
+template Array<std::complex<float>,FunctionTable> Array<std::complex<float>,FunctionTable>::repmat(std::vector<unsigned long> const&) const;
+template Array<std::complex<double>,FunctionTable> Array<std::complex<double>,FunctionTable>::repmat(std::vector<unsigned long> const&) const;
+template void Array<std::complex<float>,FunctionTable>::copySubset(std::vector<unsigned long> const&, Array<std::complex<float>,FunctionTable> const&);
+template void Array<std::complex<double>,FunctionTable>::copySubset(std::vector<unsigned long> const&, Array<std::complex<double>,FunctionTable> const&);
+template Array<std::complex<float>,FunctionTable> Array<std::complex<float>,FunctionTable>::subset(std::vector<unsigned long> const&) const;
+template Array<std::complex<double>,FunctionTable> Array<std::complex<double>,FunctionTable>::subset(std::vector<unsigned long> const&) const;
+template bool Array<std::complex<float>,FunctionTable>::NaNs() const;
+template bool Array<std::complex<double>,FunctionTable>::NaNs() const;


 /********************************************************
 *  Explicit instantiations of Array<std::string>        *
 ********************************************************/
-// clang-format off
-template Array<std::string, FunctionTable>::Array();
-template Array<std::string, FunctionTable>::~Array();
-template Array<std::string, FunctionTable>::Array( size_t );
-template Array<std::string, FunctionTable>::Array( size_t, size_t );
-template Array<std::string, FunctionTable>::Array( size_t, size_t, size_t );
-template Array<std::string, FunctionTable>::Array( size_t, size_t, size_t, size_t );
-template Array<std::string, FunctionTable>::Array( size_t, size_t, size_t, size_t, size_t );
-template Array<std::string, FunctionTable>::Array( const std::vector<size_t>&, const std::string* );
-template Array<std::string, FunctionTable>::Array( std::initializer_list<std::string> );
-template Array<std::string, FunctionTable>::Array( const Array<std::string, FunctionTable>& );
-template Array<std::string, FunctionTable>::Array( Array<std::string, FunctionTable>&& );
-template Array<std::string, FunctionTable>& Array<std::string, FunctionTable>::operator=( const Array<std::string, FunctionTable>& );
-template Array<std::string, FunctionTable>& Array<std::string, FunctionTable>::operator=( Array<std::string, FunctionTable>&& );
-template Array<std::string, FunctionTable>& Array<std::string, FunctionTable>::operator=( const std::vector<std::string>& );
-template void Array<std::string, FunctionTable>::resize( ArraySize const& );
-// clang-format on
+instantiateArrayConstructors( std::string )
+template void Array<std::string,FunctionTable>::resize( ArraySize const& );
+template void Array<std::string,FunctionTable>::clear();
+template Array<std::string, FunctionTable> &Array<std::string, FunctionTable>::
+operator=( const std::vector<std::string> & );
+template bool Array<std::string>::operator==(Array<std::string> const&) const;
+
+
+#if defined( USING_ICC )
+ENABLE_WARNINGS
+#endif
+
+
--- a/common/Array.h
+++ b/common/Array.h
@@ -8,6 +8,7 @@
 #include <initializer_list>
 #include <iostream>
 #include <memory>
+#include <string>
 #include <vector>


@@ -74,13 +75,7 @@ public: // Constructors / assignment operators
     * @param N             Number of elements in each dimension
     * @param data          Optional raw array to copy the src data
     */
-    explicit Array( const std::vector<size_t> &N, const TYPE *data = NULL );
-
-    /*!
-     * Create a 1D Array with the range
-     * @param range         Range of the data
-     */
-    explicit Array( const Range<TYPE> &range );
+    explicit Array( const std::vector<size_t> &N, const TYPE *data = nullptr );

    /*!
     * Create a 1D Array using a string that mimic's MATLAB
@@ -94,6 +89,12 @@ public: // Constructors / assignment operators
     */
    Array( std::initializer_list<TYPE> data );

+    /*!
+     * Create a 2D Array with the given initializer lists
+     * @param data          Input data
+     */
+    Array( std::initializer_list<std::initializer_list<TYPE>> data );
+

    /*!
     * Copy constructor
@@ -144,7 +145,7 @@ public: // Views/copies/subset
     * @param N             Number of elements in each dimension
     * @param data          Pointer to the data
     */
-    static std::unique_ptr<Array> view( const ArraySize &N, std::shared_ptr<TYPE> &data );
+    static std::unique_ptr<Array> view( const ArraySize &N, std::shared_ptr<TYPE> data );


    /*!
@@ -152,8 +153,8 @@ public: // Views/copies/subset
     * @param N             Number of elements in each dimension
     * @param data          Pointer to the data
     */
-    static std::unique_ptr<const Array> constView(
-        const ArraySize &N, std::shared_ptr<const TYPE> const &data );
+    static std::unique_ptr<const Array> constView( const ArraySize &N,
+                                                   std::shared_ptr<const TYPE> const &data );


    /*!
@@ -167,7 +168,7 @@ public: // Views/copies/subset
     * @param N             Number of elements in each dimension
     * @param data          Pointer to the data
     */
-    void view2( const ArraySize &N, std::shared_ptr<TYPE> const &data );
+    void view2( const ArraySize &N, std::shared_ptr<TYPE> data );

    /*!
     * Make this object a view of the raw data (expert use only).
@@ -202,14 +203,30 @@ public: // Views/copies/subset
     */
    void viewRaw( const ArraySize &N, TYPE *data, bool isCopyable = true, bool isFixedSize = true );

+    /*!
+     * Create an array view of the given data (expert use only).
+     * Use view2( N, shared_ptr(data,[](TYPE*){}) ) instead.
+     *   Note: this interface is not recommended as it does not protect from
+     *   the src data being deleted while still being used by the Array.
+     *   Additionally for maximum performance it does not set the internal shared_ptr
+     *   so functions like getPtr and resize will not work correctly.
+     * @param N             Number of elements in each dimension
+     * @param data          Pointer to the data
+     */
+    static inline Array staticView( const ArraySize &N, TYPE *data )
+    {
+        Array x;
+        x.viewRaw( N, data, true, true );
+        return x;
+    }

    /*!
     * Convert an array of one type to another.  This may or may not allocate new memory.
     * @param array         Input array
     */
    template<class TYPE2>
-    static inline std::unique_ptr<Array<TYPE2, FUN, Allocator>> convert(
-        std::shared_ptr<Array<TYPE, FUN, Allocator>> array )
+    static inline std::unique_ptr<Array<TYPE2, FUN, Allocator>>
+    convert( std::shared_ptr<Array<TYPE, FUN, Allocator>> array )
    {
        auto array2 = std::make_unique<Array<TYPE2>>( array->size() );
        array2.copy( *array );
@@ -222,8 +239,8 @@ public: // Views/copies/subset
     * @param array         Input array
     */
    template<class TYPE2>
-    static inline std::unique_ptr<const Array<TYPE2, FUN, Allocator>> convert(
-        std::shared_ptr<const Array<TYPE, FUN, Allocator>> array )
+    static inline std::unique_ptr<const Array<TYPE2, FUN, Allocator>>
+    convert( std::shared_ptr<const Array<TYPE, FUN, Allocator>> array )
    {
        auto array2 = std::make_unique<Array<TYPE2>>( array->size() );
        array2.copy( *array );
@@ -235,8 +252,8 @@ public: // Views/copies/subset
     * Copy and convert data from another array to this array
     * @param array         Source array
     */
-    template<class TYPE2>
-    void inline copy( const Array<TYPE2, FUN, Allocator> &array )
+    template<class TYPE2, class FUN2, class Allocator2>
+    void inline copy( const Array<TYPE2, FUN2, Allocator2> &array )
    {
        resize( array.size() );
        copy( array.data() );
@@ -245,51 +262,55 @@ public: // Views/copies/subset
    /*!
     * Copy and convert data from a raw vector to this array.
     *    Note: The current array must be allocated to the proper size first.
-     * @param array         Source array
+     * @param data          Source data
     */
    template<class TYPE2>
-    void inline copy( const TYPE2 *data )
-    {
-        for ( size_t i = 0; i < d_size.length(); i++ )
-            d_data[i] = static_cast<TYPE>( data[i] );
-    }
+    inline void copy( const TYPE2 *data );

    /*!
     * Copy and convert data from this array to a raw vector.
-     * @param array         Source array
+     * @param data          Source data
     */
    template<class TYPE2>
-    void inline copyTo( TYPE2 *data ) const
-    {
-        for ( size_t i = 0; i < d_size.length(); i++ )
-            data[i] = static_cast<TYPE2>( d_data[i] );
-    }
+    inline void copyTo( TYPE2 *data ) const;

    /*!
     * Copy and convert data from this array to a new array
     */
    template<class TYPE2>
-    Array<TYPE2, FUN, Allocator> inline cloneTo() const
+    Array<TYPE2, FUN, std::allocator<TYPE2>> inline cloneTo() const
    {
-        Array<TYPE2, FUN> dst( this->size() );
+        Array<TYPE2, FUN, std::allocator<TYPE2>> dst( this->size() );
        copyTo( dst.data() );
        return dst;
    }

+
    /*! swap the raw data pointers for the Arrays after checking for compatibility */
    void swap( Array &other );

+
    /*!
     * Fill the array with the given value
-     * @param value         Value to fill
+     * @param y         Value to fill
     */
-    void fill( const TYPE &value );
+    inline void fill( const TYPE &y )
+    {
+        for ( auto &x : *this )
+            x = y;
+    }

    /*!
     * Scale the array by the given value
-     * @param scale         Value to scale by
+     * @param y         Value to scale by
     */
-    void scale( const TYPE &scale );
+    template<class TYPE2>
+    inline void scale( const TYPE2 &y )
+    {
+        for ( auto &x : *this )
+            x *= y;
+    }
+

    /*!
     * Set the values of this array to pow(base, exp)
@@ -298,6 +319,7 @@ public: // Views/copies/subset
     */
    void pow( const Array &base, const TYPE &exp );

+
    //! Destructor
    ~Array();

@@ -326,6 +348,10 @@ public: // Views/copies/subset
    inline bool empty() const { return d_size.length() == 0; }


+    //! Return true if the Array is not empty
+    inline operator bool() const { return d_size.length() != 0; }
+
+
    /*!
     * Resize the Array
     * @param N             NUmber of elements
@@ -371,6 +397,12 @@ public: // Views/copies/subset
    void reshape( const ArraySize &N );


+    /*!
+     * Remove singleton dimensions.
+     */
+    void squeeze();
+
+
    /*!
     * Reshape the Array so that the number of dimensions is the
     *    max of ndim and the largest dim>1.
@@ -499,8 +531,8 @@ public: // Accessors
     * @param i3            The third index
     * @param i4            The fourth index
     */
-    ARRAY_ATTRIBUTE inline const TYPE &operator()(
-        size_t i1, size_t i2, size_t i3, size_t i4 ) const
+    ARRAY_ATTRIBUTE inline const TYPE &
+    operator()( size_t i1, size_t i2, size_t i3, size_t i4 ) const
    {
        return d_data[d_size.index( i1, i2, i3, i4 )];
    }
@@ -526,8 +558,8 @@ public: // Accessors
     * @param i4            The fourth index
     * @param i5            The fifth index
     */
-    ARRAY_ATTRIBUTE inline const TYPE &operator()(
-        size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const
+    ARRAY_ATTRIBUTE inline const TYPE &
+    operator()( size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const
    {
        return d_data[d_size.index( i1, i2, i3, i4, i5 )];
    }
@@ -600,6 +632,12 @@ public: // Math operations
    //! Concatenates the arrays along the dimension dim.
    static Array cat( const std::vector<Array> &x, int dim = 0 );

+    //! Concatenates the arrays along the dimension dim.
+    static Array cat( const std::initializer_list<Array> &x, int dim = 0 );
+
+    //! Concatenates the arrays along the dimension dim.
+    static Array cat( size_t N_array, const Array *x, int dim );
+
    //! Concatenates a given array with the current array
    void cat( const Array &x, int dim = 0 );

@@ -655,20 +693,37 @@ public: // Math operations
    TYPE mean( const std::vector<Range<size_t>> &index ) const;

    //! Find all elements that match the operator
-    std::vector<size_t> find(
-        const TYPE &value, std::function<bool( const TYPE &, const TYPE & )> compare ) const;
+    std::vector<size_t> find( const TYPE &value,
+                              std::function<bool( const TYPE &, const TYPE & )> compare ) const;


    //! Print an array
-    void print(
-        std::ostream &os, const std::string &name = "A", const std::string &prefix = "" ) const;
-
-    //! Multiply two arrays
-    static Array multiply( const Array &a, const Array &b );
+    void
+    print( std::ostream &os, const std::string &name = "A", const std::string &prefix = "" ) const;

    //! Transpose an array
    Array reverseDim() const;

+    /*!
+     * @brief  Shift dimensions
+     * @details  Shifts the dimensions of the array by N.  When N is positive,
+     *    shiftDim shifts the dimensions to the left and wraps the
+     *    N leading dimensions to the end.  When N is negative,
+     *    shiftDim shifts the dimensions to the right and pads with singletons.
+     * @param N             Desired shift
+     */
+    Array shiftDim( int N ) const;
+
+    /*!
+     * @brief   Permute array dimensions
+     * @details  Rearranges the dimensions of the array so that they
+     *    are in the order specified by the vector index.
+     *    The array produced has the same values as A but the order of the subscripts
+     *    needed to access any particular element are rearranged as specified.
+     * @param index        Desired order of the subscripts
+     */
+    Array permute( const std::vector<uint8_t> &index ) const;
+
    //! Replicate an array a given number of times in each direction
    Array repmat( const std::vector<size_t> &N ) const;

@@ -676,8 +731,8 @@ public: // Math operations
    Array coarsen( const Array &filter ) const;

    //! Coarsen an array using the given filter
-    Array coarsen(
-        const std::vector<size_t> &ratio, std::function<TYPE( const Array & )> filter ) const;
+    Array coarsen( const std::vector<size_t> &ratio,
+                   std::function<TYPE( const Array & )> filter ) const;

    /*!
     * Perform a element-wise operation y = f(x)
@@ -692,8 +747,9 @@ public: // Math operations
     * @param[in] x             The first array
     * @param[in] y             The second array
     */
-    static Array transform(
-        std::function<TYPE( const TYPE &, const TYPE & )> fun, const Array &x, const Array &y );
+    static Array transform( std::function<TYPE( const TYPE &, const TYPE & )> fun,
+                            const Array &x,
+                            const Array &y );

    /*!
     * axpby operation: this = alpha*x + beta*this
@@ -707,7 +763,13 @@ public: // Math operations
     * Linear interpolation
     * @param[in] x             Position as a decimal index
     */
-    TYPE interp( const std::vector<double> &x ) const;
+    inline TYPE interp( const std::vector<double> &x ) const { return interp( x.data() ); }
+
+    /*!
+     * Linear interpolation
+     * @param[in] x             Position as a decimal index
+     */
+    TYPE interp( const double *x ) const;

    /**
     * \fn equals (Array & const rhs, TYPE tol )
@@ -730,8 +792,10 @@ private:
    inline void checkSubsetIndex( const std::vector<Range<size_t>> &range ) const;
    inline std::vector<Range<size_t>> convert( const std::vector<size_t> &index ) const;
    static inline void getSubsetArrays( const std::vector<Range<size_t>> &range,
-        std::array<size_t, 5> &first, std::array<size_t, 5> &last, std::array<size_t, 5> &inc,
-        std::array<size_t, 5> &N );
+                                        std::array<size_t, 5> &first,
+                                        std::array<size_t, 5> &last,
+                                        std::array<size_t, 5> &inc,
+                                        std::array<size_t, 5> &N );
 };


@@ -756,8 +820,8 @@ inline Array<TYPE, FUN, Allocator> operator+(
    const Array<TYPE, FUN, Allocator> &a, const Array<TYPE, FUN, Allocator> &b )
 {
    Array<TYPE, FUN, Allocator> c;
-    const auto &fun = []( const TYPE &a, const TYPE &b ) { return a + b; };
-    FUN::transform( fun, a, b, c );
+    const auto &op = []( const TYPE &a, const TYPE &b ) { return a + b; };
+    FUN::transform( op, a, b, c );
    return c;
 }
 template<class TYPE, class FUN, class Allocator>
@@ -765,30 +829,78 @@ inline Array<TYPE, FUN, Allocator> operator-(
    const Array<TYPE, FUN, Allocator> &a, const Array<TYPE, FUN, Allocator> &b )
 {
    Array<TYPE, FUN, Allocator> c;
-    const auto &fun = []( const TYPE &a, const TYPE &b ) { return a - b; };
-    FUN::transform( fun, a, b, c );
+    const auto &op = []( const TYPE &a, const TYPE &b ) { return a - b; };
+    FUN::transform( op, a, b, c );
    return c;
 }
 template<class TYPE, class FUN, class Allocator>
 inline Array<TYPE, FUN, Allocator> operator*(
    const Array<TYPE, FUN, Allocator> &a, const Array<TYPE, FUN, Allocator> &b )
 {
-    return Array<TYPE, FUN, Allocator>::multiply( a, b );
+    Array<TYPE, FUN, Allocator> c;
+    FUN::multiply( a, b, c );
+    return c;
 }
 template<class TYPE, class FUN, class Allocator>
 inline Array<TYPE, FUN, Allocator> operator*(
    const Array<TYPE, FUN, Allocator> &a, const std::vector<TYPE> &b )
 {
-    Array<TYPE, FUN, Allocator> b2;
+    Array<TYPE, FUN, Allocator> b2, c;
    b2.viewRaw( { b.size() }, const_cast<TYPE *>( b.data() ) );
-    return Array<TYPE, FUN, Allocator>::multiply( a, b2 );
+    FUN::multiply( a, b2, c );
+    return c;
+}
+template<class TYPE, class FUN, class Allocator>
+inline Array<TYPE, FUN, Allocator> operator*( const TYPE &a,
+                                              const Array<TYPE, FUN, Allocator> &b )
+{
+    auto c = b;
+    c.scale( a );
+    return c;
+}
+template<class TYPE, class FUN, class Allocator>
+inline Array<TYPE, FUN, Allocator> operator*( const Array<TYPE, FUN, Allocator> &a,
+                                              const TYPE &b )
+{
+    auto c = a;
+    c.scale( b );
+    return c;
+}
+
+
+/********************************************************
+ *  Copy array                                           *
+ ********************************************************/
+template<class TYPE, class FUN, class Allocator>
+template<class TYPE2>
+inline void Array<TYPE, FUN, Allocator>::copy( const TYPE2 *data )
+{
+    if ( std::is_same<TYPE, TYPE2>::value ) {
+        std::copy( data, data + d_size.length(), d_data );
+    } else {
+        for ( size_t i = 0; i < d_size.length(); i++ )
+            d_data[i] = static_cast<TYPE>( data[i] );
+    }
+}
+template<class TYPE, class FUN, class Allocator>
+template<class TYPE2>
+inline void Array<TYPE, FUN, Allocator>::copyTo( TYPE2 *data ) const
+{
+    if ( std::is_same<TYPE, TYPE2>::value ) {
+        std::copy( d_data, d_data + d_size.length(), data );
+    } else {
+        for ( size_t i = 0; i < d_size.length(); i++ )
+            data[i] = static_cast<TYPE2>( d_data[i] );
+    }
 }


 /********************************************************
 *  Convience typedefs                                   *
+ *  Copy array                                           *
 ********************************************************/
 typedef Array<double> DoubleArray;
 typedef Array<int> IntArray;

+
 #endif
--- a/common/Array.hpp
+++ b/common/Array.hpp
@@ -17,65 +17,46 @@
 /********************************************************
 *  External instantiations                              *
 ********************************************************/
-extern template class Array<char, FunctionTable>;
-extern template class Array<uint8_t, FunctionTable>;
-extern template class Array<uint16_t, FunctionTable>;
-extern template class Array<uint32_t, FunctionTable>;
-extern template class Array<uint64_t, FunctionTable>;
-extern template class Array<int8_t, FunctionTable>;
-extern template class Array<int16_t, FunctionTable>;
-extern template class Array<int32_t, FunctionTable>;
-extern template class Array<int64_t, FunctionTable>;
-extern template class Array<double, FunctionTable>;
-extern template class Array<float, FunctionTable>;
+extern template class Array<char>;
+extern template class Array<uint8_t>;
+extern template class Array<uint16_t>;
+extern template class Array<uint32_t>;
+extern template class Array<uint64_t>;
+extern template class Array<int8_t>;
+extern template class Array<int16_t>;
+extern template class Array<int32_t>;
+extern template class Array<int64_t>;
+extern template class Array<double>;
+extern template class Array<float>;


 /********************************************************
- *  Helper functions                                     *
+ *  Macros to help instantiate functions                 *
 ********************************************************/
-template<class TYPE>
-inline typename std::enable_if<std::is_integral<TYPE>::value, size_t>::type getRangeSize(
-    const Range<TYPE> &range )
-{
-    return ( static_cast<int64_t>( range.j ) - static_cast<int64_t>( range.i ) ) /
-           static_cast<int64_t>( range.k );
-}
-template<class TYPE>
-inline typename std::enable_if<std::is_floating_point<TYPE>::value, size_t>::type getRangeSize(
-    const Range<TYPE> &range )
-{
-    double tmp = static_cast<double>( ( range.j - range.i ) ) / static_cast<double>( range.k );
-    return static_cast<size_t>( floor( tmp + 1e-12 ) + 1 );
-}
-template<class TYPE>
-inline typename std::enable_if<std::is_same<TYPE, std::complex<float>>::value ||
-                                   std::is_same<TYPE, std::complex<double>>::value,
-    size_t>::type
-getRangeSize( const Range<TYPE> &range )
-{
-    double tmp = std::real( ( range.j - range.i ) / ( range.k ) );
-    return static_cast<size_t>( floor( tmp + 1e-12 ) + 1 );
-}
-template<class TYPE>
-inline typename std::enable_if<std::is_integral<TYPE>::value, TYPE>::type getRangeValue(
-    const Range<TYPE> &range, size_t index )
-{
-    return range.i + index * range.k;
-}
-template<class TYPE>
-inline typename std::enable_if<std::is_floating_point<TYPE>::value, TYPE>::type getRangeValue(
-    const Range<TYPE> &range, size_t index )
-{
-    return range.k * ( range.i / range.k + index );
-}
-template<class TYPE>
-inline typename std::enable_if<std::is_same<TYPE, std::complex<float>>::value ||
-                                   std::is_same<TYPE, std::complex<double>>::value,
-    TYPE>::type
-getRangeValue( const Range<TYPE> &range, size_t index )
-{
-    return range.k * ( range.i / range.k + static_cast<TYPE>( index ) );
-}
+// clang-format off
+#define instantiateArrayConstructors( TYPE )                                    \
+    template Array<TYPE>::Array();                                              \
+    template Array<TYPE>::~Array();                                             \
+    template Array<TYPE>::Array( const ArraySize & );                           \
+    template Array<TYPE>::Array( size_t );                                      \
+    template Array<TYPE>::Array( size_t, size_t );                              \
+    template Array<TYPE>::Array( size_t, size_t, size_t );                      \
+    template Array<TYPE>::Array( size_t, size_t, size_t, size_t );              \
+    template Array<TYPE>::Array( size_t, size_t, size_t, size_t, size_t );      \
+    template Array<TYPE>::Array( const std::vector<size_t> &, const TYPE * );   \
+    template Array<TYPE>::Array( std::initializer_list<TYPE> );                 \
+    template Array<TYPE>::Array( std::initializer_list<std::initializer_list<TYPE>> ); \
+    template Array<TYPE>::Array( const Array<TYPE> & );                         \
+    template Array<TYPE>::Array( Array<TYPE> && );                              \
+    template void Array<TYPE>::reshape( ArraySize const& );                     \
+    template void Array<TYPE>::squeeze();                                       \
+    template std::unique_ptr<const Array<TYPE>>                                 \
+        Array<TYPE>::constView(ArraySize const&, std::shared_ptr<TYPE const> const&); \
+    template void Array<TYPE>::viewRaw( ArraySize const&, TYPE*, bool, bool );  \
+    template void Array<TYPE>::view2(ArraySize const&, std::shared_ptr<TYPE> ); \
+    template Array<TYPE> &Array<TYPE>::operator=( const Array<TYPE> & );        \
+    template Array<TYPE> &Array<TYPE>::operator=( Array<TYPE> && );
+// clang-format on


 /********************************************************
@@ -126,19 +107,8 @@ Array<TYPE, FUN, Allocator>::Array( const std::vector<size_t> &N, const TYPE *da
    : d_isCopyable( true ), d_isFixedSize( false )
 {
    allocate( N );
-    if ( data ) {
-        for ( size_t i = 0; i < d_size.length(); i++ )
-            d_data[i] = data[i];
-    }
-}
-template<class TYPE, class FUN, class Allocator>
-Array<TYPE, FUN, Allocator>::Array( const Range<TYPE> &range )
-    : d_isCopyable( true ), d_isFixedSize( false )
-{
-    size_t N = getRangeSize( range );
-    allocate( { N } );
-    for ( size_t i = 0; i < N; i++ )
-        d_data[i] = getRangeValue( range, i );
+    if ( data )
+        copy( data );
 }
 template<class TYPE, class FUN, class Allocator>
 Array<TYPE, FUN, Allocator>::Array( std::string str ) : d_isCopyable( true ), d_isFixedSize( false )
@@ -216,8 +186,12 @@ Array<TYPE, FUN, Allocator>::Array( std::string str ) : d_isCopyable( true ), d_
            i2 = str.length();
    }
    allocate( data.size() );
-    for ( size_t i = 0; i < data.size(); i++ )
-        d_data[i] = data[i];
+    if ( std::is_same<TYPE, bool>::value ) {
+        for ( size_t i = 0; i < data.size(); i++ )
+            d_data[i] = data[i];
+    } else {
+        copy( data.data() );
+    }
 }
 template<class TYPE, class FUN, class Allocator>
 Array<TYPE, FUN, Allocator>::Array( std::initializer_list<TYPE> x )
@@ -229,19 +203,38 @@ Array<TYPE, FUN, Allocator>::Array( std::initializer_list<TYPE> x )
        d_data[i] = *it;
 }
 template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>::Array( std::initializer_list<std::initializer_list<TYPE>> x )
+    : d_isCopyable( true ), d_isFixedSize( false )
+{
+    size_t Nx = x.size();
+    size_t Ny = 0;
+    for ( const auto y : x )
+        Ny = std::max<size_t>( Ny, y.size() );
+    allocate( { Nx, Ny } );
+    auto itx = x.begin();
+    for ( size_t i = 0; i < x.size(); ++i, ++itx ) {
+        auto ity = itx->begin();
+        for ( size_t j = 0; j < itx->size(); ++j, ++ity ) {
+            d_data[i + j * Nx] = *ity;
+        }
+    }
+}
+template<class TYPE, class FUN, class Allocator>
 void Array<TYPE, FUN, Allocator>::allocate( const ArraySize &N )
 {
    if ( d_isFixedSize )
        throw std::logic_error( "Array cannot be resized" );
    d_size      = N;
    auto length = d_size.length();
-    if ( length == 0 )
-        d_ptr.reset();
-    else
-        d_ptr.reset( new ( std::nothrow ) TYPE[length], []( TYPE *p ) { delete[] p; } );
-    d_data = d_ptr.get();
-    if ( length > 0 && d_data == nullptr )
-        throw std::logic_error( "Failed to allocate array" );
+    d_data      = nullptr;
+    if ( length > 0 ) {
+        try {
+            d_data = new TYPE[length];
+        } catch ( ... ) {
+            throw std::logic_error( "Failed to allocate array" );
+        }
+    }
+    d_ptr.reset( d_data, []( TYPE *p ) { delete[] p; } );
 }
 template<class TYPE, class FUN, class Allocator>
 Array<TYPE, FUN, Allocator>::Array( const Array &rhs )
@@ -250,18 +243,19 @@ Array<TYPE, FUN, Allocator>::Array( const Array &rhs )
    if ( !rhs.d_isCopyable )
        throw std::logic_error( "Array cannot be copied" );
    allocate( rhs.size() );
-    for ( size_t i = 0; i < d_size.length(); i++ )
-        d_data[i] = rhs.d_data[i];
+    copy( rhs.d_data );
 }
 template<class TYPE, class FUN, class Allocator>
 Array<TYPE, FUN, Allocator>::Array( Array &&rhs )
    : d_isCopyable( rhs.d_isCopyable ),
      d_isFixedSize( rhs.d_isFixedSize ),
      d_size( rhs.d_size ),
-      d_data( rhs.d_data )
+      d_data( rhs.d_data ),
+      d_ptr( std::move( rhs.d_ptr ) )
 {
+    rhs.d_size = ArraySize();
    rhs.d_data = nullptr;
-    d_ptr      = std::move( rhs.d_ptr );
+    rhs.d_ptr  = nullptr;
 }
 template<class TYPE, class FUN, class Allocator>
 Array<TYPE, FUN, Allocator> &Array<TYPE, FUN, Allocator>::operator=( const Array &rhs )
@@ -270,9 +264,8 @@ Array<TYPE, FUN, Allocator> &Array<TYPE, FUN, Allocator>::operator=( const Array
        return *this;
    if ( !rhs.d_isCopyable )
        throw std::logic_error( "Array cannot be copied" );
-    this->allocate( rhs.size() );
-    for ( size_t i = 0; i < d_size.length(); i++ )
-        this->d_data[i] = rhs.d_data[i];
+    allocate( rhs.size() );
+    copy( rhs.d_data );
    return *this;
 }
 template<class TYPE, class FUN, class Allocator>
@@ -285,15 +278,17 @@ Array<TYPE, FUN, Allocator> &Array<TYPE, FUN, Allocator>::operator=( Array &&rhs
    d_size        = rhs.d_size;
    d_data        = rhs.d_data;
    d_ptr         = std::move( rhs.d_ptr );
+    rhs.d_size    = ArraySize();
    rhs.d_data    = nullptr;
+    rhs.d_ptr     = nullptr;
    return *this;
 }
 template<class TYPE, class FUN, class Allocator>
 Array<TYPE, FUN, Allocator> &Array<TYPE, FUN, Allocator>::operator=( const std::vector<TYPE> &rhs )
 {
-    this->allocate( ArraySize( rhs.size() ) );
+    allocate( ArraySize( rhs.size() ) );
    for ( size_t i = 0; i < rhs.size(); i++ )
-        this->d_data[i] = rhs[i];
+        d_data[i] = rhs[i];
    return *this;
 }
 template<class TYPE, class FUN, class Allocator>
@@ -331,9 +326,9 @@ static inline void moveValues( const ArraySize &N1, const ArraySize &N2, TYPE *d
        }
    }
 }
-template<bool test, class TYPE>
-static inline typename std::enable_if<test, void>::type copyValues(
-    const ArraySize &N1, const ArraySize &N2, const TYPE *data1, TYPE *data2 )
+template<class TYPE>
+static inline void
+copyValues( const ArraySize &N1, const ArraySize &N2, const TYPE *data1, TYPE *data2 )
 {
    for ( size_t i5 = 0; i5 < std::min( N1[4], N2[4] ); i5++ ) {
        for ( size_t i4 = 0; i4 < std::min( N1[3], N2[3] ); i4++ ) {
@@ -349,12 +344,6 @@ static inline typename std::enable_if<test, void>::type copyValues(
        }
    }
 }
-template<bool test, class TYPE>
-static inline typename std::enable_if<!test, void>::type copyValues(
-    const ArraySize &, const ArraySize &, const TYPE *, TYPE * )
-{
-    throw std::logic_error( "No copy constructor" );
-}


 /********************************************************
@@ -381,9 +370,11 @@ void Array<TYPE, FUN, Allocator>::resize( const ArraySize &N )
        if ( data0.use_count() <= 1 ) {
            // We own the data, use std:move
            moveValues( N0, N, data0.get(), d_data );
-        } else {
+        } else if ( std::is_copy_constructible<TYPE>::value ) {
            // We do not own the data, copy
-            copyValues<std::is_copy_constructible<TYPE>::value, TYPE>( N0, N, data0.get(), d_data );
+            copyValues( N0, N, data0.get(), d_data );
+        } else {
+            throw std::logic_error( "No copy constructor" );
        }
    }
 }
@@ -412,7 +403,7 @@ void Array<TYPE, FUN, Allocator>::resizeDim( int dim, size_t N, const TYPE &valu


 /********************************************************
- *  Reshape the array                                     *
+ *  Reshape/squeeze the array                            *
 ********************************************************/
 template<class TYPE, class FUN, class Allocator>
 void Array<TYPE, FUN, Allocator>::reshape( const ArraySize &N )
@@ -421,6 +412,85 @@ void Array<TYPE, FUN, Allocator>::reshape( const ArraySize &N )
        throw std::logic_error( "reshape is not allowed to change the array size" );
    d_size = N;
 }
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::squeeze()
+{
+    d_size.squeeze();
+}
+
+
+/********************************************************
+ *  Shift/permute the array                              *
+ ********************************************************/
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::shiftDim( int N ) const
+{
+    if ( N > 0 )
+        N = N % d_size.ndim();
+    if ( N == 0 ) {
+        // No shift required
+        return *this;
+    } else if ( N > 0 ) {
+        // Shift to the left and wrap
+        std::vector<uint8_t> index( d_size.ndim() );
+        size_t i = 0;
+        for ( size_t j=N; j<index.size(); j++, i++)
+            index[i] = j;
+        for ( size_t j=0; i<index.size(); j++, i++)
+            index[i] = j;
+        return permute( index );
+    } else  {
+        // Shift to the right (padding with singletons)
+        N = -N;
+        ASSERT( d_size.ndim() + N < (int) ArraySize::maxDim() );
+        size_t dims[10] = { 1 };
+        for ( int i = 0; i < ndim(); i++ )
+            dims[N+i] = d_size[i];
+        auto y = *this;
+        y.reshape( ArraySize( ndim() + N, dims ) );
+        return y;
+    }
+}
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::permute( const std::vector<uint8_t> &index ) const
+{
+    // Check the permutation
+    ASSERT( (int) index.size() == ndim() );
+    for ( int i=0; i < ndim(); i++) {
+        ASSERT( index[i] < ndim() );
+        for ( int j=0; j < i; j++)
+            ASSERT( index[i] != index[j] );
+    }
+    // Create the new Array
+    size_t dims[5] = { 1u, 1u, 1u, 1u, 1u };
+    for ( size_t i=0; i<index.size(); i++)
+        dims[i] = d_size[index[i]];
+    Array y( ArraySize( ndim(), dims ) );
+    y.fill( -1 );
+    ASSERT( y.length() == this->length() );
+    // Fill the data
+    size_t N[5] = { 1u, 1u, 1u, 1u, 1u };
+    for ( int i=0; i < ndim(); i++) {
+        std::array<size_t, 5> ijk = { 0, 0, 0, 0, 0 };
+        ijk[index[i]] = 1;
+        N[i] = d_size.index( ijk );
+    }
+    size_t tmp = ( dims[0] - 1 ) * N[0] + ( dims[1] - 1 ) * N[1] + ( dims[2] - 1 ) * N[2] + ( dims[3] - 1 ) * N[3] + ( dims[4] - 1 ) * N[4] + 1;
+    ASSERT( tmp == length() );
+    for ( size_t i4 = 0; i4 < dims[4]; i4++ ) {
+        for ( size_t i3 = 0; i3 < dims[3]; i3++ ) {
+            for ( size_t i2 = 0; i2 < dims[2]; i2++ ) {
+                for ( size_t i1 = 0; i1 < dims[1]; i1++ ) {
+                    for ( size_t i0 = 0; i0 < dims[0]; i0++ ) {
+                        size_t index2 = i0 * N[0] + i1 * N[1] + i2 * N[2] + i3 * N[3] + i4 * N[4];
+                        y( i0, i1, i2, i3, i4 ) = d_data[index2];
+                    }
+                }
+            }
+        }
+    }
+    return y;
+}


 /********************************************************
@@ -428,8 +498,8 @@ void Array<TYPE, FUN, Allocator>::reshape( const ArraySize &N )
 ********************************************************/
 // Helper function to check subset indices
 template<class TYPE, class FUN, class Allocator>
-inline void Array<TYPE, FUN, Allocator>::checkSubsetIndex(
-    const std::vector<Range<size_t>> &range ) const
+inline void
+Array<TYPE, FUN, Allocator>::checkSubsetIndex( const std::vector<Range<size_t>> &range ) const
 {
    bool test = (int) range.size() == d_size.ndim();
    for ( size_t d = 0; d < range.size(); d++ )
@@ -438,8 +508,8 @@ inline void Array<TYPE, FUN, Allocator>::checkSubsetIndex(
        throw std::logic_error( "indices for subset are invalid" );
 }
 template<class TYPE, class FUN, class Allocator>
-std::vector<Range<size_t>> Array<TYPE, FUN, Allocator>::convert(
-    const std::vector<size_t> &index ) const
+std::vector<Range<size_t>>
+Array<TYPE, FUN, Allocator>::convert( const std::vector<size_t> &index ) const
 {
    std::vector<Range<size_t>> range( d_size.ndim() );
    if ( index.size() % 2 != 0 || static_cast<int>( index.size() / 2 ) < d_size.ndim() )
@@ -451,8 +521,10 @@ std::vector<Range<size_t>> Array<TYPE, FUN, Allocator>::convert(
 // Helper function to return dimensions for the subset array
 template<class TYPE, class FUN, class Allocator>
 void Array<TYPE, FUN, Allocator>::getSubsetArrays( const std::vector<Range<size_t>> &index,
-    std::array<size_t, 5> &first, std::array<size_t, 5> &last, std::array<size_t, 5> &inc,
-    std::array<size_t, 5> &N )
+                                                   std::array<size_t, 5> &first,
+                                                   std::array<size_t, 5> &last,
+                                                   std::array<size_t, 5> &inc,
+                                                   std::array<size_t, 5> &N )
 {
    first.fill( 0 );
    last.fill( 0 );
@@ -467,8 +539,8 @@ void Array<TYPE, FUN, Allocator>::getSubsetArrays( const std::vector<Range<size_
    }
 }
 template<class TYPE, class FUN, class Allocator>
-Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::subset(
-    const std::vector<Range<size_t>> &index ) const
+Array<TYPE, FUN, Allocator>
+Array<TYPE, FUN, Allocator>::subset( const std::vector<Range<size_t>> &index ) const
 {
    // Get the subset indicies
    checkSubsetIndex( index );
@@ -476,9 +548,8 @@ Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::subset(
    getSubsetArrays( index, first, last, inc, N1 );
    ArraySize S1( d_size.ndim(), N1.data() );
    // Create the new array
-    Array<TYPE> subset_array( S1 );
+    Array<TYPE, FUN, Allocator> subset_array( S1 );
    // Fill the new array
-    static_assert( ArraySize::maxDim() == 5, "Not programmed for more than 5 dimensions" );
    TYPE *subset_data = subset_array.data();
    for ( size_t i4 = first[4], k1 = 0; i4 <= last[4]; i4 += inc[4] ) {
        for ( size_t i3 = first[3]; i3 <= last[3]; i3 += inc[3] ) {
@@ -495,22 +566,21 @@ Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::subset(
    return subset_array;
 }
 template<class TYPE, class FUN, class Allocator>
-Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::subset(
-    const std::vector<size_t> &index ) const
+Array<TYPE, FUN, Allocator>
+Array<TYPE, FUN, Allocator>::subset( const std::vector<size_t> &index ) const
 {
    auto range = convert( index );
    return subset( range );
 }
 template<class TYPE, class FUN, class Allocator>
-void Array<TYPE, FUN, Allocator>::copySubset(
-    const std::vector<Range<size_t>> &index, const Array<TYPE, FUN, Allocator> &subset )
+void Array<TYPE, FUN, Allocator>::copySubset( const std::vector<Range<size_t>> &index,
+                                              const Array<TYPE, FUN, Allocator> &subset )
 {
    // Get the subset indices
    checkSubsetIndex( index );
    std::array<size_t, 5> first, last, inc, N1;
    getSubsetArrays( index, first, last, inc, N1 );
    // Copy the sub-array
-    static_assert( ArraySize::maxDim() == 5, "Not programmed for more than 5 dimensions" );
    const TYPE *src_data = subset.data();
    for ( size_t i4 = first[4], k1 = 0; i4 <= last[4]; i4 += inc[4] ) {
        for ( size_t i3 = first[3]; i3 <= last[3]; i3 += inc[3] ) {
@@ -526,15 +596,14 @@ void Array<TYPE, FUN, Allocator>::copySubset(
    }
 }
 template<class TYPE, class FUN, class Allocator>
-void Array<TYPE, FUN, Allocator>::addSubset(
-    const std::vector<Range<size_t>> &index, const Array<TYPE, FUN, Allocator> &subset )
+void Array<TYPE, FUN, Allocator>::addSubset( const std::vector<Range<size_t>> &index,
+                                             const Array<TYPE, FUN, Allocator> &subset )
 {
    // Get the subset indices
    checkSubsetIndex( index );
    std::array<size_t, 5> first, last, inc, N1;
    getSubsetArrays( index, first, last, inc, N1 );
    // add the sub-array
-    static_assert( ArraySize::maxDim() == 5, "Not programmed for more than 5 dimensions" );
    for ( size_t i4 = first[4], k1 = 0; i4 <= last[4]; i4 += inc[4] ) {
        for ( size_t i3 = first[3]; i3 <= last[3]; i3 += inc[3] ) {
            for ( size_t i2 = first[2]; i2 <= last[2]; i2 += inc[2] ) {
@@ -549,16 +618,16 @@ void Array<TYPE, FUN, Allocator>::addSubset(
    }
 }
 template<class TYPE, class FUN, class Allocator>
-void Array<TYPE, FUN, Allocator>::copySubset(
-    const std::vector<size_t> &index, const Array<TYPE, FUN, Allocator> &subset )
+void Array<TYPE, FUN, Allocator>::copySubset( const std::vector<size_t> &index,
+                                              const Array<TYPE, FUN, Allocator> &subset )
 {
    auto range = convert( index );
    copySubset( range, subset );
 }

 template<class TYPE, class FUN, class Allocator>
-void Array<TYPE, FUN, Allocator>::addSubset(
-    const std::vector<size_t> &index, const Array<TYPE, FUN, Allocator> &subset )
+void Array<TYPE, FUN, Allocator>::addSubset( const std::vector<size_t> &index,
+                                             const Array<TYPE, FUN, Allocator> &subset )
 {
    auto range = convert( index );
    addSubset( range, subset );
@@ -586,8 +655,8 @@ bool Array<TYPE, FUN, Allocator>::operator==( const Array &rhs ) const
 *  Get a view of an C array                             *
 ********************************************************/
 template<class TYPE, class FUN, class Allocator>
-std::unique_ptr<Array<TYPE, FUN, Allocator>> Array<TYPE, FUN, Allocator>::view(
-    const ArraySize &N, std::shared_ptr<TYPE> &data )
+std::unique_ptr<Array<TYPE, FUN, Allocator>>
+Array<TYPE, FUN, Allocator>::view( const ArraySize &N, std::shared_ptr<TYPE> data )
 {
    auto array    = std::make_unique<Array<TYPE, FUN, Allocator>>();
    array->d_size = N;
@@ -596,8 +665,9 @@ std::unique_ptr<Array<TYPE, FUN, Allocator>> Array<TYPE, FUN, Allocator>::view(
    return array;
 }
 template<class TYPE, class FUN, class Allocator>
-std::unique_ptr<const Array<TYPE, FUN, Allocator>> Array<TYPE, FUN, Allocator>::constView(
-    const ArraySize &N, std::shared_ptr<const TYPE> const &data )
+std::unique_ptr<const Array<TYPE, FUN, Allocator>>
+Array<TYPE, FUN, Allocator>::constView( const ArraySize &N,
+                                        std::shared_ptr<const TYPE> const &data )
 {
    auto array    = std::make_unique<Array<TYPE, FUN, Allocator>>();
    array->d_size = N;
@@ -612,15 +682,17 @@ void Array<TYPE, FUN, Allocator>::view2( Array<TYPE, FUN, Allocator> &src )
    d_data = src.d_data;
 }
 template<class TYPE, class FUN, class Allocator>
-void Array<TYPE, FUN, Allocator>::view2( const ArraySize &N, std::shared_ptr<TYPE> const &data )
+void Array<TYPE, FUN, Allocator>::view2( const ArraySize &N, std::shared_ptr<TYPE> data )
 {
    d_size = N;
    d_ptr  = data;
    d_data = d_ptr.get();
 }
 template<class TYPE, class FUN, class Allocator>
-void Array<TYPE, FUN, Allocator>::viewRaw(
-    const ArraySize &N, TYPE *data, bool isCopyable, bool isFixedSize )
+void Array<TYPE, FUN, Allocator>::viewRaw( const ArraySize &N,
+                                           TYPE *data,
+                                           bool isCopyable,
+                                           bool isFixedSize )
 {
    d_isCopyable  = isCopyable;
    d_isFixedSize = isFixedSize;
@@ -644,20 +716,8 @@ void Array<TYPE, FUN, Allocator>::swap( Array &other )
    std::swap( d_ptr, other.d_ptr );
 }
 template<class TYPE, class FUN, class Allocator>
-void Array<TYPE, FUN, Allocator>::fill( const TYPE &value )
-{
-    for ( size_t i = 0; i < d_size.length(); i++ )
-        d_data[i] = value;
-}
-template<class TYPE, class FUN, class Allocator>
-void Array<TYPE, FUN, Allocator>::scale( const TYPE &value )
-{
-    for ( size_t i = 0; i < d_size.length(); i++ )
-        d_data[i] *= value;
-}
-template<class TYPE, class FUN, class Allocator>
-void Array<TYPE, FUN, Allocator>::pow(
-    const Array<TYPE, FUN, Allocator> &baseArray, const TYPE &exp )
+void Array<TYPE, FUN, Allocator>::pow( const Array<TYPE, FUN, Allocator> &baseArray,
+                                       const TYPE &exp )
 {
    // not insisting on the shapes being the same
    // but insisting on the total size being the same
@@ -674,8 +734,8 @@ void Array<TYPE, FUN, Allocator>::pow(
 *  Replicate the array                                  *
 ********************************************************/
 template<class TYPE, class FUN, class Allocator>
-Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::repmat(
-    const std::vector<size_t> &N_rep ) const
+Array<TYPE, FUN, Allocator>
+Array<TYPE, FUN, Allocator>::repmat( const std::vector<size_t> &N_rep ) const
 {
    std::vector<size_t> N2( d_size.begin(), d_size.end() );
    if ( N2.size() < N_rep.size() )
@@ -689,7 +749,6 @@ Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::repmat(
        N2[d] *= N_rep[d];
    }
    Array<TYPE, FUN, Allocator> y( N2 );
-    static_assert( ArraySize::maxDim() <= 5, "Not programmed for dimensions > 5" );
    TYPE *y2 = y.data();
    for ( size_t i4 = 0, index = 0; i4 < N1[4]; i4++ ) {
        for ( size_t j4 = 0; j4 < Nr[4]; j4++ ) {
@@ -731,7 +790,7 @@ bool Array<TYPE, FUN, Allocator>::NaNs() const
 template<class TYPE, class FUN, class Allocator>
 TYPE Array<TYPE, FUN, Allocator>::mean( void ) const
 {
-    TYPE x = this->sum() / d_size.length();
+    TYPE x = sum() / d_size.length();
    return x;
 }
 template<class TYPE, class FUN, class Allocator>
@@ -813,7 +872,6 @@ TYPE Array<TYPE, FUN, Allocator>::min( const std::vector<Range<size_t>> &range )
    checkSubsetIndex( range );
    std::array<size_t, 5> first, last, inc, N1;
    getSubsetArrays( range, first, last, inc, N1 );
-    static_assert( ArraySize::maxDim() <= 5, "Function programmed for more than 5 dimensions" );
    TYPE x = std::numeric_limits<TYPE>::max();
    for ( size_t i4 = first[4]; i4 <= last[4]; i4 += inc[4] ) {
        for ( size_t i3 = first[3]; i3 <= last[3]; i3 += inc[3] ) {
@@ -836,7 +894,6 @@ TYPE Array<TYPE, FUN, Allocator>::max( const std::vector<Range<size_t>> &range )
    checkSubsetIndex( range );
    std::array<size_t, 5> first, last, inc, N1;
    getSubsetArrays( range, first, last, inc, N1 );
-    static_assert( ArraySize::maxDim() <= 5, "Function programmed for more than 5 dimensions" );
    TYPE x = std::numeric_limits<TYPE>::min();
    for ( size_t i4 = first[4]; i4 <= last[4]; i4 += inc[4] ) {
        for ( size_t i3 = first[3]; i3 <= last[3]; i3 += inc[3] ) {
@@ -859,7 +916,6 @@ TYPE Array<TYPE, FUN, Allocator>::sum( const std::vector<Range<size_t>> &range )
    checkSubsetIndex( range );
    std::array<size_t, 5> first, last, inc, N1;
    getSubsetArrays( range, first, last, inc, N1 );
-    static_assert( ArraySize::maxDim() <= 5, "Function programmed for more than 5 dimensions" );
    TYPE x = 0;
    for ( size_t i4 = first[4]; i4 <= last[4]; i4 += inc[4] ) {
        for ( size_t i3 = first[3]; i3 <= last[3]; i3 += inc[3] ) {
@@ -882,7 +938,6 @@ TYPE Array<TYPE, FUN, Allocator>::mean( const std::vector<Range<size_t>> &range
    checkSubsetIndex( range );
    std::array<size_t, 5> first, last, inc, N1;
    getSubsetArrays( range, first, last, inc, N1 );
-    static_assert( ArraySize::maxDim() <= 5, "Function programmed for more than 5 dimensions" );
    size_t n = 1;
    for ( auto &d : N1 )
        n *= d;
@@ -919,8 +974,9 @@ TYPE Array<TYPE, FUN, Allocator>::mean( const std::vector<size_t> &index ) const
 *  Find all elements that match the given operation     *
 ********************************************************/
 template<class TYPE, class FUN, class Allocator>
-std::vector<size_t> Array<TYPE, FUN, Allocator>::find(
-    const TYPE &value, std::function<bool( const TYPE &, const TYPE & )> compare ) const
+std::vector<size_t>
+Array<TYPE, FUN, Allocator>::find( const TYPE &value,
+                                   std::function<bool( const TYPE &, const TYPE & )> compare ) const
 {
    std::vector<size_t> result;
    result.reserve( d_size.length() );
@@ -936,8 +992,9 @@ std::vector<size_t> Array<TYPE, FUN, Allocator>::find(
 *  Print an array to an output stream                   *
 ********************************************************/
 template<class TYPE, class FUN, class Allocator>
-void Array<TYPE, FUN, Allocator>::print(
-    std::ostream &os, const std::string &name, const std::string &prefix ) const
+void Array<TYPE, FUN, Allocator>::print( std::ostream &os,
+                                         const std::string &name,
+                                         const std::string &prefix ) const
 {
    if ( d_size.ndim() == 1 ) {
        for ( size_t i = 0; i < d_size[0]; i++ )
@@ -961,12 +1018,11 @@ void Array<TYPE, FUN, Allocator>::print(
 template<class TYPE, class FUN, class Allocator>
 Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::reverseDim() const
 {
-    size_t N2[ArraySize::maxDim()];
+    size_t N2[5];
    for ( int d = 0; d < ArraySize::maxDim(); d++ )
        N2[d] = d_size[ArraySize::maxDim() - d - 1];
    ArraySize S2( ArraySize::maxDim(), N2 );
    Array<TYPE, FUN, Allocator> y( S2 );
-    static_assert( ArraySize::maxDim() == 5, "Not programmed for dimensions other than 5" );
    TYPE *y2 = y.data();
    for ( size_t i0 = 0; i0 < d_size[0]; i0++ ) {
        for ( size_t i1 = 0; i1 < d_size[1]; i1++ ) {
@@ -991,8 +1047,8 @@ Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::reverseDim() const
 *  Coarsen the array                                    *
 ********************************************************/
 template<class TYPE, class FUN, class Allocator>
-Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::coarsen(
-    const Array<TYPE, FUN, Allocator> &filter ) const
+Array<TYPE, FUN, Allocator>
+Array<TYPE, FUN, Allocator>::coarsen( const Array<TYPE, FUN, Allocator> &filter ) const
 {
    auto S2 = size();
    for ( size_t i = 0; i < S2.size(); i++ ) {
@@ -1012,8 +1068,9 @@ Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::coarsen(
                for ( size_t k2 = 0; k2 < Nh[2]; k2++ ) {
                    for ( size_t j2 = 0; j2 < Nh[1]; j2++ ) {
                        for ( size_t i2 = 0; i2 < Nh[0]; i2++ ) {
-                            tmp += filter( i2, j2, k2 ) * this->operator()( i1 *Nh[0] + i2,
-                                                              j1 * Nh[1] + j2, k1 * Nh[2] + k2 );
+                            tmp += filter( i2, j2, k2 ) * operator()( i1 *Nh[0] + i2,
+                                                                      j1 * Nh[1] + j2,
+                                                                      k1 * Nh[2] + k2 );
                        }
                    }
                }
@@ -1024,7 +1081,8 @@ Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::coarsen(
    return y;
 }
 template<class TYPE, class FUN, class Allocator>
-Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::coarsen( const std::vector<size_t> &ratio,
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::coarsen(
+    const std::vector<size_t> &ratio,
    std::function<TYPE( const Array<TYPE, FUN, Allocator> & )> filter ) const
 {
    if ( ratio.size() != d_size.ndim() )
@@ -1045,7 +1103,7 @@ Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::coarsen( const std::vec
                for ( size_t k2 = 0; k2 < ratio[2]; k2++ ) {
                    for ( size_t j2 = 0; j2 < ratio[1]; j2++ ) {
                        for ( size_t i2 = 0; i2 < ratio[0]; i2++ ) {
-                            tmp( i2, j2, k2 ) = this->operator()(
+                            tmp( i2, j2, k2 ) = operator()(
                                i1 *ratio[0] + i2, j1 * ratio[1] + j2, k1 * ratio[2] + k2 );
                        }
                    }
@@ -1070,13 +1128,25 @@ void Array<TYPE, FUN, Allocator>::cat( const Array<TYPE, FUN, Allocator> &x, int
    *this = cat( tmp, dim );
 }
 template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::cat( const std::initializer_list<Array> &x,
+                                                              int dim )
+{
+    return cat( x.size(), x.begin(), dim );
+}
+template<class TYPE, class FUN, class Allocator>
 Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::cat( const std::vector<Array> &x, int dim )
 {
-    if ( x.empty() )
+    return cat( x.size(), x.data(), dim );
+}
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>
+Array<TYPE, FUN, Allocator>::cat( size_t N_array, const Array *x, int dim )
+{
+    if ( N_array == 0 )
        return Array<TYPE, FUN, Allocator>();
    // Check that the dimensions match
    bool check = true;
-    for ( size_t i = 1; i < x.size(); i++ ) {
+    for ( size_t i = 1; i < N_array; i++ ) {
        check = check && x[i].ndim() == x[0].ndim();
        for ( int d = 0; d < x[0].ndim(); d++ )
            if ( d != dim )
@@ -1086,7 +1156,7 @@ Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::cat( const std::vector<
        throw std::logic_error( "Array dimensions do not match for concatenation" );
    // Create the output array
    auto size = x[0].d_size;
-    for ( size_t i = 1; i < x.size(); i++ )
+    for ( size_t i = 1; i < N_array; i++ )
        size.resize( dim, size[dim] + x[i].size( dim ) );
    Array<TYPE, FUN, Allocator> out( size );
    size_t N1 = 1;
@@ -1097,7 +1167,7 @@ Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::cat( const std::vector<
    for ( size_t d = dim + 1; d < size.ndim(); d++ )
        N3 *= size[d];
    TYPE *data = out.data();
-    for ( size_t i = 0, i0 = 0; i < x.size(); i++ ) {
+    for ( size_t i = 0, i0 = 0; i < N_array; i++ ) {
        const TYPE *src = x[i].data();
        size_t N22      = x[i].size( dim );
        for ( size_t j2 = 0; j2 < N3; j2++ ) {
@@ -1117,87 +1187,82 @@ Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::cat( const std::vector<
 *  Interpolate                                          *
 ********************************************************/
 template<class T>
-struct is_compatible_double
-    : std::integral_constant<bool, std::is_floating_point<T>::value || std::is_integral<T>::value> {
-};
-template<class TYPE>
-inline typename std::enable_if<is_compatible_double<TYPE>::value, TYPE>::type Array_interp_1D(
-    double x, int N, const TYPE *data )
+constexpr bool is_compatible_double()
 {
-    int i = floor( x );
-    i     = std::max( i, 0 );
-    i     = std::min( i, N - 2 );
-    return ( i + 1 - x ) * data[i] + ( x - i ) * data[i + 1];
+    return std::is_floating_point<T>::value || std::is_integral<T>::value;
 }
 template<class TYPE>
-inline typename std::enable_if<is_compatible_double<TYPE>::value, TYPE>::type Array_interp_2D(
-    double x, double y, int Nx, int Ny, const TYPE *data )
+inline TYPE Array_interp_1D( double x, int N, const TYPE *data )
 {
-    int i       = floor( x );
-    i           = std::max( i, 0 );
-    i           = std::min( i, Nx - 2 );
-    double dx   = x - i;
-    double dx2  = 1.0 - dx;
-    int j       = floor( y );
-    j           = std::max( j, 0 );
-    j           = std::min( j, Ny - 2 );
-    double dy   = y - j;
-    double dy2  = 1.0 - dy;
-    double f[4] = { (double) data[i + j * Nx], (double) data[i + 1 + j * Nx],
-        (double) data[i + ( j + 1 ) * Nx], (double) data[i + 1 + ( j + 1 ) * Nx] };
-    return ( dx * f[1] + dx2 * f[0] ) * dy2 + ( dx * f[3] + dx2 * f[2] ) * dy;
+    if ( is_compatible_double<TYPE>() ) {
+        int i = floor( x );
+        i     = std::max( i, 0 );
+        i     = std::min( i, N - 2 );
+        return ( i + 1 - x ) * data[i] + ( x - i ) * data[i + 1];
+    } else {
+        throw std::logic_error( "Invalid conversion" );
+    }
 }
 template<class TYPE>
-inline typename std::enable_if<is_compatible_double<TYPE>::value, TYPE>::type Array_interp_3D(
-    double x, double y, double z, int Nx, int Ny, int Nz, const TYPE *data )
+inline TYPE Array_interp_2D( double x, double y, int Nx, int Ny, const TYPE *data )
 {
-    int i       = floor( x );
-    i           = std::max( i, 0 );
-    i           = std::min( i, Nx - 2 );
-    double dx   = x - i;
-    double dx2  = 1.0 - dx;
-    int j       = floor( y );
-    j           = std::max( j, 0 );
-    j           = std::min( j, Ny - 2 );
-    double dy   = y - j;
-    double dy2  = 1.0 - dy;
-    int k       = floor( z );
-    k           = std::max( k, 0 );
-    k           = std::min( k, Nz - 2 );
-    double dz   = z - k;
-    double dz2  = 1.0 - dz;
-    double f[8] = { (double) data[i + j * Nx + k * Nx * Ny],
-        (double) data[i + 1 + j * Nx + k * Nx * Ny],
-        (double) data[i + ( j + 1 ) * Nx + k * Nx * Ny],
-        (double) data[i + 1 + ( j + 1 ) * Nx + k * Nx * Ny],
-        (double) data[i + j * Nx + ( k + 1 ) * Nx * Ny],
-        (double) data[i + 1 + j * Nx + ( k + 1 ) * Nx * Ny],
-        (double) data[i + ( j + 1 ) * Nx + ( k + 1 ) * Nx * Ny],
-        (double) data[i + 1 + ( j + 1 ) * Nx + ( k + 1 ) * Nx * Ny] };
-    double h0   = ( dx * f[1] + dx2 * f[0] ) * dy2 + ( dx * f[3] + dx2 * f[2] ) * dy;
-    double h1   = ( dx * f[5] + dx2 * f[4] ) * dy2 + ( dx * f[7] + dx2 * f[6] ) * dy;
-    return h0 * dz2 + h1 * dz;
+    if ( is_compatible_double<TYPE>() ) {
+        int i       = floor( x );
+        i           = std::max( i, 0 );
+        i           = std::min( i, Nx - 2 );
+        double dx   = x - i;
+        double dx2  = 1.0 - dx;
+        int j       = floor( y );
+        j           = std::max( j, 0 );
+        j           = std::min( j, Ny - 2 );
+        double dy   = y - j;
+        double dy2  = 1.0 - dy;
+        double f[4] = { (double) data[i + j * Nx],
+                        (double) data[i + 1 + j * Nx],
+                        (double) data[i + ( j + 1 ) * Nx],
+                        (double) data[i + 1 + ( j + 1 ) * Nx] };
+        return ( dx * f[1] + dx2 * f[0] ) * dy2 + ( dx * f[3] + dx2 * f[2] ) * dy;
+    } else {
+        throw std::logic_error( "Invalid conversion" );
+    }
 }
 template<class TYPE>
-inline typename std::enable_if<!is_compatible_double<TYPE>::value, TYPE>::type Array_interp_1D(
-    double, int, const TYPE * )
+inline TYPE
+Array_interp_3D( double x, double y, double z, int Nx, int Ny, int Nz, const TYPE *data )
 {
-    throw std::logic_error( "Invalid conversion" );
-}
-template<class TYPE>
-inline typename std::enable_if<!is_compatible_double<TYPE>::value, TYPE>::type Array_interp_2D(
-    double, double, int, int, const TYPE * )
-{
-    throw std::logic_error( "Invalid conversion" );
-}
-template<class TYPE>
-inline typename std::enable_if<!is_compatible_double<TYPE>::value, TYPE>::type Array_interp_3D(
-    double, double, double, int, int, int, const TYPE * )
-{
-    throw std::logic_error( "Invalid conversion" );
+    if ( is_compatible_double<TYPE>() ) {
+        int i       = floor( x );
+        i           = std::max( i, 0 );
+        i           = std::min( i, Nx - 2 );
+        double dx   = x - i;
+        double dx2  = 1.0 - dx;
+        int j       = floor( y );
+        j           = std::max( j, 0 );
+        j           = std::min( j, Ny - 2 );
+        double dy   = y - j;
+        double dy2  = 1.0 - dy;
+        int k       = floor( z );
+        k           = std::max( k, 0 );
+        k           = std::min( k, Nz - 2 );
+        double dz   = z - k;
+        double dz2  = 1.0 - dz;
+        double f[8] = { (double) data[i + j * Nx + k * Nx * Ny],
+                        (double) data[i + 1 + j * Nx + k * Nx * Ny],
+                        (double) data[i + ( j + 1 ) * Nx + k * Nx * Ny],
+                        (double) data[i + 1 + ( j + 1 ) * Nx + k * Nx * Ny],
+                        (double) data[i + j * Nx + ( k + 1 ) * Nx * Ny],
+                        (double) data[i + 1 + j * Nx + ( k + 1 ) * Nx * Ny],
+                        (double) data[i + ( j + 1 ) * Nx + ( k + 1 ) * Nx * Ny],
+                        (double) data[i + 1 + ( j + 1 ) * Nx + ( k + 1 ) * Nx * Ny] };
+        double h0   = ( dx * f[1] + dx2 * f[0] ) * dy2 + ( dx * f[3] + dx2 * f[2] ) * dy;
+        double h1   = ( dx * f[5] + dx2 * f[4] ) * dy2 + ( dx * f[7] + dx2 * f[6] ) * dy;
+        return h0 * dz2 + h1 * dz;
+    } else {
+        throw std::logic_error( "Invalid conversion" );
+    }
 }
 template<class TYPE, class FUN, class Allocator>
-TYPE Array<TYPE, FUN, Allocator>::interp( const std::vector<double> &x ) const
+TYPE Array<TYPE, FUN, Allocator>::interp( const double *x ) const
 {
    int ndim = 0, dim[5];
    double x2[5];
@@ -1233,81 +1298,75 @@ void Array<TYPE, FUN, Allocator>::rand()
    FUN::rand( *this );
 }
 template<class TYPE, class FUN, class Allocator>
-Array<TYPE, FUN, Allocator> &Array<TYPE, FUN, Allocator>::operator+=(
-    const Array<TYPE, FUN, Allocator> &rhs )
+Array<TYPE, FUN, Allocator> &
+Array<TYPE, FUN, Allocator>::operator+=( const Array<TYPE, FUN, Allocator> &rhs )
 {
-    const auto &fun = []( const TYPE &a, const TYPE &b ) { return a + b; };
-    FUN::transform( fun, *this, rhs, *this );
+    auto op = []( const TYPE &a, const TYPE &b ) { return a + b; };
+    FUN::transform( op, *this, rhs, *this );
    return *this;
 }
 template<class TYPE, class FUN, class Allocator>
-Array<TYPE, FUN, Allocator> &Array<TYPE, FUN, Allocator>::operator-=(
-    const Array<TYPE, FUN, Allocator> &rhs )
+Array<TYPE, FUN, Allocator> &
+Array<TYPE, FUN, Allocator>::operator-=( const Array<TYPE, FUN, Allocator> &rhs )
 {
-    const auto &fun = []( const TYPE &a, const TYPE &b ) { return a - b; };
-    FUN::transform( fun, *this, rhs, *this );
+    auto op = []( const TYPE &a, const TYPE &b ) { return a - b; };
+    FUN::transform( op, *this, rhs, *this );
    return *this;
 }
 template<class TYPE, class FUN, class Allocator>
 Array<TYPE, FUN, Allocator> &Array<TYPE, FUN, Allocator>::operator+=( const TYPE &rhs )
 {
-    const auto &fun = [rhs]( const TYPE &x ) { return x + rhs; };
-    FUN::transform( fun, *this, *this );
+    auto op = [rhs]( const TYPE &x ) { return x + rhs; };
+    FUN::transform( op, *this, *this );
    return *this;
 }
 template<class TYPE, class FUN, class Allocator>
 Array<TYPE, FUN, Allocator> &Array<TYPE, FUN, Allocator>::operator-=( const TYPE &rhs )
 {
-    const auto &fun = [rhs]( const TYPE &x ) { return x - rhs; };
-    FUN::transform( fun, *this, *this );
+    auto op = [rhs]( const TYPE &x ) { return x - rhs; };
+    FUN::transform( op, *this, *this );
    return *this;
 }
 template<class TYPE, class FUN, class Allocator>
 TYPE Array<TYPE, FUN, Allocator>::min() const
 {
-    const auto &fun = []( const TYPE &a, const TYPE &b ) { return a < b ? a : b; };
-    return FUN::reduce( fun, *this );
+    const auto &op = []( const TYPE &a, const TYPE &b ) { return a < b ? a : b; };
+    return FUN::reduce( op, *this, d_data[0] );
 }
 template<class TYPE, class FUN, class Allocator>
 TYPE Array<TYPE, FUN, Allocator>::max() const
 {
-    const auto &fun = []( const TYPE &a, const TYPE &b ) { return a > b ? a : b; };
-    return FUN::reduce( fun, *this );
+    const auto &op = []( const TYPE &a, const TYPE &b ) { return a > b ? a : b; };
+    return FUN::reduce( op, *this, d_data[0] );
 }
 template<class TYPE, class FUN, class Allocator>
 TYPE Array<TYPE, FUN, Allocator>::sum() const
 {
-    const auto &fun = []( const TYPE &a, const TYPE &b ) { return a + b; };
-    return FUN::reduce( fun, *this );
+    const auto &op = []( const TYPE &a, const TYPE &b ) { return a + b; };
+    return FUN::reduce( op, *this, static_cast<TYPE>( 0 ) );
 }
 template<class TYPE, class FUN, class Allocator>
-Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::multiply(
-    const Array<TYPE, FUN, Allocator> &a, const Array<TYPE, FUN, Allocator> &b )
+void Array<TYPE, FUN, Allocator>::axpby( const TYPE &alpha,
+                                         const Array<TYPE, FUN, Allocator> &x,
+                                         const TYPE &beta )
 {
-    Array<TYPE, FUN, Allocator> c;
-    FUN::multiply( a, b, c );
-    return c;
+    const auto &op = [alpha, beta]( const TYPE &x, const TYPE &y ) { return alpha * x + beta * y; };
+    return FUN::transform( op, x, *this, *this );
 }
 template<class TYPE, class FUN, class Allocator>
-void Array<TYPE, FUN, Allocator>::axpby(
-    const TYPE &alpha, const Array<TYPE, FUN, Allocator> &x, const TYPE &beta )
-{
-    const auto &fun = [alpha, beta](
-                          const TYPE &x, const TYPE &y ) { return alpha * x + beta * y; };
-    return FUN::transform( fun, x, *this, *this );
-}
-template<class TYPE, class FUN, class Allocator>
-Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::transform(
-    std::function<TYPE( const TYPE & )> fun, const Array<TYPE, FUN, Allocator> &x )
+Array<TYPE, FUN, Allocator>
+Array<TYPE, FUN, Allocator>::transform( std::function<TYPE( const TYPE & )> fun,
+                                        const Array<TYPE, FUN, Allocator> &x )
 {
    Array<TYPE, FUN, Allocator> y;
    FUN::transform( fun, x, y );
    return y;
 }
 template<class TYPE, class FUN, class Allocator>
-Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::transform(
-    std::function<TYPE( const TYPE &, const TYPE & )> fun, const Array<TYPE, FUN, Allocator> &x,
-    const Array<TYPE, FUN, Allocator> &y )
+Array<TYPE, FUN, Allocator>
+Array<TYPE, FUN, Allocator>::transform( std::function<TYPE( const TYPE &, const TYPE & )> fun,
+                                        const Array<TYPE, FUN, Allocator> &x,
+                                        const Array<TYPE, FUN, Allocator> &y )
 {
    Array<TYPE, FUN, Allocator> z;
    FUN::transform( fun, x, y, z );
@@ -1319,4 +1378,5 @@ bool Array<TYPE, FUN, Allocator>::equals( const Array &rhs, TYPE tol ) const
    return FUN::equals( *this, rhs, tol );
 }

+
 #endif
--- a/common/ArraySize.h
+++ b/common/ArraySize.h
@@ -1,8 +1,12 @@
 #ifndef included_ArraySizeClass
 #define included_ArraySizeClass

+#include "common/Utilities.h"

 #include <array>
+#include <cmath>
+#include <complex>
+#include <cstdlib>
 #include <cstring>
 #include <initializer_list>
 #include <vector>
@@ -22,21 +26,22 @@


 #if ( defined( DEBUG ) || defined( _DEBUG ) ) && !defined( NDEBUG )
-#define CHECK_ARRAY_LENGTH( i )                                      \
+#define CHECK_ARRAY_LENGTH( i, length )                              \
    do {                                                             \
-        if ( i >= d_length )                                         \
+        if ( i >= length )                                           \
            throw std::out_of_range( "Index exceeds array bounds" ); \
    } while ( 0 )
 #else
-#define CHECK_ARRAY_LENGTH( i ) \
-    do {                        \
+#define CHECK_ARRAY_LENGTH( i, length ) \
+    do {                                \
    } while ( 0 )
 #endif


+
 // Forward declerations
 class FunctionTable;
-template<class TYPE, class FUN = FunctionTable, class Allocator = std::nullptr_t>
+template<class TYPE, class FUN = FunctionTable, class Allocator = std::allocator<TYPE>>
 class Array;


@@ -46,7 +51,7 @@ class Range final
 {
 public:
    //! Empty constructor
-    constexpr Range() : i( 0 ), j( -1 ), k( 1 ) {}
+    Range() : i( 0 ), j( -1 ), k( 1 ) {}

    /*!
     * Create a range i:k:j (or i:j)
@@ -54,8 +59,30 @@ public:
     * @param j_            Ending value
     * @param k_            Increment value
     */
-    constexpr Range( TYPE i_, TYPE j_, TYPE k_ = 1 ) : i( i_ ), j( j_ ), k( k_ ) {}
+    Range( const TYPE &i_, const TYPE &j_, const TYPE &k_ = 1 )
+        : i( i_ ), j( j_ ), k( k_ )
+    {
+    }

+    //! Get the number of values in the range
+    size_t size() const
+    {
+        if ( std::is_integral<TYPE>::value ) {
+            return ( static_cast<int64_t>( j ) - static_cast<int64_t>( i ) ) /
+                   static_cast<int64_t>( k );
+        } else if ( std::is_floating_point<TYPE>::value ) {
+            double tmp = static_cast<double>( ( j - i ) ) / static_cast<double>( k );
+            return static_cast<size_t>( floor( tmp + 1e-12 ) + 1 );
+        } else if ( std::is_same<TYPE, std::complex<float>>::value ||
+                                 std::is_same<TYPE, std::complex<double>>::value ) {
+            double tmp = std::real( ( j - i ) / ( k ) );
+            return static_cast<size_t>( floor( tmp + 1e-12 ) + 1 );
+        } else {
+            ERROR( "Unsupported type for range" );
+        }
+    }
+
+public:
    TYPE i, j, k;
 };

@@ -65,20 +92,20 @@ class ArraySize final
 {
 public:
    //! Empty constructor
-    constexpr ArraySize() : d_ndim( 1 ), d_length( 0 ), d_N{ 0, 1, 1, 1, 1 } {}
+    ArraySize() : d_ndim( 1 ), d_length( 0 ), d_N{ 0, 1, 1, 1, 1 } {}

    /*!
     * Create the vector size
     * @param N1            Number of elements in the first dimension
     */
-    constexpr ArraySize( size_t N1 ) : d_ndim( 1 ), d_length( N1 ), d_N{ N1, 1, 1, 1, 1 } {}
+    ArraySize( size_t N1 ) : d_ndim( 1 ), d_length( N1 ), d_N{ N1, 1, 1, 1, 1 } {}

    /*!
     * Create the vector size
     * @param N1            Number of elements in the first dimension
     * @param N2            Number of elements in the second dimension
     */
-    constexpr ArraySize( size_t N1, size_t N2 )
+    ArraySize( size_t N1, size_t N2 )
        : d_ndim( 2 ), d_length( N1 * N2 ), d_N{ N1, N2, 1, 1, 1 }
    {
    }
@@ -89,7 +116,7 @@ public:
     * @param N2            Number of elements in the second dimension
     * @param N3            Number of elements in the third dimension
     */
-    constexpr ArraySize( size_t N1, size_t N2, size_t N3 )
+    ArraySize( size_t N1, size_t N2, size_t N3 )
        : d_ndim( 3 ), d_length( N1 * N2 * N3 ), d_N{ N1, N2, N3, 1, 1 }
    {
    }
@@ -101,7 +128,7 @@ public:
     * @param N3            Number of elements in the third dimension
     * @param N4            Number of elements in the fourth dimension
     */
-    constexpr ArraySize( size_t N1, size_t N2, size_t N3, size_t N4 )
+    ArraySize( size_t N1, size_t N2, size_t N3, size_t N4 )
        : d_ndim( 4 ), d_length( N1 * N2 * N3 * N4 ), d_N{ N1, N2, N3, N4, 1 }
    {
    }
@@ -114,7 +141,7 @@ public:
     * @param N4            Number of elements in the fourth dimension
     * @param N5            Number of elements in the fifth dimension
     */
-    constexpr ArraySize( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 )
+    ArraySize( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 )
        : d_ndim( 5 ), d_length( N1 * N2 * N3 * N4 * N5 ), d_N{ N1, N2, N3, N4, N5 }
    {
    }
@@ -122,11 +149,14 @@ public:
    /*!
     * Create from initializer list
     * @param N             Size of the array
+     * @param ndim          Number of dimensions
     */
-    constexpr ArraySize( std::initializer_list<size_t> N )
+    ArraySize( std::initializer_list<size_t> N, int ndim = -1 )
        : d_ndim( N.size() ), d_length( 0 ), d_N{ 0, 1, 1, 1, 1 }
    {
-        if ( d_ndim > maxDim() )
+        if ( ndim >= 0 )
+            d_ndim = ndim;
+        if ( d_ndim > 5 )
            throw std::out_of_range( "Maximum number of dimensions exceeded" );
        auto it = N.begin();
        for ( size_t i = 0; i < d_ndim; i++, ++it )
@@ -144,10 +174,10 @@ public:
     * @param ndim          Number of dimensions
     * @param dims          Dimensions
     */
-    constexpr ArraySize( size_t ndim, const size_t *dims )
+    ArraySize( size_t ndim, const size_t *dims )
        : d_ndim( ndim ), d_length( 0 ), d_N{ 0, 1, 1, 1, 1 }
    {
-        if ( d_ndim > maxDim() )
+        if ( d_ndim > 5 )
            throw std::out_of_range( "Maximum number of dimensions exceeded" );
        for ( size_t i = 0; i < ndim; i++ )
            d_N[i] = dims[i];
@@ -158,35 +188,44 @@ public:
            d_length = 0;
    }

+    /*!
+     * Create from std::array
+     * @param N             Size of the array
+     */
+    template<std::size_t NDIM>
+    ArraySize( const std::array<size_t, NDIM> &N ) : ArraySize( NDIM, N.data() )
+    {
+    }
+
    /*!
     * Create from std::vector
     * @param N             Size of the array
     */
-    ArraySize( const std::vector<size_t> &N );
+    inline ArraySize( const std::vector<size_t> &N ) : ArraySize( N.size(), N.data() ) {}

    // Copy/assignment constructors
-    constexpr ArraySize( ArraySize &&rhs )      = default;
-    constexpr ArraySize( const ArraySize &rhs ) = default;
-    constexpr ArraySize &operator=( ArraySize &&rhs ) = default;
-    constexpr ArraySize &operator=( const ArraySize &rhs ) = default;
+    ArraySize( ArraySize &&rhs )      = default;
+    ArraySize( const ArraySize &rhs ) = default;
+    ArraySize &operator=( ArraySize &&rhs ) = default;
+    ArraySize &operator=( const ArraySize &rhs ) = default;

    /*!
     * Access the ith dimension
     * @param i             Index to access
     */
-    constexpr ARRAY_ATTRIBUTE size_t operator[]( size_t i ) const { return d_N[i]; }
+    ARRAY_ATTRIBUTE size_t operator[]( size_t i ) const { return d_N[i]; }

    //! Return the number of dimensions
-    constexpr ARRAY_ATTRIBUTE uint8_t ndim() const { return d_ndim; }
+    ARRAY_ATTRIBUTE uint8_t ndim() const { return d_ndim; }

    //! Return the number of dimensions
-    constexpr ARRAY_ATTRIBUTE size_t size() const { return d_ndim; }
+    ARRAY_ATTRIBUTE size_t size() const { return d_ndim; }

    //! Return the total number of elements in the array
-    constexpr ARRAY_ATTRIBUTE size_t length() const { return d_length; }
+    ARRAY_ATTRIBUTE size_t length() const { return d_length; }

    //! Resize the dimension
-    constexpr void resize( uint8_t dim, size_t N )
+    void resize( uint8_t dim, size_t N )
    {
        if ( dim >= d_ndim )
            throw std::out_of_range( "Invalid dimension" );
@@ -201,75 +240,141 @@ public:
     *    max of ndim and the largest dim>1.
     * @param ndim          Desired number of dimensions
     */
-    constexpr void setNdim( uint8_t ndim ) { d_ndim = std::max( ndim, d_ndim ); }
+    void setNdim( uint8_t ndim ) { d_ndim = std::max( ndim, d_ndim ); }
+
+    /*!
+     * Remove singleton dimensions
+     */
+    void squeeze()
+    {
+        d_ndim = 0;
+        for ( uint8_t i = 0; i < maxDim(); i++ ) {
+            if ( d_N[i] != 1 )
+                d_N[d_ndim++] = d_N[i];
+        }
+    }

    //! Returns an iterator to the beginning
-    constexpr const size_t *begin() const { return d_N; }
+    const size_t *begin() const { return d_N; }

    //! Returns an iterator to the end
-    constexpr const size_t *end() const { return d_N + d_ndim; }
+    const size_t *end() const { return d_N + d_ndim; }

    // Check if two array sizes are equal
-    constexpr ARRAY_ATTRIBUTE bool operator==( const ArraySize &rhs ) const
+    ARRAY_ATTRIBUTE bool operator==( const ArraySize &rhs ) const
    {
        return d_ndim == rhs.d_ndim && memcmp( d_N, rhs.d_N, sizeof( d_N ) ) == 0;
    }

    // Check if two array sizes are equal (ignoring the dimension)
-    constexpr ARRAY_ATTRIBUTE bool approxEqual( const ArraySize &rhs ) const
+    ARRAY_ATTRIBUTE bool approxEqual( const ArraySize &rhs ) const
    {
        return ( length() == 0 && rhs.length() == 0 ) || memcmp( d_N, rhs.d_N, sizeof( d_N ) ) == 0;
    }

    //! Check if two matrices are not equal
-    constexpr ARRAY_ATTRIBUTE bool operator!=( const ArraySize &rhs ) const
+    ARRAY_ATTRIBUTE bool operator!=( const ArraySize &rhs ) const
    {
        return d_ndim != rhs.d_ndim || memcmp( d_N, rhs.d_N, sizeof( d_N ) ) != 0;
    }

    //! Maximum supported dimension
-    constexpr ARRAY_ATTRIBUTE static uint8_t maxDim() { return 5u; }
+    ARRAY_ATTRIBUTE static uint8_t maxDim() { return 5; }

    //! Get the index
-    constexpr ARRAY_ATTRIBUTE size_t index( size_t i ) const
+    ARRAY_ATTRIBUTE size_t index( size_t i ) const
    {
-        CHECK_ARRAY_LENGTH( i );
+        CHECK_ARRAY_LENGTH( i, d_length );
        return i;
    }

    //! Get the index
-    constexpr ARRAY_ATTRIBUTE size_t index( size_t i1, size_t i2 ) const
+    ARRAY_ATTRIBUTE size_t index( size_t i1, size_t i2 ) const
    {
        size_t index = i1 + i2 * d_N[0];
-        CHECK_ARRAY_LENGTH( index );
+        CHECK_ARRAY_LENGTH( index, d_length );
        return index;
    }

    //! Get the index
-    constexpr ARRAY_ATTRIBUTE size_t index( size_t i1, size_t i2, size_t i3 ) const
+    ARRAY_ATTRIBUTE size_t index( size_t i1, size_t i2, size_t i3 ) const
    {
        size_t index = i1 + d_N[0] * ( i2 + d_N[1] * i3 );
-        CHECK_ARRAY_LENGTH( index );
+        CHECK_ARRAY_LENGTH( index, d_length );
        return index;
    }

    //! Get the index
-    constexpr ARRAY_ATTRIBUTE size_t index( size_t i1, size_t i2, size_t i3, size_t i4 ) const
+    ARRAY_ATTRIBUTE size_t index( size_t i1, size_t i2, size_t i3, size_t i4 ) const
    {
        size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * i4 ) );
-        CHECK_ARRAY_LENGTH( index );
+        CHECK_ARRAY_LENGTH( index, d_length );
        return index;
    }

    //! Get the index
-    constexpr ARRAY_ATTRIBUTE size_t index(
-        size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const
+    ARRAY_ATTRIBUTE size_t
+    index( size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const
    {
        size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * ( i4 + d_N[3] * i5 ) ) );
-        CHECK_ARRAY_LENGTH( index );
+        CHECK_ARRAY_LENGTH( index, d_length );
        return index;
    }

+    //! Get the index
+    size_t index( const std::array<size_t, 5> &i ) const
+    {
+        size_t j = 0;
+        for ( size_t m = 0, N = 1; m < 5; m++ ) {
+            j += i[m] * N;
+            N *= d_N[m];
+        }
+        return j;
+    }
+
+    //! Get the index
+    size_t index( std::initializer_list<size_t> i ) const
+    {
+        size_t N = 1;
+        size_t j = 0;
+        size_t m = 0;
+        for ( size_t k : i ) {
+            j += k * N;
+            N *= d_N[m++];
+        }
+        return j;
+    }
+
+    //! Convert the index to ijk values
+    std::array<size_t, 5> ijk( size_t index ) const
+    {
+        CHECK_ARRAY_LENGTH( index, d_length );
+        size_t i0 = index % d_N[0];
+        index     = index / d_N[0];
+        size_t i1 = index % d_N[1];
+        index     = index / d_N[1];
+        size_t i2 = index % d_N[2];
+        index     = index / d_N[2];
+        size_t i3 = index % d_N[3];
+        index     = index / d_N[3];
+        return { i0, i1, i2, i3, index };
+    }
+
+    //! Convert the index to ijk values
+    void ijk( size_t index, size_t *x ) const
+    {
+        CHECK_ARRAY_LENGTH( index, d_length );
+        x[0]  = index % d_N[0];
+        index = index / d_N[0];
+        x[1]  = index % d_N[1];
+        index = index / d_N[1];
+        x[2]  = index % d_N[2];
+        index = index / d_N[2];
+        x[3]  = index % d_N[3];
+        index = index / d_N[3];
+        x[4]  = index;
+    }
+
 private:
    uint8_t d_ndim;
    size_t d_length;
@@ -278,11 +383,11 @@ private:


 // Function to concatenate dimensions of two array sizes
-constexpr ArraySize cat( const ArraySize &x, const ArraySize &y )
+inline ArraySize cat( const ArraySize &x, const ArraySize &y )
 {
-    if ( x.ndim() + y.ndim() > ArraySize::maxDim() )
+    if ( x.ndim() + y.ndim() > 5 )
        throw std::out_of_range( "Maximum number of dimensions exceeded" );
-    size_t N[ArraySize::maxDim()] = { 0 };
+    size_t N[5] = { 0 };
    for ( int i = 0; i < x.ndim(); i++ )
        N[i] = x[i];
    for ( int i = 0; i < y.ndim(); i++ )
@@ -291,4 +396,36 @@ constexpr ArraySize cat( const ArraySize &x, const ArraySize &y )
 }


+// Operator overloads
+inline ArraySize operator*( size_t v, const ArraySize &x )
+{
+    size_t N[5] = { v * x[0], v * x[1], v * x[2], v * x[3], v * x[4] };
+    return ArraySize( x.ndim(), N );
+}
+inline ArraySize operator*( const ArraySize &x, size_t v )
+{
+    size_t N[5] = { v * x[0], v * x[1], v * x[2], v * x[3], v * x[4] };
+    return ArraySize( x.ndim(), N );
+}
+inline ArraySize operator-( const ArraySize &x, size_t v )
+{
+    size_t N[5] = { x[0] - v, x[1] - v, x[2] - v, x[3] - v, x[4] - v };
+    return ArraySize( x.ndim(), N );
+}
+inline ArraySize operator+( const ArraySize &x, size_t v )
+{
+    size_t N[5] = { x[0] + v, x[1] + v, x[2] + v, x[3] + v, x[4] + v };
+    return ArraySize( x.ndim(), N );
+}
+inline ArraySize operator+( size_t v, const ArraySize &x )
+{
+    size_t N[5] = { x[0] + v, x[1] + v, x[2] + v, x[3] + v, x[4] + v };
+    return ArraySize( x.ndim(), N );
+}
+
+#if defined( USING_ICC )
+ENABLE_WARNINGS
+#endif
+
+
 #endif
--- a/common/FunctionTable.cpp
+++ b/common/FunctionTable.cpp
@@ -0,0 +1,147 @@
+#include "FunctionTable.hpp"
+
+
+/********************************************************
+ *  Random number generation                             *
+ ********************************************************/
+template<> char genRand<char>()
+{
+    static std::random_device rd;
+    static std::mt19937 gen( rd() );
+    static std::uniform_int_distribution<char> dis;
+    return dis( gen );
+}
+template<> int8_t genRand<int8_t>()
+{
+    static std::random_device rd;
+    static std::mt19937 gen( rd() );
+    static std::uniform_int_distribution<int8_t> dis;
+    return dis( gen );
+}
+template<> uint8_t genRand<uint8_t>()
+{
+    static std::random_device rd;
+    static std::mt19937 gen( rd() );
+    static std::uniform_int_distribution<uint8_t> dis;
+    return dis( gen );
+}
+template<> int16_t genRand<int16_t>()
+{
+    static std::random_device rd;
+    static std::mt19937 gen( rd() );
+    static std::uniform_int_distribution<int16_t> dis;
+    return dis( gen );
+}
+template<> uint16_t genRand<uint16_t>()
+{
+    static std::random_device rd;
+    static std::mt19937 gen( rd() );
+    static std::uniform_int_distribution<uint16_t> dis;
+    return dis( gen );
+}
+template<> int32_t genRand<int32_t>()
+{
+    static std::random_device rd;
+    static std::mt19937 gen( rd() );
+    static std::uniform_int_distribution<int32_t> dis;
+    return dis( gen );
+}
+template<> uint32_t genRand<uint32_t>()
+{
+    static std::random_device rd;
+    static std::mt19937 gen( rd() );
+    static std::uniform_int_distribution<uint32_t> dis;
+    return dis( gen );
+}
+template<> int64_t genRand<int64_t>()
+{
+    static std::random_device rd;
+    static std::mt19937 gen( rd() );
+    static std::uniform_int_distribution<int64_t> dis;
+    return dis( gen );
+}
+template<> uint64_t genRand<uint64_t>()
+{
+    static std::random_device rd;
+    static std::mt19937 gen( rd() );
+    static std::uniform_int_distribution<uint64_t> dis;
+    return dis( gen );
+}
+template<> float genRand<float>()
+{
+    static std::random_device rd;
+    static std::mt19937 gen( rd() );
+    static std::uniform_real_distribution<float> dis;
+    return dis( gen );
+}
+template<> double genRand<double>()
+{
+    static std::random_device rd;
+    static std::mt19937 gen( rd() );
+    static std::uniform_real_distribution<double> dis;
+    return dis( gen );
+}
+template<> long double genRand<long double>()
+{
+    static std::random_device rd;
+    static std::mt19937 gen( rd() );
+    static std::uniform_real_distribution<double> dis;
+    return dis( gen );
+}
+
+
+/********************************************************
+ *  axpy                                                 *
+ ********************************************************/
+template<>
+void call_axpy<float>( size_t N, const float alpha, const float *x, float *y )
+{
+    ERROR("Not finished");
+}
+template<>
+void call_axpy<double>( size_t N, const double alpha, const double *x, double *y )
+{
+    ERROR("Not finished");
+}
+
+
+/********************************************************
+ *  Multiply two arrays                                  *
+ ********************************************************/
+template<>
+void call_gemv<double>(
+    size_t M, size_t N, double alpha, double beta, const double *A, const double *x, double *y )
+{
+    ERROR("Not finished");
+}
+template<>
+void call_gemv<float>(
+    size_t M, size_t N, float alpha, float beta, const float *A, const float *x, float *y )
+{
+    ERROR("Not finished");
+}
+template<>
+void call_gemm<double>( size_t M,
+                        size_t N,
+                        size_t K,
+                        double alpha,
+                        double beta,
+                        const double *A,
+                        const double *B,
+                        double *C )
+{
+    ERROR("Not finished");
+}
+template<>
+void call_gemm<float>( size_t M,
+                       size_t N,
+                       size_t K,
+                       float alpha,
+                       float beta,
+                       const float *A,
+                       const float *B,
+                       float *C )
+{
+    ERROR("Not finished");
+}
+
--- a/common/FunctionTable.h
+++ b/common/FunctionTable.h
@@ -7,6 +7,7 @@
 #include <functional>


+
 /*!
 * Class FunctionTable is a serial function table class that defines
 *   a series of operations that can be performed on the Array class.
@@ -25,38 +26,55 @@ public:

    /*!
     * Perform a reduce operator y = f(x)
-     * @param[in] op        The function operation
-     *                      Note: the operator is a template parameter
-     *                      (compared to a std::function to improve performance)
-     * @param[in] A         The array to operate on
-     * @return              The reduction
+     * @param[in] op            The function operation
+     *                          Note: the operator is a template parameter to improve performance
+     * @param[in] A             The array to operate on
+     * @param[in] initialValue  The initial value for the reduction (0 for sum, +/- inf for min/max,
+     * ...)
+     * @return                  The reduction
     */
    template<class TYPE, class FUN, typename LAMBDA>
-    static inline TYPE reduce( LAMBDA &op, const Array<TYPE, FUN> &A );
+    static inline TYPE reduce( LAMBDA &op, const Array<TYPE, FUN> &A, const TYPE &initialValue );
+
+    /*!
+     * Perform a reduce operator z = f(x,y)
+     * @param[in] op            The function operation
+     *                          Note: the operator is a template parameter to improve performance
+     * @param[in] A             The first array to operate on
+     * @param[in] B             The second array to operate on
+     * @param[in] initialValue  The initial value for the reduction (0 for sum, +/- inf for min/max,
+     * ...)
+     * @return                  The reduction
+     */
+    template<class TYPE, class FUN, typename LAMBDA>
+    static inline TYPE reduce( LAMBDA &op,
+                               const Array<TYPE, FUN> &A,
+                               const Array<TYPE, FUN> &B,
+                               const TYPE &initialValue );

    /*!
     * Perform a element-wise operation y = f(x)
-     * @param[in] fun       The function operation
-     *                      Note: the operator is a template parameter
-     *                      (compared to a std::function to improve performance)
-     * @param[in] x         The input array to operate on
-     * @param[out] y        The output array
+     * @param[in] fun           The function operation
+     *                          Note: the function is a template parameter to improve performance
+     * @param[in,out] x         The array to operate on
+     * @param[out] y            The output array
     */
    template<class TYPE, class FUN, typename LAMBDA>
    static inline void transform( LAMBDA &fun, const Array<TYPE, FUN> &x, Array<TYPE, FUN> &y );

    /*!
     * Perform a element-wise operation z = f(x,y)
-     * @param[in] fun       The function operation
-     *                      Note: the operator is a template parameter
-     *                      (compared to a std::function to improve performance)
-     * @param[in] x         The first array
-     * @param[in] y         The second array
-     * @param[out] z        The result
+     * @param[in] fun           The function operation
+     *                          Note: the function is a template parameter to improve performance
+     * @param[in] x             The first array
+     * @param[in] y             The second array
+     * @param[out] z            The output array
     */
    template<class TYPE, class FUN, typename LAMBDA>
-    static inline void transform(
-        LAMBDA &fun, const Array<TYPE, FUN> &x, const Array<TYPE, FUN> &y, Array<TYPE, FUN> &z );
+    static inline void transform( LAMBDA &fun,
+                                  const Array<TYPE, FUN> &x,
+                                  const Array<TYPE, FUN> &y,
+                                  Array<TYPE, FUN> &z );

    /*!
     * Multiply two arrays
@@ -65,8 +83,8 @@ public:
     * @param[out] c            The output array
     */
    template<class TYPE, class FUN>
-    static inline void multiply(
-        const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b, Array<TYPE, FUN> &c );
+    static void
+    multiply( const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b, Array<TYPE, FUN> &c );

    /*!
     * Perform dgemv/dgemm equavalent operation ( C = alpha*A*B + beta*C )
@@ -74,11 +92,14 @@ public:
     * @param[in] A             The first array
     * @param[in] B             The second array
     * @param[in] beta          The scalar value alpha
-     * @param[in,out] c         The output array C
+     * @param[in,out] C         The output array C
     */
    template<class TYPE, class FUN>
-    static void gemm( const TYPE alpha, const Array<TYPE, FUN> &A, const Array<TYPE, FUN> &B,
-        const TYPE beta, Array<TYPE, FUN> &C );
+    static void gemm( const TYPE alpha,
+                      const Array<TYPE, FUN> &A,
+                      const Array<TYPE, FUN> &B,
+                      const TYPE beta,
+                      Array<TYPE, FUN> &C );

    /*!
     * Perform axpy equavalent operation ( y = alpha*x + y )
@@ -98,9 +119,84 @@ public:
    template<class TYPE, class FUN>
    static bool equals( const Array<TYPE, FUN> &A, const Array<TYPE, FUN> &B, TYPE tol );

+    template<class TYPE>
+    static inline void gemmWrapper( char TRANSA,
+                                    char TRANSB,
+                                    int M,
+                                    int N,
+                                    int K,
+                                    TYPE alpha,
+                                    const TYPE *A,
+                                    int LDA,
+                                    const TYPE *B,
+                                    int LDB,
+                                    TYPE beta,
+                                    TYPE *C,
+                                    int LDC );
+
+
+    /* Specialized Functions */
+
+    /*!
+     * Perform a element-wise operation y = max(x , 0)
+     * @param[in] A             The input array
+     * @param[out] B            The output array
+     */
+    template<class TYPE, class FUN, class ALLOC>
+    static void transformReLU( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B );
+
+    /*!
+     * Perform a element-wise operation B = |A|
+     * @param[in] A             The array to operate on
+     * @param[out] B            The output array
+     */
+    template<class TYPE, class FUN, class ALLOC>
+    static void transformAbs( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B );
+
+    /*!
+     * Perform a element-wise operation B = tanh(A)
+     * @param[in] A             The array to operate on
+     * @param[out] B            The output array
+     */
+    template<class TYPE, class FUN, class ALLOC>
+    static void transformTanh( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B );
+
+    /*!
+     * Perform a element-wise operation B = max(-1 , min(1 , A) )
+     * @param[in] A             The array to operate on
+     * @param[out] B            The output array
+     */
+    template<class TYPE, class FUN, class ALLOC>
+    static void transformHardTanh( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B );
+
+    /*!
+     * Perform a element-wise operation B = 1 / (1 + exp(-A))
+     * @param[in] A             The array to operate on
+     * @param[out] B            The output array
+     */
+    template<class TYPE, class FUN, class ALLOC>
+    static void transformSigmoid( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B );
+
+    /*!
+     * Perform a element-wise operation B = log(exp(A) + 1)
+     * @param[in] A             The array to operate on
+     * @param[out] B            The output array
+     */
+    template<class TYPE, class FUN, class ALLOC>
+    static void transformSoftPlus( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B );
+
+    /*!
+     * Sum the elements of the Array
+     * @param[in] A             The array to sum
+     */
+    template<class TYPE, class FUN, class ALLOC>
+    static TYPE sum( const Array<TYPE, FUN, ALLOC> &A );

 private:
    FunctionTable();
+
+    template<class T>
+    static inline void rand( size_t N, T *x );
 };


--- a/common/FunctionTable.hpp
+++ b/common/FunctionTable.hpp
@@ -2,7 +2,7 @@
 #define included_FunctionTable_hpp

 #include "common/FunctionTable.h"
-#include "common/Utilities.h"
+#include "common/UtilityMacros.h"

 #include <algorithm>
 #include <cstring>
@@ -10,33 +10,16 @@
 #include <random>


+
 /********************************************************
 *  Random number initialization                         *
 ********************************************************/
-template<class TYPE>
-static inline typename std::enable_if<std::is_integral<TYPE>::value>::type genRand(
-    size_t N, TYPE* x )
-{
-    std::random_device rd;
-    std::mt19937 gen( rd() );
-    std::uniform_int_distribution<TYPE> dis;
-    for ( size_t i = 0; i < N; i++ )
-        x[i] = dis( gen );
-}
-template<class TYPE>
-static inline typename std::enable_if<std::is_floating_point<TYPE>::value>::type genRand(
-    size_t N, TYPE* x )
-{
-    std::random_device rd;
-    std::mt19937 gen( rd() );
-    std::uniform_real_distribution<TYPE> dis( 0, 1 );
-    for ( size_t i = 0; i < N; i++ )
-        x[i] = dis( gen );
-}
+template<class TYPE> TYPE genRand();
 template<class TYPE, class FUN>
-inline void FunctionTable::rand( Array<TYPE, FUN>& x )
+inline void FunctionTable::rand( Array<TYPE, FUN> &x )
 {
-    genRand<TYPE>( x.length(), x.data() );
+    for ( size_t i = 0; i < x.length(); i++ )
+        x( i ) = genRand<TYPE>();
 }


@@ -44,24 +27,39 @@ inline void FunctionTable::rand( Array<TYPE, FUN>& x )
 *  Reduction                                            *
 ********************************************************/
 template<class TYPE, class FUN, typename LAMBDA>
-inline TYPE FunctionTable::reduce( LAMBDA& op, const Array<TYPE, FUN>& A )
+inline TYPE FunctionTable::reduce( LAMBDA &op, const Array<TYPE, FUN> &A, const TYPE &initialValue )
 {
    if ( A.length() == 0 )
        return TYPE();
-    const TYPE* x  = A.data();
-    TYPE y         = x[0];
-    const size_t N = A.length();
-    for ( size_t i = 1; i < N; i++ )
+    const TYPE *x = A.data();
+    TYPE y        = initialValue;
+    for ( size_t i = 0; i < A.length(); i++ )
        y = op( x[i], y );
    return y;
 }
+template<class TYPE, class FUN, typename LAMBDA>
+inline TYPE FunctionTable::reduce( LAMBDA &op,
+                                   const Array<TYPE, FUN> &A,
+                                   const Array<TYPE, FUN> &B,
+                                   const TYPE &initialValue )
+{
+    ARRAY_ASSERT( A.length() == B.length() );
+    if ( A.length() == 0 )
+        return TYPE();
+    const TYPE *x = A.data();
+    const TYPE *y = B.data();
+    TYPE z        = initialValue;
+    for ( size_t i = 0; i < A.length(); i++ )
+        z = op( x[i], y[i], z );
+    return z;
+}


 /********************************************************
 *  Unary transformation                                 *
 ********************************************************/
 template<class TYPE, class FUN, typename LAMBDA>
-inline void FunctionTable::transform( LAMBDA& fun, const Array<TYPE, FUN>& x, Array<TYPE, FUN>& y )
+inline void FunctionTable::transform( LAMBDA &fun, const Array<TYPE, FUN> &x, Array<TYPE, FUN> &y )
 {
    y.resize( x.size() );
    const size_t N = x.length();
@@ -69,8 +67,10 @@ inline void FunctionTable::transform( LAMBDA& fun, const Array<TYPE, FUN>& x, Ar
        y( i ) = fun( x( i ) );
 }
 template<class TYPE, class FUN, typename LAMBDA>
-inline void FunctionTable::transform(
-    LAMBDA& fun, const Array<TYPE, FUN>& x, const Array<TYPE, FUN>& y, Array<TYPE, FUN>& z )
+inline void FunctionTable::transform( LAMBDA &fun,
+                                      const Array<TYPE, FUN> &x,
+                                      const Array<TYPE, FUN> &y,
+                                      Array<TYPE, FUN> &z )
 {
    if ( x.size() != y.size() )
        throw std::logic_error( "Sizes of x and y do not match" );
@@ -85,25 +85,19 @@ inline void FunctionTable::transform(
 *  axpy                                                 *
 ********************************************************/
 template<class TYPE>
-inline void call_axpy( size_t N, const TYPE alpha, const TYPE* x, TYPE* y );
+void call_axpy( size_t N, const TYPE alpha, const TYPE *x, TYPE *y );
 template<>
-inline void call_axpy<float>( size_t, const float, const float*, float* )
-{
-    throw std::logic_error( "LapackWrappers not configured" );
-}
+void call_axpy<float>( size_t N, const float alpha, const float *x, float *y );
 template<>
-inline void call_axpy<double>( size_t, const double, const double*, double* )
-{
-    throw std::logic_error( "LapackWrappers not configured" );
-}
+void call_axpy<double>( size_t N, const double alpha, const double *x, double *y );
 template<class TYPE>
-inline void call_axpy( size_t N, const TYPE alpha, const TYPE* x, TYPE* y )
+void call_axpy( size_t N, const TYPE alpha, const TYPE *x, TYPE *y )
 {
    for ( size_t i = 0; i < N; i++ )
        y[i] += alpha * x[i];
 }
 template<class TYPE, class FUN>
-void FunctionTable::axpy( const TYPE alpha, const Array<TYPE, FUN>& x, Array<TYPE, FUN>& y )
+void FunctionTable::axpy( const TYPE alpha, const Array<TYPE, FUN> &x, Array<TYPE, FUN> &y )
 {
    if ( x.size() != y.size() )
        throw std::logic_error( "Array sizes do not match" );
@@ -115,21 +109,15 @@ void FunctionTable::axpy( const TYPE alpha, const Array<TYPE, FUN>& x, Array<TYP
 *  Multiply two arrays                                  *
 ********************************************************/
 template<class TYPE>
-inline void call_gemv( size_t M, size_t N, TYPE alpha, TYPE beta, const TYPE* A, const TYPE* x, TYPE* y );
+void call_gemv( size_t M, size_t N, TYPE alpha, TYPE beta, const TYPE *A, const TYPE *x, TYPE *y );
 template<>
-inline void call_gemv<double>(
-    size_t, size_t, double, double, const double*, const double*, double* )
-{
-    throw std::logic_error( "LapackWrappers not configured" );
-}
+void call_gemv<double>(
+    size_t M, size_t N, double alpha, double beta, const double *A, const double *x, double *y );
 template<>
-inline void call_gemv<float>( size_t, size_t, float, float, const float*, const float*, float* )
-{
-    throw std::logic_error( "LapackWrappers not configured" );
-}
+void call_gemv<float>(
+    size_t M, size_t N, float alpha, float beta, const float *A, const float *x, float *y );
 template<class TYPE>
-inline void call_gemv(
-    size_t M, size_t N, TYPE alpha, TYPE beta, const TYPE* A, const TYPE* x, TYPE* y )
+void call_gemv( size_t M, size_t N, TYPE alpha, TYPE beta, const TYPE *A, const TYPE *x, TYPE *y )
 {
    for ( size_t i = 0; i < M; i++ )
        y[i] = beta * y[i];
@@ -139,21 +127,29 @@ inline void call_gemv(
    }
 }
 template<class TYPE>
-inline void call_gemm(
-    size_t M, size_t N, size_t K, TYPE alpha, TYPE beta, const TYPE* A, const TYPE* B, TYPE* C );
+void call_gemm(
+    size_t M, size_t N, size_t K, TYPE alpha, TYPE beta, const TYPE *A, const TYPE *B, TYPE *C );
 template<>
-inline void call_gemm<double>( size_t, size_t, size_t, double, double, const double*, const double*, double* )
-{
-    throw std::logic_error( "LapackWrappers not configured" );
-}
+void call_gemm<double>( size_t M,
+                        size_t N,
+                        size_t K,
+                        double alpha,
+                        double beta,
+                        const double *A,
+                        const double *B,
+                        double *C );
 template<>
-inline void call_gemm<float>( size_t, size_t, size_t, float, float, const float*, const float*, float* )
-{
-    throw std::logic_error( "LapackWrappers not configured" );
-}
+void call_gemm<float>( size_t M,
+                       size_t N,
+                       size_t K,
+                       float alpha,
+                       float beta,
+                       const float *A,
+                       const float *B,
+                       float *C );
 template<class TYPE>
-inline void call_gemm(
-    size_t M, size_t N, size_t K, TYPE alpha, TYPE beta, const TYPE* A, const TYPE* B, TYPE* C )
+void call_gemm(
+    size_t M, size_t N, size_t K, TYPE alpha, TYPE beta, const TYPE *A, const TYPE *B, TYPE *C )
 {
    for ( size_t i = 0; i < K * M; i++ )
        C[i] = beta * C[i];
@@ -165,16 +161,17 @@ inline void call_gemm(
    }
 }
 template<class TYPE, class FUN>
-void FunctionTable::gemm( const TYPE alpha, const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b,
-    const TYPE beta, Array<TYPE, FUN>& c )
+void FunctionTable::gemm( const TYPE alpha,
+                          const Array<TYPE, FUN> &a,
+                          const Array<TYPE, FUN> &b,
+                          const TYPE beta,
+                          Array<TYPE, FUN> &c )
 {
+    if ( a.size( 1 ) != b.size( 0 ) )
+        throw std::logic_error( "Inner dimensions must match" );
    if ( a.ndim() == 2 && b.ndim() == 1 ) {
-        if ( a.size( 1 ) != b.size( 0 ) )
-            throw std::logic_error( "Inner dimensions must match" );
        call_gemv<TYPE>( a.size( 0 ), a.size( 1 ), alpha, beta, a.data(), b.data(), c.data() );
    } else if ( a.ndim() <= 2 && b.ndim() <= 2 ) {
-        if ( a.size( 1 ) != b.size( 0 ) )
-            throw std::logic_error( "Inner dimensions must match" );
        call_gemm<TYPE>(
            a.size( 0 ), a.size( 1 ), b.size( 1 ), alpha, beta, a.data(), b.data(), c.data() );
    } else {
@@ -182,17 +179,16 @@ void FunctionTable::gemm( const TYPE alpha, const Array<TYPE, FUN>& a, const Arr
    }
 }
 template<class TYPE, class FUN>
-void FunctionTable::multiply(
-    const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b, Array<TYPE, FUN>& c )
+void FunctionTable::multiply( const Array<TYPE, FUN> &a,
+                              const Array<TYPE, FUN> &b,
+                              Array<TYPE, FUN> &c )
 {
+    if ( a.size( 1 ) != b.size( 0 ) )
+        throw std::logic_error( "Inner dimensions must match" );
    if ( a.ndim() == 2 && b.ndim() == 1 ) {
-        if ( a.size( 1 ) != b.size( 0 ) )
-            throw std::logic_error( "Inner dimensions must match" );
        c.resize( a.size( 0 ) );
        call_gemv<TYPE>( a.size( 0 ), a.size( 1 ), 1, 0, a.data(), b.data(), c.data() );
    } else if ( a.ndim() <= 2 && b.ndim() <= 2 ) {
-        if ( a.size( 1 ) != b.size( 0 ) )
-            throw std::logic_error( "Inner dimensions must match" );
        c.resize( a.size( 0 ), b.size( 1 ) );
        call_gemm<TYPE>(
            a.size( 0 ), a.size( 1 ), b.size( 1 ), 1, 0, a.data(), b.data(), c.data() );
@@ -206,8 +202,8 @@ void FunctionTable::multiply(
 *  Check if two arrays are equal                        *
 ********************************************************/
 template<class TYPE, class FUN>
-inline typename std::enable_if<!std::is_floating_point<TYPE>::value, bool>::type
-FunctionTableCompare( const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b, TYPE )
+inline typename std::enable_if<std::is_integral<TYPE>::value, bool>::type
+FunctionTableCompare( const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b, TYPE )
 {
    bool pass = true;
    if ( a.size() != b.size() )
@@ -218,7 +214,7 @@ FunctionTableCompare( const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b, TYPE
 }
 template<class TYPE, class FUN>
 inline typename std::enable_if<std::is_floating_point<TYPE>::value, bool>::type
-FunctionTableCompare( const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b, TYPE tol )
+FunctionTableCompare( const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b, TYPE tol )
 {
    bool pass = true;
    if ( a.size() != b.size() )
@@ -228,10 +224,89 @@ FunctionTableCompare( const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b, TYPE
    return pass;
 }
 template<class TYPE, class FUN>
-bool FunctionTable::equals( const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b, TYPE tol )
+bool FunctionTable::equals( const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b, TYPE tol )
 {
    return FunctionTableCompare( a, b, tol );
 }


+/********************************************************
+ *  Specialized Functions                                *
+ ********************************************************/
+template<class TYPE, class FUN, class ALLOC>
+void FunctionTable::transformReLU( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B )
+{
+    const auto &fun = []( const TYPE &a ) { return std::max( a, static_cast<TYPE>( 0 ) ); };
+    transform( fun, A, B );
+}
+
+template<class TYPE, class FUN, class ALLOC>
+void FunctionTable::transformAbs( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B )
+{
+    B.resize( A.size() );
+    const auto &fun = []( const TYPE &a ) { return std::abs( a ); };
+    transform( fun, A, B );
+}
+template<class TYPE, class FUN, class ALLOC>
+void FunctionTable::transformTanh( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B )
+{
+    B.resize( A.size() );
+    const auto &fun = []( const TYPE &a ) { return tanh( a ); };
+    transform( fun, A, B );
+}
+
+template<class TYPE, class FUN, class ALLOC>
+void FunctionTable::transformHardTanh( const Array<TYPE, FUN, ALLOC> &A,
+                                       Array<TYPE, FUN, ALLOC> &B )
+{
+    B.resize( A.size() );
+    const auto &fun = []( const TYPE &a ) {
+        return std::max( -static_cast<TYPE>( 1.0 ), std::min( static_cast<TYPE>( 1.0 ), a ) );
+    };
+    transform( fun, A, B );
+}
+
+template<class TYPE, class FUN, class ALLOC>
+void FunctionTable::transformSigmoid( const Array<TYPE, FUN, ALLOC> &A, Array<TYPE, FUN, ALLOC> &B )
+{
+    B.resize( A.size() );
+    const auto &fun = []( const TYPE &a ) { return 1.0 / ( 1.0 + exp( -a ) ); };
+    transform( fun, A, B );
+}
+
+template<class TYPE, class FUN, class ALLOC>
+void FunctionTable::transformSoftPlus( const Array<TYPE, FUN, ALLOC> &A,
+                                       Array<TYPE, FUN, ALLOC> &B )
+{
+    B.resize( A.size() );
+    const auto &fun = []( const TYPE &a ) { return log1p( exp( a ) ); };
+    transform( fun, A, B );
+}
+
+template<class TYPE, class FUN, class ALLOC>
+TYPE FunctionTable::sum( const Array<TYPE, FUN, ALLOC> &A )
+{
+    const auto &fun = []( const TYPE &a, const TYPE &b ) { return a + b; };
+    return reduce( fun, A, (TYPE) 0 );
+}
+
+template<class TYPE>
+inline void FunctionTable::gemmWrapper( char TRANSA,
+                                        char TRANSB,
+                                        int M,
+                                        int N,
+                                        int K,
+                                        TYPE alpha,
+                                        const TYPE *A,
+                                        int LDA,
+                                        const TYPE *B,
+                                        int LDB,
+                                        TYPE beta,
+                                        TYPE *C,
+                                        int LDC )
+{
+    ERROR("Not finished");
+}
+
+
 #endif