diff --git a/CMakeLists.txt b/CMakeLists.txt
index de984c16..579043c6 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -35,10 +35,6 @@ IF ( NOT CMAKE_CXX_STANDARD )
 ENDIF()
 
 
-# Disable Link Time Optimization until we have a chance to test
-SET( DISABLE_LTO )
-
-
 # Set source/install paths
 SET( ${PROJ}_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}" )
 SET( ${PROJ}_BUILD_DIR  "${CMAKE_CURRENT_BINARY_DIR}" )
diff --git a/analysis/Minkowski.cpp b/analysis/Minkowski.cpp
index a58a87d7..650d30dc 100644
--- a/analysis/Minkowski.cpp
+++ b/analysis/Minkowski.cpp
@@ -11,6 +11,9 @@
 #include "IO/Reader.h"
 #include "IO/Writer.h"
 
+#include "ProfilerApp.h"
+
+
 #define PI 3.14159265359
 
 // Constructor
@@ -55,11 +58,11 @@ double Minkowski::X(){
 	return Xi_global;
 }
 
-void Minkowski::ComputeScalar(const DoubleArray Field, const double isovalue)
+void Minkowski::ComputeScalar(const DoubleArray& Field, const double isovalue)
 {
+    PROFILE_START("ComputeScalar");
 	Xi = Ji = Ai = 0.0;
 	DECL object;
-	Point P1,P2,P3;
 	int e1,e2,e3;
 	double s,s1,s2,s3;
 	double a1,a2,a3;
@@ -75,13 +78,13 @@ void Minkowski::ComputeScalar(const DoubleArray Field, const double isovalue)
 					e1 = object.Face(idx); 
 					e2 = object.halfedge.next(e1);
 					e3 = object.halfedge.next(e2);
-					P1 = object.vertex.coords(object.halfedge.v1(e1));
-					P2 = object.vertex.coords(object.halfedge.v1(e2));
-					P3 = object.vertex.coords(object.halfedge.v1(e3));
+					auto P1 = object.vertex.coords(object.halfedge.v1(e1));
+					auto P2 = object.vertex.coords(object.halfedge.v1(e2));
+					auto P3 = object.vertex.coords(object.halfedge.v1(e3));
 					// Surface area
-					s1 = sqrt((P1.x-P2.x)*(P1.x-P2.x)+(P1.y-P2.y)*(P1.y-P2.y)+(P1.z-P2.z)*(P1.z-P2.z));
-					s2 = sqrt((P2.x-P3.x)*(P2.x-P3.x)+(P2.y-P3.y)*(P2.y-P3.y)+(P2.z-P3.z)*(P2.z-P3.z));
-					s3 = sqrt((P1.x-P3.x)*(P1.x-P3.x)+(P1.y-P3.y)*(P1.y-P3.y)+(P1.z-P3.z)*(P1.z-P3.z));
+					s1 = Distance( P1, P2 );
+					s2 = Distance( P2, P3 );
+					s3 = Distance( P1, P3 );
 					s = 0.5*(s1+s2+s3);
 					Ai += sqrt(s*(s-s1)*(s-s2)*(s-s3));
 					// Mean curvature based on half edge angle
@@ -122,7 +125,7 @@ void Minkowski::ComputeScalar(const DoubleArray Field, const double isovalue)
 	MPI_Allreduce(&Ai,&Ai_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
 	MPI_Allreduce(&Ji,&Ji_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
 	MPI_Barrier(Dm->Comm);
-
+    PROFILE_STOP("ComputeScalar");
 }
 
 void Minkowski::PrintAll()
diff --git a/analysis/Minkowski.h b/analysis/Minkowski.h
index 7c1e138d..7527d62f 100644
--- a/analysis/Minkowski.h
+++ b/analysis/Minkowski.h
@@ -48,7 +48,7 @@ public:
 	//...........................................................................
 	Minkowski(std::shared_ptr <Domain> Dm);
 	~Minkowski();
-	void ComputeScalar(const DoubleArray Field, const double isovalue);
+	void ComputeScalar(const DoubleArray& Field, const double isovalue);
 	void PrintAll();
 
 };
diff --git a/analysis/analysis.cpp b/analysis/analysis.cpp
index 35675a34..7587f3c5 100644
--- a/analysis/analysis.cpp
+++ b/analysis/analysis.cpp
@@ -1,5 +1,7 @@
 #include "analysis/analysis.h"
 #include "ProfilerApp.h"
+
+#include <algorithm>
 #include <iostream>
 
 
diff --git a/analysis/dcel.cpp b/analysis/dcel.cpp
index b1530180..8267f09f 100644
--- a/analysis/dcel.cpp
+++ b/analysis/dcel.cpp
@@ -1,78 +1,5 @@
 #include "analysis/dcel.h"
 
-/* 
-Double connected edge list (DECL) 
-*/
-
-Vertex::Vertex(){
-	size_ = 0;
-	vertex_data.resize(36);
-}
-
-Vertex::~Vertex(){
-
-}
-
-void Vertex::add(Point P){
-	vertex_data.push_back(P.x);
-	vertex_data.push_back(P.y);
-	vertex_data.push_back(P.z);
-	size_++;
-}
-
-void Vertex::assign(int idx, Point P){
-	vertex_data[3*idx] = P.x;
-	vertex_data[3*idx+1] = P.y;
-	vertex_data[3*idx+2] = P.z;
-}
-
-int Vertex::size(){
-	return size_;
-}
-
-Point Vertex::coords(int idx){
-	Point P;
-	P.x = vertex_data[3*idx];
-	P.y = vertex_data[3*idx+1];
-	P.z = vertex_data[3*idx+2];
-	return P;
-}
-
-
-Halfedge::Halfedge(){
-	size_=0;
-}
-
-Halfedge::~Halfedge(){
-	
-}
-int Halfedge::v1(int edge){
-	return data(0,edge);
-}
-
-int Halfedge::v2(int edge){
-	return data(1,edge);
-}
-
-int Halfedge::face(int edge){
-	return data(2,edge);
-}
-
-int Halfedge::twin(int edge){
-	return data(3,edge);
-}
-
-int Halfedge::prev(int edge){
-	return data(4,edge);
-}
-
-int Halfedge::next(int edge){
-	return data(5,edge);
-}
-
-int Halfedge::size(){
-	return size_;
-}
 
 
 DECL::DECL(){
@@ -85,24 +12,20 @@ DECL::~DECL(){
 }
 
 int DECL::Face(int index){
-	return FaceData(index);
+	return FaceData[index];
 }
 
-void DECL::LocalIsosurface(const DoubleArray A, double value, const int i, const int j, const int k){
+void DECL::LocalIsosurface(const DoubleArray& A, double value, const int i, const int j, const int k){
 	Point P,Q;
 	Point PlaceHolder;
 	Point C0,C1,C2,C3,C4,C5,C6,C7;
 
-	int CubeIndex;
-	int nTris = 0;
-	int nVert =0;
-
 	Point VertexList[12];
 	Point NewVertexList[12];
 	int LocalRemap[12];
 
-	DTMutableList<Point> cellvertices = DTMutableList<Point>(20);
-	IntArray Triangles = IntArray(3,20);
+	Point cellvertices[20];
+    std::array<std::array<int,3>,20> Triangles;
 
 	// Values from array 'A' at the cube corners
 	double CubeValues[8];
@@ -129,7 +52,7 @@ void DECL::LocalIsosurface(const DoubleArray A, double value, const int i, const
 	
 	//Determine the index into the edge table which
 	//tells us which vertices are inside of the surface
-	CubeIndex = 0;
+	int CubeIndex = 0;
 	if (CubeValues[0] < 0.0f) CubeIndex |= 1;
 	if (CubeValues[1] < 0.0f) CubeIndex |= 2;
 	if (CubeValues[2] < 0.0f) CubeIndex |= 4;
@@ -222,31 +145,30 @@ void DECL::LocalIsosurface(const DoubleArray A, double value, const int i, const
 		//P.x  += i;
 		//P.y  += j;
 		//P.z  += k;
-		cellvertices(idx) = P;
+		cellvertices[idx] = P;
 	}
-	nVert = VertexCount;
 
 	TriangleCount = 0;
 	for (int idx=0;triTable[CubeIndex][idx]!=-1;idx+=3) {
-		Triangles(0,TriangleCount) = LocalRemap[triTable[CubeIndex][idx+0]];
-		Triangles(1,TriangleCount) = LocalRemap[triTable[CubeIndex][idx+1]];
-		Triangles(2,TriangleCount) = LocalRemap[triTable[CubeIndex][idx+2]];
+		Triangles[TriangleCount][0] = LocalRemap[triTable[CubeIndex][idx+0]];
+		Triangles[TriangleCount][1] = LocalRemap[triTable[CubeIndex][idx+1]];
+		Triangles[TriangleCount][2] = LocalRemap[triTable[CubeIndex][idx+2]];
 		TriangleCount++;
 	}
-	nTris = TriangleCount;
+	int nTris = TriangleCount;
 
 	// Now add the local values to the DECL data structure
 	if (nTris>0){
 		FaceData.resize(TriangleCount);
 	    //printf("Construct halfedge structure... \n");
 	    //printf("   Construct %i triangles \n",nTris);
-		halfedge.data.resize(6,nTris*3);
+		halfedge.resize(nTris*3);
 		int idx_edge=0;
 		for (int idx=0; idx<TriangleCount; idx++){
-			int V1 = Triangles(0,idx);
-			int V2 = Triangles(1,idx);
-			int V3 = Triangles(2,idx);
-			FaceData(idx) = idx_edge;
+			int V1 = Triangles[idx][0];
+			int V2 = Triangles[idx][1];
+			int V3 = Triangles[idx][2];
+			FaceData[idx] = idx_edge;
 			// first edge: V1->V2 
 			halfedge.data(0,idx_edge) = V1;  // first vertex
 			halfedge.data(1,idx_edge) = V2;  // second vertex
@@ -290,8 +212,8 @@ void DECL::LocalIsosurface(const DoubleArray A, double value, const int i, const
 				}
 			}	
 			// Use "ghost" twins if edge is on a cube face
-			P = cellvertices(V1);
-			Q = cellvertices(V2);
+			P = cellvertices[V1];
+			Q = cellvertices[V2];
 			if (P.x == 0.0 && Q.x == 0.0) halfedge.data(3,idx) = -1;  // ghost twin for x=0 face
 			if (P.x == 1.0 && Q.x == 1.0) halfedge.data(3,idx) = -4;  // ghost twin for x=1 face
 			if (P.y == 0.0 && Q.y == 0.0) halfedge.data(3,idx) = -2;  // ghost twin for y=0 face
@@ -303,7 +225,7 @@ void DECL::LocalIsosurface(const DoubleArray A, double value, const int i, const
 
 	// Map vertices to global coordinates
 	for (int idx=0;idx<VertexCount;idx++) {
-		P = cellvertices(idx);
+		P = cellvertices[idx];
 		P.x  += i;
 		P.y  += j;
 		P.z  += k;
@@ -358,8 +280,7 @@ Point DECL::TriNormal(int edge)
 double DECL::EdgeAngle(int edge)
 {
 	double angle;
-	double nx,ny,nz;
-	double dotprod,length,hypotenuse;
+	double dotprod;
 	Point P,Q,R; // triangle vertices
 	Point U,V,W; // normal vectors
 	int e2 = halfedge.next(edge);
@@ -372,14 +293,14 @@ double DECL::EdgeAngle(int edge)
 	if (halfedge.twin(edge) < 0 ){
 		// compute edge normal in plane of cube face
 		W = P - Q; // edge tangent vector
-		length = sqrt(W.x*W.x+W.y*W.y+W.z*W.z);
+		double length = sqrt(W.x*W.x+W.y*W.y+W.z*W.z);
 		W.x /= length;
 		W.y /= length;
 		W.z /= length;
 		// edge normal within the plane of the cube face
-		nx = W.y*V.z - W.z*V.y;
-		ny = W.z*V.x - W.x*V.z;
-		nz = W.x*V.y - W.y*V.x;
+		double nx = W.y*V.z - W.z*V.y;
+		double ny = W.z*V.x - W.x*V.z;
+		double nz = W.x*V.y - W.y*V.x;
 		length = sqrt(nx*nx+ny*ny+nz*nz);
 		// new value for V is this normal vector
 		V.x = nx/length; V.y = ny/length; V.z = nz/length;
@@ -431,20 +352,19 @@ void Isosurface(DoubleArray &A, const double &v)
 {
 	Point P,Q;
 	Point PlaceHolder;
-	double temp;
 	Point C0,C1,C2,C3,C4,C5,C6,C7;
 
 	int TriangleCount;
 	int VertexCount;
 	int CubeIndex;
-	int nTris, nVert;
 
 	Point VertexList[12];
 	Point NewVertexList[12];
 	int LocalRemap[12];
 
-	DTMutableList<Point> cellvertices = DTMutableList<Point>(20);
-	IntArray Triangles = IntArray(3,20);
+	Point cellvertices[20];
+    std::array<std::array<int,3>,20> Triangles;
+    Triangles.fill( { 0 } );
 
 	// Values from array 'A' at the cube corners
 	double CubeValues[8];
@@ -463,6 +383,7 @@ void Isosurface(DoubleArray &A, const double &v)
 	C6.x = 1.0; C6.y = 1.0; C6.z = 1.0;
 	C7.x = 0.0; C7.y = 1.0; C7.z = 1.0;							
 
+	std::vector<std::array<int,6>> HalfEdge;
 	for (int k=1; k<Nz-1; k++){		
 		for (int j=1; j<Ny-1; j++){
 			for (int i=1; i<Nx-1; i++){
@@ -569,83 +490,81 @@ void Isosurface(DoubleArray &A, const double &v)
 					//P.x  += i;
 					//P.y  += j;
 					//P.z  += k;
-					cellvertices(idx) = P;
+					cellvertices[idx] = P;
 				}
-				nVert = VertexCount;
 
 				TriangleCount = 0;
 				for (int idx=0;triTable[CubeIndex][idx]!=-1;idx+=3) {
-					Triangles(0,TriangleCount) = LocalRemap[triTable[CubeIndex][idx+0]];
-					Triangles(1,TriangleCount) = LocalRemap[triTable[CubeIndex][idx+1]];
-					Triangles(2,TriangleCount) = LocalRemap[triTable[CubeIndex][idx+2]];
+					Triangles[TriangleCount][0] = LocalRemap[triTable[CubeIndex][idx+0]];
+					Triangles[TriangleCount][1] = LocalRemap[triTable[CubeIndex][idx+1]];
+					Triangles[TriangleCount][2] = LocalRemap[triTable[CubeIndex][idx+2]];
 					TriangleCount++;
 				}
-				nTris = TriangleCount;
+				int nTris = TriangleCount;
 
 				// Now add the local values to the DECL data structure
-				IntArray HalfEdge(6,nTris*3);
-				DoubleArray EdgeAngles(nTris*3);
+				HalfEdge.resize(nTris*3);
 				int idx_edge=0;
 				for (int idx=0; idx<TriangleCount; idx++){
-					int V1 = Triangles(0,idx);
-					int V2 = Triangles(1,idx);
-					int V3 = Triangles(2,idx);
+					int V1 = Triangles[idx][0];
+					int V2 = Triangles[idx][1];
+					int V3 = Triangles[idx][2];
 					// first edge: V1->V2 
-					HalfEdge(0,idx_edge) = V1;  // first vertex
-					HalfEdge(1,idx_edge) = V2;  // second vertex
-					HalfEdge(2,idx_edge) = idx; // triangle
-					HalfEdge(3,idx_edge) = -1;  // twin
-					HalfEdge(4,idx_edge) = idx_edge+2;  // previous edge
-					HalfEdge(5,idx_edge) = idx_edge+1;  // next edge
+					HalfEdge[idx_edge][0] = V1;  // first vertex
+					HalfEdge[idx_edge][1] = V2;  // second vertex
+					HalfEdge[idx_edge][2] = idx; // triangle
+					HalfEdge[idx_edge][3] = -1;  // twin
+					HalfEdge[idx_edge][4] = idx_edge+2;  // previous edge
+					HalfEdge[idx_edge][5] = idx_edge+1;  // next edge
 					idx_edge++;
 					// second edge: V2->V3
-					HalfEdge(0,idx_edge) = V2;  // first vertex
-					HalfEdge(1,idx_edge) = V3;  // second vertex
-					HalfEdge(2,idx_edge) = idx; // triangle
-					HalfEdge(3,idx_edge) = -1;  // twin
-					HalfEdge(4,idx_edge) = idx_edge-1;  // previous edge
-					HalfEdge(5,idx_edge) = idx_edge+1;  // next edge
+					HalfEdge[idx_edge][0] = V2;  // first vertex
+					HalfEdge[idx_edge][1] = V3;  // second vertex
+					HalfEdge[idx_edge][2] = idx; // triangle
+					HalfEdge[idx_edge][3] = -1;  // twin
+					HalfEdge[idx_edge][4] = idx_edge-1;  // previous edge
+					HalfEdge[idx_edge][5] = idx_edge+1;  // next edge
 					idx_edge++;
 					// third edge: V3->V1
-					HalfEdge(0,idx_edge) = V3;  // first vertex
-					HalfEdge(1,idx_edge) = V1;  // second vertex
-					HalfEdge(2,idx_edge) = idx; // triangle
-					HalfEdge(3,idx_edge) = -1;  // twin
-					HalfEdge(4,idx_edge) = idx_edge-1;  // previous edge
-					HalfEdge(5,idx_edge) = idx_edge-2;  // next edge
+					HalfEdge[idx_edge][0] = V3;  // first vertex
+					HalfEdge[idx_edge][1] = V1;  // second vertex
+					HalfEdge[idx_edge][2] = idx; // triangle
+					HalfEdge[idx_edge][3] = -1;  // twin
+					HalfEdge[idx_edge][4] = idx_edge-1;  // previous edge
+					HalfEdge[idx_edge][5] = idx_edge-2;  // next edge
 					idx_edge++;
 				}
 				int EdgeCount=idx_edge;
 				for (int idx=0; idx<EdgeCount; idx++){
-					int V1=HalfEdge(0,idx);
-					int V2=HalfEdge(1,idx);
+					int V1=HalfEdge[idx][0];
+					int V2=HalfEdge[idx][1];
 					// Find all the twins within the cube
 					for (int jdx=0; idx<EdgeCount; jdx++){
-						if (HalfEdge(1,jdx) == V1 && HalfEdge(0,jdx) == V2){
+						if (HalfEdge[jdx][1] == V1 && HalfEdge[jdx][0] == V2){
 							// this is the pair
-							HalfEdge(3,idx) = jdx;
-							HalfEdge(3,jdx) = idx;
+							HalfEdge[idx][3] = jdx;
+							HalfEdge[jdx][3] = idx;
 						}
-						if (HalfEdge(1,jdx) == V2 && HalfEdge(0,jdx) == V1 && !(idx==jdx)){
+						if (HalfEdge[jdx][1] == V2 && HalfEdge[jdx][0] == V1 && !(idx==jdx)){
 							std::printf("WARNING: half edges with identical orientation! \n");
 						}
 					}	
 					// Use "ghost" twins if edge is on a cube face
-					P = cellvertices(V1);
-					Q = cellvertices(V2);
-					if (P.x == 0.0 && Q.x == 0.0) HalfEdge(3,idx_edge) = -1;  // ghost twin for x=0 face
-					if (P.x == 1.0 && Q.x == 1.0) HalfEdge(3,idx_edge) = -2;  // ghost twin for x=1 face
-					if (P.y == 0.0 && Q.y == 0.0) HalfEdge(3,idx_edge) = -3;  // ghost twin for y=0 face
-					if (P.y == 1.0 && Q.y == 1.0) HalfEdge(3,idx_edge) = -4;  // ghost twin for y=1 face
-					if (P.z == 0.0 && Q.z == 0.0) HalfEdge(3,idx_edge) = -5;  // ghost twin for z=0 face
-					if (P.z == 1.0 && Q.z == 1.0) HalfEdge(3,idx_edge) = -6;  // ghost twin for z=1 face
+					P = cellvertices[V1];
+					Q = cellvertices[V2];
+					if (P.x == 0.0 && Q.x == 0.0) HalfEdge[idx_edge][3] = -1;  // ghost twin for x=0 face
+					if (P.x == 1.0 && Q.x == 1.0) HalfEdge[idx_edge][3] = -2;  // ghost twin for x=1 face
+					if (P.y == 0.0 && Q.y == 0.0) HalfEdge[idx_edge][3] = -3;  // ghost twin for y=0 face
+					if (P.y == 1.0 && Q.y == 1.0) HalfEdge[idx_edge][3] = -4;  // ghost twin for y=1 face
+					if (P.z == 0.0 && Q.z == 0.0) HalfEdge[idx_edge][3] = -5;  // ghost twin for z=0 face
+					if (P.z == 1.0 && Q.z == 1.0) HalfEdge[idx_edge][3] = -6;  // ghost twin for z=1 face
 				}
 				// Find all the angles
 				for (int idx=0; idx<EdgeCount; idx++){
-					int V1=HalfEdge(0,idx);
-					int V2=HalfEdge(1,idx);
-					int T1= HalfEdge(2,idx_edge);
-					int twin=HalfEdge(3,idx_edge); 
+					int V1=HalfEdge[idx][0];
+					int V2=HalfEdge[idx][1];
+					int T1= HalfEdge[idx_edge][2];
+					int twin=HalfEdge[idx_edge][3];
 					if (twin == -1){
 						
 					}
@@ -653,11 +572,11 @@ void Isosurface(DoubleArray &A, const double &v)
 
 				// Map vertices to global coordinates
 				for (int idx=0;idx<VertexCount;idx++) {
-					P = cellvertices(idx);
+					P = cellvertices[idx];
 					P.x  += i;
 					P.y  += j;
 					P.z  += k;
-					cellvertices(idx) = P;
+					cellvertices[idx] = P;
 				}
 			}
 		}
diff --git a/analysis/dcel.h b/analysis/dcel.h
index 992ba8de..d8cbb1f7 100644
--- a/analysis/dcel.h
+++ b/analysis/dcel.h
@@ -8,36 +8,53 @@ Doubly-connected edge list (DECL)
 // Vertex structure
 class Vertex{
 public:
-	Vertex();
-	~Vertex();
-	void add(Point P);
-	void assign( int idx, Point P);
-	int size();
-	Point coords(int idx);
+	Vertex() { d_data.resize(12); }
+	~Vertex() = default;
+    Vertex( const Vertex& ) = delete;
+    Vertex operator=( const Vertex& ) = delete;
+
+    // Add/assign a point
+	inline void add( const Point& P ) { d_data.push_back( P ); }
+	inline void assign( int idx, const Point& P ) { d_data[idx] = P; }
+
+    // Get a point
+	inline Point& coords( int idx ) { return d_data[idx]; }
+	inline const Point& coords( int idx ) const { return d_data[idx]; }
+
 	int IncidentEdge();
+
+    // Return the number of points
+	inline int size() const { return d_data.size(); }
+
 private:
-	std::vector<double> vertex_data;
-	int size_;
+	std::vector<Point> d_data;
 };
 
+
 // Halfedge structure
 // Face
 class Halfedge{
 public:
-	Halfedge();
-	~Halfedge();
+	Halfedge() = default;
+	~Halfedge() = default;
+    Halfedge( const Halfedge& ) = delete;
+    Halfedge operator=( const Halfedge& ) = delete;
 
-	int v1(int edge);
-	int v2(int edge);
-	int twin(int edge);
-	int face(int edge);
-	int next(int edge);
-	int prev(int edge);
-	int size();
+	inline int v1(int edge) const { return d_data[edge][0]; }
+	inline int v2(int edge) const { return d_data[edge][1]; }
+	inline int face(int edge) const { return d_data[edge][2]; }
+	inline int twin(int edge) const { return d_data[edge][3]; }
+	inline int prev(int edge) const { return d_data[edge][4]; }
+	inline int next(int edge) const { return d_data[edge][5]; }
+
+	inline int size() const { return d_data.size(); }
+    inline void resize( int N ) { d_data.resize( N ); }
+
+    inline int& data( int i, int j ) { return d_data[j][i]; }
+    inline const int& data( int i, int j ) const { return d_data[j][i]; }
 
-	Array<int> data;
 private:
-	int size_;
+	std::vector<std::array<int,6>> d_data;
 };
 
 // DECL
@@ -49,15 +66,15 @@ public:
 	int face();
 	Vertex vertex;
 	Halfedge halfedge;
-	void LocalIsosurface(const DoubleArray A, double value, int i, int j, int k);
+	void LocalIsosurface(const DoubleArray& A, double value, int i, int j, int k);
 	int Face(int index);
 	
 	double origin(int edge);
 	double EdgeAngle(int edge);
 	Point TriNormal(int edge);
 	int TriangleCount;
-	int VertexCount;	
+	int VertexCount;
 
 private:
-	Array <int> FaceData;
+	std::vector<int> FaceData;
 };
diff --git a/analysis/distance.cpp b/analysis/distance.cpp
index 8bd10389..e297b435 100644
--- a/analysis/distance.cpp
+++ b/analysis/distance.cpp
@@ -1,6 +1,7 @@
 #include "analysis/distance.h"
 
 
+
 /******************************************************************
 * A fast distance calculation                                     *
 ******************************************************************/
diff --git a/analysis/distance.h b/analysis/distance.h
index b84a93f8..b3fc870e 100644
--- a/analysis/distance.h
+++ b/analysis/distance.h
@@ -2,6 +2,7 @@
 #define Distance_H_INC
 
 #include "common/Domain.h"
+#include "common/Array.hpp"
 
 
 struct Vec {
diff --git a/common/Array.cpp b/common/Array.cpp
new file mode 100644
index 00000000..ecb3470a
--- /dev/null
+++ b/common/Array.cpp
@@ -0,0 +1,117 @@
+#include "common/Array.h"
+#include "common/Array.hpp"
+
+#include <complex>
+
+
+/********************************************************
+ *  ArraySize                                            *
+ ********************************************************/
+ArraySize::ArraySize( const std::vector<size_t>& N )
+{
+    d_ndim = N.size();
+    d_N[0] = 0;
+    d_N[1] = 1;
+    d_N[2] = 1;
+    d_N[3] = 1;
+    d_N[4] = 1;
+    for ( size_t i = 0; i < d_ndim; i++ )
+        d_N[i] = N[i];
+    d_length = 1;
+    for ( unsigned long i : d_N )
+        d_length *= i;
+    if ( d_ndim == 0 )
+        d_length = 0;
+}
+
+
+/********************************************************
+ *  Explicit instantiations of Array                     *
+ ********************************************************/
+template class Array<char, FunctionTable>;
+template class Array<uint8_t, FunctionTable>;
+template class Array<uint16_t, FunctionTable>;
+template class Array<uint32_t, FunctionTable>;
+template class Array<uint64_t, FunctionTable>;
+template class Array<int8_t, FunctionTable>;
+template class Array<int16_t, FunctionTable>;
+template class Array<int32_t, FunctionTable>;
+template class Array<int64_t, FunctionTable>;
+template class Array<float, FunctionTable>;
+template class Array<double, FunctionTable>;
+
+
+/********************************************************
+ *  Explicit instantiations of Array<bool>               *
+ ********************************************************/
+// clang-format off
+template Array<bool, FunctionTable>::Array();
+template Array<bool, FunctionTable>::~Array();
+template Array<bool, FunctionTable>::Array( size_t );
+template Array<bool, FunctionTable>::Array( size_t, size_t );
+template Array<bool, FunctionTable>::Array( size_t, size_t, size_t );
+template Array<bool, FunctionTable>::Array( size_t, size_t, size_t, size_t );
+template Array<bool, FunctionTable>::Array( size_t, size_t, size_t, size_t, size_t );
+template Array<bool, FunctionTable>::Array( const std::vector<size_t>&, const bool* );
+template Array<bool, FunctionTable>::Array( std::string );
+template Array<bool, FunctionTable>::Array( std::initializer_list<bool> );
+template Array<bool, FunctionTable>::Array( const Array<bool, FunctionTable>& );
+template Array<bool, FunctionTable>::Array( Array<bool, FunctionTable>&& );
+template Array<bool, FunctionTable>& Array<bool, FunctionTable>::operator=( const Array<bool, FunctionTable>& );
+template Array<bool, FunctionTable>& Array<bool, FunctionTable>::operator=( Array<bool, FunctionTable>&& );
+template Array<bool, FunctionTable>& Array<bool, FunctionTable>::operator=( const std::vector<bool>& );
+template void Array<bool, FunctionTable>::fill(bool const&);
+template void Array<bool, FunctionTable>::clear();
+template bool Array<bool, FunctionTable>::operator==(Array<bool, FunctionTable> const&) const;
+template void Array<bool, FunctionTable>::resize( ArraySize const& );
+// clang-format on
+
+
+/********************************************************
+ *  Explicit instantiations of Array<std::complex>       *
+ ********************************************************/
+// clang-format off
+template Array<std::complex<double>, FunctionTable>::Array();
+template Array<std::complex<double>, FunctionTable>::~Array();
+template Array<std::complex<double>, FunctionTable>::Array( size_t );
+template Array<std::complex<double>, FunctionTable>::Array( size_t, size_t );
+template Array<std::complex<double>, FunctionTable>::Array( size_t, size_t, size_t );
+template Array<std::complex<double>, FunctionTable>::Array( size_t, size_t, size_t, size_t );
+template Array<std::complex<double>, FunctionTable>::Array( size_t, size_t, size_t, size_t, size_t );
+template Array<std::complex<double>, FunctionTable>::Array( const std::vector<size_t>&, const std::complex<double>* );
+template Array<std::complex<double>, FunctionTable>::Array( std::initializer_list<std::complex<double>> );
+template Array<std::complex<double>, FunctionTable>::Array( const Range<std::complex<double>>& range );
+template Array<std::complex<double>, FunctionTable>::Array( const Array<std::complex<double>, FunctionTable>& );
+template Array<std::complex<double>, FunctionTable>::Array( Array<std::complex<double>, FunctionTable>&& );
+template Array<std::complex<double>, FunctionTable>& Array<std::complex<double>, FunctionTable>::operator=( const Array<std::complex<double>, FunctionTable>& );
+template Array<std::complex<double>, FunctionTable>& Array<std::complex<double>, FunctionTable>::operator=( Array<std::complex<double>, FunctionTable>&& );
+template Array<std::complex<double>, FunctionTable>& Array<std::complex<double>, FunctionTable>::operator=( const std::vector<std::complex<double>>& );
+template void Array<std::complex<double>, FunctionTable>::resize( ArraySize const& );
+template void Array<std::complex<double>, FunctionTable>::viewRaw( ArraySize const&, std::complex<double>*, bool, bool );
+template void Array<std::complex<double>, FunctionTable>::fill(std::complex<double> const&);
+template void Array<std::complex<double>, FunctionTable>::clear();
+template bool Array<std::complex<double>, FunctionTable>::operator==(Array<std::complex<double>, FunctionTable> const&) const;
+template Array<std::complex<double>, FunctionTable> Array<std::complex<double>, FunctionTable>::repmat(std::vector<unsigned long, std::allocator<unsigned long> > const&) const;
+// clang-format on
+
+
+/********************************************************
+ *  Explicit instantiations of Array<std::string>        *
+ ********************************************************/
+// clang-format off
+template Array<std::string, FunctionTable>::Array();
+template Array<std::string, FunctionTable>::~Array();
+template Array<std::string, FunctionTable>::Array( size_t );
+template Array<std::string, FunctionTable>::Array( size_t, size_t );
+template Array<std::string, FunctionTable>::Array( size_t, size_t, size_t );
+template Array<std::string, FunctionTable>::Array( size_t, size_t, size_t, size_t );
+template Array<std::string, FunctionTable>::Array( size_t, size_t, size_t, size_t, size_t );
+template Array<std::string, FunctionTable>::Array( const std::vector<size_t>&, const std::string* );
+template Array<std::string, FunctionTable>::Array( std::initializer_list<std::string> );
+template Array<std::string, FunctionTable>::Array( const Array<std::string, FunctionTable>& );
+template Array<std::string, FunctionTable>::Array( Array<std::string, FunctionTable>&& );
+template Array<std::string, FunctionTable>& Array<std::string, FunctionTable>::operator=( const Array<std::string, FunctionTable>& );
+template Array<std::string, FunctionTable>& Array<std::string, FunctionTable>::operator=( Array<std::string, FunctionTable>&& );
+template Array<std::string, FunctionTable>& Array<std::string, FunctionTable>::operator=( const std::vector<std::string>& );
+template void Array<std::string, FunctionTable>::resize( ArraySize const& );
+// clang-format on
diff --git a/common/Array.h b/common/Array.h
index edfa687a..2dccfde8 100644
--- a/common/Array.h
+++ b/common/Array.h
@@ -1,247 +1,20 @@
 #ifndef included_ArrayClass
 #define included_ArrayClass
 
+#include "common/ArraySize.h"
+
 #include <array>
-#include <cstring>
 #include <functional>
 #include <initializer_list>
 #include <iostream>
 #include <memory>
 #include <vector>
 
-#include "Utilities.h"
-
-
-#if defined( __CUDA_ARCH__ )
-#include <cuda.h>
-#define HOST_DEVICE __host__ __device__
-#else
-#define HOST_DEVICE
-#endif
-#if defined( USING_GCC ) || defined( USING_CLANG )
-#define ATTRIBUTE_INLINE __attribute__( ( always_inline ) )
-#else
-#define ATTRIBUTE_INLINE
-#endif
-
-
-#if ( defined( DEBUG ) || defined( _DEBUG ) ) && !defined( NDEBUG )
-#define CHECK_ARRAY_LENGTH( i )                                      \
-    do {                                                             \
-        if ( i >= d_length )                                         \
-            throw std::length_error( "Index exceeds array bounds" ); \
-    } while ( 0 )
-#else
-#define CHECK_ARRAY_LENGTH( i ) \
-    do {                        \
-    } while ( 0 )
-#endif
-
-
-// Forward decleration
-class FunctionTable;
-
-
-//! Simple range class
-template<class TYPE = size_t>
-class Range final
-{
-public:
-    //! Empty constructor
-    Range() : i( 0 ), j( -1 ), k( 1 ) {}
-
-    /*!
-     * Create a range i:k:j (or i:j)
-     * @param i_            Starting value
-     * @param j_            Ending value
-     * @param k_            Increment value
-     */
-    Range( TYPE i_, TYPE j_, TYPE k_ = 1 ) : i( i_ ), j( j_ ), k( k_ ) {}
-
-    TYPE i, j, k;
-};
-
-
-//! Simple class to store the array dimensions
-class ArraySize final
-{
-public:
-    //! Empty constructor
-    inline ArraySize();
-
-    /*!
-     * Create the vector size
-     * @param N1            Number of elements in the first dimension
-     */
-    inline ArraySize( size_t N1 );
-
-    /*!
-     * Create the vector size
-     * @param N1            Number of elements in the first dimension
-     * @param N2            Number of elements in the second dimension
-     */
-    inline ArraySize( size_t N1, size_t N2 );
-
-    /*!
-     * Create the vector size
-     * @param N1            Number of elements in the first dimension
-     * @param N2            Number of elements in the second dimension
-     * @param N3            Number of elements in the third dimension
-     */
-    inline ArraySize( size_t N1, size_t N2, size_t N3 );
-
-    /*!
-     * Create the vector size
-     * @param N1            Number of elements in the first dimension
-     * @param N2            Number of elements in the second dimension
-     * @param N3            Number of elements in the third dimension
-     * @param N4            Number of elements in the fourth dimension
-     */
-    inline ArraySize( size_t N1, size_t N2, size_t N3, size_t N4 );
-
-    /*!
-     * Create the vector size
-     * @param N1            Number of elements in the first dimension
-     * @param N2            Number of elements in the second dimension
-     * @param N3            Number of elements in the third dimension
-     * @param N4            Number of elements in the fourth dimension
-     * @param N5            Number of elements in the fifth dimension
-     */
-    inline ArraySize( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 );
-
-    /*!
-     * Create from initializer list
-     * @param N             Size of the array
-     */
-    inline ArraySize( std::initializer_list<size_t> N );
-
-    /*!
-     * Create from raw pointer
-     * @param ndim          Number of dimensions
-     * @param ndim          Dimensions
-     */
-    inline ArraySize( size_t ndim, const size_t *dims );
-
-    /*!
-     * Create from std::vector
-     * @param N             Size of the array
-     */
-    inline ArraySize( const std::vector<size_t> &N );
-
-    /*!
-     * Copy constructor
-     * @param rhs           Array to copy
-     */
-    inline ArraySize( const ArraySize &rhs );
-
-    /*!
-     * Move constructor
-     * @param rhs           Array to copy
-     */
-    inline ArraySize( ArraySize &&rhs );
-
-    /*!
-     * Assignment operator
-     * @param rhs           Array to copy
-     */
-    inline ArraySize &operator=( const ArraySize &rhs );
-
-    /*!
-     * Move assignment operator
-     * @param rhs           Array to copy
-     */
-    inline ArraySize &operator=( ArraySize &&rhs );
-
-    /*!
-     * Access the ith dimension
-     * @param i             Index to access
-     */
-    inline size_t operator[]( size_t i ) const { return d_N[i]; }
-
-    //! Sum the elements
-    inline uint8_t ndim() const ATTRIBUTE_INLINE { return d_ndim; }
-
-    //! Sum the elements
-    inline size_t size() const ATTRIBUTE_INLINE { return d_ndim; }
-
-    //! Sum the elements
-    inline size_t length() const ATTRIBUTE_INLINE { return d_length; }
-
-    //! Sum the elements
-    inline void resize( uint8_t dim, size_t N );
-
-    //! Returns an iterator to the beginning
-    inline const size_t *begin() const ATTRIBUTE_INLINE { return d_N; }
-
-    //! Returns an iterator to the end
-    inline const size_t *end() const ATTRIBUTE_INLINE { return d_N + d_ndim; }
-
-    // Check if two matrices are equal
-    inline bool operator==( const ArraySize &rhs ) const ATTRIBUTE_INLINE
-    {
-        return d_ndim == rhs.d_ndim && memcmp( d_N, rhs.d_N, sizeof( d_N ) ) == 0;
-    }
-
-    //! Check if two matrices are not equal
-    inline bool operator!=( const ArraySize &rhs ) const ATTRIBUTE_INLINE
-    {
-        return d_ndim != rhs.d_ndim || memcmp( d_N, rhs.d_N, sizeof( d_N ) ) != 0;
-    }
-
-    //! Maximum supported dimension
-    constexpr static uint8_t maxDim() ATTRIBUTE_INLINE { return 5u; }
-
-    //! Get the index
-    inline size_t index( size_t i ) const ATTRIBUTE_INLINE
-    {
-        CHECK_ARRAY_LENGTH( i );
-        return i;
-    }
-
-    //! Get the index
-    inline size_t index( size_t i1, size_t i2 ) const ATTRIBUTE_INLINE
-    {
-        size_t index = i1 + i2 * d_N[0];
-        CHECK_ARRAY_LENGTH( index );
-        return index;
-    }
-
-    //! Get the index
-    inline size_t index( size_t i1, size_t i2, size_t i3 ) const ATTRIBUTE_INLINE
-    {
-        size_t index = i1 + d_N[0] * ( i2 + d_N[1] * i3 );
-        CHECK_ARRAY_LENGTH( index );
-        return index;
-    }
-
-    //! Get the index
-    inline size_t index( size_t i1, size_t i2, size_t i3, size_t i4 ) const ATTRIBUTE_INLINE
-    {
-        size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * i4 ) );
-        CHECK_ARRAY_LENGTH( index );
-        return index;
-    }
-
-    //! Get the index
-    inline size_t index(
-        size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const ATTRIBUTE_INLINE
-    {
-        size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * ( i4 + d_N[3] * i5 ) ) );
-        CHECK_ARRAY_LENGTH( index );
-        return index;
-    }
-
-private:
-    uint8_t d_ndim;
-    size_t d_length;
-    size_t d_N[5];
-};
-
 
 /*!
  * Class Array is a multi-dimensional array class written by Mark Berrill
  */
-template<class TYPE, class FUN = FunctionTable>
+template<class TYPE, class FUN, class Allocator>
 class Array final
 {
 public: // Constructors / assignment operators
@@ -309,6 +82,12 @@ public: // Constructors / assignment operators
      */
     explicit Array( const Range<TYPE> &range );
 
+    /*!
+     * Create a 1D Array using a string that mimic's MATLAB
+     * @param range         Range of the data
+     */
+    explicit Array( std::string range );
+
     /*!
      * Create a 1D Array with the given initializer list
      * @param data          Input data
@@ -346,74 +125,34 @@ public: // Constructors / assignment operators
      */
     Array &operator=( const std::vector<TYPE> &rhs );
 
+    //! Is copyable?
+    inline bool isCopyable() const { return d_isCopyable; }
+
+    //! Set is copyable
+    inline void setCopyable( bool flag ) { d_isCopyable = flag; }
+
+    //! Is fixed size?
+    inline bool isFixedSize() const { return d_isFixedSize; }
+
+    //! Set is copyable
+    inline void setFixedSize( bool flag ) { d_isFixedSize = flag; }
+
+
 public: // Views/copies/subset
     /*!
-     * Create a 1D Array view to a raw block of data
-     * @param N             Number of elements in the array
+     * Create a multi-dimensional Array view to a raw block of data
+     * @param N             Number of elements in each dimension
      * @param data          Pointer to the data
      */
-    static std::shared_ptr<Array> view( size_t N, std::shared_ptr<TYPE> const &data );
+    static std::unique_ptr<Array> view( const ArraySize &N, std::shared_ptr<TYPE> &data );
 
-    /*!
-     * Create a new 2D Array with the given number of rows and columns
-     * @param N_rows        Number of rows
-     * @param N_columns     Number of columns
-     * @param data          Pointer to the data
-     */
-    static std::shared_ptr<Array> view(
-        size_t N_rows, size_t N_columns, std::shared_ptr<TYPE> const &data );
-
-    /*!
-     * Create a new 3D Array view to a raw block of data
-     * @param N1            Number of rows
-     * @param N2            Number of columns
-     * @param N3            Number of elements in the third dimension
-     * @param data          Pointer to the data
-     */
-    static std::shared_ptr<Array> view(
-        size_t N1, size_t N2, size_t N3, std::shared_ptr<TYPE> const &data );
 
     /*!
      * Create a multi-dimensional Array view to a raw block of data
      * @param N             Number of elements in each dimension
      * @param data          Pointer to the data
      */
-    static std::shared_ptr<Array> view( const ArraySize &N, std::shared_ptr<TYPE> const &data );
-
-
-    /*!
-     * Create a 1D Array view to a raw block of data
-     * @param N             Number of elements in the array
-     * @param data          Pointer to the data
-     */
-    static std::shared_ptr<const Array> constView(
-        size_t N, std::shared_ptr<const TYPE> const &data );
-
-    /*!
-     * Create a new 2D Array with the given number of rows and columns
-     * @param N_rows        Number of rows
-     * @param N_columns     Number of columns
-     * @param data          Pointer to the data
-     */
-    static std::shared_ptr<const Array> constView(
-        size_t N_rows, size_t N_columns, std::shared_ptr<const TYPE> const &data );
-
-    /*!
-     * Create a new 3D Array view to a raw block of data
-     * @param N1            Number of rows
-     * @param N2            Number of columns
-     * @param N3            Number of elements in the third dimension
-     * @param data          Pointer to the data
-     */
-    static std::shared_ptr<const Array> constView(
-        size_t N1, size_t N2, size_t N3, std::shared_ptr<const TYPE> const &data );
-
-    /*!
-     * Create a multi-dimensional Array view to a raw block of data
-     * @param N             Number of elements in each dimension
-     * @param data          Pointer to the data
-     */
-    static std::shared_ptr<const Array> constView(
+    static std::unique_ptr<const Array> constView(
         const ArraySize &N, std::shared_ptr<const TYPE> const &data );
 
 
@@ -432,7 +171,7 @@ public: // Views/copies/subset
 
     /*!
      * Make this object a view of the raw data (expert use only).
-     * Use view2( N, std::shared_ptr(data,[](TYPE*){}) ) instead.
+     * Use view2( N, shared_ptr(data,[](TYPE*){}) ) instead.
      *   Note: this interface is not recommended as it does not protect from
      *   the src data being deleted while still being used by the Array.
      *   Additionally for maximum performance it does not set the internal shared_ptr
@@ -440,27 +179,28 @@ public: // Views/copies/subset
      * @param ndim          Number of dimensions
      * @param dims          Number of elements in each dimension
      * @param data          Pointer to the data
+     * @param isCopyable    Once the view is created, can the array be copied
+     * @param isFixedSize   Once the view is created, is the array size fixed
      */
-    void viewRaw( int ndim, const size_t *dims, TYPE *data );
+    inline void viewRaw(
+        int ndim, const size_t *dims, TYPE *data, bool isCopyable = true, bool isFixedSize = true )
+    {
+        viewRaw( ArraySize( ndim, dims ), data, isCopyable, isFixedSize );
+    }
 
     /*!
      * Make this object a view of the raw data (expert use only).
-     * Use view2( N, std::shared_ptr(data,[](TYPE*){}) ) instead.
+     * Use view2( N, shared_ptr(data,[](TYPE*){}) ) instead.
      *   Note: this interface is not recommended as it does not protect from
      *   the src data being deleted while still being used by the Array.
      *   Additionally for maximum performance it does not set the internal shared_ptr
      *   so functions like getPtr and resize will not work correctly.
      * @param N             Number of elements in each dimension
      * @param data          Pointer to the data
+     * @param isCopyable    Once the view is created, can the array be copied
+     * @param isFixedSize   Once the view is created, is the array size fixed
      */
-    void viewRaw( const ArraySize &N, TYPE *data );
-
-    /*!
-     * Convert an array of one type to another.  This may or may not allocate new memory.
-     * @param array         Input array
-     */
-    template<class TYPE2>
-    static std::shared_ptr<Array<TYPE2>> convert( std::shared_ptr<Array<TYPE, FUN>> array );
+    void viewRaw( const ArraySize &N, TYPE *data, bool isCopyable = true, bool isFixedSize = true );
 
 
     /*!
@@ -468,8 +208,27 @@ public: // Views/copies/subset
      * @param array         Input array
      */
     template<class TYPE2>
-    static std::shared_ptr<const Array<TYPE2>> convert(
-        std::shared_ptr<const Array<TYPE, FUN>> array );
+    static inline std::unique_ptr<Array<TYPE2, FUN, Allocator>> convert(
+        std::shared_ptr<Array<TYPE, FUN, Allocator>> array )
+    {
+        auto array2 = std::make_unique<Array<TYPE2>>( array->size() );
+        array2.copy( *array );
+        return array2;
+    }
+
+
+    /*!
+     * Convert an array of one type to another.  This may or may not allocate new memory.
+     * @param array         Input array
+     */
+    template<class TYPE2>
+    static inline std::unique_ptr<const Array<TYPE2, FUN, Allocator>> convert(
+        std::shared_ptr<const Array<TYPE, FUN, Allocator>> array )
+    {
+        auto array2 = std::make_unique<Array<TYPE2>>( array->size() );
+        array2.copy( *array );
+        return array2;
+    }
 
 
     /*!
@@ -477,7 +236,11 @@ public: // Views/copies/subset
      * @param array         Source array
      */
     template<class TYPE2>
-    void copy( const Array<TYPE2> &array );
+    void inline copy( const Array<TYPE2, FUN, Allocator> &array )
+    {
+        resize( array.size() );
+        copy( array.data() );
+    }
 
     /*!
      * Copy and convert data from a raw vector to this array.
@@ -485,22 +248,36 @@ public: // Views/copies/subset
      * @param array         Source array
      */
     template<class TYPE2>
-    void copy( const TYPE2 *array );
+    void inline copy( const TYPE2 *data )
+    {
+        for ( size_t i = 0; i < d_size.length(); i++ )
+            d_data[i] = static_cast<TYPE>( data[i] );
+    }
 
     /*!
      * Copy and convert data from this array to a raw vector.
      * @param array         Source array
      */
     template<class TYPE2>
-    void copyTo( TYPE2 *array ) const;
+    void inline copyTo( TYPE2 *data ) const
+    {
+        for ( size_t i = 0; i < d_size.length(); i++ )
+            data[i] = static_cast<TYPE2>( d_data[i] );
+    }
 
     /*!
-     * Copy and convert data from this array to a raw vector.
-     * @param array         Source array
+     * Copy and convert data from this array to a new array
      */
     template<class TYPE2>
-    Array<TYPE2, FUN> cloneTo() const;
+    Array<TYPE2, FUN, Allocator> inline cloneTo() const
+    {
+        Array<TYPE2, FUN> dst( this->size() );
+        copyTo( dst.data() );
+        return dst;
+    }
 
+    /*! swap the raw data pointers for the Arrays after checking for compatibility */
+    void swap( Array &other );
 
     /*!
      * Fill the array with the given value
@@ -519,7 +296,7 @@ public: // Views/copies/subset
      * @param base        Base array
      * @param exp         Exponent value
      */
-    void pow( const Array<TYPE, FUN> &base, const TYPE &exp );
+    void pow( const Array &base, const TYPE &exp );
 
     //! Destructor
     ~Array();
@@ -534,11 +311,7 @@ public: // Views/copies/subset
 
 
     //! Return the size of the Array
-    inline ArraySize &size() { return d_size; }
-
-
-    //! Return the size of the Array
-    inline ArraySize size() const { return d_size; }
+    inline const ArraySize &size() const { return d_size; }
 
 
     //! Return the size of the Array
@@ -557,14 +330,15 @@ public: // Views/copies/subset
      * Resize the Array
      * @param N             NUmber of elements
      */
-    void resize( size_t N );
+    inline void resize( size_t N ) { resize( ArraySize( N ) ); }
+
 
     /*!
      * Resize the Array
-     * @param N_rows        Number of rows
-     * @param N_columns     Number of columns
+     * @param N_row         Number of rows
+     * @param N_col         Number of columns
      */
-    void resize( size_t N_rows, size_t N_columns );
+    inline void resize( size_t N_row, size_t N_col ) { resize( ArraySize( N_row, N_col ) ); }
 
     /*!
      * Resize the Array
@@ -572,7 +346,7 @@ public: // Views/copies/subset
      * @param N2            Number of columns
      * @param N3            Number of elements in the third dimension
      */
-    void resize( size_t N1, size_t N2, size_t N3 );
+    inline void resize( size_t N1, size_t N2, size_t N3 ) { resize( ArraySize( N1, N2, N3 ) ); }
 
     /*!
      * Resize the Array
@@ -598,19 +372,25 @@ public: // Views/copies/subset
 
 
     /*!
-     * Subset the Array (total size of array will not change)
+     * Reshape the Array so that the number of dimensions is the
+     *    max of ndim and the largest dim>1.
+     * @param ndim          Desired number of dimensions
+     */
+    inline void setNdim( int ndim ) { d_size.setNdim( ndim ); }
+
+
+    /*!
+     * Subset the Array
      * @param index         Index to subset (imin,imax,jmin,jmax,kmin,kmax,...)
      */
-    template<class TYPE2 = TYPE>
-    Array<TYPE2, FUN> subset( const std::vector<size_t> &index ) const;
+    Array subset( const std::vector<size_t> &index ) const;
 
 
     /*!
-     * Subset the Array (total size of array will not change)
+     * Subset the Array
      * @param index         Index to subset (ix:kx:jx,iy:ky:jy,...)
      */
-    template<class TYPE2 = TYPE>
-    Array<TYPE2, FUN> subset( const std::vector<Range<size_t>> &index ) const;
+    Array subset( const std::vector<Range<size_t>> &index ) const;
 
 
     /*!
@@ -618,30 +398,28 @@ public: // Views/copies/subset
      * @param index         Index of the subset (imin,imax,jmin,jmax,kmin,kmax,...)
      * @param subset        The subset array to copy from
      */
-    template<class TYPE2>
-    void copySubset( const std::vector<size_t> &index, const Array<TYPE2, FUN> &subset );
+    void copySubset( const std::vector<size_t> &index, const Array &subset );
 
     /*!
      * Copy data from an array into a subset of this array
      * @param index         Index of the subset
      * @param subset        The subset array to copy from
      */
-    template<class TYPE2>
-    void copySubset( const std::vector<Range<size_t>> &index, const Array<TYPE2, FUN> &subset );
+    void copySubset( const std::vector<Range<size_t>> &index, const Array &subset );
 
     /*!
      * Add data from an array into a subset of this array
      * @param index         Index of the subset (imin,imax,jmin,jmax,kmin,kmax,...)
      * @param subset        The subset array to add from
      */
-    void addSubset( const std::vector<size_t> &index, const Array<TYPE, FUN> &subset );
+    void addSubset( const std::vector<size_t> &index, const Array &subset );
 
     /*!
      * Add data from an array into a subset of this array
      * @param index         Index of the subset
      * @param subset        The subset array to add from
      */
-    void addSubset( const std::vector<Range<size_t>> &index, const Array<TYPE, FUN> &subset );
+    void addSubset( const std::vector<Range<size_t>> &index, const Array &subset );
 
 
 public: // Accessors
@@ -649,16 +427,13 @@ public: // Accessors
      * Access the desired element
      * @param i             The row index
      */
-    HOST_DEVICE inline TYPE &operator()( size_t i ) ATTRIBUTE_INLINE
-    {
-        return d_data[d_size.index( i )];
-    }
+    ARRAY_ATTRIBUTE inline TYPE &operator()( size_t i ) { return d_data[d_size.index( i )]; }
 
     /*!
      * Access the desired element
      * @param i             The row index
      */
-    HOST_DEVICE inline const TYPE &operator()( size_t i ) const ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline const TYPE &operator()( size_t i ) const
     {
         return d_data[d_size.index( i )];
     }
@@ -668,7 +443,7 @@ public: // Accessors
      * @param i             The row index
      * @param j             The column index
      */
-    HOST_DEVICE inline TYPE &operator()( size_t i, size_t j ) ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline TYPE &operator()( size_t i, size_t j )
     {
         return d_data[d_size.index( i, j )];
     }
@@ -678,7 +453,7 @@ public: // Accessors
      * @param i             The row index
      * @param j             The column index
      */
-    HOST_DEVICE inline const TYPE &operator()( size_t i, size_t j ) const ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline const TYPE &operator()( size_t i, size_t j ) const
     {
         return d_data[d_size.index( i, j )];
     }
@@ -689,7 +464,7 @@ public: // Accessors
      * @param j             The column index
      * @param k             The third index
      */
-    HOST_DEVICE inline TYPE &operator()( size_t i, size_t j, size_t k ) ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline TYPE &operator()( size_t i, size_t j, size_t k )
     {
         return d_data[d_size.index( i, j, k )];
     }
@@ -700,7 +475,7 @@ public: // Accessors
      * @param j             The column index
      * @param k             The third index
      */
-    HOST_DEVICE inline const TYPE &operator()( size_t i, size_t j, size_t k ) const ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline const TYPE &operator()( size_t i, size_t j, size_t k ) const
     {
         return d_data[d_size.index( i, j, k )];
     }
@@ -712,8 +487,7 @@ public: // Accessors
      * @param i3            The third index
      * @param i4            The fourth index
      */
-    HOST_DEVICE inline TYPE &operator()(
-        size_t i1, size_t i2, size_t i3, size_t i4 ) ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline TYPE &operator()( size_t i1, size_t i2, size_t i3, size_t i4 )
     {
         return d_data[d_size.index( i1, i2, i3, i4 )];
     }
@@ -725,8 +499,8 @@ public: // Accessors
      * @param i3            The third index
      * @param i4            The fourth index
      */
-    HOST_DEVICE inline const TYPE &operator()(
-        size_t i1, size_t i2, size_t i3, size_t i4 ) const ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline const TYPE &operator()(
+        size_t i1, size_t i2, size_t i3, size_t i4 ) const
     {
         return d_data[d_size.index( i1, i2, i3, i4 )];
     }
@@ -739,8 +513,7 @@ public: // Accessors
      * @param i4            The fourth index
      * @param i5            The fifth index
      */
-    HOST_DEVICE inline TYPE &operator()(
-        size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline TYPE &operator()( size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 )
     {
         return d_data[d_size.index( i1, i2, i3, i4, i5 )];
     }
@@ -753,8 +526,8 @@ public: // Accessors
      * @param i4            The fourth index
      * @param i5            The fifth index
      */
-    HOST_DEVICE inline const TYPE &operator()(
-        size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline const TYPE &operator()(
+        size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const
     {
         return d_data[d_size.index( i1, i2, i3, i4, i5 )];
     }
@@ -763,7 +536,7 @@ public: // Accessors
      * Access the desired element as a raw pointer
      * @param i             The global index
      */
-    HOST_DEVICE inline TYPE *ptr( size_t i ) ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline TYPE *ptr( size_t i )
     {
         return i >= d_size.length() ? nullptr : &d_data[i];
     }
@@ -772,34 +545,34 @@ public: // Accessors
      * Access the desired element as a raw pointer
      * @param i             The global index
      */
-    HOST_DEVICE inline const TYPE *ptr( size_t i ) const ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline const TYPE *ptr( size_t i ) const
     {
         return i >= d_size.length() ? nullptr : &d_data[i];
     }
 
     //! Get iterator to beginning of data
-    inline TYPE *begin() ATTRIBUTE_INLINE { return d_data; }
+    inline TYPE *begin() { return d_data; }
 
     //! Get iterator to beginning of data
-    inline const TYPE *begin() const ATTRIBUTE_INLINE { return d_data; }
+    inline const TYPE *begin() const { return d_data; }
 
     //! Get iterator to beginning of data
-    inline TYPE *end() ATTRIBUTE_INLINE { return d_data + d_size.length(); }
+    inline TYPE *end() { return d_data + d_size.length(); }
 
     //! Get iterator to beginning of data
-    inline const TYPE *end() const ATTRIBUTE_INLINE { return d_data + d_size.length(); }
+    inline const TYPE *end() const { return d_data + d_size.length(); }
 
     //! Return the pointer to the raw data
-    inline std::shared_ptr<TYPE> getPtr() ATTRIBUTE_INLINE { return d_ptr; }
+    inline std::shared_ptr<TYPE> getPtr() { return d_ptr; }
 
     //! Return the pointer to the raw data
-    inline std::shared_ptr<const TYPE> getPtr() const ATTRIBUTE_INLINE { return d_ptr; }
+    inline std::shared_ptr<const TYPE> getPtr() const { return d_ptr; }
 
     //! Return the pointer to the raw data
-    HOST_DEVICE inline TYPE *data() ATTRIBUTE_INLINE { return d_data; }
+    ARRAY_ATTRIBUTE inline TYPE *data() { return d_data; }
 
     //! Return the pointer to the raw data
-    HOST_DEVICE inline const TYPE *data() const ATTRIBUTE_INLINE { return d_data; }
+    ARRAY_ATTRIBUTE inline const TYPE *data() const { return d_data; }
 
 
 public: // Operator overloading
@@ -834,52 +607,52 @@ public: // Math operations
     void rand();
 
     //! Return true if NaNs are present
-    inline bool NaNs() const;
+    bool NaNs() const;
 
     //! Return the smallest value
-    inline TYPE min() const;
+    TYPE min() const;
 
     //! Return the largest value
-    inline TYPE max() const;
+    TYPE max() const;
 
     //! Return the sum of all elements
-    inline TYPE sum() const;
+    TYPE sum() const;
 
     //! Return the mean of all elements
-    inline TYPE mean() const;
+    TYPE mean() const;
 
     //! Return the min of all elements in a given direction
-    Array<TYPE, FUN> min( int dir ) const;
+    Array min( int dir ) const;
 
     //! Return the max of all elements in a given direction
-    Array<TYPE, FUN> max( int dir ) const;
+    Array max( int dir ) const;
 
     //! Return the sum of all elements in a given direction
-    Array<TYPE, FUN> sum( int dir ) const;
+    Array sum( int dir ) const;
 
     //! Return the smallest value
-    inline TYPE min( const std::vector<size_t> &index ) const;
+    TYPE min( const std::vector<size_t> &index ) const;
 
     //! Return the largest value
-    inline TYPE max( const std::vector<size_t> &index ) const;
+    TYPE max( const std::vector<size_t> &index ) const;
 
     //! Return the sum of all elements
-    inline TYPE sum( const std::vector<size_t> &index ) const;
+    TYPE sum( const std::vector<size_t> &index ) const;
 
     //! Return the mean of all elements
-    inline TYPE mean( const std::vector<size_t> &index ) const;
+    TYPE mean( const std::vector<size_t> &index ) const;
 
     //! Return the smallest value
-    inline TYPE min( const std::vector<Range<size_t>> &index ) const;
+    TYPE min( const std::vector<Range<size_t>> &index ) const;
 
     //! Return the largest value
-    inline TYPE max( const std::vector<Range<size_t>> &index ) const;
+    TYPE max( const std::vector<Range<size_t>> &index ) const;
 
     //! Return the sum of all elements
-    inline TYPE sum( const std::vector<Range<size_t>> &index ) const;
+    TYPE sum( const std::vector<Range<size_t>> &index ) const;
 
     //! Return the mean of all elements
-    inline TYPE mean( const std::vector<Range<size_t>> &index ) const;
+    TYPE mean( const std::vector<Range<size_t>> &index ) const;
 
     //! Find all elements that match the operator
     std::vector<size_t> find(
@@ -894,17 +667,17 @@ public: // Math operations
     static Array multiply( const Array &a, const Array &b );
 
     //! Transpose an array
-    Array<TYPE, FUN> reverseDim() const;
+    Array reverseDim() const;
 
     //! Replicate an array a given number of times in each direction
-    Array<TYPE, FUN> repmat( const std::vector<size_t> &N ) const;
+    Array repmat( const std::vector<size_t> &N ) const;
 
     //! Coarsen an array using the given filter
-    Array<TYPE, FUN> coarsen( const Array<TYPE, FUN> &filter ) const;
+    Array coarsen( const Array &filter ) const;
 
     //! Coarsen an array using the given filter
-    Array<TYPE, FUN> coarsen( const std::vector<size_t> &ratio,
-        std::function<TYPE( const Array<TYPE, FUN> & )> filter ) const;
+    Array coarsen(
+        const std::vector<size_t> &ratio, std::function<TYPE( const Array & )> filter ) const;
 
     /*!
      * Perform a element-wise operation y = f(x)
@@ -928,21 +701,31 @@ public: // Math operations
      * @param[in] x             x
      * @param[in] beta          beta
      */
-    void axpby( const TYPE &alpha, const Array<TYPE, FUN> &x, const TYPE &beta );
+    void axpby( const TYPE &alpha, const Array &x, const TYPE &beta );
+
+    /*!
+     * Linear interpolation
+     * @param[in] x             Position as a decimal index
+     */
+    TYPE interp( const std::vector<double> &x ) const;
+
+    /**
+     * \fn equals (Array & const rhs, TYPE tol )
+     * \brief  Determine if two Arrays are equal using an absolute tolerance
+     * \param[in] rhs Vector to compare to
+     * \param[in] tol Tolerance of comparison
+     * \return  True iff \f$||\mathit{rhs} - x||_\infty < \mathit{tol}\f$
+     */
+    bool equals( const Array &rhs, TYPE tol = 0.000001 ) const;
 
 private:
+    bool d_isCopyable;           // Can the array be copied
+    bool d_isFixedSize;          // Can the array be resized
     ArraySize d_size;            // Size of each dimension
     TYPE *d_data;                // Raw pointer to data in array
     std::shared_ptr<TYPE> d_ptr; // Shared pointer to data in array
     void allocate( const ArraySize &N );
 
-public:
-    template<class TYPE2, class FUN2>
-    inline bool sizeMatch( const Array<TYPE2, FUN2> &rhs ) const
-    {
-        return d_size == rhs.d_size;
-    }
-
 private:
     inline void checkSubsetIndex( const std::vector<Range<size_t>> &range ) const;
     inline std::vector<Range<size_t>> convert( const std::vector<size_t> &index ) const;
@@ -952,11 +735,60 @@ private:
 };
 
 
+/********************************************************
+ *  ostream operator                                     *
+ ********************************************************/
+inline std::ostream &operator<<( std::ostream &out, const ArraySize &s )
+{
+    out << "[" << s[0];
+    for ( size_t i = 1; i < s.ndim(); i++ )
+        out << "," << s[i];
+    out << "]";
+    return out;
+}
+
+
+/********************************************************
+ *  Math operations                                      *
+ ********************************************************/
+template<class TYPE, class FUN, class Allocator>
+inline Array<TYPE, FUN, Allocator> operator+(
+    const Array<TYPE, FUN, Allocator> &a, const Array<TYPE, FUN, Allocator> &b )
+{
+    Array<TYPE, FUN, Allocator> c;
+    const auto &fun = []( const TYPE &a, const TYPE &b ) { return a + b; };
+    FUN::transform( fun, a, b, c );
+    return c;
+}
+template<class TYPE, class FUN, class Allocator>
+inline Array<TYPE, FUN, Allocator> operator-(
+    const Array<TYPE, FUN, Allocator> &a, const Array<TYPE, FUN, Allocator> &b )
+{
+    Array<TYPE, FUN, Allocator> c;
+    const auto &fun = []( const TYPE &a, const TYPE &b ) { return a - b; };
+    FUN::transform( fun, a, b, c );
+    return c;
+}
+template<class TYPE, class FUN, class Allocator>
+inline Array<TYPE, FUN, Allocator> operator*(
+    const Array<TYPE, FUN, Allocator> &a, const Array<TYPE, FUN, Allocator> &b )
+{
+    return Array<TYPE, FUN, Allocator>::multiply( a, b );
+}
+template<class TYPE, class FUN, class Allocator>
+inline Array<TYPE, FUN, Allocator> operator*(
+    const Array<TYPE, FUN, Allocator> &a, const std::vector<TYPE> &b )
+{
+    Array<TYPE, FUN, Allocator> b2;
+    b2.viewRaw( { b.size() }, const_cast<TYPE *>( b.data() ) );
+    return Array<TYPE, FUN, Allocator>::multiply( a, b2 );
+}
+
+
+/********************************************************
+ *  Convience typedefs                                   *
+ ********************************************************/
 typedef Array<double> DoubleArray;
-typedef Array<float> FloatArray;
 typedef Array<int> IntArray;
 
-
-#include "common/Array.hpp"
-
 #endif
diff --git a/common/Array.hpp b/common/Array.hpp
index 60a8987c..f7d4398c 100644
--- a/common/Array.hpp
+++ b/common/Array.hpp
@@ -3,190 +3,127 @@
 
 #include "common/Array.h"
 #include "common/FunctionTable.h"
+#include "common/FunctionTable.hpp"
 #include "common/Utilities.h"
+
 #include <algorithm>
 #include <cmath>
+#include <complex>
 #include <cstring>
 #include <limits>
+#include <memory>
 
 
 /********************************************************
- *  ArraySize                                            *
+ *  External instantiations                              *
  ********************************************************/
-inline ArraySize::ArraySize()
+extern template class Array<char, FunctionTable>;
+extern template class Array<uint8_t, FunctionTable>;
+extern template class Array<uint16_t, FunctionTable>;
+extern template class Array<uint32_t, FunctionTable>;
+extern template class Array<uint64_t, FunctionTable>;
+extern template class Array<int8_t, FunctionTable>;
+extern template class Array<int16_t, FunctionTable>;
+extern template class Array<int32_t, FunctionTable>;
+extern template class Array<int64_t, FunctionTable>;
+extern template class Array<double, FunctionTable>;
+extern template class Array<float, FunctionTable>;
+
+
+/********************************************************
+ *  Helper functions                                     *
+ ********************************************************/
+template<class TYPE>
+inline typename std::enable_if<std::is_integral<TYPE>::value, size_t>::type getRangeSize(
+    const Range<TYPE> &range )
 {
-    d_ndim   = 1;
-    d_N[0]   = 0;
-    d_N[1]   = 1;
-    d_N[2]   = 1;
-    d_N[3]   = 1;
-    d_N[4]   = 1;
-    d_length = 0;
+    return ( static_cast<int64_t>( range.j ) - static_cast<int64_t>( range.i ) ) /
+           static_cast<int64_t>( range.k );
 }
-inline ArraySize::ArraySize( size_t N1 )
+template<class TYPE>
+inline typename std::enable_if<std::is_floating_point<TYPE>::value, size_t>::type getRangeSize(
+    const Range<TYPE> &range )
 {
-    d_ndim   = 1;
-    d_N[0]   = N1;
-    d_N[1]   = 1;
-    d_N[2]   = 1;
-    d_N[3]   = 1;
-    d_N[4]   = 1;
-    d_length = N1;
+    double tmp = static_cast<double>( ( range.j - range.i ) ) / static_cast<double>( range.k );
+    return static_cast<size_t>( floor( tmp + 1e-12 ) + 1 );
 }
-inline ArraySize::ArraySize( size_t N1, size_t N2 )
+template<class TYPE>
+inline typename std::enable_if<std::is_same<TYPE, std::complex<float>>::value ||
+                                   std::is_same<TYPE, std::complex<double>>::value,
+    size_t>::type
+getRangeSize( const Range<TYPE> &range )
 {
-    d_ndim   = 2;
-    d_N[0]   = N1;
-    d_N[1]   = N2;
-    d_N[2]   = 1;
-    d_N[3]   = 1;
-    d_N[4]   = 1;
-    d_length = N1 * N2;
+    double tmp = std::real( ( range.j - range.i ) / ( range.k ) );
+    return static_cast<size_t>( floor( tmp + 1e-12 ) + 1 );
 }
-inline ArraySize::ArraySize( size_t N1, size_t N2, size_t N3 )
+template<class TYPE>
+inline typename std::enable_if<std::is_integral<TYPE>::value, TYPE>::type getRangeValue(
+    const Range<TYPE> &range, size_t index )
 {
-    d_ndim   = 3;
-    d_N[0]   = N1;
-    d_N[1]   = N2;
-    d_N[2]   = N3;
-    d_N[3]   = 1;
-    d_N[4]   = 1;
-    d_length = N1 * N2 * N3;
+    return range.i + index * range.k;
 }
-inline ArraySize::ArraySize( size_t N1, size_t N2, size_t N3, size_t N4 )
+template<class TYPE>
+inline typename std::enable_if<std::is_floating_point<TYPE>::value, TYPE>::type getRangeValue(
+    const Range<TYPE> &range, size_t index )
 {
-    d_ndim   = 4;
-    d_N[0]   = N1;
-    d_N[1]   = N2;
-    d_N[2]   = N3;
-    d_N[3]   = N4;
-    d_N[4]   = 1;
-    d_length = N1 * N2 * N3 * N4;
+    return range.k * ( range.i / range.k + index );
 }
-inline ArraySize::ArraySize( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 )
+template<class TYPE>
+inline typename std::enable_if<std::is_same<TYPE, std::complex<float>>::value ||
+                                   std::is_same<TYPE, std::complex<double>>::value,
+    TYPE>::type
+getRangeValue( const Range<TYPE> &range, size_t index )
 {
-    d_ndim   = 5;
-    d_N[0]   = N1;
-    d_N[1]   = N2;
-    d_N[2]   = N3;
-    d_N[3]   = N4;
-    d_N[4]   = N5;
-    d_length = N1 * N2 * N3 * N4 * N5;
-}
-inline ArraySize::ArraySize( std::initializer_list<size_t> N )
-{
-    d_ndim  = N.size();
-    d_N[0]  = 0;
-    d_N[1]  = 1;
-    d_N[2]  = 1;
-    d_N[3]  = 1;
-    d_N[4]  = 1;
-    auto it = N.begin();
-    for ( size_t i = 0; i < d_ndim; i++, ++it )
-        d_N[i] = *it;
-    d_length = 1;
-    for ( size_t i = 0; i < maxDim(); i++ )
-        d_length *= d_N[i];
-    if ( d_ndim == 0 )
-        d_length = 0;
-}
-inline ArraySize::ArraySize( size_t ndim, const size_t *dims )
-{
-    d_ndim = ndim;
-    d_N[0] = 0;
-    d_N[1] = 1;
-    d_N[2] = 1;
-    d_N[3] = 1;
-    d_N[4] = 1;
-    for ( size_t i = 0; i < ndim; i++ )
-        d_N[i] = dims[i];
-    d_length = 1;
-    for ( size_t i = 0; i < maxDim(); i++ )
-        d_length *= d_N[i];
-    if ( d_ndim == 0 )
-        d_length = 0;
-}
-inline ArraySize::ArraySize( const std::vector<size_t> &N )
-{
-    d_ndim = N.size();
-    d_N[0] = 0;
-    d_N[1] = 1;
-    d_N[2] = 1;
-    d_N[3] = 1;
-    d_N[4] = 1;
-    for ( size_t i = 0; i < d_ndim; i++ )
-        d_N[i] = N[i];
-    d_length = 1;
-    for ( size_t i = 0; i < maxDim(); i++ )
-        d_length *= d_N[i];
-    if ( d_ndim == 0 )
-        d_length = 0;
-}
-inline ArraySize::ArraySize( const ArraySize &rhs ) { memcpy( this, &rhs, sizeof( *this ) ); }
-inline ArraySize::ArraySize( ArraySize &&rhs ) { memcpy( this, &rhs, sizeof( *this ) ); }
-inline ArraySize &ArraySize::operator=( const ArraySize &rhs )
-{
-    if ( this != &rhs )
-        memcpy( this, &rhs, sizeof( *this ) );
-    return *this;
-}
-inline ArraySize &ArraySize::operator=( ArraySize &&rhs )
-{
-    if ( this != &rhs )
-        memcpy( this, &rhs, sizeof( *this ) );
-    return *this;
-}
-inline void ArraySize::resize( uint8_t dim, size_t N )
-{
-    if ( dim >= d_ndim )
-        throw std::out_of_range( "Invalid dimension" );
-    d_N[dim] = N;
-    d_length = 1;
-    for ( size_t i = 0; i < maxDim(); i++ )
-        d_length *= d_N[i];
+    return range.k * ( range.i / range.k + static_cast<TYPE>( index ) );
 }
 
 
 /********************************************************
  *  Constructors                                         *
  ********************************************************/
-template<class TYPE, class FUN>
-Array<TYPE, FUN>::Array()
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>::Array()
+    : d_isCopyable( true ), d_isFixedSize( false ), d_data( nullptr )
 {
-    d_data = nullptr;
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN>::Array( const ArraySize &N )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>::Array( const ArraySize &N )
+    : d_isCopyable( true ), d_isFixedSize( false )
 {
     allocate( N );
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN>::Array( size_t N )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>::Array( size_t N ) : d_isCopyable( true ), d_isFixedSize( false )
 {
     allocate( ArraySize( N ) );
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN>::Array( size_t N_rows, size_t N_cols )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>::Array( size_t N_rows, size_t N_cols )
+    : d_isCopyable( true ), d_isFixedSize( false )
 {
     allocate( ArraySize( N_rows, N_cols ) );
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN>::Array( size_t N1, size_t N2, size_t N3 )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>::Array( size_t N1, size_t N2, size_t N3 )
+    : d_isCopyable( true ), d_isFixedSize( false )
 {
     allocate( ArraySize( N1, N2, N3 ) );
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN>::Array( size_t N1, size_t N2, size_t N3, size_t N4 )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>::Array( size_t N1, size_t N2, size_t N3, size_t N4 )
+    : d_isCopyable( true ), d_isFixedSize( false )
 {
     allocate( ArraySize( N1, N2, N3, N4 ) );
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN>::Array( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>::Array( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 )
+    : d_isCopyable( true ), d_isFixedSize( false )
 {
     allocate( ArraySize( N1, N2, N3, N4, N5 ) );
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN>::Array( const std::vector<size_t> &N, const TYPE *data )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>::Array( const std::vector<size_t> &N, const TYPE *data )
+    : d_isCopyable( true ), d_isFixedSize( false )
 {
     allocate( N );
     if ( data ) {
@@ -194,26 +131,108 @@ Array<TYPE, FUN>::Array( const std::vector<size_t> &N, const TYPE *data )
             d_data[i] = data[i];
     }
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN>::Array( const Range<TYPE> &range )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>::Array( const Range<TYPE> &range )
+    : d_isCopyable( true ), d_isFixedSize( false )
 {
-    double tmp = static_cast<double>( ( range.j - range.i ) ) / static_cast<double>( range.k );
-    size_t N   = static_cast<size_t>( floor( tmp + 1e-12 ) + 1 );
+    size_t N = getRangeSize( range );
     allocate( { N } );
     for ( size_t i = 0; i < N; i++ )
-        d_data[i] = range.k * ( range.i / range.k + i );
+        d_data[i] = getRangeValue( range, i );
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN>::Array( std::initializer_list<TYPE> x )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>::Array( std::string str ) : d_isCopyable( true ), d_isFixedSize( false )
+{
+    allocate( 0 );
+    if ( (int) std::count( str.begin(), str.end(), ' ' ) == (int) str.length() ) {
+        // Empty string
+        return;
+    }
+    // Remove unnecessary whitespace
+    while ( str.front() == ' ' )
+        str.erase( 0, 1 );
+    while ( str.back() == ' ' )
+        str.resize( str.length() - 1 );
+    while ( str.find( ',' ) != std::string::npos )
+        str[str.find( ',' )] = ' ';
+    while ( str.find( "  " ) != std::string::npos )
+        str.replace( str.find( "  " ), 2, " " );
+    // Check if the string is of the format [...]
+    if ( str.front() == '[' && str.back() == ']' ) {
+        str.erase( 0, 1 );
+        str.resize( str.length() - 1 );
+        *this = Array( str );
+        return;
+    }
+    // Check if we are dealing with a 2D array
+    if ( str.find( ';' ) != std::string::npos ) {
+        size_t i1 = 0;
+        size_t i2 = str.find( ';' );
+        std::vector<Array> x;
+        while ( i2 > i1 ) {
+            auto tmp = str.substr( i1, i2 - i1 );
+            x.emplace_back( Array( tmp ) );
+            i1 = i2 + 1;
+            i2 = str.find( ';', i1 + 1 );
+            if ( i2 == std::string::npos )
+                i2 = str.length();
+        }
+        for ( auto &y : x )
+            y.reshape( { 1, y.length() } );
+        *this = cat( x, 0 );
+        return;
+    }
+    // Begin parsing the array constructor
+    size_t i1 = 0;
+    size_t i2 = str.find( ' ' );
+    std::vector<TYPE> data;
+    while ( i2 > i1 ) {
+        auto tmp = str.substr( i1, i2 - i1 );
+        int type = std::count( tmp.begin(), tmp.end(), ':' );
+        if ( type == 0 ) {
+            data.push_back( std::stod( tmp ) );
+        } else if ( type == 1 ) {
+            size_t k   = tmp.find( ':' );
+            TYPE x1    = std::stod( tmp.substr( 0, k ) );
+            TYPE x2    = std::stod( tmp.substr( k + 1 ) );
+            double tol = 1e-8 * ( x2 - x1 );
+            for ( TYPE x = x1; x <= x2 + tol; x += 1 )
+                data.push_back( x );
+        } else if ( type == 2 ) {
+            size_t k1  = tmp.find( ':' );
+            size_t k2  = tmp.find( ':', k1 + 1 );
+            TYPE x1    = std::stod( tmp.substr( 0, k1 ) );
+            TYPE dx    = std::stod( tmp.substr( k1 + 1, k2 - k1 - 1 ) );
+            TYPE x2    = std::stod( tmp.substr( k2 + 1 ) );
+            double tol = 1e-8 * ( x2 - x1 );
+            for ( TYPE x = x1; x <= x2 + tol; x += dx )
+                data.push_back( x );
+        } else {
+            throw std::logic_error( "Failed to parse string constructor: " + str );
+        }
+        i1 = i2;
+        i2 = str.find( ' ', i1 + 1 );
+        if ( i2 == std::string::npos )
+            i2 = str.length();
+    }
+    allocate( data.size() );
+    for ( size_t i = 0; i < data.size(); i++ )
+        d_data[i] = data[i];
+}
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>::Array( std::initializer_list<TYPE> x )
+    : d_isCopyable( true ), d_isFixedSize( false )
 {
     allocate( { x.size() } );
     auto it = x.begin();
     for ( size_t i = 0; i < x.size(); ++i, ++it )
         d_data[i] = *it;
 }
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::allocate( const ArraySize &N )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::allocate( const ArraySize &N )
 {
+    if ( d_isFixedSize )
+        throw std::logic_error( "Array cannot be resized" );
     d_size      = N;
     auto length = d_size.length();
     if ( length == 0 )
@@ -224,73 +243,79 @@ void Array<TYPE, FUN>::allocate( const ArraySize &N )
     if ( length > 0 && d_data == nullptr )
         throw std::logic_error( "Failed to allocate array" );
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN>::Array( const Array &rhs ) : d_size( rhs.d_size ), d_data( nullptr )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>::Array( const Array &rhs )
+    : d_isCopyable( true ), d_isFixedSize( false )
 {
+    if ( !rhs.d_isCopyable )
+        throw std::logic_error( "Array cannot be copied" );
     allocate( rhs.size() );
     for ( size_t i = 0; i < d_size.length(); i++ )
         d_data[i] = rhs.d_data[i];
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN>::Array( Array &&rhs ) : d_size( rhs.d_size ), d_data( rhs.d_data )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>::Array( Array &&rhs )
+    : d_isCopyable( rhs.d_isCopyable ),
+      d_isFixedSize( rhs.d_isFixedSize ),
+      d_size( rhs.d_size ),
+      d_data( rhs.d_data )
 {
-    rhs.d_size = ArraySize();
     rhs.d_data = nullptr;
     d_ptr      = std::move( rhs.d_ptr );
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> &Array<TYPE, FUN>::operator=( const Array &rhs )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> &Array<TYPE, FUN, Allocator>::operator=( const Array &rhs )
 {
     if ( this == &rhs )
         return *this;
+    if ( !rhs.d_isCopyable )
+        throw std::logic_error( "Array cannot be copied" );
     this->allocate( rhs.size() );
     for ( size_t i = 0; i < d_size.length(); i++ )
         this->d_data[i] = rhs.d_data[i];
     return *this;
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> &Array<TYPE, FUN>::operator=( Array &&rhs )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> &Array<TYPE, FUN, Allocator>::operator=( Array &&rhs )
 {
     if ( this == &rhs )
         return *this;
-    d_size     = rhs.d_size;
-    rhs.d_size = ArraySize();
-    d_data     = rhs.d_data;
-    rhs.d_data = nullptr;
-    d_ptr      = std::move( rhs.d_ptr );
+    d_isCopyable  = rhs.d_isCopyable;
+    d_isFixedSize = rhs.d_isFixedSize;
+    d_size        = rhs.d_size;
+    d_data        = rhs.d_data;
+    d_ptr         = std::move( rhs.d_ptr );
+    rhs.d_data    = nullptr;
     return *this;
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> &Array<TYPE, FUN>::operator=( const std::vector<TYPE> &rhs )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> &Array<TYPE, FUN, Allocator>::operator=( const std::vector<TYPE> &rhs )
 {
     this->allocate( ArraySize( rhs.size() ) );
     for ( size_t i = 0; i < rhs.size(); i++ )
         this->d_data[i] = rhs[i];
     return *this;
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN>::~Array()
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator>::~Array()
 {
 }
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::clear()
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::clear()
 {
-    d_size = ArraySize();
+    d_isCopyable  = true;
+    d_isFixedSize = false;
+    d_size        = ArraySize();
     d_ptr.reset();
     d_data = nullptr;
 }
 
 
-/********************************************************
- *  Access elements                                      *
- ********************************************************/
-
-
 /********************************************************
  *  Copy/move values from one array to another (resize)  *
  ********************************************************/
 template<class TYPE>
-inline void moveValues( const ArraySize &N1, const ArraySize &N2, TYPE *data1, TYPE *data2 )
+static inline void moveValues( const ArraySize &N1, const ArraySize &N2, TYPE *data1, TYPE *data2 )
 {
     for ( size_t i5 = 0; i5 < std::min( N1[4], N2[4] ); i5++ ) {
         for ( size_t i4 = 0; i4 < std::min( N1[3], N2[3] ); i4++ ) {
@@ -307,7 +332,7 @@ inline void moveValues( const ArraySize &N1, const ArraySize &N2, TYPE *data1, T
     }
 }
 template<bool test, class TYPE>
-inline typename std::enable_if<test, void>::type copyValues(
+static inline typename std::enable_if<test, void>::type copyValues(
     const ArraySize &N1, const ArraySize &N2, const TYPE *data1, TYPE *data2 )
 {
     for ( size_t i5 = 0; i5 < std::min( N1[4], N2[4] ); i5++ ) {
@@ -325,7 +350,7 @@ inline typename std::enable_if<test, void>::type copyValues(
     }
 }
 template<bool test, class TYPE>
-inline typename std::enable_if<!test, void>::type copyValues(
+static inline typename std::enable_if<!test, void>::type copyValues(
     const ArraySize &, const ArraySize &, const TYPE *, TYPE * )
 {
     throw std::logic_error( "No copy constructor" );
@@ -335,24 +360,8 @@ inline typename std::enable_if<!test, void>::type copyValues(
 /********************************************************
  *  Resize the array                                     *
  ********************************************************/
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::resize( size_t N )
-{
-    resize( ArraySize( N ) );
-}
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::resize( size_t N1, size_t N2 )
-{
-    resize( ArraySize( N1, N2 ) );
-}
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::resize( size_t N1, size_t N2, size_t N3 )
-{
-    resize( ArraySize( N1, N2, N3 ) );
-}
-
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::resize( const ArraySize &N )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::resize( const ArraySize &N )
 {
     // Check if the array actually changed size
     bool equal = true;
@@ -378,8 +387,8 @@ void Array<TYPE, FUN>::resize( const ArraySize &N )
         }
     }
 }
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::resizeDim( int dim, size_t N, const TYPE &value )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::resizeDim( int dim, size_t N, const TYPE &value )
 {
     if ( dim < 0 || dim > d_size.ndim() )
         throw std::out_of_range( "Invalid dimension" );
@@ -405,8 +414,8 @@ void Array<TYPE, FUN>::resizeDim( int dim, size_t N, const TYPE &value )
 /********************************************************
  *  Reshape the array                                     *
  ********************************************************/
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::reshape( const ArraySize &N )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::reshape( const ArraySize &N )
 {
     if ( N.length() != d_size.length() )
         throw std::logic_error( "reshape is not allowed to change the array size" );
@@ -418,17 +427,18 @@ void Array<TYPE, FUN>::reshape( const ArraySize &N )
  *  Subset the array                                     *
  ********************************************************/
 // Helper function to check subset indices
-template<class TYPE, class FUN>
-inline void Array<TYPE, FUN>::checkSubsetIndex( const std::vector<Range<size_t>> &range ) const
+template<class TYPE, class FUN, class Allocator>
+inline void Array<TYPE, FUN, Allocator>::checkSubsetIndex(
+    const std::vector<Range<size_t>> &range ) const
 {
     bool test = (int) range.size() == d_size.ndim();
     for ( size_t d = 0; d < range.size(); d++ )
-        test = test && range[d].i >= 0 && range[d].j <= d_size[d];
+        test = test && range[d].j <= d_size[d];
     if ( !test )
         throw std::logic_error( "indices for subset are invalid" );
 }
-template<class TYPE, class FUN>
-inline std::vector<Range<size_t>> Array<TYPE, FUN>::convert(
+template<class TYPE, class FUN, class Allocator>
+std::vector<Range<size_t>> Array<TYPE, FUN, Allocator>::convert(
     const std::vector<size_t> &index ) const
 {
     std::vector<Range<size_t>> range( d_size.ndim() );
@@ -439,8 +449,8 @@ inline std::vector<Range<size_t>> Array<TYPE, FUN>::convert(
     return range;
 }
 // Helper function to return dimensions for the subset array
-template<class TYPE, class FUN>
-inline void Array<TYPE, FUN>::getSubsetArrays( const std::vector<Range<size_t>> &index,
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::getSubsetArrays( const std::vector<Range<size_t>> &index,
     std::array<size_t, 5> &first, std::array<size_t, 5> &last, std::array<size_t, 5> &inc,
     std::array<size_t, 5> &N )
 {
@@ -456,9 +466,9 @@ inline void Array<TYPE, FUN>::getSubsetArrays( const std::vector<Range<size_t>>
         N[d]     = ( last[d] - first[d] + inc[d] ) / inc[d];
     }
 }
-template<class TYPE, class FUN>
-template<class TYPE2>
-Array<TYPE2, FUN> Array<TYPE, FUN>::subset( const std::vector<Range<size_t>> &index ) const
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::subset(
+    const std::vector<Range<size_t>> &index ) const
 {
     // Get the subset indicies
     checkSubsetIndex( index );
@@ -466,17 +476,17 @@ Array<TYPE2, FUN> Array<TYPE, FUN>::subset( const std::vector<Range<size_t>> &in
     getSubsetArrays( index, first, last, inc, N1 );
     ArraySize S1( d_size.ndim(), N1.data() );
     // Create the new array
-    Array<TYPE2> subset_array( S1 );
+    Array<TYPE> subset_array( S1 );
     // Fill the new array
     static_assert( ArraySize::maxDim() == 5, "Not programmed for more than 5 dimensions" );
-    TYPE2 *subset_data = subset_array.data();
+    TYPE *subset_data = subset_array.data();
     for ( size_t i4 = first[4], k1 = 0; i4 <= last[4]; i4 += inc[4] ) {
         for ( size_t i3 = first[3]; i3 <= last[3]; i3 += inc[3] ) {
             for ( size_t i2 = first[2]; i2 <= last[2]; i2 += inc[2] ) {
                 for ( size_t i1 = first[1]; i1 <= last[1]; i1 += inc[1] ) {
                     for ( size_t i0 = first[0]; i0 <= last[0]; i0 += inc[0], k1++ ) {
                         size_t k2       = d_size.index( i0, i1, i2, i3, i4 );
-                        subset_data[k1] = static_cast<TYPE2>( d_data[k2] );
+                        subset_data[k1] = d_data[k2];
                     }
                 }
             }
@@ -484,17 +494,16 @@ Array<TYPE2, FUN> Array<TYPE, FUN>::subset( const std::vector<Range<size_t>> &in
     }
     return subset_array;
 }
-template<class TYPE, class FUN>
-template<class TYPE2>
-Array<TYPE2, FUN> Array<TYPE, FUN>::subset( const std::vector<size_t> &index ) const
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::subset(
+    const std::vector<size_t> &index ) const
 {
     auto range = convert( index );
     return subset( range );
 }
-template<class TYPE, class FUN>
-template<class TYPE2>
-void Array<TYPE, FUN>::copySubset(
-    const std::vector<Range<size_t>> &index, const Array<TYPE2, FUN> &subset )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::copySubset(
+    const std::vector<Range<size_t>> &index, const Array<TYPE, FUN, Allocator> &subset )
 {
     // Get the subset indices
     checkSubsetIndex( index );
@@ -502,24 +511,23 @@ void Array<TYPE, FUN>::copySubset(
     getSubsetArrays( index, first, last, inc, N1 );
     // Copy the sub-array
     static_assert( ArraySize::maxDim() == 5, "Not programmed for more than 5 dimensions" );
-    const TYPE2 *src_data = subset.data();
+    const TYPE *src_data = subset.data();
     for ( size_t i4 = first[4], k1 = 0; i4 <= last[4]; i4 += inc[4] ) {
         for ( size_t i3 = first[3]; i3 <= last[3]; i3 += inc[3] ) {
             for ( size_t i2 = first[2]; i2 <= last[2]; i2 += inc[2] ) {
                 for ( size_t i1 = first[1]; i1 <= last[1]; i1 += inc[1] ) {
                     for ( size_t i0 = first[0]; i0 <= last[0]; i0 += inc[0], k1++ ) {
                         size_t k2  = d_size.index( i0, i1, i2, i3, i4 );
-                        d_data[k2] = static_cast<TYPE>( src_data[k1] );
+                        d_data[k2] = src_data[k1];
                     }
                 }
             }
         }
     }
 }
-
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::addSubset(
-    const std::vector<Range<size_t>> &index, const Array<TYPE, FUN> &subset )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::addSubset(
+    const std::vector<Range<size_t>> &index, const Array<TYPE, FUN, Allocator> &subset )
 {
     // Get the subset indices
     checkSubsetIndex( index );
@@ -540,17 +548,17 @@ void Array<TYPE, FUN>::addSubset(
         }
     }
 }
-template<class TYPE, class FUN>
-template<class TYPE2>
-void Array<TYPE, FUN>::copySubset(
-    const std::vector<size_t> &index, const Array<TYPE2, FUN> &subset )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::copySubset(
+    const std::vector<size_t> &index, const Array<TYPE, FUN, Allocator> &subset )
 {
     auto range = convert( index );
     copySubset( range, subset );
 }
 
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::addSubset( const std::vector<size_t> &index, const Array<TYPE, FUN> &subset )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::addSubset(
+    const std::vector<size_t> &index, const Array<TYPE, FUN, Allocator> &subset )
 {
     auto range = convert( index );
     addSubset( range, subset );
@@ -560,8 +568,8 @@ void Array<TYPE, FUN>::addSubset( const std::vector<size_t> &index, const Array<
 /********************************************************
  *  Operator overloading                                 *
  ********************************************************/
-template<class TYPE, class FUN>
-bool Array<TYPE, FUN>::operator==( const Array &rhs ) const
+template<class TYPE, class FUN, class Allocator>
+bool Array<TYPE, FUN, Allocator>::operator==( const Array &rhs ) const
 {
     if ( this == &rhs )
         return true;
@@ -577,158 +585,79 @@ bool Array<TYPE, FUN>::operator==( const Array &rhs ) const
 /********************************************************
  *  Get a view of an C array                             *
  ********************************************************/
-template<class TYPE, class FUN>
-std::shared_ptr<Array<TYPE, FUN>> Array<TYPE, FUN>::view(
-    size_t N, std::shared_ptr<TYPE> const &data )
+template<class TYPE, class FUN, class Allocator>
+std::unique_ptr<Array<TYPE, FUN, Allocator>> Array<TYPE, FUN, Allocator>::view(
+    const ArraySize &N, std::shared_ptr<TYPE> &data )
 {
-    return view( ArraySize( N ), data );
-}
-template<class TYPE, class FUN>
-std::shared_ptr<Array<TYPE, FUN>> Array<TYPE, FUN>::view(
-    size_t N1, size_t N2, std::shared_ptr<TYPE> const &data )
-{
-    return view( ArraySize( N1, N2 ), data );
-}
-template<class TYPE, class FUN>
-std::shared_ptr<Array<TYPE, FUN>> Array<TYPE, FUN>::view(
-    size_t N1, size_t N2, size_t N3, std::shared_ptr<TYPE> const &data )
-{
-    return view( ArraySize( N1, N2, N3 ), data );
-}
-template<class TYPE, class FUN>
-std::shared_ptr<const Array<TYPE, FUN>> Array<TYPE, FUN>::constView(
-    size_t N, std::shared_ptr<const TYPE> const &data )
-{
-    return constView( ArraySize( N ), data );
-}
-template<class TYPE, class FUN>
-std::shared_ptr<const Array<TYPE, FUN>> Array<TYPE, FUN>::constView(
-    size_t N1, size_t N2, std::shared_ptr<const TYPE> const &data )
-{
-    return constView( ArraySize( N1, N2 ), data );
-}
-template<class TYPE, class FUN>
-std::shared_ptr<const Array<TYPE, FUN>> Array<TYPE, FUN>::constView(
-    size_t N1, size_t N2, size_t N3, std::shared_ptr<const TYPE> const &data )
-{
-    return constView( ArraySize( N1, N2, N3 ), data );
-}
-template<class TYPE, class FUN>
-std::shared_ptr<Array<TYPE, FUN>> Array<TYPE, FUN>::view(
-    const ArraySize &N, std::shared_ptr<TYPE> const &data )
-{
-    std::shared_ptr<Array<TYPE, FUN>> array( new Array<TYPE, FUN>() );
+    auto array    = std::make_unique<Array<TYPE, FUN, Allocator>>();
     array->d_size = N;
     array->d_ptr  = data;
     array->d_data = array->d_ptr.get();
     return array;
 }
-template<class TYPE, class FUN>
-std::shared_ptr<const Array<TYPE, FUN>> Array<TYPE, FUN>::constView(
+template<class TYPE, class FUN, class Allocator>
+std::unique_ptr<const Array<TYPE, FUN, Allocator>> Array<TYPE, FUN, Allocator>::constView(
     const ArraySize &N, std::shared_ptr<const TYPE> const &data )
 {
-    std::shared_ptr<Array<TYPE, FUN>> array( new Array<TYPE, FUN>() );
+    auto array    = std::make_unique<Array<TYPE, FUN, Allocator>>();
     array->d_size = N;
-    array->d_ptr  = data;
+    array->d_ptr  = std::const_pointer_cast<TYPE>( data );
     array->d_data = array->d_ptr.get();
     return array;
 }
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::view2( Array<TYPE, FUN> &src )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::view2( Array<TYPE, FUN, Allocator> &src )
 {
     view2( src.size(), src.getPtr() );
     d_data = src.d_data;
 }
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::view2( const ArraySize &N, std::shared_ptr<TYPE> const &data )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::view2( const ArraySize &N, std::shared_ptr<TYPE> const &data )
 {
     d_size = N;
     d_ptr  = data;
     d_data = d_ptr.get();
 }
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::viewRaw( int ndim, const size_t *dims, TYPE *data )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::viewRaw(
+    const ArraySize &N, TYPE *data, bool isCopyable, bool isFixedSize )
 {
-    d_size = ArraySize( ndim, dims );
+    d_isCopyable  = isCopyable;
+    d_isFixedSize = isFixedSize;
     d_ptr.reset();
-    d_data = data;
-}
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::viewRaw( const ArraySize &N, TYPE *data )
-{
     d_size = N;
-    d_ptr.reset();
     d_data = data;
 }
 
 
 /********************************************************
- *  Convert array types                                  *
+ *  Basic functions                                      *
  ********************************************************/
-template<class TYPE, class FUN>
-template<class TYPE2>
-std::shared_ptr<Array<TYPE2>> Array<TYPE, FUN>::convert( std::shared_ptr<Array<TYPE, FUN>> array )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::swap( Array &other )
 {
-    if ( std::is_same<TYPE, TYPE2>() )
-        return array;
-    std::shared_ptr<Array<TYPE2>> array2( new Array<TYPE2>( array->size() ) );
-    array2.copy( *array );
-    return array2;
+    // check that dimensions match
+    if ( d_size != other.d_size )
+        throw std::logic_error( "length of arrays do not match" );
+    // swap the data
+    std::swap( d_data, other.d_data );
+    std::swap( d_ptr, other.d_ptr );
 }
-template<class TYPE, class FUN>
-template<class TYPE2>
-std::shared_ptr<const Array<TYPE2>> Array<TYPE, FUN>::convert(
-    std::shared_ptr<const Array<TYPE, FUN>> array )
-{
-    return Array<TYPE, FUN>::convert( std::const_pointer_cast<Array<TYPE2>>( array ) );
-}
-template<class TYPE, class FUN>
-template<class TYPE2>
-void Array<TYPE, FUN>::copy( const Array<TYPE2> &array )
-{
-    resize( array.size() );
-    const TYPE2 *src = array.data();
-    for ( size_t i = 0; i < d_size.length(); i++ )
-        d_data[i] = static_cast<TYPE>( src[i] );
-}
-template<class TYPE, class FUN>
-template<class TYPE2>
-void Array<TYPE, FUN>::copy( const TYPE2 *src )
-{
-    for ( size_t i = 0; i < d_size.length(); i++ )
-        d_data[i] = static_cast<TYPE>( src[i] );
-}
-template<class TYPE, class FUN>
-template<class TYPE2>
-void Array<TYPE, FUN>::copyTo( TYPE2 *dst ) const
-{
-    for ( size_t i = 0; i < d_size.length(); i++ )
-        dst[i] = static_cast<TYPE2>( d_data[i] );
-}
-template<class TYPE, class FUN>
-template<class TYPE2>
-Array<TYPE2, FUN> Array<TYPE, FUN>::cloneTo() const
-{
-    Array<TYPE2, FUN> dst( this->size() );
-    auto dst_data = dst.data();
-    for ( size_t i = 0; i < d_size.length(); i++ )
-        dst_data[i] = static_cast<TYPE2>( d_data[i] );
-    return dst;
-}
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::fill( const TYPE &value )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::fill( const TYPE &value )
 {
     for ( size_t i = 0; i < d_size.length(); i++ )
         d_data[i] = value;
 }
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::scale( const TYPE &value )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::scale( const TYPE &value )
 {
     for ( size_t i = 0; i < d_size.length(); i++ )
         d_data[i] *= value;
 }
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::pow( const Array<TYPE, FUN> &baseArray, const TYPE &exp )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::pow(
+    const Array<TYPE, FUN, Allocator> &baseArray, const TYPE &exp )
 {
     // not insisting on the shapes being the same
     // but insisting on the total size being the same
@@ -744,8 +673,9 @@ void Array<TYPE, FUN>::pow( const Array<TYPE, FUN> &baseArray, const TYPE &exp )
 /********************************************************
  *  Replicate the array                                  *
  ********************************************************/
-template<class TYPE, class FUN>
-Array<TYPE, FUN> Array<TYPE, FUN>::repmat( const std::vector<size_t> &N_rep ) const
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::repmat(
+    const std::vector<size_t> &N_rep ) const
 {
     std::vector<size_t> N2( d_size.begin(), d_size.end() );
     if ( N2.size() < N_rep.size() )
@@ -758,7 +688,7 @@ Array<TYPE, FUN> Array<TYPE, FUN>::repmat( const std::vector<size_t> &N_rep ) co
         Nr[d] = N_rep[d];
         N2[d] *= N_rep[d];
     }
-    Array<TYPE, FUN> y( N2 );
+    Array<TYPE, FUN, Allocator> y( N2 );
     static_assert( ArraySize::maxDim() <= 5, "Not programmed for dimensions > 5" );
     TYPE *y2 = y.data();
     for ( size_t i4 = 0, index = 0; i4 < N1[4]; i4++ ) {
@@ -790,27 +720,26 @@ Array<TYPE, FUN> Array<TYPE, FUN>::repmat( const std::vector<size_t> &N_rep ) co
 /********************************************************
  *  Simple math operations                               *
  ********************************************************/
-template<class TYPE, class FUN>
-bool Array<TYPE, FUN>::NaNs() const
+template<class TYPE, class FUN, class Allocator>
+bool Array<TYPE, FUN, Allocator>::NaNs() const
 {
     bool test = false;
     for ( size_t i = 0; i < d_size.length(); i++ )
         test = test || d_data[i] != d_data[i];
     return test;
 }
-
-template<class TYPE, class FUN>
-TYPE Array<TYPE, FUN>::mean( void ) const
+template<class TYPE, class FUN, class Allocator>
+TYPE Array<TYPE, FUN, Allocator>::mean( void ) const
 {
     TYPE x = this->sum() / d_size.length();
     return x;
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> Array<TYPE, FUN>::min( int dir ) const
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::min( int dir ) const
 {
     auto size_ans = d_size;
     size_ans.resize( dir, 1 );
-    Array<TYPE, FUN> ans( size_ans );
+    Array<TYPE, FUN, Allocator> ans( size_ans );
     size_t N1 = 1, N2 = 1, N3 = 1;
     for ( int d = 0; d < std::min<int>( dir, d_size.ndim() ); d++ )
         N1 *= d_size[d];
@@ -828,12 +757,12 @@ Array<TYPE, FUN> Array<TYPE, FUN>::min( int dir ) const
     }
     return ans;
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> Array<TYPE, FUN>::max( int dir ) const
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::max( int dir ) const
 {
     auto size_ans = d_size;
     size_ans.resize( dir, 1 );
-    Array<TYPE, FUN> ans( size_ans );
+    Array<TYPE, FUN, Allocator> ans( size_ans );
     size_t N1 = 1, N2 = 1, N3 = 1;
     for ( int d = 0; d < std::min<int>( dir, d_size.ndim() ); d++ )
         N1 *= d_size[d];
@@ -852,12 +781,12 @@ Array<TYPE, FUN> Array<TYPE, FUN>::max( int dir ) const
     }
     return ans;
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> Array<TYPE, FUN>::sum( int dir ) const
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::sum( int dir ) const
 {
     auto size_ans = d_size;
     size_ans.resize( dir, 1 );
-    Array<TYPE, FUN> ans( size_ans );
+    Array<TYPE, FUN, Allocator> ans( size_ans );
     size_t N1 = 1, N2 = 1, N3 = 1;
     for ( int d = 0; d < std::min<int>( dir, d_size.ndim() ); d++ )
         N1 *= d_size[d];
@@ -877,8 +806,8 @@ Array<TYPE, FUN> Array<TYPE, FUN>::sum( int dir ) const
     }
     return ans;
 }
-template<class TYPE, class FUN>
-TYPE Array<TYPE, FUN>::min( const std::vector<Range<size_t>> &range ) const
+template<class TYPE, class FUN, class Allocator>
+TYPE Array<TYPE, FUN, Allocator>::min( const std::vector<Range<size_t>> &range ) const
 {
     // Get the subset indicies
     checkSubsetIndex( range );
@@ -900,8 +829,8 @@ TYPE Array<TYPE, FUN>::min( const std::vector<Range<size_t>> &range ) const
     }
     return x;
 }
-template<class TYPE, class FUN>
-TYPE Array<TYPE, FUN>::max( const std::vector<Range<size_t>> &range ) const
+template<class TYPE, class FUN, class Allocator>
+TYPE Array<TYPE, FUN, Allocator>::max( const std::vector<Range<size_t>> &range ) const
 {
     // Get the subset indicies
     checkSubsetIndex( range );
@@ -923,8 +852,8 @@ TYPE Array<TYPE, FUN>::max( const std::vector<Range<size_t>> &range ) const
     }
     return x;
 }
-template<class TYPE, class FUN>
-TYPE Array<TYPE, FUN>::sum( const std::vector<Range<size_t>> &range ) const
+template<class TYPE, class FUN, class Allocator>
+TYPE Array<TYPE, FUN, Allocator>::sum( const std::vector<Range<size_t>> &range ) const
 {
     // Get the subset indicies
     checkSubsetIndex( range );
@@ -946,8 +875,8 @@ TYPE Array<TYPE, FUN>::sum( const std::vector<Range<size_t>> &range ) const
     }
     return x;
 }
-template<class TYPE, class FUN>
-TYPE Array<TYPE, FUN>::mean( const std::vector<Range<size_t>> &range ) const
+template<class TYPE, class FUN, class Allocator>
+TYPE Array<TYPE, FUN, Allocator>::mean( const std::vector<Range<size_t>> &range ) const
 {
     // Get the subset indicies
     checkSubsetIndex( range );
@@ -960,26 +889,26 @@ TYPE Array<TYPE, FUN>::mean( const std::vector<Range<size_t>> &range ) const
     TYPE x = sum( range ) / n;
     return x;
 }
-template<class TYPE, class FUN>
-TYPE Array<TYPE, FUN>::min( const std::vector<size_t> &index ) const
+template<class TYPE, class FUN, class Allocator>
+TYPE Array<TYPE, FUN, Allocator>::min( const std::vector<size_t> &index ) const
 {
     auto range = convert( index );
     return min( range );
 }
-template<class TYPE, class FUN>
-TYPE Array<TYPE, FUN>::max( const std::vector<size_t> &index ) const
+template<class TYPE, class FUN, class Allocator>
+TYPE Array<TYPE, FUN, Allocator>::max( const std::vector<size_t> &index ) const
 {
     auto range = convert( index );
     return max( range );
 }
-template<class TYPE, class FUN>
-TYPE Array<TYPE, FUN>::sum( const std::vector<size_t> &index ) const
+template<class TYPE, class FUN, class Allocator>
+TYPE Array<TYPE, FUN, Allocator>::sum( const std::vector<size_t> &index ) const
 {
     auto range = convert( index );
     return sum( range );
 }
-template<class TYPE, class FUN>
-TYPE Array<TYPE, FUN>::mean( const std::vector<size_t> &index ) const
+template<class TYPE, class FUN, class Allocator>
+TYPE Array<TYPE, FUN, Allocator>::mean( const std::vector<size_t> &index ) const
 {
     auto range = convert( index );
     return mean( range );
@@ -989,8 +918,8 @@ TYPE Array<TYPE, FUN>::mean( const std::vector<size_t> &index ) const
 /********************************************************
  *  Find all elements that match the given operation     *
  ********************************************************/
-template<class TYPE, class FUN>
-std::vector<size_t> Array<TYPE, FUN>::find(
+template<class TYPE, class FUN, class Allocator>
+std::vector<size_t> Array<TYPE, FUN, Allocator>::find(
     const TYPE &value, std::function<bool( const TYPE &, const TYPE & )> compare ) const
 {
     std::vector<size_t> result;
@@ -1006,8 +935,8 @@ std::vector<size_t> Array<TYPE, FUN>::find(
 /********************************************************
  *  Print an array to an output stream                   *
  ********************************************************/
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::print(
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::print(
     std::ostream &os, const std::string &name, const std::string &prefix ) const
 {
     if ( d_size.ndim() == 1 ) {
@@ -1029,14 +958,14 @@ void Array<TYPE, FUN>::print(
 /********************************************************
  *  Reverse dimensions (transpose)                       *
  ********************************************************/
-template<class TYPE, class FUN>
-Array<TYPE, FUN> Array<TYPE, FUN>::reverseDim() const
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::reverseDim() const
 {
     size_t N2[ArraySize::maxDim()];
     for ( int d = 0; d < ArraySize::maxDim(); d++ )
         N2[d] = d_size[ArraySize::maxDim() - d - 1];
     ArraySize S2( ArraySize::maxDim(), N2 );
-    Array<TYPE, FUN> y( S2 );
+    Array<TYPE, FUN, Allocator> y( S2 );
     static_assert( ArraySize::maxDim() == 5, "Not programmed for dimensions other than 5" );
     TYPE *y2 = y.data();
     for ( size_t i0 = 0; i0 < d_size[0]; i0++ ) {
@@ -1061,19 +990,21 @@ Array<TYPE, FUN> Array<TYPE, FUN>::reverseDim() const
 /********************************************************
  *  Coarsen the array                                    *
  ********************************************************/
-template<class TYPE, class FUN>
-Array<TYPE, FUN> Array<TYPE, FUN>::coarsen( const Array<TYPE, FUN> &filter ) const
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::coarsen(
+    const Array<TYPE, FUN, Allocator> &filter ) const
 {
-  auto S2 = size();
-  for ( size_t i = 0; i < S2.size(); i++ ) {
-    S2.resize( i, S2[i] / filter.size(i) );
-    if ( S2[i] * filter.size( i ) != size( i ) )
-      throw std::invalid_argument( "Array must be multiple of filter size" );
-  }
-  Array<TYPE, FUN> y( S2 );
-  if ( d_size.ndim() <= 3 )
-    throw std::logic_error( "Function programmed for more than 3 dimensions" );
-    const auto& Nh = filter.d_size;
+    auto S2 = size();
+    for ( size_t i = 0; i < S2.size(); i++ ) {
+        size_t s = S2[i] / filter.size( i );
+        S2.resize( i, s );
+        if ( S2[i] * filter.size( i ) != size( i ) )
+            throw std::invalid_argument( "Array must be multiple of filter size" );
+    }
+    Array<TYPE, FUN, Allocator> y( S2 );
+    if ( d_size.ndim() <= 3 )
+        throw std::logic_error( "Function programmed for more than 3 dimensions" );
+    const auto &Nh = filter.d_size;
     for ( size_t k1 = 0; k1 < y.d_size[2]; k1++ ) {
         for ( size_t j1 = 0; j1 < y.d_size[1]; j1++ ) {
             for ( size_t i1 = 0; i1 < y.d_size[0]; i1++ ) {
@@ -1092,20 +1023,20 @@ Array<TYPE, FUN> Array<TYPE, FUN>::coarsen( const Array<TYPE, FUN> &filter ) con
     }
     return y;
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> Array<TYPE, FUN>::coarsen(
-    const std::vector<size_t> &ratio, std::function<TYPE( const Array<TYPE, FUN> & )> filter ) const
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::coarsen( const std::vector<size_t> &ratio,
+    std::function<TYPE( const Array<TYPE, FUN, Allocator> & )> filter ) const
 {
-  //if ( ratio.size() != d_size.ndim() )
-  //     throw std::logic_error( "ratio size does not match ndim" );
+    if ( ratio.size() != d_size.ndim() )
+        throw std::logic_error( "ratio size does not match ndim" );
     auto S2 = size();
     for ( size_t i = 0; i < S2.size(); i++ ) {
         S2.resize( i, S2[i] / ratio[i] );
         if ( S2[i] * ratio[i] != size( i ) )
             throw std::invalid_argument( "Array must be multiple of filter size" );
     }
-    Array<TYPE, FUN> tmp( ratio );
-    Array<TYPE, FUN> y( S2 );
+    Array<TYPE, FUN, Allocator> tmp( ratio );
+    Array<TYPE, FUN, Allocator> y( S2 );
     if ( d_size.ndim() <= 3 )
         throw std::logic_error( "Function programmed for more than 3 dimensions" );
     for ( size_t k1 = 0; k1 < y.d_size[2]; k1++ ) {
@@ -1130,25 +1061,26 @@ Array<TYPE, FUN> Array<TYPE, FUN>::coarsen(
 /********************************************************
  *  Concatenates the arrays                              *
  ********************************************************/
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::cat( const Array<TYPE, FUN> &x, int dim )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::cat( const Array<TYPE, FUN, Allocator> &x, int dim )
 {
-    std::vector<Array<TYPE, FUN>> tmp( 2 );
+    std::vector<Array<TYPE, FUN, Allocator>> tmp( 2 );
     tmp[0].view2( *this );
-    tmp[1].view2( const_cast<Array<TYPE, FUN> &>( x ) );
+    tmp[1].view2( const_cast<Array<TYPE, FUN, Allocator> &>( x ) );
     *this = cat( tmp, dim );
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> Array<TYPE, FUN>::cat( const std::vector<Array> &x, int dim )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::cat( const std::vector<Array> &x, int dim )
 {
     if ( x.empty() )
-        return Array<TYPE, FUN>();
+        return Array<TYPE, FUN, Allocator>();
     // Check that the dimensions match
     bool check = true;
     for ( size_t i = 1; i < x.size(); i++ ) {
         check = check && x[i].ndim() == x[0].ndim();
         for ( int d = 0; d < x[0].ndim(); d++ )
-            check = check && d == dim;
+            if ( d != dim )
+                check = check && x[i].size( d ) == x[0].size( d );
     }
     if ( !check )
         throw std::logic_error( "Array dimensions do not match for concatenation" );
@@ -1156,7 +1088,7 @@ Array<TYPE, FUN> Array<TYPE, FUN>::cat( const std::vector<Array> &x, int dim )
     auto size = x[0].d_size;
     for ( size_t i = 1; i < x.size(); i++ )
         size.resize( dim, size[dim] + x[i].size( dim ) );
-    Array<TYPE, FUN> out( size );
+    Array<TYPE, FUN, Allocator> out( size );
     size_t N1 = 1;
     size_t N2 = size[dim];
     size_t N3 = 1;
@@ -1181,118 +1113,211 @@ Array<TYPE, FUN> Array<TYPE, FUN>::cat( const std::vector<Array> &x, int dim )
 }
 
 
+/********************************************************
+ *  Interpolate                                          *
+ ********************************************************/
+template<class T>
+struct is_compatible_double : std::integral_constant<bool,
+                                  std::is_floating_point<typename std::remove_cv<T>::type>::value ||
+                                      std::is_integral<typename std::remove_cv<T>::type>::value> {
+};
+template<class TYPE>
+inline typename std::enable_if<is_compatible_double<TYPE>::value, TYPE>::type Array_interp_1D(
+    double x, int N, const TYPE *data )
+{
+    int i = floor( x );
+    i     = std::max( i, 0 );
+    i     = std::min( i, N - 2 );
+    return ( i + 1 - x ) * data[i] + ( x - i ) * data[i + 1];
+}
+template<class TYPE>
+inline typename std::enable_if<is_compatible_double<TYPE>::value, TYPE>::type Array_interp_2D(
+    double x, double y, int Nx, int Ny, const TYPE *data )
+{
+    int i       = floor( x );
+    i           = std::max( i, 0 );
+    i           = std::min( i, Nx - 2 );
+    double dx   = x - i;
+    double dx2  = 1.0 - dx;
+    int j       = floor( y );
+    j           = std::max( j, 0 );
+    j           = std::min( j, Ny - 2 );
+    double dy   = y - j;
+    double dy2  = 1.0 - dy;
+    double f[4] = { (double) data[i + j * Nx], (double) data[i + 1 + j * Nx],
+        (double) data[i + ( j + 1 ) * Nx], (double) data[i + 1 + ( j + 1 ) * Nx] };
+    return ( dx * f[1] + dx2 * f[0] ) * dy2 + ( dx * f[3] + dx2 * f[2] ) * dy;
+}
+template<class TYPE>
+inline typename std::enable_if<is_compatible_double<TYPE>::value, TYPE>::type Array_interp_3D(
+    double x, double y, double z, int Nx, int Ny, int Nz, const TYPE *data )
+{
+    int i       = floor( x );
+    i           = std::max( i, 0 );
+    i           = std::min( i, Nx - 2 );
+    double dx   = x - i;
+    double dx2  = 1.0 - dx;
+    int j       = floor( y );
+    j           = std::max( j, 0 );
+    j           = std::min( j, Ny - 2 );
+    double dy   = y - j;
+    double dy2  = 1.0 - dy;
+    int k       = floor( z );
+    k           = std::max( k, 0 );
+    k           = std::min( k, Nz - 2 );
+    double dz   = z - k;
+    double dz2  = 1.0 - dz;
+    double f[8] = { (double) data[i + j * Nx + k * Nx * Ny],
+        (double) data[i + 1 + j * Nx + k * Nx * Ny],
+        (double) data[i + ( j + 1 ) * Nx + k * Nx * Ny],
+        (double) data[i + 1 + ( j + 1 ) * Nx + k * Nx * Ny],
+        (double) data[i + j * Nx + ( k + 1 ) * Nx * Ny],
+        (double) data[i + 1 + j * Nx + ( k + 1 ) * Nx * Ny],
+        (double) data[i + ( j + 1 ) * Nx + ( k + 1 ) * Nx * Ny],
+        (double) data[i + 1 + ( j + 1 ) * Nx + ( k + 1 ) * Nx * Ny] };
+    double h0   = ( dx * f[1] + dx2 * f[0] ) * dy2 + ( dx * f[3] + dx2 * f[2] ) * dy;
+    double h1   = ( dx * f[5] + dx2 * f[4] ) * dy2 + ( dx * f[7] + dx2 * f[6] ) * dy;
+    return h0 * dz2 + h1 * dz;
+}
+template<class TYPE>
+inline typename std::enable_if<!is_compatible_double<TYPE>::value, TYPE>::type Array_interp_1D(
+    double, int, const TYPE * )
+{
+    throw std::logic_error( "Invalid conversion" );
+}
+template<class TYPE>
+inline typename std::enable_if<!is_compatible_double<TYPE>::value, TYPE>::type Array_interp_2D(
+    double, double, int, int, const TYPE * )
+{
+    throw std::logic_error( "Invalid conversion" );
+}
+template<class TYPE>
+inline typename std::enable_if<!is_compatible_double<TYPE>::value, TYPE>::type Array_interp_3D(
+    double, double, double, int, int, int, const TYPE * )
+{
+    throw std::logic_error( "Invalid conversion" );
+}
+template<class TYPE, class FUN, class Allocator>
+TYPE Array<TYPE, FUN, Allocator>::interp( const std::vector<double> &x ) const
+{
+    int ndim = 0, dim[5];
+    double x2[5];
+    for ( int d = 0; d < d_size.ndim(); d++ ) {
+        if ( d_size[d] > 1 ) {
+            x2[ndim]  = x[d];
+            dim[ndim] = d_size[d];
+            ndim++;
+        }
+    }
+    TYPE f = 0;
+    if ( ndim == 0 ) {
+        // No data, do nothing
+    } else if ( ndim == 1 ) {
+        f = Array_interp_1D( x2[0], dim[0], d_data );
+    } else if ( ndim == 2 ) {
+        f = Array_interp_2D( x2[0], x2[1], dim[0], dim[1], d_data );
+    } else if ( ndim == 3 ) {
+        f = Array_interp_3D( x2[0], x2[1], x2[2], dim[0], dim[1], dim[2], d_data );
+    } else {
+        throw std::logic_error( "Not finished" );
+    }
+    return f;
+}
+
+
 /********************************************************
  *  Math operations (should call the Math class)         *
  ********************************************************/
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::rand()
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::rand()
 {
     FUN::rand( *this );
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> &Array<TYPE, FUN>::operator+=( const Array<TYPE, FUN> &rhs )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> &Array<TYPE, FUN, Allocator>::operator+=(
+    const Array<TYPE, FUN, Allocator> &rhs )
 {
     const auto &fun = []( const TYPE &a, const TYPE &b ) { return a + b; };
     FUN::transform( fun, *this, rhs, *this );
     return *this;
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> &Array<TYPE, FUN>::operator-=( const Array<TYPE, FUN> &rhs )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> &Array<TYPE, FUN, Allocator>::operator-=(
+    const Array<TYPE, FUN, Allocator> &rhs )
 {
     const auto &fun = []( const TYPE &a, const TYPE &b ) { return a - b; };
     FUN::transform( fun, *this, rhs, *this );
     return *this;
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> &Array<TYPE, FUN>::operator+=( const TYPE &rhs )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> &Array<TYPE, FUN, Allocator>::operator+=( const TYPE &rhs )
 {
     const auto &fun = [rhs]( const TYPE &x ) { return x + rhs; };
     FUN::transform( fun, *this, *this );
     return *this;
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> &Array<TYPE, FUN>::operator-=( const TYPE &rhs )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> &Array<TYPE, FUN, Allocator>::operator-=( const TYPE &rhs )
 {
     const auto &fun = [rhs]( const TYPE &x ) { return x - rhs; };
     FUN::transform( fun, *this, *this );
     return *this;
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> operator+( const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b )
-{
-    Array<TYPE, FUN> c;
-    const auto &fun = []( const TYPE &a, const TYPE &b ) { return a + b; };
-    FUN::transform( fun, a, b, c );
-    return c;
-}
-template<class TYPE, class FUN>
-Array<TYPE, FUN> operator-( const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b )
-{
-    Array<TYPE, FUN> c;
-    const auto &fun = []( const TYPE &a, const TYPE &b ) { return a - b; };
-    FUN::transform( fun, a, b, c );
-    return c;
-}
-template<class TYPE, class FUN>
-Array<TYPE, FUN> operator*( const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b )
-{
-    return Array<TYPE, FUN>::multiply( a, b );
-}
-template<class TYPE, class FUN>
-inline Array<TYPE, FUN> operator*( const Array<TYPE, FUN> &a, const std::vector<TYPE> &b )
-{
-    Array<TYPE, FUN> b2;
-    b2.viewRaw( { b.size() }, const_cast<TYPE *>( b.data() ) );
-    return Array<TYPE, FUN>::multiply( a, b2 );
-}
-template<class TYPE, class FUN>
-TYPE Array<TYPE, FUN>::min() const
+template<class TYPE, class FUN, class Allocator>
+TYPE Array<TYPE, FUN, Allocator>::min() const
 {
     const auto &fun = []( const TYPE &a, const TYPE &b ) { return a < b ? a : b; };
     return FUN::reduce( fun, *this );
 }
-template<class TYPE, class FUN>
-TYPE Array<TYPE, FUN>::max() const
+template<class TYPE, class FUN, class Allocator>
+TYPE Array<TYPE, FUN, Allocator>::max() const
 {
     const auto &fun = []( const TYPE &a, const TYPE &b ) { return a > b ? a : b; };
     return FUN::reduce( fun, *this );
 }
-template<class TYPE, class FUN>
-TYPE Array<TYPE, FUN>::sum() const
+template<class TYPE, class FUN, class Allocator>
+TYPE Array<TYPE, FUN, Allocator>::sum() const
 {
     const auto &fun = []( const TYPE &a, const TYPE &b ) { return a + b; };
     return FUN::reduce( fun, *this );
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> Array<TYPE, FUN>::multiply( const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::multiply(
+    const Array<TYPE, FUN, Allocator> &a, const Array<TYPE, FUN, Allocator> &b )
 {
-    Array<TYPE, FUN> c;
+    Array<TYPE, FUN, Allocator> c;
     FUN::multiply( a, b, c );
     return c;
 }
-template<class TYPE, class FUN>
-void Array<TYPE, FUN>::axpby( const TYPE &alpha, const Array<TYPE, FUN> &x, const TYPE &beta )
+template<class TYPE, class FUN, class Allocator>
+void Array<TYPE, FUN, Allocator>::axpby(
+    const TYPE &alpha, const Array<TYPE, FUN, Allocator> &x, const TYPE &beta )
 {
     const auto &fun = [alpha, beta](
                           const TYPE &x, const TYPE &y ) { return alpha * x + beta * y; };
-    return FUN::transform( fun, x, *this );
+    return FUN::transform( fun, x, *this, *this );
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> Array<TYPE, FUN>::transform(
-    std::function<TYPE( const TYPE & )> fun, const Array<TYPE, FUN> &x )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::transform(
+    std::function<TYPE( const TYPE & )> fun, const Array<TYPE, FUN, Allocator> &x )
 {
-    Array<TYPE, FUN> y;
+    Array<TYPE, FUN, Allocator> y;
     FUN::transform( fun, x, y );
     return y;
 }
-template<class TYPE, class FUN>
-Array<TYPE, FUN> Array<TYPE, FUN>::transform( std::function<TYPE( const TYPE &, const TYPE & )> fun,
-    const Array<TYPE, FUN> &x, const Array<TYPE, FUN> &y )
+template<class TYPE, class FUN, class Allocator>
+Array<TYPE, FUN, Allocator> Array<TYPE, FUN, Allocator>::transform(
+    std::function<TYPE( const TYPE &, const TYPE & )> fun, const Array<TYPE, FUN, Allocator> &x,
+    const Array<TYPE, FUN, Allocator> &y )
 {
-    Array<TYPE, FUN> z;
+    Array<TYPE, FUN, Allocator> z;
     FUN::transform( fun, x, y, z );
     return z;
 }
-
+template<class TYPE, class FUN, class Allocator>
+bool Array<TYPE, FUN, Allocator>::equals( const Array &rhs, TYPE tol ) const
+{
+    return FUN::equals( *this, rhs, tol );
+}
 
 #endif
diff --git a/common/ArraySize.h b/common/ArraySize.h
new file mode 100644
index 00000000..a3cb78c3
--- /dev/null
+++ b/common/ArraySize.h
@@ -0,0 +1,294 @@
+#ifndef included_ArraySizeClass
+#define included_ArraySizeClass
+
+
+#include <array>
+#include <cstring>
+#include <initializer_list>
+#include <vector>
+
+
+#if defined( __CUDA_ARCH__ )
+#include <cuda.h>
+#define HOST_DEVICE __host__ __device__
+#else
+#define HOST_DEVICE
+#endif
+#if defined( USING_GCC ) || defined( USING_CLANG )
+#define ARRAY_ATTRIBUTE HOST_DEVICE __attribute__( ( always_inline ) )
+#else
+#define ARRAY_ATTRIBUTE HOST_DEVICE
+#endif
+
+
+#if ( defined( DEBUG ) || defined( _DEBUG ) ) && !defined( NDEBUG )
+#define CHECK_ARRAY_LENGTH( i )                                      \
+    do {                                                             \
+        if ( i >= d_length )                                         \
+            throw std::out_of_range( "Index exceeds array bounds" ); \
+    } while ( 0 )
+#else
+#define CHECK_ARRAY_LENGTH( i ) \
+    do {                        \
+    } while ( 0 )
+#endif
+
+
+// Forward declerations
+class FunctionTable;
+template<class TYPE, class FUN = FunctionTable, class Allocator = std::nullptr_t>
+class Array;
+
+
+//! Simple range class
+template<class TYPE = size_t>
+class Range final
+{
+public:
+    //! Empty constructor
+    constexpr Range() : i( 0 ), j( -1 ), k( 1 ) {}
+
+    /*!
+     * Create a range i:k:j (or i:j)
+     * @param i_            Starting value
+     * @param j_            Ending value
+     * @param k_            Increment value
+     */
+    constexpr Range( TYPE i_, TYPE j_, TYPE k_ = 1 ) : i( i_ ), j( j_ ), k( k_ ) {}
+
+    TYPE i, j, k;
+};
+
+
+//! Simple class to store the array dimensions
+class ArraySize final
+{
+public:
+    //! Empty constructor
+    constexpr ArraySize() : d_ndim( 1 ), d_length( 0 ), d_N{ 0, 1, 1, 1, 1 } {}
+
+    /*!
+     * Create the vector size
+     * @param N1            Number of elements in the first dimension
+     */
+    constexpr ArraySize( size_t N1 ) : d_ndim( 1 ), d_length( N1 ), d_N{ N1, 1, 1, 1, 1 } {}
+
+    /*!
+     * Create the vector size
+     * @param N1            Number of elements in the first dimension
+     * @param N2            Number of elements in the second dimension
+     */
+    constexpr ArraySize( size_t N1, size_t N2 )
+        : d_ndim( 2 ), d_length( N1 * N2 ), d_N{ N1, N2, 1, 1, 1 }
+    {
+    }
+
+    /*!
+     * Create the vector size
+     * @param N1            Number of elements in the first dimension
+     * @param N2            Number of elements in the second dimension
+     * @param N3            Number of elements in the third dimension
+     */
+    constexpr ArraySize( size_t N1, size_t N2, size_t N3 )
+        : d_ndim( 3 ), d_length( N1 * N2 * N3 ), d_N{ N1, N2, N3, 1, 1 }
+    {
+    }
+
+    /*!
+     * Create the vector size
+     * @param N1            Number of elements in the first dimension
+     * @param N2            Number of elements in the second dimension
+     * @param N3            Number of elements in the third dimension
+     * @param N4            Number of elements in the fourth dimension
+     */
+    constexpr ArraySize( size_t N1, size_t N2, size_t N3, size_t N4 )
+        : d_ndim( 4 ), d_length( N1 * N2 * N3 * N4 ), d_N{ N1, N2, N3, N4, 1 }
+    {
+    }
+
+    /*!
+     * Create the vector size
+     * @param N1            Number of elements in the first dimension
+     * @param N2            Number of elements in the second dimension
+     * @param N3            Number of elements in the third dimension
+     * @param N4            Number of elements in the fourth dimension
+     * @param N5            Number of elements in the fifth dimension
+     */
+    constexpr ArraySize( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 )
+        : d_ndim( 5 ), d_length( N1 * N2 * N3 * N4 * N5 ), d_N{ N1, N2, N3, N4, N5 }
+    {
+    }
+
+    /*!
+     * Create from initializer list
+     * @param N             Size of the array
+     */
+    constexpr ArraySize( std::initializer_list<size_t> N )
+        : d_ndim( N.size() ), d_length( 0 ), d_N{ 0, 1, 1, 1, 1 }
+    {
+        if ( d_ndim > maxDim() )
+            throw std::out_of_range( "Maximum number of dimensions exceeded" );
+        auto it = N.begin();
+        for ( size_t i = 0; i < d_ndim; i++, ++it )
+            d_N[i] = *it;
+        d_length = 1;
+        for ( unsigned long i : d_N )
+            d_length *= i;
+        if ( d_ndim == 0 )
+            d_length = 0;
+    }
+
+
+    /*!
+     * Create from raw pointer
+     * @param ndim          Number of dimensions
+     * @param dims          Dimensions
+     */
+    constexpr ArraySize( size_t ndim, const size_t *dims )
+        : d_ndim( ndim ), d_length( 0 ), d_N{ 0, 1, 1, 1, 1 }
+    {
+        if ( d_ndim > maxDim() )
+            throw std::out_of_range( "Maximum number of dimensions exceeded" );
+        for ( size_t i = 0; i < ndim; i++ )
+            d_N[i] = dims[i];
+        d_length = 1;
+        for ( unsigned long i : d_N )
+            d_length *= i;
+        if ( d_ndim == 0 )
+            d_length = 0;
+    }
+
+    /*!
+     * Create from std::vector
+     * @param N             Size of the array
+     */
+    ArraySize( const std::vector<size_t> &N );
+
+    // Copy/assignment constructors
+    constexpr ArraySize( ArraySize &&rhs )      = default;
+    constexpr ArraySize( const ArraySize &rhs ) = default;
+    constexpr ArraySize &operator=( ArraySize &&rhs ) = default;
+    constexpr ArraySize &operator=( const ArraySize &rhs ) = default;
+
+    /*!
+     * Access the ith dimension
+     * @param i             Index to access
+     */
+    constexpr ARRAY_ATTRIBUTE size_t operator[]( size_t i ) const { return d_N[i]; }
+
+    //! Return the number of dimensions
+    constexpr ARRAY_ATTRIBUTE uint8_t ndim() const { return d_ndim; }
+
+    //! Return the number of dimensions
+    constexpr ARRAY_ATTRIBUTE size_t size() const { return d_ndim; }
+
+    //! Return the total number of elements in the array
+    constexpr ARRAY_ATTRIBUTE size_t length() const { return d_length; }
+
+    //! Resize the dimension
+    constexpr void resize( uint8_t dim, size_t N )
+    {
+        if ( dim >= d_ndim )
+            throw std::out_of_range( "Invalid dimension" );
+        d_N[dim] = N;
+        d_length = 1;
+        for ( unsigned long i : d_N )
+            d_length *= i;
+    }
+
+    /*!
+     * Reshape the Array so that the number of dimensions is the
+     *    max of ndim and the largest dim>1.
+     * @param ndim          Desired number of dimensions
+     */
+    constexpr void setNdim( uint8_t ndim ) { d_ndim = std::max( ndim, d_ndim ); }
+
+    //! Returns an iterator to the beginning
+    constexpr const size_t *begin() const { return d_N; }
+
+    //! Returns an iterator to the end
+    constexpr const size_t *end() const { return d_N + d_ndim; }
+
+    // Check if two array sizes are equal
+    constexpr ARRAY_ATTRIBUTE bool operator==( const ArraySize &rhs ) const
+    {
+        return d_ndim == rhs.d_ndim && memcmp( d_N, rhs.d_N, sizeof( d_N ) ) == 0;
+    }
+
+    // Check if two array sizes are equal (ignoring the dimension)
+    constexpr ARRAY_ATTRIBUTE bool approxEqual( const ArraySize &rhs ) const
+    {
+        return ( length() == 0 && rhs.length() == 0 ) || memcmp( d_N, rhs.d_N, sizeof( d_N ) ) == 0;
+    }
+
+    //! Check if two matrices are not equal
+    constexpr ARRAY_ATTRIBUTE bool operator!=( const ArraySize &rhs ) const
+    {
+        return d_ndim != rhs.d_ndim || memcmp( d_N, rhs.d_N, sizeof( d_N ) ) != 0;
+    }
+
+    //! Maximum supported dimension
+    constexpr ARRAY_ATTRIBUTE static uint8_t maxDim() { return 5u; }
+
+    //! Get the index
+    constexpr ARRAY_ATTRIBUTE size_t index( size_t i ) const
+    {
+        CHECK_ARRAY_LENGTH( i );
+        return i;
+    }
+
+    //! Get the index
+    constexpr ARRAY_ATTRIBUTE size_t index( size_t i1, size_t i2 ) const
+    {
+        size_t index = i1 + i2 * d_N[0];
+        CHECK_ARRAY_LENGTH( index );
+        return index;
+    }
+
+    //! Get the index
+    constexpr ARRAY_ATTRIBUTE size_t index( size_t i1, size_t i2, size_t i3 ) const
+    {
+        size_t index = i1 + d_N[0] * ( i2 + d_N[1] * i3 );
+        CHECK_ARRAY_LENGTH( index );
+        return index;
+    }
+
+    //! Get the index
+    constexpr ARRAY_ATTRIBUTE size_t index( size_t i1, size_t i2, size_t i3, size_t i4 ) const
+    {
+        size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * i4 ) );
+        CHECK_ARRAY_LENGTH( index );
+        return index;
+    }
+
+    //! Get the index
+    constexpr ARRAY_ATTRIBUTE size_t index(
+        size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const
+    {
+        size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * ( i4 + d_N[3] * i5 ) ) );
+        CHECK_ARRAY_LENGTH( index );
+        return index;
+    }
+
+private:
+    uint8_t d_ndim;
+    size_t d_length;
+    size_t d_N[5];
+};
+
+
+// Function to concatenate dimensions of two array sizes
+constexpr ArraySize cat( const ArraySize &x, const ArraySize &y )
+{
+    if ( x.ndim() + y.ndim() > ArraySize::maxDim() )
+        throw std::out_of_range( "Maximum number of dimensions exceeded" );
+    size_t N[ArraySize::maxDim()] = { 0 };
+    for ( int i = 0; i < x.ndim(); i++ )
+        N[i] = x[i];
+    for ( int i = 0; i < y.ndim(); i++ )
+        N[i + x.ndim()] = y[i];
+    return ArraySize( x.ndim() + y.ndim(), N );
+}
+
+
+#endif
diff --git a/common/FunctionTable.h b/common/FunctionTable.h
index e2bdcb67..d0e8565d 100644
--- a/common/FunctionTable.h
+++ b/common/FunctionTable.h
@@ -2,7 +2,7 @@
 #define included_FunctionTable
 
 
-#include "common/Array.h"
+#include "common/ArraySize.h"
 
 #include <functional>
 
@@ -65,17 +65,43 @@ public:
      * @param[out] c            The output array
      */
     template<class TYPE, class FUN>
-    static void multiply(
+    static inline void multiply(
         const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b, Array<TYPE, FUN> &c );
 
+    /*!
+     * Perform dgemv/dgemm equavalent operation ( C = alpha*A*B + beta*C )
+     * @param[in] alpha         The scalar value alpha
+     * @param[in] A             The first array
+     * @param[in] B             The second array
+     * @param[in] beta          The scalar value alpha
+     * @param[in,out] c         The output array C
+     */
+    template<class TYPE, class FUN>
+    static void gemm( const TYPE alpha, const Array<TYPE, FUN> &A, const Array<TYPE, FUN> &B,
+        const TYPE beta, Array<TYPE, FUN> &C );
+
+    /*!
+     * Perform axpy equavalent operation ( y = alpha*x + y )
+     * @param[in] alpha         The scalar value alpha
+     * @param[in] x             The input array x
+     * @param[in,out] y         The output array y
+     */
+    template<class TYPE, class FUN>
+    static void axpy( const TYPE alpha, const Array<TYPE, FUN> &x, Array<TYPE, FUN> &y );
+
+    /*!
+     * Check if two arrays are approximately equal
+     * @param[in] A             The first array
+     * @param[in] B             The second array
+     * @param[in] tol           The tolerance
+     */
+    template<class TYPE, class FUN>
+    static bool equals( const Array<TYPE, FUN> &A, const Array<TYPE, FUN> &B, TYPE tol );
+
 
 private:
     FunctionTable();
-
-    template<class T>
-    static inline void rand( size_t N, T *x );
 };
 
-#include "common/FunctionTable.hpp"
 
 #endif
diff --git a/common/FunctionTable.hpp b/common/FunctionTable.hpp
index 52897d5c..e5fc7786 100644
--- a/common/FunctionTable.hpp
+++ b/common/FunctionTable.hpp
@@ -13,37 +13,30 @@
 /********************************************************
  *  Random number initialization                         *
  ********************************************************/
+template<class TYPE>
+static inline typename std::enable_if<std::is_integral<TYPE>::value>::type genRand(
+    size_t N, TYPE* x )
+{
+    std::random_device rd;
+    std::mt19937 gen( rd() );
+    std::uniform_int_distribution<TYPE> dis;
+    for ( size_t i = 0; i < N; i++ )
+        x[i] = dis( gen );
+}
+template<class TYPE>
+static inline typename std::enable_if<std::is_floating_point<TYPE>::value>::type genRand(
+    size_t N, TYPE* x )
+{
+    std::random_device rd;
+    std::mt19937 gen( rd() );
+    std::uniform_real_distribution<TYPE> dis( 0, 1 );
+    for ( size_t i = 0; i < N; i++ )
+        x[i] = dis( gen );
+}
 template<class TYPE, class FUN>
-void FunctionTable::rand( Array<TYPE, FUN> &x )
+inline void FunctionTable::rand( Array<TYPE, FUN>& x )
 {
-    FunctionTable::rand<TYPE>( x.length(), x.data() );
-}
-template<>
-inline void FunctionTable::rand<double>( size_t N, double *x )
-{
-    std::random_device rd;
-    std::mt19937 gen( rd() );
-    std::uniform_real_distribution<> dis( 0, 1 );
-    for ( size_t i = 0; i < N; i++ )
-        x[i] = dis( gen );
-}
-template<>
-inline void FunctionTable::rand<float>( size_t N, float *x )
-{
-    std::random_device rd;
-    std::mt19937 gen( rd() );
-    std::uniform_real_distribution<> dis( 0, 1 );
-    for ( size_t i = 0; i < N; i++ )
-        x[i] = dis( gen );
-}
-template<>
-inline void FunctionTable::rand<int>( size_t N, int *x )
-{
-    std::random_device rd;
-    std::mt19937 gen( rd() );
-    std::uniform_int_distribution<> dis;
-    for ( size_t i = 0; i < N; i++ )
-        x[i] = dis( gen );
+    genRand<TYPE>( x.length(), x.data() );
 }
 
 
@@ -51,11 +44,11 @@ inline void FunctionTable::rand<int>( size_t N, int *x )
  *  Reduction                                            *
  ********************************************************/
 template<class TYPE, class FUN, typename LAMBDA>
-inline TYPE FunctionTable::reduce( LAMBDA &op, const Array<TYPE, FUN> &A )
+inline TYPE FunctionTable::reduce( LAMBDA& op, const Array<TYPE, FUN>& A )
 {
     if ( A.length() == 0 )
         return TYPE();
-    const TYPE *x  = A.data();
+    const TYPE* x  = A.data();
     TYPE y         = x[0];
     const size_t N = A.length();
     for ( size_t i = 1; i < N; i++ )
@@ -68,7 +61,7 @@ inline TYPE FunctionTable::reduce( LAMBDA &op, const Array<TYPE, FUN> &A )
  *  Unary transformation                                 *
  ********************************************************/
 template<class TYPE, class FUN, typename LAMBDA>
-inline void FunctionTable::transform( LAMBDA &fun, const Array<TYPE, FUN> &x, Array<TYPE, FUN> &y )
+inline void FunctionTable::transform( LAMBDA& fun, const Array<TYPE, FUN>& x, Array<TYPE, FUN>& y )
 {
     y.resize( x.size() );
     const size_t N = x.length();
@@ -77,9 +70,9 @@ inline void FunctionTable::transform( LAMBDA &fun, const Array<TYPE, FUN> &x, Ar
 }
 template<class TYPE, class FUN, typename LAMBDA>
 inline void FunctionTable::transform(
-    LAMBDA &fun, const Array<TYPE, FUN> &x, const Array<TYPE, FUN> &y, Array<TYPE, FUN> &z )
+    LAMBDA& fun, const Array<TYPE, FUN>& x, const Array<TYPE, FUN>& y, Array<TYPE, FUN>& z )
 {
-    if ( !x.sizeMatch( y ) )
+    if ( x.size() != y.size() )
         throw std::logic_error( "Sizes of x and y do not match" );
     z.resize( x.size() );
     const size_t N = x.length();
@@ -88,29 +81,157 @@ inline void FunctionTable::transform(
 }
 
 
+/********************************************************
+ *  axpy                                                 *
+ ********************************************************/
+template<class TYPE>
+inline void call_axpy( size_t N, const TYPE alpha, const TYPE* x, TYPE* y );
+template<>
+inline void call_axpy<float>( size_t, const float, const float*, float* )
+{
+    throw std::logic_error( "LapackWrappers not configured" );
+}
+template<>
+inline void call_axpy<double>( size_t, const double, const double*, double* )
+{
+    throw std::logic_error( "LapackWrappers not configured" );
+}
+template<class TYPE>
+inline void call_axpy( size_t N, const TYPE alpha, const TYPE* x, TYPE* y )
+{
+    for ( size_t i = 0; i < N; i++ )
+        y[i] += alpha * x[i];
+}
+template<class TYPE, class FUN>
+void FunctionTable::axpy( const TYPE alpha, const Array<TYPE, FUN>& x, Array<TYPE, FUN>& y )
+{
+    if ( x.size() != y.size() )
+        throw std::logic_error( "Array sizes do not match" );
+    call_axpy( x.length(), alpha, x.data(), y.data() );
+}
+
+
 /********************************************************
  *  Multiply two arrays                                  *
  ********************************************************/
+template<class TYPE>
+inline void call_gemv( size_t M, size_t N, TYPE alpha, TYPE beta, const TYPE* A, const TYPE* x, TYPE* y );
+template<>
+inline void call_gemv<double>(
+    size_t, size_t, double, double, const double*, const double*, double* )
+{
+    throw std::logic_error( "LapackWrappers not configured" );
+}
+template<>
+inline void call_gemv<float>( size_t, size_t, float, float, const float*, const float*, float* )
+{
+    throw std::logic_error( "LapackWrappers not configured" );
+}
+template<class TYPE>
+inline void call_gemv(
+    size_t M, size_t N, TYPE alpha, TYPE beta, const TYPE* A, const TYPE* x, TYPE* y )
+{
+    for ( size_t i = 0; i < M; i++ )
+        y[i] = beta * y[i];
+    for ( size_t j = 0; j < N; j++ ) {
+        for ( size_t i = 0; i < M; i++ )
+            y[i] += alpha * A[i + j * M] * x[j];
+    }
+}
+template<class TYPE>
+inline void call_gemm(
+    size_t M, size_t N, size_t K, TYPE alpha, TYPE beta, const TYPE* A, const TYPE* B, TYPE* C );
+template<>
+inline void call_gemm<double>( size_t, size_t, size_t, double, double, const double*, const double*, double* )
+{
+    throw std::logic_error( "LapackWrappers not configured" );
+}
+template<>
+inline void call_gemm<float>( size_t, size_t, size_t, float, float, const float*, const float*, float* )
+{
+    throw std::logic_error( "LapackWrappers not configured" );
+}
+template<class TYPE>
+inline void call_gemm(
+    size_t M, size_t N, size_t K, TYPE alpha, TYPE beta, const TYPE* A, const TYPE* B, TYPE* C )
+{
+    for ( size_t i = 0; i < K * M; i++ )
+        C[i] = beta * C[i];
+    for ( size_t k = 0; k < K; k++ ) {
+        for ( size_t j = 0; j < N; j++ ) {
+            for ( size_t i = 0; i < M; i++ )
+                C[i + k * M] += alpha * A[i + j * M] * B[j + k * N];
+        }
+    }
+}
+template<class TYPE, class FUN>
+void FunctionTable::gemm( const TYPE alpha, const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b,
+    const TYPE beta, Array<TYPE, FUN>& c )
+{
+    if ( a.ndim() == 2 && b.ndim() == 1 ) {
+        if ( a.size( 1 ) != b.size( 0 ) )
+            throw std::logic_error( "Inner dimensions must match" );
+        call_gemv<TYPE>( a.size( 0 ), a.size( 1 ), alpha, beta, a.data(), b.data(), c.data() );
+    } else if ( a.ndim() <= 2 && b.ndim() <= 2 ) {
+        if ( a.size( 1 ) != b.size( 0 ) )
+            throw std::logic_error( "Inner dimensions must match" );
+        call_gemm<TYPE>(
+            a.size( 0 ), a.size( 1 ), b.size( 1 ), alpha, beta, a.data(), b.data(), c.data() );
+    } else {
+        throw std::logic_error( "Not finished yet" );
+    }
+}
 template<class TYPE, class FUN>
 void FunctionTable::multiply(
-    const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b, Array<TYPE, FUN> &c )
+    const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b, Array<TYPE, FUN>& c )
 {
-    if ( a.ndim() <= 2 && b.ndim() <= 2 ) {
+    if ( a.ndim() == 2 && b.ndim() == 1 ) {
+        if ( a.size( 1 ) != b.size( 0 ) )
+            throw std::logic_error( "Inner dimensions must match" );
+        c.resize( a.size( 0 ) );
+        call_gemv<TYPE>( a.size( 0 ), a.size( 1 ), 1, 0, a.data(), b.data(), c.data() );
+    } else if ( a.ndim() <= 2 && b.ndim() <= 2 ) {
         if ( a.size( 1 ) != b.size( 0 ) )
             throw std::logic_error( "Inner dimensions must match" );
         c.resize( a.size( 0 ), b.size( 1 ) );
-        c.fill( 0 );
-        for ( size_t k = 0; k < b.size( 1 ); k++ ) {
-            for ( size_t j = 0; j < a.size( 1 ); j++ ) {
-                for ( size_t i = 0; i < a.size( 0 ); i++ ) {
-                    c( i, k ) += a( i, j ) * b( j, k );
-                }
-            }
-        }
+        call_gemm<TYPE>(
+            a.size( 0 ), a.size( 1 ), b.size( 1 ), 1, 0, a.data(), b.data(), c.data() );
     } else {
         throw std::logic_error( "Not finished yet" );
     }
 }
 
 
+/********************************************************
+ *  Check if two arrays are equal                        *
+ ********************************************************/
+template<class TYPE, class FUN>
+inline typename std::enable_if<!std::is_floating_point<TYPE>::value, bool>::type
+FunctionTableCompare( const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b, TYPE )
+{
+    bool pass = true;
+    if ( a.size() != b.size() )
+        throw std::logic_error( "Sizes of x and y do not match" );
+    for ( size_t i = 0; i < a.length(); i++ )
+        pass = pass && a( i ) == b( i );
+    return pass;
+}
+template<class TYPE, class FUN>
+inline typename std::enable_if<std::is_floating_point<TYPE>::value, bool>::type
+FunctionTableCompare( const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b, TYPE tol )
+{
+    bool pass = true;
+    if ( a.size() != b.size() )
+        throw std::logic_error( "Sizes of x and y do not match" );
+    for ( size_t i = 0; i < a.length(); i++ )
+        pass = pass && ( std::abs( a( i ) - b( i ) ) < tol );
+    return pass;
+}
+template<class TYPE, class FUN>
+bool FunctionTable::equals( const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b, TYPE tol )
+{
+    return FunctionTableCompare( a, b, tol );
+}
+
+
 #endif
diff --git a/tests/test_dcel_minkowski.cpp b/tests/test_dcel_minkowski.cpp
index 3f543c9f..0d6cbca9 100644
--- a/tests/test_dcel_minkowski.cpp
+++ b/tests/test_dcel_minkowski.cpp
@@ -4,13 +4,13 @@
 #include "common/Domain.h"
 #include "common/SpherePack.h"
 
-using namespace std;
+#include "ProfilerApp.h"
+
 
 /*
  *  Compare the measured and analytical curvature for a sphere
  *
  */
-
 std::shared_ptr<Database> loadInputs( )
 {
   //auto db = std::make_shared<Database>( "Domain.in" );
@@ -38,7 +38,7 @@ int main(int argc, char **argv)
 		int Nx = db->getVector<int>( "n" )[0];
 		int Ny = db->getVector<int>( "n" )[1];
 		int Nz = db->getVector<int>( "n" )[2];
-		std::shared_ptr<Domain> Dm = std::shared_ptr<Domain>(new Domain(db,comm));
+		auto Dm = std::make_shared<Domain>( db, comm );
 		
 		Nx+=2; Ny+=2; Nz+=2;
 		DoubleArray Phase(Nx,Ny,Nz);
@@ -98,6 +98,7 @@ int main(int argc, char **argv)
 		printf("   Euler characteristic  = %f (analytical = 2.0) \n",sphere.Xi);
 		
 	}
+    PROFILE_SAVE("test_dcel_minkowski");
 	MPI_Barrier(comm);
 	MPI_Finalize();
 	return toReturn;