Optimizing performance and updating Array class

2018-09-17 13:03:00 -04:00
parent c3aa841f26
commit f955162e20
15 changed files with 1459 additions and 1104 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -35,10 +35,6 @@ IF ( NOT CMAKE_CXX_STANDARD )
 ENDIF()


-# Disable Link Time Optimization until we have a chance to test
-SET( DISABLE_LTO )
-
-
 # Set source/install paths
 SET( ${PROJ}_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}" )
 SET( ${PROJ}_BUILD_DIR  "${CMAKE_CURRENT_BINARY_DIR}" )
--- a/analysis/Minkowski.cpp
+++ b/analysis/Minkowski.cpp
@@ -11,6 +11,9 @@
 #include "IO/Reader.h"
 #include "IO/Writer.h"

+#include "ProfilerApp.h"
+
+
 #define PI 3.14159265359

 // Constructor
@@ -55,11 +58,11 @@ double Minkowski::X(){
 	return Xi_global;
 }

-void Minkowski::ComputeScalar(const DoubleArray Field, const double isovalue)
+void Minkowski::ComputeScalar(const DoubleArray& Field, const double isovalue)
 {
+    PROFILE_START("ComputeScalar");
 	Xi = Ji = Ai = 0.0;
 	DECL object;
-	Point P1,P2,P3;
 	int e1,e2,e3;
 	double s,s1,s2,s3;
 	double a1,a2,a3;
@@ -75,13 +78,13 @@ void Minkowski::ComputeScalar(const DoubleArray Field, const double isovalue)
 					e1 = object.Face(idx); 
 					e2 = object.halfedge.next(e1);
 					e3 = object.halfedge.next(e2);
-					P1 = object.vertex.coords(object.halfedge.v1(e1));
-					P2 = object.vertex.coords(object.halfedge.v1(e2));
-					P3 = object.vertex.coords(object.halfedge.v1(e3));
+					auto P1 = object.vertex.coords(object.halfedge.v1(e1));
+					auto P2 = object.vertex.coords(object.halfedge.v1(e2));
+					auto P3 = object.vertex.coords(object.halfedge.v1(e3));
 					// Surface area
-					s1 = sqrt((P1.x-P2.x)*(P1.x-P2.x)+(P1.y-P2.y)*(P1.y-P2.y)+(P1.z-P2.z)*(P1.z-P2.z));
-					s2 = sqrt((P2.x-P3.x)*(P2.x-P3.x)+(P2.y-P3.y)*(P2.y-P3.y)+(P2.z-P3.z)*(P2.z-P3.z));
-					s3 = sqrt((P1.x-P3.x)*(P1.x-P3.x)+(P1.y-P3.y)*(P1.y-P3.y)+(P1.z-P3.z)*(P1.z-P3.z));
+					s1 = Distance( P1, P2 );
+					s2 = Distance( P2, P3 );
+					s3 = Distance( P1, P3 );
 					s = 0.5*(s1+s2+s3);
 					Ai += sqrt(s*(s-s1)*(s-s2)*(s-s3));
 					// Mean curvature based on half edge angle
@@ -122,7 +125,7 @@ void Minkowski::ComputeScalar(const DoubleArray Field, const double isovalue)
 	MPI_Allreduce(&Ai,&Ai_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
 	MPI_Allreduce(&Ji,&Ji_global,1,MPI_DOUBLE,MPI_SUM,Dm->Comm);
 	MPI_Barrier(Dm->Comm);
-
+    PROFILE_STOP("ComputeScalar");
 }

 void Minkowski::PrintAll()
--- a/analysis/Minkowski.h
+++ b/analysis/Minkowski.h
@@ -48,7 +48,7 @@ public:
 	//...........................................................................
 	Minkowski(std::shared_ptr <Domain> Dm);
 	~Minkowski();
-	void ComputeScalar(const DoubleArray Field, const double isovalue);
+	void ComputeScalar(const DoubleArray& Field, const double isovalue);
 	void PrintAll();

 };
--- a/analysis/analysis.cpp
+++ b/analysis/analysis.cpp
@@ -1,5 +1,7 @@
 #include "analysis/analysis.h"
 #include "ProfilerApp.h"
+
+#include <algorithm>
 #include <iostream>


--- a/analysis/dcel.cpp
+++ b/analysis/dcel.cpp
@@ -1,78 +1,5 @@
 #include "analysis/dcel.h"

-/* 
-Double connected edge list (DECL) 
-*/
-
-Vertex::Vertex(){
-	size_ = 0;
-	vertex_data.resize(36);
-}
-
-Vertex::~Vertex(){
-
-}
-
-void Vertex::add(Point P){
-	vertex_data.push_back(P.x);
-	vertex_data.push_back(P.y);
-	vertex_data.push_back(P.z);
-	size_++;
-}
-
-void Vertex::assign(int idx, Point P){
-	vertex_data[3*idx] = P.x;
-	vertex_data[3*idx+1] = P.y;
-	vertex_data[3*idx+2] = P.z;
-}
-
-int Vertex::size(){
-	return size_;
-}
-
-Point Vertex::coords(int idx){
-	Point P;
-	P.x = vertex_data[3*idx];
-	P.y = vertex_data[3*idx+1];
-	P.z = vertex_data[3*idx+2];
-	return P;
-}
-
-
-Halfedge::Halfedge(){
-	size_=0;
-}
-
-Halfedge::~Halfedge(){
-	
-}
-int Halfedge::v1(int edge){
-	return data(0,edge);
-}
-
-int Halfedge::v2(int edge){
-	return data(1,edge);
-}
-
-int Halfedge::face(int edge){
-	return data(2,edge);
-}
-
-int Halfedge::twin(int edge){
-	return data(3,edge);
-}
-
-int Halfedge::prev(int edge){
-	return data(4,edge);
-}
-
-int Halfedge::next(int edge){
-	return data(5,edge);
-}
-
-int Halfedge::size(){
-	return size_;
-}


 DECL::DECL(){
@@ -85,24 +12,20 @@ DECL::~DECL(){
 }

 int DECL::Face(int index){
-	return FaceData(index);
+	return FaceData[index];
 }

-void DECL::LocalIsosurface(const DoubleArray A, double value, const int i, const int j, const int k){
+void DECL::LocalIsosurface(const DoubleArray& A, double value, const int i, const int j, const int k){
 	Point P,Q;
 	Point PlaceHolder;
 	Point C0,C1,C2,C3,C4,C5,C6,C7;

-	int CubeIndex;
-	int nTris = 0;
-	int nVert =0;
-
 	Point VertexList[12];
 	Point NewVertexList[12];
 	int LocalRemap[12];

-	DTMutableList<Point> cellvertices = DTMutableList<Point>(20);
-	IntArray Triangles = IntArray(3,20);
+	Point cellvertices[20];
+    std::array<std::array<int,3>,20> Triangles;

 	// Values from array 'A' at the cube corners
 	double CubeValues[8];
@@ -129,7 +52,7 @@ void DECL::LocalIsosurface(const DoubleArray A, double value, const int i, const
 	
 	//Determine the index into the edge table which
 	//tells us which vertices are inside of the surface
-	CubeIndex = 0;
+	int CubeIndex = 0;
 	if (CubeValues[0] < 0.0f) CubeIndex |= 1;
 	if (CubeValues[1] < 0.0f) CubeIndex |= 2;
 	if (CubeValues[2] < 0.0f) CubeIndex |= 4;
@@ -222,31 +145,30 @@ void DECL::LocalIsosurface(const DoubleArray A, double value, const int i, const
 		//P.x  += i;
 		//P.y  += j;
 		//P.z  += k;
-		cellvertices(idx) = P;
+		cellvertices[idx] = P;
 	}
-	nVert = VertexCount;

 	TriangleCount = 0;
 	for (int idx=0;triTable[CubeIndex][idx]!=-1;idx+=3) {
-		Triangles(0,TriangleCount) = LocalRemap[triTable[CubeIndex][idx+0]];
-		Triangles(1,TriangleCount) = LocalRemap[triTable[CubeIndex][idx+1]];
-		Triangles(2,TriangleCount) = LocalRemap[triTable[CubeIndex][idx+2]];
+		Triangles[TriangleCount][0] = LocalRemap[triTable[CubeIndex][idx+0]];
+		Triangles[TriangleCount][1] = LocalRemap[triTable[CubeIndex][idx+1]];
+		Triangles[TriangleCount][2] = LocalRemap[triTable[CubeIndex][idx+2]];
 		TriangleCount++;
 	}
-	nTris = TriangleCount;
+	int nTris = TriangleCount;

 	// Now add the local values to the DECL data structure
 	if (nTris>0){
 		FaceData.resize(TriangleCount);
 	    //printf("Construct halfedge structure... \n");
 	    //printf("   Construct %i triangles \n",nTris);
-		halfedge.data.resize(6,nTris*3);
+		halfedge.resize(nTris*3);
 		int idx_edge=0;
 		for (int idx=0; idx<TriangleCount; idx++){
-			int V1 = Triangles(0,idx);
-			int V2 = Triangles(1,idx);
-			int V3 = Triangles(2,idx);
-			FaceData(idx) = idx_edge;
+			int V1 = Triangles[idx][0];
+			int V2 = Triangles[idx][1];
+			int V3 = Triangles[idx][2];
+			FaceData[idx] = idx_edge;
 			// first edge: V1->V2 
 			halfedge.data(0,idx_edge) = V1;  // first vertex
 			halfedge.data(1,idx_edge) = V2;  // second vertex
@@ -290,8 +212,8 @@ void DECL::LocalIsosurface(const DoubleArray A, double value, const int i, const
 				}
 			}	
 			// Use "ghost" twins if edge is on a cube face
-			P = cellvertices(V1);
-			Q = cellvertices(V2);
+			P = cellvertices[V1];
+			Q = cellvertices[V2];
 			if (P.x == 0.0 && Q.x == 0.0) halfedge.data(3,idx) = -1;  // ghost twin for x=0 face
 			if (P.x == 1.0 && Q.x == 1.0) halfedge.data(3,idx) = -4;  // ghost twin for x=1 face
 			if (P.y == 0.0 && Q.y == 0.0) halfedge.data(3,idx) = -2;  // ghost twin for y=0 face
@@ -303,7 +225,7 @@ void DECL::LocalIsosurface(const DoubleArray A, double value, const int i, const

 	// Map vertices to global coordinates
 	for (int idx=0;idx<VertexCount;idx++) {
-		P = cellvertices(idx);
+		P = cellvertices[idx];
 		P.x  += i;
 		P.y  += j;
 		P.z  += k;
@@ -358,8 +280,7 @@ Point DECL::TriNormal(int edge)
 double DECL::EdgeAngle(int edge)
 {
 	double angle;
-	double nx,ny,nz;
-	double dotprod,length,hypotenuse;
+	double dotprod;
 	Point P,Q,R; // triangle vertices
 	Point U,V,W; // normal vectors
 	int e2 = halfedge.next(edge);
@@ -372,14 +293,14 @@ double DECL::EdgeAngle(int edge)
 	if (halfedge.twin(edge) < 0 ){
 		// compute edge normal in plane of cube face
 		W = P - Q; // edge tangent vector
-		length = sqrt(W.x*W.x+W.y*W.y+W.z*W.z);
+		double length = sqrt(W.x*W.x+W.y*W.y+W.z*W.z);
 		W.x /= length;
 		W.y /= length;
 		W.z /= length;
 		// edge normal within the plane of the cube face
-		nx = W.y*V.z - W.z*V.y;
-		ny = W.z*V.x - W.x*V.z;
-		nz = W.x*V.y - W.y*V.x;
+		double nx = W.y*V.z - W.z*V.y;
+		double ny = W.z*V.x - W.x*V.z;
+		double nz = W.x*V.y - W.y*V.x;
 		length = sqrt(nx*nx+ny*ny+nz*nz);
 		// new value for V is this normal vector
 		V.x = nx/length; V.y = ny/length; V.z = nz/length;
@@ -431,20 +352,19 @@ void Isosurface(DoubleArray &A, const double &v)
 {
 	Point P,Q;
 	Point PlaceHolder;
-	double temp;
 	Point C0,C1,C2,C3,C4,C5,C6,C7;

 	int TriangleCount;
 	int VertexCount;
 	int CubeIndex;
-	int nTris, nVert;

 	Point VertexList[12];
 	Point NewVertexList[12];
 	int LocalRemap[12];

-	DTMutableList<Point> cellvertices = DTMutableList<Point>(20);
-	IntArray Triangles = IntArray(3,20);
+	Point cellvertices[20];
+    std::array<std::array<int,3>,20> Triangles;
+    Triangles.fill( { 0 } );

 	// Values from array 'A' at the cube corners
 	double CubeValues[8];
@@ -463,6 +383,7 @@ void Isosurface(DoubleArray &A, const double &v)
 	C6.x = 1.0; C6.y = 1.0; C6.z = 1.0;
 	C7.x = 0.0; C7.y = 1.0; C7.z = 1.0;							

+	std::vector<std::array<int,6>> HalfEdge;
 	for (int k=1; k<Nz-1; k++){		
 		for (int j=1; j<Ny-1; j++){
 			for (int i=1; i<Nx-1; i++){
@@ -569,83 +490,81 @@ void Isosurface(DoubleArray &A, const double &v)
 					//P.x  += i;
 					//P.y  += j;
 					//P.z  += k;
-					cellvertices(idx) = P;
+					cellvertices[idx] = P;
 				}
-				nVert = VertexCount;

 				TriangleCount = 0;
 				for (int idx=0;triTable[CubeIndex][idx]!=-1;idx+=3) {
-					Triangles(0,TriangleCount) = LocalRemap[triTable[CubeIndex][idx+0]];
-					Triangles(1,TriangleCount) = LocalRemap[triTable[CubeIndex][idx+1]];
-					Triangles(2,TriangleCount) = LocalRemap[triTable[CubeIndex][idx+2]];
+					Triangles[TriangleCount][0] = LocalRemap[triTable[CubeIndex][idx+0]];
+					Triangles[TriangleCount][1] = LocalRemap[triTable[CubeIndex][idx+1]];
+					Triangles[TriangleCount][2] = LocalRemap[triTable[CubeIndex][idx+2]];
 					TriangleCount++;
 				}
-				nTris = TriangleCount;
+				int nTris = TriangleCount;

 				// Now add the local values to the DECL data structure
-				IntArray HalfEdge(6,nTris*3);
-				DoubleArray EdgeAngles(nTris*3);
+				HalfEdge.resize(nTris*3);
 				int idx_edge=0;
 				for (int idx=0; idx<TriangleCount; idx++){
-					int V1 = Triangles(0,idx);
-					int V2 = Triangles(1,idx);
-					int V3 = Triangles(2,idx);
+					int V1 = Triangles[idx][0];
+					int V2 = Triangles[idx][1];
+					int V3 = Triangles[idx][2];
 					// first edge: V1->V2 
-					HalfEdge(0,idx_edge) = V1;  // first vertex
-					HalfEdge(1,idx_edge) = V2;  // second vertex
-					HalfEdge(2,idx_edge) = idx; // triangle
-					HalfEdge(3,idx_edge) = -1;  // twin
-					HalfEdge(4,idx_edge) = idx_edge+2;  // previous edge
-					HalfEdge(5,idx_edge) = idx_edge+1;  // next edge
+					HalfEdge[idx_edge][0] = V1;  // first vertex
+					HalfEdge[idx_edge][1] = V2;  // second vertex
+					HalfEdge[idx_edge][2] = idx; // triangle
+					HalfEdge[idx_edge][3] = -1;  // twin
+					HalfEdge[idx_edge][4] = idx_edge+2;  // previous edge
+					HalfEdge[idx_edge][5] = idx_edge+1;  // next edge
 					idx_edge++;
 					// second edge: V2->V3
-					HalfEdge(0,idx_edge) = V2;  // first vertex
-					HalfEdge(1,idx_edge) = V3;  // second vertex
-					HalfEdge(2,idx_edge) = idx; // triangle
-					HalfEdge(3,idx_edge) = -1;  // twin
-					HalfEdge(4,idx_edge) = idx_edge-1;  // previous edge
-					HalfEdge(5,idx_edge) = idx_edge+1;  // next edge
+					HalfEdge[idx_edge][0] = V2;  // first vertex
+					HalfEdge[idx_edge][1] = V3;  // second vertex
+					HalfEdge[idx_edge][2] = idx; // triangle
+					HalfEdge[idx_edge][3] = -1;  // twin
+					HalfEdge[idx_edge][4] = idx_edge-1;  // previous edge
+					HalfEdge[idx_edge][5] = idx_edge+1;  // next edge
 					idx_edge++;
 					// third edge: V3->V1
-					HalfEdge(0,idx_edge) = V3;  // first vertex
-					HalfEdge(1,idx_edge) = V1;  // second vertex
-					HalfEdge(2,idx_edge) = idx; // triangle
-					HalfEdge(3,idx_edge) = -1;  // twin
-					HalfEdge(4,idx_edge) = idx_edge-1;  // previous edge
-					HalfEdge(5,idx_edge) = idx_edge-2;  // next edge
+					HalfEdge[idx_edge][0] = V3;  // first vertex
+					HalfEdge[idx_edge][1] = V1;  // second vertex
+					HalfEdge[idx_edge][2] = idx; // triangle
+					HalfEdge[idx_edge][3] = -1;  // twin
+					HalfEdge[idx_edge][4] = idx_edge-1;  // previous edge
+					HalfEdge[idx_edge][5] = idx_edge-2;  // next edge
 					idx_edge++;
 				}
 				int EdgeCount=idx_edge;
 				for (int idx=0; idx<EdgeCount; idx++){
-					int V1=HalfEdge(0,idx);
-					int V2=HalfEdge(1,idx);
+					int V1=HalfEdge[idx][0];
+					int V2=HalfEdge[idx][1];
 					// Find all the twins within the cube
 					for (int jdx=0; idx<EdgeCount; jdx++){
-						if (HalfEdge(1,jdx) == V1 && HalfEdge(0,jdx) == V2){
+						if (HalfEdge[jdx][1] == V1 && HalfEdge[jdx][0] == V2){
 							// this is the pair
-							HalfEdge(3,idx) = jdx;
-							HalfEdge(3,jdx) = idx;
+							HalfEdge[idx][3] = jdx;
+							HalfEdge[jdx][3] = idx;
 						}
-						if (HalfEdge(1,jdx) == V2 && HalfEdge(0,jdx) == V1 && !(idx==jdx)){
+						if (HalfEdge[jdx][1] == V2 && HalfEdge[jdx][0] == V1 && !(idx==jdx)){
 							std::printf("WARNING: half edges with identical orientation! \n");
 						}
 					}	
 					// Use "ghost" twins if edge is on a cube face
-					P = cellvertices(V1);
-					Q = cellvertices(V2);
-					if (P.x == 0.0 && Q.x == 0.0) HalfEdge(3,idx_edge) = -1;  // ghost twin for x=0 face
-					if (P.x == 1.0 && Q.x == 1.0) HalfEdge(3,idx_edge) = -2;  // ghost twin for x=1 face
-					if (P.y == 0.0 && Q.y == 0.0) HalfEdge(3,idx_edge) = -3;  // ghost twin for y=0 face
-					if (P.y == 1.0 && Q.y == 1.0) HalfEdge(3,idx_edge) = -4;  // ghost twin for y=1 face
-					if (P.z == 0.0 && Q.z == 0.0) HalfEdge(3,idx_edge) = -5;  // ghost twin for z=0 face
-					if (P.z == 1.0 && Q.z == 1.0) HalfEdge(3,idx_edge) = -6;  // ghost twin for z=1 face
+					P = cellvertices[V1];
+					Q = cellvertices[V2];
+					if (P.x == 0.0 && Q.x == 0.0) HalfEdge[idx_edge][3] = -1;  // ghost twin for x=0 face
+					if (P.x == 1.0 && Q.x == 1.0) HalfEdge[idx_edge][3] = -2;  // ghost twin for x=1 face
+					if (P.y == 0.0 && Q.y == 0.0) HalfEdge[idx_edge][3] = -3;  // ghost twin for y=0 face
+					if (P.y == 1.0 && Q.y == 1.0) HalfEdge[idx_edge][3] = -4;  // ghost twin for y=1 face
+					if (P.z == 0.0 && Q.z == 0.0) HalfEdge[idx_edge][3] = -5;  // ghost twin for z=0 face
+					if (P.z == 1.0 && Q.z == 1.0) HalfEdge[idx_edge][3] = -6;  // ghost twin for z=1 face
 				}
 				// Find all the angles
 				for (int idx=0; idx<EdgeCount; idx++){
-					int V1=HalfEdge(0,idx);
-					int V2=HalfEdge(1,idx);
-					int T1= HalfEdge(2,idx_edge);
-					int twin=HalfEdge(3,idx_edge); 
+					int V1=HalfEdge[idx][0];
+					int V2=HalfEdge[idx][1];
+					int T1= HalfEdge[idx_edge][2];
+					int twin=HalfEdge[idx_edge][3];
 					if (twin == -1){
 						
 					}
@@ -653,11 +572,11 @@ void Isosurface(DoubleArray &A, const double &v)

 				// Map vertices to global coordinates
 				for (int idx=0;idx<VertexCount;idx++) {
-					P = cellvertices(idx);
+					P = cellvertices[idx];
 					P.x  += i;
 					P.y  += j;
 					P.z  += k;
-					cellvertices(idx) = P;
+					cellvertices[idx] = P;
 				}
 			}
 		}
--- a/analysis/dcel.h
+++ b/analysis/dcel.h
@@ -8,36 +8,53 @@ Doubly-connected edge list (DECL)
 // Vertex structure
 class Vertex{
 public:
-	Vertex();
-	~Vertex();
-	void add(Point P);
-	void assign( int idx, Point P);
-	int size();
-	Point coords(int idx);
+	Vertex() { d_data.resize(12); }
+	~Vertex() = default;
+    Vertex( const Vertex& ) = delete;
+    Vertex operator=( const Vertex& ) = delete;
+
+    // Add/assign a point
+	inline void add( const Point& P ) { d_data.push_back( P ); }
+	inline void assign( int idx, const Point& P ) { d_data[idx] = P; }
+
+    // Get a point
+	inline Point& coords( int idx ) { return d_data[idx]; }
+	inline const Point& coords( int idx ) const { return d_data[idx]; }
+
 	int IncidentEdge();
+
+    // Return the number of points
+	inline int size() const { return d_data.size(); }
+
 private:
-	std::vector<double> vertex_data;
-	int size_;
+	std::vector<Point> d_data;
 };

+
 // Halfedge structure
 // Face
 class Halfedge{
 public:
-	Halfedge();
-	~Halfedge();
+	Halfedge() = default;
+	~Halfedge() = default;
+    Halfedge( const Halfedge& ) = delete;
+    Halfedge operator=( const Halfedge& ) = delete;

-	int v1(int edge);
-	int v2(int edge);
-	int twin(int edge);
-	int face(int edge);
-	int next(int edge);
-	int prev(int edge);
-	int size();
+	inline int v1(int edge) const { return d_data[edge][0]; }
+	inline int v2(int edge) const { return d_data[edge][1]; }
+	inline int face(int edge) const { return d_data[edge][2]; }
+	inline int twin(int edge) const { return d_data[edge][3]; }
+	inline int prev(int edge) const { return d_data[edge][4]; }
+	inline int next(int edge) const { return d_data[edge][5]; }
+
+	inline int size() const { return d_data.size(); }
+    inline void resize( int N ) { d_data.resize( N ); }
+
+    inline int& data( int i, int j ) { return d_data[j][i]; }
+    inline const int& data( int i, int j ) const { return d_data[j][i]; }

-	Array<int> data;
 private:
-	int size_;
+	std::vector<std::array<int,6>> d_data;
 };

 // DECL
@@ -49,7 +66,7 @@ public:
 	int face();
 	Vertex vertex;
 	Halfedge halfedge;
-	void LocalIsosurface(const DoubleArray A, double value, int i, int j, int k);
+	void LocalIsosurface(const DoubleArray& A, double value, int i, int j, int k);
 	int Face(int index);
 	
 	double origin(int edge);
@@ -59,5 +76,5 @@ public:
 	int VertexCount;

 private:
-	Array <int> FaceData;
+	std::vector<int> FaceData;
 };
--- a/analysis/distance.cpp
+++ b/analysis/distance.cpp
@@ -1,6 +1,7 @@
 #include "analysis/distance.h"


+
 /******************************************************************
 * A fast distance calculation                                     *
 ******************************************************************/
--- a/analysis/distance.h
+++ b/analysis/distance.h
@@ -2,6 +2,7 @@
 #define Distance_H_INC

 #include "common/Domain.h"
+#include "common/Array.hpp"


 struct Vec {
--- a/common/Array.cpp
+++ b/common/Array.cpp
@@ -0,0 +1,117 @@
+#include "common/Array.h"
+#include "common/Array.hpp"
+
+#include <complex>
+
+
+/********************************************************
+ *  ArraySize                                            *
+ ********************************************************/
+ArraySize::ArraySize( const std::vector<size_t>& N )
+{
+    d_ndim = N.size();
+    d_N[0] = 0;
+    d_N[1] = 1;
+    d_N[2] = 1;
+    d_N[3] = 1;
+    d_N[4] = 1;
+    for ( size_t i = 0; i < d_ndim; i++ )
+        d_N[i] = N[i];
+    d_length = 1;
+    for ( unsigned long i : d_N )
+        d_length *= i;
+    if ( d_ndim == 0 )
+        d_length = 0;
+}
+
+
+/********************************************************
+ *  Explicit instantiations of Array                     *
+ ********************************************************/
+template class Array<char, FunctionTable>;
+template class Array<uint8_t, FunctionTable>;
+template class Array<uint16_t, FunctionTable>;
+template class Array<uint32_t, FunctionTable>;
+template class Array<uint64_t, FunctionTable>;
+template class Array<int8_t, FunctionTable>;
+template class Array<int16_t, FunctionTable>;
+template class Array<int32_t, FunctionTable>;
+template class Array<int64_t, FunctionTable>;
+template class Array<float, FunctionTable>;
+template class Array<double, FunctionTable>;
+
+
+/********************************************************
+ *  Explicit instantiations of Array<bool>               *
+ ********************************************************/
+// clang-format off
+template Array<bool, FunctionTable>::Array();
+template Array<bool, FunctionTable>::~Array();
+template Array<bool, FunctionTable>::Array( size_t );
+template Array<bool, FunctionTable>::Array( size_t, size_t );
+template Array<bool, FunctionTable>::Array( size_t, size_t, size_t );
+template Array<bool, FunctionTable>::Array( size_t, size_t, size_t, size_t );
+template Array<bool, FunctionTable>::Array( size_t, size_t, size_t, size_t, size_t );
+template Array<bool, FunctionTable>::Array( const std::vector<size_t>&, const bool* );
+template Array<bool, FunctionTable>::Array( std::string );
+template Array<bool, FunctionTable>::Array( std::initializer_list<bool> );
+template Array<bool, FunctionTable>::Array( const Array<bool, FunctionTable>& );
+template Array<bool, FunctionTable>::Array( Array<bool, FunctionTable>&& );
+template Array<bool, FunctionTable>& Array<bool, FunctionTable>::operator=( const Array<bool, FunctionTable>& );
+template Array<bool, FunctionTable>& Array<bool, FunctionTable>::operator=( Array<bool, FunctionTable>&& );
+template Array<bool, FunctionTable>& Array<bool, FunctionTable>::operator=( const std::vector<bool>& );
+template void Array<bool, FunctionTable>::fill(bool const&);
+template void Array<bool, FunctionTable>::clear();
+template bool Array<bool, FunctionTable>::operator==(Array<bool, FunctionTable> const&) const;
+template void Array<bool, FunctionTable>::resize( ArraySize const& );
+// clang-format on
+
+
+/********************************************************
+ *  Explicit instantiations of Array<std::complex>       *
+ ********************************************************/
+// clang-format off
+template Array<std::complex<double>, FunctionTable>::Array();
+template Array<std::complex<double>, FunctionTable>::~Array();
+template Array<std::complex<double>, FunctionTable>::Array( size_t );
+template Array<std::complex<double>, FunctionTable>::Array( size_t, size_t );
+template Array<std::complex<double>, FunctionTable>::Array( size_t, size_t, size_t );
+template Array<std::complex<double>, FunctionTable>::Array( size_t, size_t, size_t, size_t );
+template Array<std::complex<double>, FunctionTable>::Array( size_t, size_t, size_t, size_t, size_t );
+template Array<std::complex<double>, FunctionTable>::Array( const std::vector<size_t>&, const std::complex<double>* );
+template Array<std::complex<double>, FunctionTable>::Array( std::initializer_list<std::complex<double>> );
+template Array<std::complex<double>, FunctionTable>::Array( const Range<std::complex<double>>& range );
+template Array<std::complex<double>, FunctionTable>::Array( const Array<std::complex<double>, FunctionTable>& );
+template Array<std::complex<double>, FunctionTable>::Array( Array<std::complex<double>, FunctionTable>&& );
+template Array<std::complex<double>, FunctionTable>& Array<std::complex<double>, FunctionTable>::operator=( const Array<std::complex<double>, FunctionTable>& );
+template Array<std::complex<double>, FunctionTable>& Array<std::complex<double>, FunctionTable>::operator=( Array<std::complex<double>, FunctionTable>&& );
+template Array<std::complex<double>, FunctionTable>& Array<std::complex<double>, FunctionTable>::operator=( const std::vector<std::complex<double>>& );
+template void Array<std::complex<double>, FunctionTable>::resize( ArraySize const& );
+template void Array<std::complex<double>, FunctionTable>::viewRaw( ArraySize const&, std::complex<double>*, bool, bool );
+template void Array<std::complex<double>, FunctionTable>::fill(std::complex<double> const&);
+template void Array<std::complex<double>, FunctionTable>::clear();
+template bool Array<std::complex<double>, FunctionTable>::operator==(Array<std::complex<double>, FunctionTable> const&) const;
+template Array<std::complex<double>, FunctionTable> Array<std::complex<double>, FunctionTable>::repmat(std::vector<unsigned long, std::allocator<unsigned long> > const&) const;
+// clang-format on
+
+
+/********************************************************
+ *  Explicit instantiations of Array<std::string>        *
+ ********************************************************/
+// clang-format off
+template Array<std::string, FunctionTable>::Array();
+template Array<std::string, FunctionTable>::~Array();
+template Array<std::string, FunctionTable>::Array( size_t );
+template Array<std::string, FunctionTable>::Array( size_t, size_t );
+template Array<std::string, FunctionTable>::Array( size_t, size_t, size_t );
+template Array<std::string, FunctionTable>::Array( size_t, size_t, size_t, size_t );
+template Array<std::string, FunctionTable>::Array( size_t, size_t, size_t, size_t, size_t );
+template Array<std::string, FunctionTable>::Array( const std::vector<size_t>&, const std::string* );
+template Array<std::string, FunctionTable>::Array( std::initializer_list<std::string> );
+template Array<std::string, FunctionTable>::Array( const Array<std::string, FunctionTable>& );
+template Array<std::string, FunctionTable>::Array( Array<std::string, FunctionTable>&& );
+template Array<std::string, FunctionTable>& Array<std::string, FunctionTable>::operator=( const Array<std::string, FunctionTable>& );
+template Array<std::string, FunctionTable>& Array<std::string, FunctionTable>::operator=( Array<std::string, FunctionTable>&& );
+template Array<std::string, FunctionTable>& Array<std::string, FunctionTable>::operator=( const std::vector<std::string>& );
+template void Array<std::string, FunctionTable>::resize( ArraySize const& );
+// clang-format on
--- a/common/Array.h
+++ b/common/Array.h
@@ -1,247 +1,20 @@
 #ifndef included_ArrayClass
 #define included_ArrayClass

+#include "common/ArraySize.h"
+
 #include <array>
-#include <cstring>
 #include <functional>
 #include <initializer_list>
 #include <iostream>
 #include <memory>
 #include <vector>

-#include "Utilities.h"
-
-
-#if defined( __CUDA_ARCH__ )
-#include <cuda.h>
-#define HOST_DEVICE __host__ __device__
-#else
-#define HOST_DEVICE
-#endif
-#if defined( USING_GCC ) || defined( USING_CLANG )
-#define ATTRIBUTE_INLINE __attribute__( ( always_inline ) )
-#else
-#define ATTRIBUTE_INLINE
-#endif
-
-
-#if ( defined( DEBUG ) || defined( _DEBUG ) ) && !defined( NDEBUG )
-#define CHECK_ARRAY_LENGTH( i )                                      \
-    do {                                                             \
-        if ( i >= d_length )                                         \
-            throw std::length_error( "Index exceeds array bounds" ); \
-    } while ( 0 )
-#else
-#define CHECK_ARRAY_LENGTH( i ) \
-    do {                        \
-    } while ( 0 )
-#endif
-
-
-// Forward decleration
-class FunctionTable;
-
-
-//! Simple range class
-template<class TYPE = size_t>
-class Range final
-{
-public:
-    //! Empty constructor
-    Range() : i( 0 ), j( -1 ), k( 1 ) {}
-
-    /*!
-     * Create a range i:k:j (or i:j)
-     * @param i_            Starting value
-     * @param j_            Ending value
-     * @param k_            Increment value
-     */
-    Range( TYPE i_, TYPE j_, TYPE k_ = 1 ) : i( i_ ), j( j_ ), k( k_ ) {}
-
-    TYPE i, j, k;
-};
-
-
-//! Simple class to store the array dimensions
-class ArraySize final
-{
-public:
-    //! Empty constructor
-    inline ArraySize();
-
-    /*!
-     * Create the vector size
-     * @param N1            Number of elements in the first dimension
-     */
-    inline ArraySize( size_t N1 );
-
-    /*!
-     * Create the vector size
-     * @param N1            Number of elements in the first dimension
-     * @param N2            Number of elements in the second dimension
-     */
-    inline ArraySize( size_t N1, size_t N2 );
-
-    /*!
-     * Create the vector size
-     * @param N1            Number of elements in the first dimension
-     * @param N2            Number of elements in the second dimension
-     * @param N3            Number of elements in the third dimension
-     */
-    inline ArraySize( size_t N1, size_t N2, size_t N3 );
-
-    /*!
-     * Create the vector size
-     * @param N1            Number of elements in the first dimension
-     * @param N2            Number of elements in the second dimension
-     * @param N3            Number of elements in the third dimension
-     * @param N4            Number of elements in the fourth dimension
-     */
-    inline ArraySize( size_t N1, size_t N2, size_t N3, size_t N4 );
-
-    /*!
-     * Create the vector size
-     * @param N1            Number of elements in the first dimension
-     * @param N2            Number of elements in the second dimension
-     * @param N3            Number of elements in the third dimension
-     * @param N4            Number of elements in the fourth dimension
-     * @param N5            Number of elements in the fifth dimension
-     */
-    inline ArraySize( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 );
-
-    /*!
-     * Create from initializer list
-     * @param N             Size of the array
-     */
-    inline ArraySize( std::initializer_list<size_t> N );
-
-    /*!
-     * Create from raw pointer
-     * @param ndim          Number of dimensions
-     * @param ndim          Dimensions
-     */
-    inline ArraySize( size_t ndim, const size_t *dims );
-
-    /*!
-     * Create from std::vector
-     * @param N             Size of the array
-     */
-    inline ArraySize( const std::vector<size_t> &N );
-
-    /*!
-     * Copy constructor
-     * @param rhs           Array to copy
-     */
-    inline ArraySize( const ArraySize &rhs );
-
-    /*!
-     * Move constructor
-     * @param rhs           Array to copy
-     */
-    inline ArraySize( ArraySize &&rhs );
-
-    /*!
-     * Assignment operator
-     * @param rhs           Array to copy
-     */
-    inline ArraySize &operator=( const ArraySize &rhs );
-
-    /*!
-     * Move assignment operator
-     * @param rhs           Array to copy
-     */
-    inline ArraySize &operator=( ArraySize &&rhs );
-
-    /*!
-     * Access the ith dimension
-     * @param i             Index to access
-     */
-    inline size_t operator[]( size_t i ) const { return d_N[i]; }
-
-    //! Sum the elements
-    inline uint8_t ndim() const ATTRIBUTE_INLINE { return d_ndim; }
-
-    //! Sum the elements
-    inline size_t size() const ATTRIBUTE_INLINE { return d_ndim; }
-
-    //! Sum the elements
-    inline size_t length() const ATTRIBUTE_INLINE { return d_length; }
-
-    //! Sum the elements
-    inline void resize( uint8_t dim, size_t N );
-
-    //! Returns an iterator to the beginning
-    inline const size_t *begin() const ATTRIBUTE_INLINE { return d_N; }
-
-    //! Returns an iterator to the end
-    inline const size_t *end() const ATTRIBUTE_INLINE { return d_N + d_ndim; }
-
-    // Check if two matrices are equal
-    inline bool operator==( const ArraySize &rhs ) const ATTRIBUTE_INLINE
-    {
-        return d_ndim == rhs.d_ndim && memcmp( d_N, rhs.d_N, sizeof( d_N ) ) == 0;
-    }
-
-    //! Check if two matrices are not equal
-    inline bool operator!=( const ArraySize &rhs ) const ATTRIBUTE_INLINE
-    {
-        return d_ndim != rhs.d_ndim || memcmp( d_N, rhs.d_N, sizeof( d_N ) ) != 0;
-    }
-
-    //! Maximum supported dimension
-    constexpr static uint8_t maxDim() ATTRIBUTE_INLINE { return 5u; }
-
-    //! Get the index
-    inline size_t index( size_t i ) const ATTRIBUTE_INLINE
-    {
-        CHECK_ARRAY_LENGTH( i );
-        return i;
-    }
-
-    //! Get the index
-    inline size_t index( size_t i1, size_t i2 ) const ATTRIBUTE_INLINE
-    {
-        size_t index = i1 + i2 * d_N[0];
-        CHECK_ARRAY_LENGTH( index );
-        return index;
-    }
-
-    //! Get the index
-    inline size_t index( size_t i1, size_t i2, size_t i3 ) const ATTRIBUTE_INLINE
-    {
-        size_t index = i1 + d_N[0] * ( i2 + d_N[1] * i3 );
-        CHECK_ARRAY_LENGTH( index );
-        return index;
-    }
-
-    //! Get the index
-    inline size_t index( size_t i1, size_t i2, size_t i3, size_t i4 ) const ATTRIBUTE_INLINE
-    {
-        size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * i4 ) );
-        CHECK_ARRAY_LENGTH( index );
-        return index;
-    }
-
-    //! Get the index
-    inline size_t index(
-        size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const ATTRIBUTE_INLINE
-    {
-        size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * ( i4 + d_N[3] * i5 ) ) );
-        CHECK_ARRAY_LENGTH( index );
-        return index;
-    }
-
-private:
-    uint8_t d_ndim;
-    size_t d_length;
-    size_t d_N[5];
-};
-

 /*!
 * Class Array is a multi-dimensional array class written by Mark Berrill
 */
-template<class TYPE, class FUN = FunctionTable>
+template<class TYPE, class FUN, class Allocator>
 class Array final
 {
 public: // Constructors / assignment operators
@@ -309,6 +82,12 @@ public: // Constructors / assignment operators
     */
    explicit Array( const Range<TYPE> &range );

+    /*!
+     * Create a 1D Array using a string that mimic's MATLAB
+     * @param range         Range of the data
+     */
+    explicit Array( std::string range );
+
    /*!
     * Create a 1D Array with the given initializer list
     * @param data          Input data
@@ -346,74 +125,34 @@ public: // Constructors / assignment operators
     */
    Array &operator=( const std::vector<TYPE> &rhs );

+    //! Is copyable?
+    inline bool isCopyable() const { return d_isCopyable; }
+
+    //! Set is copyable
+    inline void setCopyable( bool flag ) { d_isCopyable = flag; }
+
+    //! Is fixed size?
+    inline bool isFixedSize() const { return d_isFixedSize; }
+
+    //! Set is copyable
+    inline void setFixedSize( bool flag ) { d_isFixedSize = flag; }
+
+
 public: // Views/copies/subset
    /*!
-     * Create a 1D Array view to a raw block of data
-     * @param N             Number of elements in the array
+     * Create a multi-dimensional Array view to a raw block of data
+     * @param N             Number of elements in each dimension
     * @param data          Pointer to the data
     */
-    static std::shared_ptr<Array> view( size_t N, std::shared_ptr<TYPE> const &data );
+    static std::unique_ptr<Array> view( const ArraySize &N, std::shared_ptr<TYPE> &data );

-    /*!
-     * Create a new 2D Array with the given number of rows and columns
-     * @param N_rows        Number of rows
-     * @param N_columns     Number of columns
-     * @param data          Pointer to the data
-     */
-    static std::shared_ptr<Array> view(
-        size_t N_rows, size_t N_columns, std::shared_ptr<TYPE> const &data );
-
-    /*!
-     * Create a new 3D Array view to a raw block of data
-     * @param N1            Number of rows
-     * @param N2            Number of columns
-     * @param N3            Number of elements in the third dimension
-     * @param data          Pointer to the data
-     */
-    static std::shared_ptr<Array> view(
-        size_t N1, size_t N2, size_t N3, std::shared_ptr<TYPE> const &data );

    /*!
     * Create a multi-dimensional Array view to a raw block of data
     * @param N             Number of elements in each dimension
     * @param data          Pointer to the data
     */
-    static std::shared_ptr<Array> view( const ArraySize &N, std::shared_ptr<TYPE> const &data );
-
-
-    /*!
-     * Create a 1D Array view to a raw block of data
-     * @param N             Number of elements in the array
-     * @param data          Pointer to the data
-     */
-    static std::shared_ptr<const Array> constView(
-        size_t N, std::shared_ptr<const TYPE> const &data );
-
-    /*!
-     * Create a new 2D Array with the given number of rows and columns
-     * @param N_rows        Number of rows
-     * @param N_columns     Number of columns
-     * @param data          Pointer to the data
-     */
-    static std::shared_ptr<const Array> constView(
-        size_t N_rows, size_t N_columns, std::shared_ptr<const TYPE> const &data );
-
-    /*!
-     * Create a new 3D Array view to a raw block of data
-     * @param N1            Number of rows
-     * @param N2            Number of columns
-     * @param N3            Number of elements in the third dimension
-     * @param data          Pointer to the data
-     */
-    static std::shared_ptr<const Array> constView(
-        size_t N1, size_t N2, size_t N3, std::shared_ptr<const TYPE> const &data );
-
-    /*!
-     * Create a multi-dimensional Array view to a raw block of data
-     * @param N             Number of elements in each dimension
-     * @param data          Pointer to the data
-     */
-    static std::shared_ptr<const Array> constView(
+    static std::unique_ptr<const Array> constView(
        const ArraySize &N, std::shared_ptr<const TYPE> const &data );


@@ -432,7 +171,7 @@ public: // Views/copies/subset

    /*!
     * Make this object a view of the raw data (expert use only).
-     * Use view2( N, std::shared_ptr(data,[](TYPE*){}) ) instead.
+     * Use view2( N, shared_ptr(data,[](TYPE*){}) ) instead.
     *   Note: this interface is not recommended as it does not protect from
     *   the src data being deleted while still being used by the Array.
     *   Additionally for maximum performance it does not set the internal shared_ptr
@@ -440,27 +179,28 @@ public: // Views/copies/subset
     * @param ndim          Number of dimensions
     * @param dims          Number of elements in each dimension
     * @param data          Pointer to the data
+     * @param isCopyable    Once the view is created, can the array be copied
+     * @param isFixedSize   Once the view is created, is the array size fixed
     */
-    void viewRaw( int ndim, const size_t *dims, TYPE *data );
+    inline void viewRaw(
+        int ndim, const size_t *dims, TYPE *data, bool isCopyable = true, bool isFixedSize = true )
+    {
+        viewRaw( ArraySize( ndim, dims ), data, isCopyable, isFixedSize );
+    }

    /*!
     * Make this object a view of the raw data (expert use only).
-     * Use view2( N, std::shared_ptr(data,[](TYPE*){}) ) instead.
+     * Use view2( N, shared_ptr(data,[](TYPE*){}) ) instead.
     *   Note: this interface is not recommended as it does not protect from
     *   the src data being deleted while still being used by the Array.
     *   Additionally for maximum performance it does not set the internal shared_ptr
     *   so functions like getPtr and resize will not work correctly.
     * @param N             Number of elements in each dimension
     * @param data          Pointer to the data
+     * @param isCopyable    Once the view is created, can the array be copied
+     * @param isFixedSize   Once the view is created, is the array size fixed
     */
-    void viewRaw( const ArraySize &N, TYPE *data );
-
-    /*!
-     * Convert an array of one type to another.  This may or may not allocate new memory.
-     * @param array         Input array
-     */
-    template<class TYPE2>
-    static std::shared_ptr<Array<TYPE2>> convert( std::shared_ptr<Array<TYPE, FUN>> array );
+    void viewRaw( const ArraySize &N, TYPE *data, bool isCopyable = true, bool isFixedSize = true );


    /*!
@@ -468,8 +208,27 @@ public: // Views/copies/subset
     * @param array         Input array
     */
    template<class TYPE2>
-    static std::shared_ptr<const Array<TYPE2>> convert(
-        std::shared_ptr<const Array<TYPE, FUN>> array );
+    static inline std::unique_ptr<Array<TYPE2, FUN, Allocator>> convert(
+        std::shared_ptr<Array<TYPE, FUN, Allocator>> array )
+    {
+        auto array2 = std::make_unique<Array<TYPE2>>( array->size() );
+        array2.copy( *array );
+        return array2;
+    }
+
+
+    /*!
+     * Convert an array of one type to another.  This may or may not allocate new memory.
+     * @param array         Input array
+     */
+    template<class TYPE2>
+    static inline std::unique_ptr<const Array<TYPE2, FUN, Allocator>> convert(
+        std::shared_ptr<const Array<TYPE, FUN, Allocator>> array )
+    {
+        auto array2 = std::make_unique<Array<TYPE2>>( array->size() );
+        array2.copy( *array );
+        return array2;
+    }


    /*!
@@ -477,7 +236,11 @@ public: // Views/copies/subset
     * @param array         Source array
     */
    template<class TYPE2>
-    void copy( const Array<TYPE2> &array );
+    void inline copy( const Array<TYPE2, FUN, Allocator> &array )
+    {
+        resize( array.size() );
+        copy( array.data() );
+    }

    /*!
     * Copy and convert data from a raw vector to this array.
@@ -485,22 +248,36 @@ public: // Views/copies/subset
     * @param array         Source array
     */
    template<class TYPE2>
-    void copy( const TYPE2 *array );
+    void inline copy( const TYPE2 *data )
+    {
+        for ( size_t i = 0; i < d_size.length(); i++ )
+            d_data[i] = static_cast<TYPE>( data[i] );
+    }

    /*!
     * Copy and convert data from this array to a raw vector.
     * @param array         Source array
     */
    template<class TYPE2>
-    void copyTo( TYPE2 *array ) const;
+    void inline copyTo( TYPE2 *data ) const
+    {
+        for ( size_t i = 0; i < d_size.length(); i++ )
+            data[i] = static_cast<TYPE2>( d_data[i] );
+    }

    /*!
-     * Copy and convert data from this array to a raw vector.
-     * @param array         Source array
+     * Copy and convert data from this array to a new array
     */
    template<class TYPE2>
-    Array<TYPE2, FUN> cloneTo() const;
+    Array<TYPE2, FUN, Allocator> inline cloneTo() const
+    {
+        Array<TYPE2, FUN> dst( this->size() );
+        copyTo( dst.data() );
+        return dst;
+    }

+    /*! swap the raw data pointers for the Arrays after checking for compatibility */
+    void swap( Array &other );

    /*!
     * Fill the array with the given value
@@ -519,7 +296,7 @@ public: // Views/copies/subset
     * @param base        Base array
     * @param exp         Exponent value
     */
-    void pow( const Array<TYPE, FUN> &base, const TYPE &exp );
+    void pow( const Array &base, const TYPE &exp );

    //! Destructor
    ~Array();
@@ -534,11 +311,7 @@ public: // Views/copies/subset


    //! Return the size of the Array
-    inline ArraySize &size() { return d_size; }
-
-
-    //! Return the size of the Array
-    inline ArraySize size() const { return d_size; }
+    inline const ArraySize &size() const { return d_size; }


    //! Return the size of the Array
@@ -557,14 +330,15 @@ public: // Views/copies/subset
     * Resize the Array
     * @param N             NUmber of elements
     */
-    void resize( size_t N );
+    inline void resize( size_t N ) { resize( ArraySize( N ) ); }
+

    /*!
     * Resize the Array
-     * @param N_rows        Number of rows
-     * @param N_columns     Number of columns
+     * @param N_row         Number of rows
+     * @param N_col         Number of columns
     */
-    void resize( size_t N_rows, size_t N_columns );
+    inline void resize( size_t N_row, size_t N_col ) { resize( ArraySize( N_row, N_col ) ); }

    /*!
     * Resize the Array
@@ -572,7 +346,7 @@ public: // Views/copies/subset
     * @param N2            Number of columns
     * @param N3            Number of elements in the third dimension
     */
-    void resize( size_t N1, size_t N2, size_t N3 );
+    inline void resize( size_t N1, size_t N2, size_t N3 ) { resize( ArraySize( N1, N2, N3 ) ); }

    /*!
     * Resize the Array
@@ -598,19 +372,25 @@ public: // Views/copies/subset


    /*!
-     * Subset the Array (total size of array will not change)
+     * Reshape the Array so that the number of dimensions is the
+     *    max of ndim and the largest dim>1.
+     * @param ndim          Desired number of dimensions
+     */
+    inline void setNdim( int ndim ) { d_size.setNdim( ndim ); }
+
+
+    /*!
+     * Subset the Array
     * @param index         Index to subset (imin,imax,jmin,jmax,kmin,kmax,...)
     */
-    template<class TYPE2 = TYPE>
-    Array<TYPE2, FUN> subset( const std::vector<size_t> &index ) const;
+    Array subset( const std::vector<size_t> &index ) const;


    /*!
-     * Subset the Array (total size of array will not change)
+     * Subset the Array
     * @param index         Index to subset (ix:kx:jx,iy:ky:jy,...)
     */
-    template<class TYPE2 = TYPE>
-    Array<TYPE2, FUN> subset( const std::vector<Range<size_t>> &index ) const;
+    Array subset( const std::vector<Range<size_t>> &index ) const;


    /*!
@@ -618,30 +398,28 @@ public: // Views/copies/subset
     * @param index         Index of the subset (imin,imax,jmin,jmax,kmin,kmax,...)
     * @param subset        The subset array to copy from
     */
-    template<class TYPE2>
-    void copySubset( const std::vector<size_t> &index, const Array<TYPE2, FUN> &subset );
+    void copySubset( const std::vector<size_t> &index, const Array &subset );

    /*!
     * Copy data from an array into a subset of this array
     * @param index         Index of the subset
     * @param subset        The subset array to copy from
     */
-    template<class TYPE2>
-    void copySubset( const std::vector<Range<size_t>> &index, const Array<TYPE2, FUN> &subset );
+    void copySubset( const std::vector<Range<size_t>> &index, const Array &subset );

    /*!
     * Add data from an array into a subset of this array
     * @param index         Index of the subset (imin,imax,jmin,jmax,kmin,kmax,...)
     * @param subset        The subset array to add from
     */
-    void addSubset( const std::vector<size_t> &index, const Array<TYPE, FUN> &subset );
+    void addSubset( const std::vector<size_t> &index, const Array &subset );

    /*!
     * Add data from an array into a subset of this array
     * @param index         Index of the subset
     * @param subset        The subset array to add from
     */
-    void addSubset( const std::vector<Range<size_t>> &index, const Array<TYPE, FUN> &subset );
+    void addSubset( const std::vector<Range<size_t>> &index, const Array &subset );


 public: // Accessors
@@ -649,16 +427,13 @@ public: // Accessors
     * Access the desired element
     * @param i             The row index
     */
-    HOST_DEVICE inline TYPE &operator()( size_t i ) ATTRIBUTE_INLINE
-    {
-        return d_data[d_size.index( i )];
-    }
+    ARRAY_ATTRIBUTE inline TYPE &operator()( size_t i ) { return d_data[d_size.index( i )]; }

    /*!
     * Access the desired element
     * @param i             The row index
     */
-    HOST_DEVICE inline const TYPE &operator()( size_t i ) const ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline const TYPE &operator()( size_t i ) const
    {
        return d_data[d_size.index( i )];
    }
@@ -668,7 +443,7 @@ public: // Accessors
     * @param i             The row index
     * @param j             The column index
     */
-    HOST_DEVICE inline TYPE &operator()( size_t i, size_t j ) ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline TYPE &operator()( size_t i, size_t j )
    {
        return d_data[d_size.index( i, j )];
    }
@@ -678,7 +453,7 @@ public: // Accessors
     * @param i             The row index
     * @param j             The column index
     */
-    HOST_DEVICE inline const TYPE &operator()( size_t i, size_t j ) const ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline const TYPE &operator()( size_t i, size_t j ) const
    {
        return d_data[d_size.index( i, j )];
    }
@@ -689,7 +464,7 @@ public: // Accessors
     * @param j             The column index
     * @param k             The third index
     */
-    HOST_DEVICE inline TYPE &operator()( size_t i, size_t j, size_t k ) ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline TYPE &operator()( size_t i, size_t j, size_t k )
    {
        return d_data[d_size.index( i, j, k )];
    }
@@ -700,7 +475,7 @@ public: // Accessors
     * @param j             The column index
     * @param k             The third index
     */
-    HOST_DEVICE inline const TYPE &operator()( size_t i, size_t j, size_t k ) const ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline const TYPE &operator()( size_t i, size_t j, size_t k ) const
    {
        return d_data[d_size.index( i, j, k )];
    }
@@ -712,8 +487,7 @@ public: // Accessors
     * @param i3            The third index
     * @param i4            The fourth index
     */
-    HOST_DEVICE inline TYPE &operator()(
-        size_t i1, size_t i2, size_t i3, size_t i4 ) ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline TYPE &operator()( size_t i1, size_t i2, size_t i3, size_t i4 )
    {
        return d_data[d_size.index( i1, i2, i3, i4 )];
    }
@@ -725,8 +499,8 @@ public: // Accessors
     * @param i3            The third index
     * @param i4            The fourth index
     */
-    HOST_DEVICE inline const TYPE &operator()(
-        size_t i1, size_t i2, size_t i3, size_t i4 ) const ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline const TYPE &operator()(
+        size_t i1, size_t i2, size_t i3, size_t i4 ) const
    {
        return d_data[d_size.index( i1, i2, i3, i4 )];
    }
@@ -739,8 +513,7 @@ public: // Accessors
     * @param i4            The fourth index
     * @param i5            The fifth index
     */
-    HOST_DEVICE inline TYPE &operator()(
-        size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline TYPE &operator()( size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 )
    {
        return d_data[d_size.index( i1, i2, i3, i4, i5 )];
    }
@@ -753,8 +526,8 @@ public: // Accessors
     * @param i4            The fourth index
     * @param i5            The fifth index
     */
-    HOST_DEVICE inline const TYPE &operator()(
-        size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline const TYPE &operator()(
+        size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const
    {
        return d_data[d_size.index( i1, i2, i3, i4, i5 )];
    }
@@ -763,7 +536,7 @@ public: // Accessors
     * Access the desired element as a raw pointer
     * @param i             The global index
     */
-    HOST_DEVICE inline TYPE *ptr( size_t i ) ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline TYPE *ptr( size_t i )
    {
        return i >= d_size.length() ? nullptr : &d_data[i];
    }
@@ -772,34 +545,34 @@ public: // Accessors
     * Access the desired element as a raw pointer
     * @param i             The global index
     */
-    HOST_DEVICE inline const TYPE *ptr( size_t i ) const ATTRIBUTE_INLINE
+    ARRAY_ATTRIBUTE inline const TYPE *ptr( size_t i ) const
    {
        return i >= d_size.length() ? nullptr : &d_data[i];
    }

    //! Get iterator to beginning of data
-    inline TYPE *begin() ATTRIBUTE_INLINE { return d_data; }
+    inline TYPE *begin() { return d_data; }

    //! Get iterator to beginning of data
-    inline const TYPE *begin() const ATTRIBUTE_INLINE { return d_data; }
+    inline const TYPE *begin() const { return d_data; }

    //! Get iterator to beginning of data
-    inline TYPE *end() ATTRIBUTE_INLINE { return d_data + d_size.length(); }
+    inline TYPE *end() { return d_data + d_size.length(); }

    //! Get iterator to beginning of data
-    inline const TYPE *end() const ATTRIBUTE_INLINE { return d_data + d_size.length(); }
+    inline const TYPE *end() const { return d_data + d_size.length(); }

    //! Return the pointer to the raw data
-    inline std::shared_ptr<TYPE> getPtr() ATTRIBUTE_INLINE { return d_ptr; }
+    inline std::shared_ptr<TYPE> getPtr() { return d_ptr; }

    //! Return the pointer to the raw data
-    inline std::shared_ptr<const TYPE> getPtr() const ATTRIBUTE_INLINE { return d_ptr; }
+    inline std::shared_ptr<const TYPE> getPtr() const { return d_ptr; }

    //! Return the pointer to the raw data
-    HOST_DEVICE inline TYPE *data() ATTRIBUTE_INLINE { return d_data; }
+    ARRAY_ATTRIBUTE inline TYPE *data() { return d_data; }

    //! Return the pointer to the raw data
-    HOST_DEVICE inline const TYPE *data() const ATTRIBUTE_INLINE { return d_data; }
+    ARRAY_ATTRIBUTE inline const TYPE *data() const { return d_data; }


 public: // Operator overloading
@@ -834,52 +607,52 @@ public: // Math operations
    void rand();

    //! Return true if NaNs are present
-    inline bool NaNs() const;
+    bool NaNs() const;

    //! Return the smallest value
-    inline TYPE min() const;
+    TYPE min() const;

    //! Return the largest value
-    inline TYPE max() const;
+    TYPE max() const;

    //! Return the sum of all elements
-    inline TYPE sum() const;
+    TYPE sum() const;

    //! Return the mean of all elements
-    inline TYPE mean() const;
+    TYPE mean() const;

    //! Return the min of all elements in a given direction
-    Array<TYPE, FUN> min( int dir ) const;
+    Array min( int dir ) const;

    //! Return the max of all elements in a given direction
-    Array<TYPE, FUN> max( int dir ) const;
+    Array max( int dir ) const;

    //! Return the sum of all elements in a given direction
-    Array<TYPE, FUN> sum( int dir ) const;
+    Array sum( int dir ) const;

    //! Return the smallest value
-    inline TYPE min( const std::vector<size_t> &index ) const;
+    TYPE min( const std::vector<size_t> &index ) const;

    //! Return the largest value
-    inline TYPE max( const std::vector<size_t> &index ) const;
+    TYPE max( const std::vector<size_t> &index ) const;

    //! Return the sum of all elements
-    inline TYPE sum( const std::vector<size_t> &index ) const;
+    TYPE sum( const std::vector<size_t> &index ) const;

    //! Return the mean of all elements
-    inline TYPE mean( const std::vector<size_t> &index ) const;
+    TYPE mean( const std::vector<size_t> &index ) const;

    //! Return the smallest value
-    inline TYPE min( const std::vector<Range<size_t>> &index ) const;
+    TYPE min( const std::vector<Range<size_t>> &index ) const;

    //! Return the largest value
-    inline TYPE max( const std::vector<Range<size_t>> &index ) const;
+    TYPE max( const std::vector<Range<size_t>> &index ) const;

    //! Return the sum of all elements
-    inline TYPE sum( const std::vector<Range<size_t>> &index ) const;
+    TYPE sum( const std::vector<Range<size_t>> &index ) const;

    //! Return the mean of all elements
-    inline TYPE mean( const std::vector<Range<size_t>> &index ) const;
+    TYPE mean( const std::vector<Range<size_t>> &index ) const;

    //! Find all elements that match the operator
    std::vector<size_t> find(
@@ -894,17 +667,17 @@ public: // Math operations
    static Array multiply( const Array &a, const Array &b );

    //! Transpose an array
-    Array<TYPE, FUN> reverseDim() const;
+    Array reverseDim() const;

    //! Replicate an array a given number of times in each direction
-    Array<TYPE, FUN> repmat( const std::vector<size_t> &N ) const;
+    Array repmat( const std::vector<size_t> &N ) const;

    //! Coarsen an array using the given filter
-    Array<TYPE, FUN> coarsen( const Array<TYPE, FUN> &filter ) const;
+    Array coarsen( const Array &filter ) const;

    //! Coarsen an array using the given filter
-    Array<TYPE, FUN> coarsen( const std::vector<size_t> &ratio,
-        std::function<TYPE( const Array<TYPE, FUN> & )> filter ) const;
+    Array coarsen(
+        const std::vector<size_t> &ratio, std::function<TYPE( const Array & )> filter ) const;

    /*!
     * Perform a element-wise operation y = f(x)
@@ -928,21 +701,31 @@ public: // Math operations
     * @param[in] x             x
     * @param[in] beta          beta
     */
-    void axpby( const TYPE &alpha, const Array<TYPE, FUN> &x, const TYPE &beta );
+    void axpby( const TYPE &alpha, const Array &x, const TYPE &beta );
+
+    /*!
+     * Linear interpolation
+     * @param[in] x             Position as a decimal index
+     */
+    TYPE interp( const std::vector<double> &x ) const;
+
+    /**
+     * \fn equals (Array & const rhs, TYPE tol )
+     * \brief  Determine if two Arrays are equal using an absolute tolerance
+     * \param[in] rhs Vector to compare to
+     * \param[in] tol Tolerance of comparison
+     * \return  True iff \f$||\mathit{rhs} - x||_\infty < \mathit{tol}\f$
+     */
+    bool equals( const Array &rhs, TYPE tol = 0.000001 ) const;

 private:
+    bool d_isCopyable;           // Can the array be copied
+    bool d_isFixedSize;          // Can the array be resized
    ArraySize d_size;            // Size of each dimension
    TYPE *d_data;                // Raw pointer to data in array
    std::shared_ptr<TYPE> d_ptr; // Shared pointer to data in array
    void allocate( const ArraySize &N );

-public:
-    template<class TYPE2, class FUN2>
-    inline bool sizeMatch( const Array<TYPE2, FUN2> &rhs ) const
-    {
-        return d_size == rhs.d_size;
-    }
-
 private:
    inline void checkSubsetIndex( const std::vector<Range<size_t>> &range ) const;
    inline std::vector<Range<size_t>> convert( const std::vector<size_t> &index ) const;
@@ -952,11 +735,60 @@ private:
 };


+/********************************************************
+ *  ostream operator                                     *
+ ********************************************************/
+inline std::ostream &operator<<( std::ostream &out, const ArraySize &s )
+{
+    out << "[" << s[0];
+    for ( size_t i = 1; i < s.ndim(); i++ )
+        out << "," << s[i];
+    out << "]";
+    return out;
+}
+
+
+/********************************************************
+ *  Math operations                                      *
+ ********************************************************/
+template<class TYPE, class FUN, class Allocator>
+inline Array<TYPE, FUN, Allocator> operator+(
+    const Array<TYPE, FUN, Allocator> &a, const Array<TYPE, FUN, Allocator> &b )
+{
+    Array<TYPE, FUN, Allocator> c;
+    const auto &fun = []( const TYPE &a, const TYPE &b ) { return a + b; };
+    FUN::transform( fun, a, b, c );
+    return c;
+}
+template<class TYPE, class FUN, class Allocator>
+inline Array<TYPE, FUN, Allocator> operator-(
+    const Array<TYPE, FUN, Allocator> &a, const Array<TYPE, FUN, Allocator> &b )
+{
+    Array<TYPE, FUN, Allocator> c;
+    const auto &fun = []( const TYPE &a, const TYPE &b ) { return a - b; };
+    FUN::transform( fun, a, b, c );
+    return c;
+}
+template<class TYPE, class FUN, class Allocator>
+inline Array<TYPE, FUN, Allocator> operator*(
+    const Array<TYPE, FUN, Allocator> &a, const Array<TYPE, FUN, Allocator> &b )
+{
+    return Array<TYPE, FUN, Allocator>::multiply( a, b );
+}
+template<class TYPE, class FUN, class Allocator>
+inline Array<TYPE, FUN, Allocator> operator*(
+    const Array<TYPE, FUN, Allocator> &a, const std::vector<TYPE> &b )
+{
+    Array<TYPE, FUN, Allocator> b2;
+    b2.viewRaw( { b.size() }, const_cast<TYPE *>( b.data() ) );
+    return Array<TYPE, FUN, Allocator>::multiply( a, b2 );
+}
+
+
+/********************************************************
+ *  Convience typedefs                                   *
+ ********************************************************/
 typedef Array<double> DoubleArray;
-typedef Array<float> FloatArray;
 typedef Array<int> IntArray;

-
-#include "common/Array.hpp"
-
 #endif
--- a/common/Array.hpp
+++ b/common/Array.hpp
--- a/common/ArraySize.h
+++ b/common/ArraySize.h
@@ -0,0 +1,294 @@
+#ifndef included_ArraySizeClass
+#define included_ArraySizeClass
+
+
+#include <array>
+#include <cstring>
+#include <initializer_list>
+#include <vector>
+
+
+#if defined( __CUDA_ARCH__ )
+#include <cuda.h>
+#define HOST_DEVICE __host__ __device__
+#else
+#define HOST_DEVICE
+#endif
+#if defined( USING_GCC ) || defined( USING_CLANG )
+#define ARRAY_ATTRIBUTE HOST_DEVICE __attribute__( ( always_inline ) )
+#else
+#define ARRAY_ATTRIBUTE HOST_DEVICE
+#endif
+
+
+#if ( defined( DEBUG ) || defined( _DEBUG ) ) && !defined( NDEBUG )
+#define CHECK_ARRAY_LENGTH( i )                                      \
+    do {                                                             \
+        if ( i >= d_length )                                         \
+            throw std::out_of_range( "Index exceeds array bounds" ); \
+    } while ( 0 )
+#else
+#define CHECK_ARRAY_LENGTH( i ) \
+    do {                        \
+    } while ( 0 )
+#endif
+
+
+// Forward declerations
+class FunctionTable;
+template<class TYPE, class FUN = FunctionTable, class Allocator = std::nullptr_t>
+class Array;
+
+
+//! Simple range class
+template<class TYPE = size_t>
+class Range final
+{
+public:
+    //! Empty constructor
+    constexpr Range() : i( 0 ), j( -1 ), k( 1 ) {}
+
+    /*!
+     * Create a range i:k:j (or i:j)
+     * @param i_            Starting value
+     * @param j_            Ending value
+     * @param k_            Increment value
+     */
+    constexpr Range( TYPE i_, TYPE j_, TYPE k_ = 1 ) : i( i_ ), j( j_ ), k( k_ ) {}
+
+    TYPE i, j, k;
+};
+
+
+//! Simple class to store the array dimensions
+class ArraySize final
+{
+public:
+    //! Empty constructor
+    constexpr ArraySize() : d_ndim( 1 ), d_length( 0 ), d_N{ 0, 1, 1, 1, 1 } {}
+
+    /*!
+     * Create the vector size
+     * @param N1            Number of elements in the first dimension
+     */
+    constexpr ArraySize( size_t N1 ) : d_ndim( 1 ), d_length( N1 ), d_N{ N1, 1, 1, 1, 1 } {}
+
+    /*!
+     * Create the vector size
+     * @param N1            Number of elements in the first dimension
+     * @param N2            Number of elements in the second dimension
+     */
+    constexpr ArraySize( size_t N1, size_t N2 )
+        : d_ndim( 2 ), d_length( N1 * N2 ), d_N{ N1, N2, 1, 1, 1 }
+    {
+    }
+
+    /*!
+     * Create the vector size
+     * @param N1            Number of elements in the first dimension
+     * @param N2            Number of elements in the second dimension
+     * @param N3            Number of elements in the third dimension
+     */
+    constexpr ArraySize( size_t N1, size_t N2, size_t N3 )
+        : d_ndim( 3 ), d_length( N1 * N2 * N3 ), d_N{ N1, N2, N3, 1, 1 }
+    {
+    }
+
+    /*!
+     * Create the vector size
+     * @param N1            Number of elements in the first dimension
+     * @param N2            Number of elements in the second dimension
+     * @param N3            Number of elements in the third dimension
+     * @param N4            Number of elements in the fourth dimension
+     */
+    constexpr ArraySize( size_t N1, size_t N2, size_t N3, size_t N4 )
+        : d_ndim( 4 ), d_length( N1 * N2 * N3 * N4 ), d_N{ N1, N2, N3, N4, 1 }
+    {
+    }
+
+    /*!
+     * Create the vector size
+     * @param N1            Number of elements in the first dimension
+     * @param N2            Number of elements in the second dimension
+     * @param N3            Number of elements in the third dimension
+     * @param N4            Number of elements in the fourth dimension
+     * @param N5            Number of elements in the fifth dimension
+     */
+    constexpr ArraySize( size_t N1, size_t N2, size_t N3, size_t N4, size_t N5 )
+        : d_ndim( 5 ), d_length( N1 * N2 * N3 * N4 * N5 ), d_N{ N1, N2, N3, N4, N5 }
+    {
+    }
+
+    /*!
+     * Create from initializer list
+     * @param N             Size of the array
+     */
+    constexpr ArraySize( std::initializer_list<size_t> N )
+        : d_ndim( N.size() ), d_length( 0 ), d_N{ 0, 1, 1, 1, 1 }
+    {
+        if ( d_ndim > maxDim() )
+            throw std::out_of_range( "Maximum number of dimensions exceeded" );
+        auto it = N.begin();
+        for ( size_t i = 0; i < d_ndim; i++, ++it )
+            d_N[i] = *it;
+        d_length = 1;
+        for ( unsigned long i : d_N )
+            d_length *= i;
+        if ( d_ndim == 0 )
+            d_length = 0;
+    }
+
+
+    /*!
+     * Create from raw pointer
+     * @param ndim          Number of dimensions
+     * @param dims          Dimensions
+     */
+    constexpr ArraySize( size_t ndim, const size_t *dims )
+        : d_ndim( ndim ), d_length( 0 ), d_N{ 0, 1, 1, 1, 1 }
+    {
+        if ( d_ndim > maxDim() )
+            throw std::out_of_range( "Maximum number of dimensions exceeded" );
+        for ( size_t i = 0; i < ndim; i++ )
+            d_N[i] = dims[i];
+        d_length = 1;
+        for ( unsigned long i : d_N )
+            d_length *= i;
+        if ( d_ndim == 0 )
+            d_length = 0;
+    }
+
+    /*!
+     * Create from std::vector
+     * @param N             Size of the array
+     */
+    ArraySize( const std::vector<size_t> &N );
+
+    // Copy/assignment constructors
+    constexpr ArraySize( ArraySize &&rhs )      = default;
+    constexpr ArraySize( const ArraySize &rhs ) = default;
+    constexpr ArraySize &operator=( ArraySize &&rhs ) = default;
+    constexpr ArraySize &operator=( const ArraySize &rhs ) = default;
+
+    /*!
+     * Access the ith dimension
+     * @param i             Index to access
+     */
+    constexpr ARRAY_ATTRIBUTE size_t operator[]( size_t i ) const { return d_N[i]; }
+
+    //! Return the number of dimensions
+    constexpr ARRAY_ATTRIBUTE uint8_t ndim() const { return d_ndim; }
+
+    //! Return the number of dimensions
+    constexpr ARRAY_ATTRIBUTE size_t size() const { return d_ndim; }
+
+    //! Return the total number of elements in the array
+    constexpr ARRAY_ATTRIBUTE size_t length() const { return d_length; }
+
+    //! Resize the dimension
+    constexpr void resize( uint8_t dim, size_t N )
+    {
+        if ( dim >= d_ndim )
+            throw std::out_of_range( "Invalid dimension" );
+        d_N[dim] = N;
+        d_length = 1;
+        for ( unsigned long i : d_N )
+            d_length *= i;
+    }
+
+    /*!
+     * Reshape the Array so that the number of dimensions is the
+     *    max of ndim and the largest dim>1.
+     * @param ndim          Desired number of dimensions
+     */
+    constexpr void setNdim( uint8_t ndim ) { d_ndim = std::max( ndim, d_ndim ); }
+
+    //! Returns an iterator to the beginning
+    constexpr const size_t *begin() const { return d_N; }
+
+    //! Returns an iterator to the end
+    constexpr const size_t *end() const { return d_N + d_ndim; }
+
+    // Check if two array sizes are equal
+    constexpr ARRAY_ATTRIBUTE bool operator==( const ArraySize &rhs ) const
+    {
+        return d_ndim == rhs.d_ndim && memcmp( d_N, rhs.d_N, sizeof( d_N ) ) == 0;
+    }
+
+    // Check if two array sizes are equal (ignoring the dimension)
+    constexpr ARRAY_ATTRIBUTE bool approxEqual( const ArraySize &rhs ) const
+    {
+        return ( length() == 0 && rhs.length() == 0 ) || memcmp( d_N, rhs.d_N, sizeof( d_N ) ) == 0;
+    }
+
+    //! Check if two matrices are not equal
+    constexpr ARRAY_ATTRIBUTE bool operator!=( const ArraySize &rhs ) const
+    {
+        return d_ndim != rhs.d_ndim || memcmp( d_N, rhs.d_N, sizeof( d_N ) ) != 0;
+    }
+
+    //! Maximum supported dimension
+    constexpr ARRAY_ATTRIBUTE static uint8_t maxDim() { return 5u; }
+
+    //! Get the index
+    constexpr ARRAY_ATTRIBUTE size_t index( size_t i ) const
+    {
+        CHECK_ARRAY_LENGTH( i );
+        return i;
+    }
+
+    //! Get the index
+    constexpr ARRAY_ATTRIBUTE size_t index( size_t i1, size_t i2 ) const
+    {
+        size_t index = i1 + i2 * d_N[0];
+        CHECK_ARRAY_LENGTH( index );
+        return index;
+    }
+
+    //! Get the index
+    constexpr ARRAY_ATTRIBUTE size_t index( size_t i1, size_t i2, size_t i3 ) const
+    {
+        size_t index = i1 + d_N[0] * ( i2 + d_N[1] * i3 );
+        CHECK_ARRAY_LENGTH( index );
+        return index;
+    }
+
+    //! Get the index
+    constexpr ARRAY_ATTRIBUTE size_t index( size_t i1, size_t i2, size_t i3, size_t i4 ) const
+    {
+        size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * i4 ) );
+        CHECK_ARRAY_LENGTH( index );
+        return index;
+    }
+
+    //! Get the index
+    constexpr ARRAY_ATTRIBUTE size_t index(
+        size_t i1, size_t i2, size_t i3, size_t i4, size_t i5 ) const
+    {
+        size_t index = i1 + d_N[0] * ( i2 + d_N[1] * ( i3 + d_N[2] * ( i4 + d_N[3] * i5 ) ) );
+        CHECK_ARRAY_LENGTH( index );
+        return index;
+    }
+
+private:
+    uint8_t d_ndim;
+    size_t d_length;
+    size_t d_N[5];
+};
+
+
+// Function to concatenate dimensions of two array sizes
+constexpr ArraySize cat( const ArraySize &x, const ArraySize &y )
+{
+    if ( x.ndim() + y.ndim() > ArraySize::maxDim() )
+        throw std::out_of_range( "Maximum number of dimensions exceeded" );
+    size_t N[ArraySize::maxDim()] = { 0 };
+    for ( int i = 0; i < x.ndim(); i++ )
+        N[i] = x[i];
+    for ( int i = 0; i < y.ndim(); i++ )
+        N[i + x.ndim()] = y[i];
+    return ArraySize( x.ndim() + y.ndim(), N );
+}
+
+
+#endif
--- a/common/FunctionTable.h
+++ b/common/FunctionTable.h
@@ -2,7 +2,7 @@
 #define included_FunctionTable


-#include "common/Array.h"
+#include "common/ArraySize.h"

 #include <functional>

@@ -65,17 +65,43 @@ public:
     * @param[out] c            The output array
     */
    template<class TYPE, class FUN>
-    static void multiply(
+    static inline void multiply(
        const Array<TYPE, FUN> &a, const Array<TYPE, FUN> &b, Array<TYPE, FUN> &c );

+    /*!
+     * Perform dgemv/dgemm equavalent operation ( C = alpha*A*B + beta*C )
+     * @param[in] alpha         The scalar value alpha
+     * @param[in] A             The first array
+     * @param[in] B             The second array
+     * @param[in] beta          The scalar value alpha
+     * @param[in,out] c         The output array C
+     */
+    template<class TYPE, class FUN>
+    static void gemm( const TYPE alpha, const Array<TYPE, FUN> &A, const Array<TYPE, FUN> &B,
+        const TYPE beta, Array<TYPE, FUN> &C );
+
+    /*!
+     * Perform axpy equavalent operation ( y = alpha*x + y )
+     * @param[in] alpha         The scalar value alpha
+     * @param[in] x             The input array x
+     * @param[in,out] y         The output array y
+     */
+    template<class TYPE, class FUN>
+    static void axpy( const TYPE alpha, const Array<TYPE, FUN> &x, Array<TYPE, FUN> &y );
+
+    /*!
+     * Check if two arrays are approximately equal
+     * @param[in] A             The first array
+     * @param[in] B             The second array
+     * @param[in] tol           The tolerance
+     */
+    template<class TYPE, class FUN>
+    static bool equals( const Array<TYPE, FUN> &A, const Array<TYPE, FUN> &B, TYPE tol );
+

 private:
    FunctionTable();
-
-    template<class T>
-    static inline void rand( size_t N, T *x );
 };

-#include "common/FunctionTable.hpp"

 #endif
--- a/common/FunctionTable.hpp
+++ b/common/FunctionTable.hpp
@@ -13,37 +13,30 @@
 /********************************************************
 *  Random number initialization                         *
 ********************************************************/
+template<class TYPE>
+static inline typename std::enable_if<std::is_integral<TYPE>::value>::type genRand(
+    size_t N, TYPE* x )
+{
+    std::random_device rd;
+    std::mt19937 gen( rd() );
+    std::uniform_int_distribution<TYPE> dis;
+    for ( size_t i = 0; i < N; i++ )
+        x[i] = dis( gen );
+}
+template<class TYPE>
+static inline typename std::enable_if<std::is_floating_point<TYPE>::value>::type genRand(
+    size_t N, TYPE* x )
+{
+    std::random_device rd;
+    std::mt19937 gen( rd() );
+    std::uniform_real_distribution<TYPE> dis( 0, 1 );
+    for ( size_t i = 0; i < N; i++ )
+        x[i] = dis( gen );
+}
 template<class TYPE, class FUN>
-void FunctionTable::rand( Array<TYPE, FUN> &x )
+inline void FunctionTable::rand( Array<TYPE, FUN>& x )
 {
-    FunctionTable::rand<TYPE>( x.length(), x.data() );
-}
-template<>
-inline void FunctionTable::rand<double>( size_t N, double *x )
-{
-    std::random_device rd;
-    std::mt19937 gen( rd() );
-    std::uniform_real_distribution<> dis( 0, 1 );
-    for ( size_t i = 0; i < N; i++ )
-        x[i] = dis( gen );
-}
-template<>
-inline void FunctionTable::rand<float>( size_t N, float *x )
-{
-    std::random_device rd;
-    std::mt19937 gen( rd() );
-    std::uniform_real_distribution<> dis( 0, 1 );
-    for ( size_t i = 0; i < N; i++ )
-        x[i] = dis( gen );
-}
-template<>
-inline void FunctionTable::rand<int>( size_t N, int *x )
-{
-    std::random_device rd;
-    std::mt19937 gen( rd() );
-    std::uniform_int_distribution<> dis;
-    for ( size_t i = 0; i < N; i++ )
-        x[i] = dis( gen );
+    genRand<TYPE>( x.length(), x.data() );
 }


@@ -79,7 +72,7 @@ template<class TYPE, class FUN, typename LAMBDA>
 inline void FunctionTable::transform(
    LAMBDA& fun, const Array<TYPE, FUN>& x, const Array<TYPE, FUN>& y, Array<TYPE, FUN>& z )
 {
-    if ( !x.sizeMatch( y ) )
+    if ( x.size() != y.size() )
        throw std::logic_error( "Sizes of x and y do not match" );
    z.resize( x.size() );
    const size_t N = x.length();
@@ -88,29 +81,157 @@ inline void FunctionTable::transform(
 }


+/********************************************************
+ *  axpy                                                 *
+ ********************************************************/
+template<class TYPE>
+inline void call_axpy( size_t N, const TYPE alpha, const TYPE* x, TYPE* y );
+template<>
+inline void call_axpy<float>( size_t, const float, const float*, float* )
+{
+    throw std::logic_error( "LapackWrappers not configured" );
+}
+template<>
+inline void call_axpy<double>( size_t, const double, const double*, double* )
+{
+    throw std::logic_error( "LapackWrappers not configured" );
+}
+template<class TYPE>
+inline void call_axpy( size_t N, const TYPE alpha, const TYPE* x, TYPE* y )
+{
+    for ( size_t i = 0; i < N; i++ )
+        y[i] += alpha * x[i];
+}
+template<class TYPE, class FUN>
+void FunctionTable::axpy( const TYPE alpha, const Array<TYPE, FUN>& x, Array<TYPE, FUN>& y )
+{
+    if ( x.size() != y.size() )
+        throw std::logic_error( "Array sizes do not match" );
+    call_axpy( x.length(), alpha, x.data(), y.data() );
+}
+
+
 /********************************************************
 *  Multiply two arrays                                  *
 ********************************************************/
+template<class TYPE>
+inline void call_gemv( size_t M, size_t N, TYPE alpha, TYPE beta, const TYPE* A, const TYPE* x, TYPE* y );
+template<>
+inline void call_gemv<double>(
+    size_t, size_t, double, double, const double*, const double*, double* )
+{
+    throw std::logic_error( "LapackWrappers not configured" );
+}
+template<>
+inline void call_gemv<float>( size_t, size_t, float, float, const float*, const float*, float* )
+{
+    throw std::logic_error( "LapackWrappers not configured" );
+}
+template<class TYPE>
+inline void call_gemv(
+    size_t M, size_t N, TYPE alpha, TYPE beta, const TYPE* A, const TYPE* x, TYPE* y )
+{
+    for ( size_t i = 0; i < M; i++ )
+        y[i] = beta * y[i];
+    for ( size_t j = 0; j < N; j++ ) {
+        for ( size_t i = 0; i < M; i++ )
+            y[i] += alpha * A[i + j * M] * x[j];
+    }
+}
+template<class TYPE>
+inline void call_gemm(
+    size_t M, size_t N, size_t K, TYPE alpha, TYPE beta, const TYPE* A, const TYPE* B, TYPE* C );
+template<>
+inline void call_gemm<double>( size_t, size_t, size_t, double, double, const double*, const double*, double* )
+{
+    throw std::logic_error( "LapackWrappers not configured" );
+}
+template<>
+inline void call_gemm<float>( size_t, size_t, size_t, float, float, const float*, const float*, float* )
+{
+    throw std::logic_error( "LapackWrappers not configured" );
+}
+template<class TYPE>
+inline void call_gemm(
+    size_t M, size_t N, size_t K, TYPE alpha, TYPE beta, const TYPE* A, const TYPE* B, TYPE* C )
+{
+    for ( size_t i = 0; i < K * M; i++ )
+        C[i] = beta * C[i];
+    for ( size_t k = 0; k < K; k++ ) {
+        for ( size_t j = 0; j < N; j++ ) {
+            for ( size_t i = 0; i < M; i++ )
+                C[i + k * M] += alpha * A[i + j * M] * B[j + k * N];
+        }
+    }
+}
+template<class TYPE, class FUN>
+void FunctionTable::gemm( const TYPE alpha, const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b,
+    const TYPE beta, Array<TYPE, FUN>& c )
+{
+    if ( a.ndim() == 2 && b.ndim() == 1 ) {
+        if ( a.size( 1 ) != b.size( 0 ) )
+            throw std::logic_error( "Inner dimensions must match" );
+        call_gemv<TYPE>( a.size( 0 ), a.size( 1 ), alpha, beta, a.data(), b.data(), c.data() );
+    } else if ( a.ndim() <= 2 && b.ndim() <= 2 ) {
+        if ( a.size( 1 ) != b.size( 0 ) )
+            throw std::logic_error( "Inner dimensions must match" );
+        call_gemm<TYPE>(
+            a.size( 0 ), a.size( 1 ), b.size( 1 ), alpha, beta, a.data(), b.data(), c.data() );
+    } else {
+        throw std::logic_error( "Not finished yet" );
+    }
+}
 template<class TYPE, class FUN>
 void FunctionTable::multiply(
    const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b, Array<TYPE, FUN>& c )
 {
-    if ( a.ndim() <= 2 && b.ndim() <= 2 ) {
+    if ( a.ndim() == 2 && b.ndim() == 1 ) {
+        if ( a.size( 1 ) != b.size( 0 ) )
+            throw std::logic_error( "Inner dimensions must match" );
+        c.resize( a.size( 0 ) );
+        call_gemv<TYPE>( a.size( 0 ), a.size( 1 ), 1, 0, a.data(), b.data(), c.data() );
+    } else if ( a.ndim() <= 2 && b.ndim() <= 2 ) {
        if ( a.size( 1 ) != b.size( 0 ) )
            throw std::logic_error( "Inner dimensions must match" );
        c.resize( a.size( 0 ), b.size( 1 ) );
-        c.fill( 0 );
-        for ( size_t k = 0; k < b.size( 1 ); k++ ) {
-            for ( size_t j = 0; j < a.size( 1 ); j++ ) {
-                for ( size_t i = 0; i < a.size( 0 ); i++ ) {
-                    c( i, k ) += a( i, j ) * b( j, k );
-                }
-            }
-        }
+        call_gemm<TYPE>(
+            a.size( 0 ), a.size( 1 ), b.size( 1 ), 1, 0, a.data(), b.data(), c.data() );
    } else {
        throw std::logic_error( "Not finished yet" );
    }
 }


+/********************************************************
+ *  Check if two arrays are equal                        *
+ ********************************************************/
+template<class TYPE, class FUN>
+inline typename std::enable_if<!std::is_floating_point<TYPE>::value, bool>::type
+FunctionTableCompare( const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b, TYPE )
+{
+    bool pass = true;
+    if ( a.size() != b.size() )
+        throw std::logic_error( "Sizes of x and y do not match" );
+    for ( size_t i = 0; i < a.length(); i++ )
+        pass = pass && a( i ) == b( i );
+    return pass;
+}
+template<class TYPE, class FUN>
+inline typename std::enable_if<std::is_floating_point<TYPE>::value, bool>::type
+FunctionTableCompare( const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b, TYPE tol )
+{
+    bool pass = true;
+    if ( a.size() != b.size() )
+        throw std::logic_error( "Sizes of x and y do not match" );
+    for ( size_t i = 0; i < a.length(); i++ )
+        pass = pass && ( std::abs( a( i ) - b( i ) ) < tol );
+    return pass;
+}
+template<class TYPE, class FUN>
+bool FunctionTable::equals( const Array<TYPE, FUN>& a, const Array<TYPE, FUN>& b, TYPE tol )
+{
+    return FunctionTableCompare( a, b, tol );
+}
+
+
 #endif
--- a/tests/test_dcel_minkowski.cpp
+++ b/tests/test_dcel_minkowski.cpp
@@ -4,13 +4,13 @@
 #include "common/Domain.h"
 #include "common/SpherePack.h"

-using namespace std;
+#include "ProfilerApp.h"
+

 /*
 *  Compare the measured and analytical curvature for a sphere
 *
 */
-
 std::shared_ptr<Database> loadInputs( )
 {
  //auto db = std::make_shared<Database>( "Domain.in" );
@@ -38,7 +38,7 @@ int main(int argc, char **argv)
 		int Nx = db->getVector<int>( "n" )[0];
 		int Ny = db->getVector<int>( "n" )[1];
 		int Nz = db->getVector<int>( "n" )[2];
-		std::shared_ptr<Domain> Dm = std::shared_ptr<Domain>(new Domain(db,comm));
+		auto Dm = std::make_shared<Domain>( db, comm );
 		
 		Nx+=2; Ny+=2; Nz+=2;
 		DoubleArray Phase(Nx,Ny,Nz);
@@ -98,6 +98,7 @@ int main(int argc, char **argv)
 		printf("   Euler characteristic  = %f (analytical = 2.0) \n",sphere.Xi);
 		
 	}
+    PROFILE_SAVE("test_dcel_minkowski");
 	MPI_Barrier(comm);
 	MPI_Finalize();
 	return toReturn;