mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Added block_size templates to BlockedMatrix and Reorder functions
This commit is contained in:
parent
8f9fa15ecd
commit
b692b66d3a
@ -65,6 +65,7 @@ namespace bda
|
||||
template <unsigned int block_size>
|
||||
bool BILU0<block_size>::init(BlockedMatrix *mat)
|
||||
{
|
||||
const unsigned int bs = block_size;
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
BlockedMatrix *CSCmat = nullptr;
|
||||
|
||||
@ -79,13 +80,13 @@ namespace bda
|
||||
CSCmat = new BlockedMatrix();
|
||||
CSCmat->Nb = Nb;
|
||||
CSCmat->nnzbs = nnzbs;
|
||||
CSCmat->nnzValues = new Block[nnzbs];
|
||||
CSCmat->nnzValues = new double[nnzbs * bs * bs];
|
||||
CSCmat->colIndices = new int[nnzbs];
|
||||
CSCmat->rowPointers = new int[Nb + 1];
|
||||
if(verbosity >= 3){
|
||||
t1 = second();
|
||||
}
|
||||
bcsr_to_bcsc(mat->nnzValues, mat->colIndices, mat->rowPointers, CSCmat->nnzValues, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb);
|
||||
bcsr_to_bcsc<block_size>(mat->nnzValues, mat->colIndices, mat->rowPointers, CSCmat->nnzValues, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb);
|
||||
if(verbosity >= 3){
|
||||
t2 = second();
|
||||
std::ostringstream out;
|
||||
@ -97,7 +98,7 @@ namespace bda
|
||||
if(verbosity >= 3){
|
||||
t1 = second();
|
||||
}
|
||||
rMat = allocateBlockedMatrix(mat->Nb, mat->nnzbs);
|
||||
rMat = allocateBlockedMatrix<block_size>(mat->Nb, mat->nnzbs);
|
||||
LUMat = soft_copyBlockedMatrix(rMat);
|
||||
if (level_scheduling) {
|
||||
rowsPerColor = findLevelScheduling(mat->colIndices, mat->rowPointers, CSCmat->colIndices, CSCmat->rowPointers, mat->Nb, &numColors, toOrder, fromOrder);
|
||||
@ -115,12 +116,12 @@ namespace bda
|
||||
}
|
||||
|
||||
diagIndex = new int[mat->Nb];
|
||||
invDiagVals = new Block[mat->Nb];
|
||||
invDiagVals = new double[mat->Nb * bs * bs];
|
||||
|
||||
LMat = allocateBlockedMatrix(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||
UMat = allocateBlockedMatrix(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||
LMat = allocateBlockedMatrix<block_size>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||
UMat = allocateBlockedMatrix<block_size>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||
|
||||
LUMat->nnzValues = new Block[mat->nnzbs];
|
||||
LUMat->nnzValues = new double[mat->nnzbs * bs * bs];
|
||||
|
||||
if (level_scheduling) {
|
||||
delete[] CSCmat->nnzValues;
|
||||
@ -130,22 +131,22 @@ namespace bda
|
||||
}
|
||||
|
||||
|
||||
s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(Block) * LMat->nnzbs);
|
||||
s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(Block) * UMat->nnzbs);
|
||||
s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * LMat->nnzbs);
|
||||
s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * UMat->nnzbs);
|
||||
s.Lcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * LMat->nnzbs);
|
||||
s.Ucols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * UMat->nnzbs);
|
||||
s.Lrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (LMat->Nb + 1));
|
||||
s.Urows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (UMat->Nb + 1));
|
||||
s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(Block) * mat->Nb);
|
||||
s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * mat->Nb);
|
||||
s.rowsPerColor = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (numColors + 1));
|
||||
|
||||
queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, LMat->nnzbs * sizeof(Block), LMat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(Block), UMat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, LMat->nnzbs * sizeof(double) * bs * bs, LMat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(double) * bs * bs, UMat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, LMat->nnzbs * sizeof(int), LMat->colIndices);
|
||||
queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, UMat->nnzbs * sizeof(int), UMat->colIndices);
|
||||
queue->enqueueWriteBuffer(s.Lrows, CL_TRUE, 0, (LMat->Nb + 1) * sizeof(int), LMat->rowPointers);
|
||||
queue->enqueueWriteBuffer(s.Urows, CL_TRUE, 0, (UMat->Nb + 1) * sizeof(int), UMat->rowPointers);
|
||||
queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, mat->Nb * sizeof(Block), invDiagVals);
|
||||
queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, mat->Nb * sizeof(double) * bs * bs, invDiagVals);
|
||||
|
||||
int *rowsPerColorPrefix = new int[numColors + 1];
|
||||
int prefix = 0;
|
||||
@ -163,11 +164,12 @@ namespace bda
|
||||
template <unsigned int block_size>
|
||||
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
|
||||
{
|
||||
const unsigned int bs = block_size;
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
if (verbosity >= 3){
|
||||
t1 = second();
|
||||
}
|
||||
blocked_reorder_matrix_by_pattern(mat, toOrder, fromOrder, rMat);
|
||||
blocked_reorder_matrix_by_pattern<block_size>(mat, toOrder, fromOrder, rMat);
|
||||
if (verbosity >= 3){
|
||||
t2 = second();
|
||||
std::ostringstream out;
|
||||
@ -179,7 +181,7 @@ namespace bda
|
||||
if (verbosity >= 3){
|
||||
t1 = second();
|
||||
}
|
||||
memcpy(LUMat->nnzValues, rMat->nnzValues, sizeof(Block) * rMat->nnzbs);
|
||||
memcpy(LUMat->nnzValues, rMat->nnzValues, sizeof(double) * bs * bs * rMat->nnzbs);
|
||||
if (verbosity >= 3){
|
||||
t2 = second();
|
||||
std::ostringstream out;
|
||||
@ -189,10 +191,9 @@ namespace bda
|
||||
|
||||
int i, j, ij, ik, jk;
|
||||
int iRowStart, iRowEnd, jRowEnd;
|
||||
Block pivot;
|
||||
double pivot[bs * bs];
|
||||
|
||||
int LSize = 0;
|
||||
const int blockSquare = block_size * block_size;
|
||||
|
||||
if (verbosity >= 3){
|
||||
t1 = second();
|
||||
@ -221,9 +222,9 @@ namespace bda
|
||||
|
||||
LSize++;
|
||||
// calculate the pivot of this row
|
||||
blockMult(LUMat->nnzValues[ij], invDiagVals[j], pivot);
|
||||
blockMult<bs>(LUMat->nnzValues + ij * bs * bs, invDiagVals + j * bs * bs, &pivot[0]);
|
||||
|
||||
memcpy(LUMat->nnzValues[ij], &pivot, sizeof(double)*block_size * block_size);
|
||||
memcpy(LUMat->nnzValues + ij * bs * bs, &pivot[0], sizeof(double) * block_size * block_size);
|
||||
|
||||
jRowEnd = LUMat->rowPointers[j + 1];
|
||||
jk = diagIndex[j] + 1;
|
||||
@ -231,7 +232,7 @@ namespace bda
|
||||
// substract that row scaled by the pivot from this row.
|
||||
while (ik < iRowEnd && jk < jRowEnd) {
|
||||
if (LUMat->colIndices[ik] == LUMat->colIndices[jk]) {
|
||||
blockMultSub(LUMat->nnzValues[ik], pivot, LUMat->nnzValues[jk]);
|
||||
blockMultSub<bs>(LUMat->nnzValues + ik * bs * bs, pivot, LUMat->nnzValues + jk * bs * bs);
|
||||
ik++;
|
||||
jk++;
|
||||
} else {
|
||||
@ -243,9 +244,9 @@ namespace bda
|
||||
}
|
||||
}
|
||||
// store the inverse in the diagonal!
|
||||
blockInvert3x3(LUMat->nnzValues[ij], invDiagVals[i]);
|
||||
blockInvert3x3(LUMat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs);
|
||||
|
||||
memcpy(LUMat->nnzValues[ij], invDiagVals[i], sizeof(double)*block_size * block_size);
|
||||
memcpy(LUMat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs, sizeof(double) * bs * bs);
|
||||
}
|
||||
|
||||
LMat->rowPointers[0] = 0;
|
||||
@ -256,7 +257,7 @@ namespace bda
|
||||
int offsetL = LMat->rowPointers[i];
|
||||
int rowSize = diagIndex[i] - LUMat->rowPointers[i];
|
||||
int offsetLU = LUMat->rowPointers[i];
|
||||
memcpy(LMat->nnzValues[offsetL], LUMat->nnzValues[offsetLU], sizeof(double) * blockSquare * rowSize);
|
||||
memcpy(LMat->nnzValues + offsetL * bs * bs, LUMat->nnzValues + offsetLU * bs * bs, sizeof(double) * bs * bs * rowSize);
|
||||
memcpy(LMat->colIndices + offsetL, LUMat->colIndices + offsetLU, sizeof(int) * rowSize);
|
||||
offsetL += rowSize;
|
||||
LMat->rowPointers[i + 1] = offsetL;
|
||||
@ -268,7 +269,7 @@ namespace bda
|
||||
int offsetU = UMat->rowPointers[URowIndex];
|
||||
int rowSize = LUMat->rowPointers[i + 1] - diagIndex[i] - 1;
|
||||
int offsetLU = diagIndex[i] + 1;
|
||||
memcpy(UMat->nnzValues[offsetU], LUMat->nnzValues[offsetLU], sizeof(double) * blockSquare * rowSize);
|
||||
memcpy(UMat->nnzValues + offsetU * bs * bs, LUMat->nnzValues + offsetLU * bs * bs, sizeof(double) * bs * bs * rowSize);
|
||||
memcpy(UMat->colIndices + offsetU, LUMat->colIndices + offsetLU, sizeof(int) * rowSize);
|
||||
offsetU += rowSize;
|
||||
UMat->rowPointers[URowIndex + 1] = offsetU;
|
||||
@ -291,9 +292,9 @@ namespace bda
|
||||
queue->enqueueWriteBuffer(s.Urows, CL_TRUE, 0, (UMat->Nb + 1) * sizeof(int), UMat->rowPointers);
|
||||
pattern_uploaded = true;
|
||||
}
|
||||
queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, LMat->nnzbs * sizeof(Block), LMat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(Block), UMat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, Nb * sizeof(Block), invDiagVals);
|
||||
queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, LMat->nnzbs * sizeof(double) * bs * bs, LMat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, UMat->nnzbs * sizeof(double) * bs * bs, UMat->nnzValues);
|
||||
queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, Nb * sizeof(double) * bs * bs, invDiagVals);
|
||||
if (verbosity >= 3) {
|
||||
t2 = second();
|
||||
std::ostringstream out;
|
||||
|
@ -47,7 +47,7 @@ namespace bda
|
||||
int nnzbs; // number of blocks of the matrix
|
||||
BlockedMatrix *LMat, *UMat, *LUMat;
|
||||
BlockedMatrix *rMat = nullptr; // only used with PAR_SIM
|
||||
Block *invDiagVals;
|
||||
double *invDiagVals;
|
||||
int *diagIndex, *rowsPerColor;
|
||||
int *toOrder, *fromOrder;
|
||||
int numColors;
|
||||
|
@ -30,10 +30,11 @@ using bda::BlockedMatrix;
|
||||
namespace bda
|
||||
{
|
||||
|
||||
template <unsigned int block_size>
|
||||
BlockedMatrix *allocateBlockedMatrix(int Nb, int nnzbs) {
|
||||
BlockedMatrix *mat = new BlockedMatrix();
|
||||
|
||||
mat->nnzValues = new Block[nnzbs];
|
||||
mat->nnzValues = new double[nnzbs * block_size * block_size];
|
||||
mat->colIndices = new int[nnzbs];
|
||||
mat->rowPointers = new int[Nb + 1];
|
||||
mat->Nb = Nb;
|
||||
@ -65,11 +66,13 @@ BlockedMatrix *soft_copyBlockedMatrix(BlockedMatrix *mat) {
|
||||
|
||||
/*Sort a row of matrix elements from a blocked CSR-format.*/
|
||||
|
||||
void sortBlockedRow(int *colIndices, Block *data, int left, int right) {
|
||||
template <unsigned int block_size>
|
||||
void sortBlockedRow(int *colIndices, double *data, int left, int right) {
|
||||
const unsigned int bs = block_size;
|
||||
int l = left;
|
||||
int r = right;
|
||||
int middle = colIndices[(l + r) >> 1];
|
||||
double lDatum[BLOCK_SIZE * BLOCK_SIZE];
|
||||
double lDatum[bs * bs];
|
||||
do {
|
||||
while (colIndices[l] < middle)
|
||||
l++;
|
||||
@ -79,9 +82,9 @@ void sortBlockedRow(int *colIndices, Block *data, int left, int right) {
|
||||
int lColIndex = colIndices[l];
|
||||
colIndices[l] = colIndices[r];
|
||||
colIndices[r] = lColIndex;
|
||||
memcpy(lDatum, data + l, sizeof(double) * BLOCK_SIZE * BLOCK_SIZE);
|
||||
memcpy(data + l, data + r, sizeof(double) * BLOCK_SIZE * BLOCK_SIZE);
|
||||
memcpy(data + r, lDatum, sizeof(double) * BLOCK_SIZE * BLOCK_SIZE);
|
||||
memcpy(lDatum, data + l * bs * bs, sizeof(double) * bs * bs);
|
||||
memcpy(data + l * bs * bs, data + r * bs * bs, sizeof(double) * bs * bs);
|
||||
memcpy(data + r * bs * bs, lDatum, sizeof(double) * bs * bs);
|
||||
|
||||
l++;
|
||||
r--;
|
||||
@ -89,38 +92,40 @@ void sortBlockedRow(int *colIndices, Block *data, int left, int right) {
|
||||
} while (l < r);
|
||||
|
||||
if (left < r)
|
||||
sortBlockedRow(colIndices, data, left, r);
|
||||
sortBlockedRow<bs>(colIndices, data, left, r);
|
||||
|
||||
if (right > l)
|
||||
sortBlockedRow(colIndices, data, l, right);
|
||||
sortBlockedRow<bs>(colIndices, data, l, right);
|
||||
}
|
||||
|
||||
|
||||
// LUMat->nnzValues[ik] = LUMat->nnzValues[ik] - (pivot * LUMat->nnzValues[jk]) in ilu decomposition
|
||||
// a = a - (b * c)
|
||||
void blockMultSub(Block a, Block b, Block c)
|
||||
template <unsigned int block_size>
|
||||
void blockMultSub(double *a, double *b, double *c)
|
||||
{
|
||||
for (int row = 0; row < BLOCK_SIZE; row++) {
|
||||
for (int col = 0; col < BLOCK_SIZE; col++) {
|
||||
for (int row = 0; row < block_size; row++) {
|
||||
for (int col = 0; col < block_size; col++) {
|
||||
double temp = 0.0;
|
||||
for (int k = 0; k < BLOCK_SIZE; k++) {
|
||||
temp += b[BLOCK_SIZE * row + k] * c[BLOCK_SIZE * k + col];
|
||||
for (int k = 0; k < block_size; k++) {
|
||||
temp += b[block_size * row + k] * c[block_size * k + col];
|
||||
}
|
||||
a[BLOCK_SIZE * row + col] -= temp;
|
||||
a[block_size * row + col] -= temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*Perform a 3x3 matrix-matrix multiplicationj on two blocks*/
|
||||
|
||||
void blockMult(Block mat1, Block mat2, Block resMat) {
|
||||
for (int row = 0; row < BLOCK_SIZE; row++) {
|
||||
for (int col = 0; col < BLOCK_SIZE; col++) {
|
||||
template <unsigned int block_size>
|
||||
void blockMult(double *mat1, double *mat2, double *resMat) {
|
||||
for (int row = 0; row < block_size; row++) {
|
||||
for (int col = 0; col < block_size; col++) {
|
||||
double temp = 0;
|
||||
for (int k = 0; k < BLOCK_SIZE; k++) {
|
||||
temp += mat1[BLOCK_SIZE * row + k] * mat2[BLOCK_SIZE * k + col];
|
||||
for (int k = 0; k < block_size; k++) {
|
||||
temp += mat1[block_size * row + k] * mat2[block_size * k + col];
|
||||
}
|
||||
resMat[BLOCK_SIZE * row + col] = temp;
|
||||
resMat[block_size * row + col] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -128,7 +133,7 @@ void blockMult(Block mat1, Block mat2, Block resMat) {
|
||||
|
||||
/* Calculate the inverse of a block. This function is specific for only 3x3 block size.*/
|
||||
|
||||
void blockInvert3x3(Block mat, Block res) {
|
||||
void blockInvert3x3(double *mat, double *res) {
|
||||
// code generated by maple, copied from DUNE
|
||||
double t4 = mat[0] * mat[4];
|
||||
double t6 = mat[0] * mat[5];
|
||||
@ -153,4 +158,17 @@ void blockInvert3x3(Block mat, Block res) {
|
||||
}
|
||||
|
||||
|
||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||
template BlockedMatrix *allocateBlockedMatrix<n>(int Nb, int nnzbs); \
|
||||
template void sortBlockedRow<n>(int *, double *, int, int); \
|
||||
template void blockMultSub<n>(double *, double *, double *); \
|
||||
template void blockMult<n>(double *, double *, double *); \
|
||||
|
||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||
INSTANTIATE_BDA_FUNCTIONS(3);
|
||||
INSTANTIATE_BDA_FUNCTIONS(4);
|
||||
|
||||
#undef INSTANTIATE_BDA_FUNCTIONS
|
||||
|
||||
} // end namespace bda
|
||||
|
@ -20,15 +20,11 @@
|
||||
#ifndef BLOCKED_MATRIX_HPP
|
||||
#define BLOCKED_MATRIX_HPP
|
||||
|
||||
#define BLOCK_SIZE 3
|
||||
|
||||
typedef double Block[9];
|
||||
|
||||
namespace bda
|
||||
{
|
||||
|
||||
typedef struct {
|
||||
Block *nnzValues;
|
||||
double *nnzValues;
|
||||
int *colIndices;
|
||||
int *rowPointers;
|
||||
int Nb;
|
||||
@ -39,6 +35,7 @@ typedef struct {
|
||||
/// \param[in] Nb number of blockrows
|
||||
/// \param[in] nnzbs number of nonzero blocks
|
||||
/// \return pointer to BlockedMatrix
|
||||
template <unsigned int block_size>
|
||||
BlockedMatrix *allocateBlockedMatrix(int Nb, int nnzbs);
|
||||
|
||||
/// Allocate BlockedMatrix, but copy data pointers instead of allocating new memory
|
||||
@ -55,25 +52,28 @@ void freeBlockedMatrix(BlockedMatrix **mat);
|
||||
/// \param[inout] data
|
||||
/// \param[in] left lower index of data of row
|
||||
/// \param[in] right upper index of data of row
|
||||
void sortBlockedRow(int *colIndices, Block *data, int left, int right);
|
||||
template <unsigned int block_size>
|
||||
void sortBlockedRow(int *colIndices, double *data, int left, int right);
|
||||
|
||||
/// Multiply and subtract blocks
|
||||
/// a = a - (b * c)
|
||||
/// \param[inout] a block to be subtracted from
|
||||
/// \param[in] b input block
|
||||
/// \param[in] c input block
|
||||
void blockMultSub(Block a, Block b, Block c);
|
||||
template <unsigned int block_size>
|
||||
void blockMultSub(double *a, double *b, double *c);
|
||||
|
||||
/// Perform a 3x3 matrix-matrix multiplication on two blocks
|
||||
/// \param[in] mat1 input block 1
|
||||
/// \param[in] mat2 input block 2
|
||||
/// \param[inout] resMat output block
|
||||
void blockMult(Block mat1, Block mat2, Block resMat);
|
||||
template <unsigned int block_size>
|
||||
void blockMult(double *mat1, double *mat2, double *resMat);
|
||||
|
||||
/// Calculate the inverse of a block. This function is specific for only 3x3 block size.
|
||||
/// \param[in] mat input block
|
||||
/// \param[inout] res output block
|
||||
void blockInvert3x3(Block mat, Block res);
|
||||
void blockInvert3x3(double *mat, double *res);
|
||||
|
||||
} // end namespace bda
|
||||
|
||||
|
@ -131,9 +131,12 @@ int colorBlockedNodes(int rows, const int *rowPointers, const int *colIndices, s
|
||||
* Both a to order array, which contains for every node from the old matrix where it will move in the new matrix,
|
||||
* and the from order, which contains for every node in the new matrix where it came from in the old matrix.*/
|
||||
|
||||
template <unsigned int block_size>
|
||||
void blocked_reorder_matrix_by_pattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rMat) {
|
||||
const unsigned int bs = block_size;
|
||||
int rIndex = 0;
|
||||
int i, j, k;
|
||||
int i, k;
|
||||
unsigned int j;
|
||||
|
||||
rMat->rowPointers[0] = 0;
|
||||
for (i = 0; i < mat->Nb; i++) {
|
||||
@ -141,8 +144,9 @@ void blocked_reorder_matrix_by_pattern(BlockedMatrix *mat, int *toOrder, int *fr
|
||||
// put thisRow from the old matrix into row i of the new matrix
|
||||
rMat->rowPointers[i + 1] = rMat->rowPointers[i] + mat->rowPointers[thisRow + 1] - mat->rowPointers[thisRow];
|
||||
for (k = mat->rowPointers[thisRow]; k < mat->rowPointers[thisRow + 1]; k++) {
|
||||
for (j = 0; j < BLOCK_SIZE * BLOCK_SIZE; j++)
|
||||
rMat->nnzValues[rIndex][j] = mat->nnzValues[k][j];
|
||||
for (j = 0; j < bs * bs; j++){
|
||||
rMat->nnzValues[rIndex * bs * bs + j] = mat->nnzValues[k * bs * bs + j];
|
||||
}
|
||||
rMat->colIndices[rIndex] = mat->colIndices[k];
|
||||
rIndex++;
|
||||
}
|
||||
@ -153,7 +157,7 @@ void blocked_reorder_matrix_by_pattern(BlockedMatrix *mat, int *toOrder, int *fr
|
||||
}
|
||||
// re-sort the column indices of every row.
|
||||
for (i = 0; i < mat->Nb; i++) {
|
||||
sortBlockedRow(rMat->colIndices, rMat->nnzValues, rMat->rowPointers[i], rMat->rowPointers[i + 1] - 1);
|
||||
sortBlockedRow<bs>(rMat->colIndices, rMat->nnzValues, rMat->rowPointers[i], rMat->rowPointers[i + 1] - 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -182,10 +186,11 @@ void colorsToReordering(int Nb, std::vector<int>& colors, int *toOrder, int *fro
|
||||
|
||||
// Reorder a matrix according to a reordering pattern
|
||||
|
||||
template <unsigned int block_size>
|
||||
void blocked_reorder_vector_by_pattern(int Nb, double *vector, int *fromOrder, double *rVector) {
|
||||
for (int i = 0; i < Nb; i++) {
|
||||
for (int j = 0; j < BLOCK_SIZE; j++) {
|
||||
rVector[BLOCK_SIZE * i + j] = vector[BLOCK_SIZE * fromOrder[i] + j];
|
||||
for (unsigned int j = 0; j < block_size; j++) {
|
||||
rVector[block_size * i + j] = vector[block_size * fromOrder[i] + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -304,7 +309,8 @@ int* findGraphColoring(int *colIndices, int *rowPointers, int Nb, int maxRowsPer
|
||||
// input : matrix A via Avals, Acols, Arows, Nb
|
||||
// output: matrix B via Bvals, Bcols, Brows
|
||||
// arrays for B must be preallocated
|
||||
void bcsr_to_bcsc(Block *Avals, int *Acols, int *Arows, Block *Bvals, int *Bcols, int *Brows, int Nb) {
|
||||
template <unsigned int block_size>
|
||||
void bcsr_to_bcsc(double *Avals, int *Acols, int *Arows, double *Bvals, int *Bcols, int *Brows, int Nb) {
|
||||
|
||||
int nnz = Arows[Nb];
|
||||
|
||||
@ -328,7 +334,7 @@ void bcsr_to_bcsc(Block *Avals, int *Acols, int *Arows, Block *Bvals, int *Bcols
|
||||
int col = Acols[j];
|
||||
int dest = Brows[col];
|
||||
Bcols[dest] = row;
|
||||
memcpy(Bvals + dest, Avals + dest, sizeof(Block));
|
||||
memcpy(Bvals + dest, Avals + dest, sizeof(double) * block_size * block_size);
|
||||
Brows[col]++;
|
||||
}
|
||||
}
|
||||
@ -340,4 +346,17 @@ void bcsr_to_bcsc(Block *Avals, int *Acols, int *Arows, Block *Bvals, int *Bcols
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||
template void blocked_reorder_matrix_by_pattern<n>(BlockedMatrix *, int *, int *, BlockedMatrix *); \
|
||||
template void blocked_reorder_vector_by_pattern<n>(int, double*, int*, double*); \
|
||||
template void bcsr_to_bcsc<n>(double *, int *, int *, double *, int *, int *, int ); \
|
||||
|
||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||
INSTANTIATE_BDA_FUNCTIONS(3);
|
||||
INSTANTIATE_BDA_FUNCTIONS(4);
|
||||
|
||||
#undef INSTANTIATE_BDA_FUNCTIONS
|
||||
|
||||
} //namespace bda
|
@ -45,6 +45,7 @@ int colorBlockedNodes(int rows, const int *rowPointers, const int *colIndices, s
|
||||
/// \param[in] toOrder reorder pattern that lists for each index in the original order, to which index in the new order it should be moved
|
||||
/// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
|
||||
/// \param[inout] rMat reordered Matrix
|
||||
template <unsigned int block_size>
|
||||
void blocked_reorder_matrix_by_pattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rMat);
|
||||
|
||||
/// Compute reorder mapping from the color that each node has received
|
||||
@ -63,6 +64,7 @@ void colorsToReordering(int Nb, std::vector<int>& colors, int *toOrder, int *fro
|
||||
/// \param[in] toOrder reorder pattern that lists for each index in the original order, to which index in the new order it should be moved
|
||||
/// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
|
||||
/// \param[inout] rVector reordered vector
|
||||
template <unsigned int block_size>
|
||||
void blocked_reorder_vector_by_pattern(int Nb, double *vector, int *fromOrder, double *rVector);
|
||||
|
||||
/// Determine whether all rows that a certain row depends on are done already
|
||||
@ -109,7 +111,8 @@ int* findGraphColoring(int *colIndices, int *rowPointers, int Nb, int maxRowsPer
|
||||
/// \param[inout] Bcols row indices of the result BCSC matrix
|
||||
/// \param[inout] Brows column pointers of the result BCSC matrix
|
||||
/// \param[in] Nb number of blockrows in the matrix
|
||||
void bcsr_to_bcsc(Block *Avals, int *Acols, int *Arows, Block *Bvals, int *Bcols, int *Brows, int Nb);
|
||||
template <unsigned int block_size>
|
||||
void bcsr_to_bcsc(double *Avals, int *Acols, int *Arows, double *Bvals, int *Bcols, int *Brows, int Nb);
|
||||
|
||||
}
|
||||
|
||||
|
@ -497,10 +497,10 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
||||
vals_contiguous = new double[N];
|
||||
#endif
|
||||
|
||||
mat = (BlockedMatrix *)malloc(sizeof(BlockedMatrix));
|
||||
mat = new BlockedMatrix();
|
||||
mat->Nb = Nb;
|
||||
mat->nnzbs = nnzb;
|
||||
mat->nnzValues = (Block*)vals;
|
||||
mat->nnzValues = vals;
|
||||
mat->colIndices = cols;
|
||||
mat->rowPointers = rows;
|
||||
|
||||
@ -547,6 +547,7 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
||||
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::finalize() {
|
||||
delete mat;
|
||||
delete[] rb;
|
||||
delete[] tmp;
|
||||
#if COPY_ROW_BY_ROW
|
||||
@ -668,9 +669,8 @@ void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
|
||||
t1 = second();
|
||||
}
|
||||
|
||||
mat->nnzValues = (Block*)vals;
|
||||
//mat->nnzValues = static_cast<Block*>(vals);
|
||||
blocked_reorder_vector_by_pattern(mat->Nb, b, fromOrder, rb);
|
||||
mat->nnzValues = vals;
|
||||
blocked_reorder_vector_by_pattern<block_size>(mat->Nb, b, fromOrder, rb);
|
||||
|
||||
if (verbosity > 2) {
|
||||
t2 = second();
|
||||
@ -732,7 +732,7 @@ void openclSolverBackend<block_size>::get_result(double *x) {
|
||||
}
|
||||
|
||||
queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, rb);
|
||||
blocked_reorder_vector_by_pattern(mat->Nb, rb, toOrder, x);
|
||||
blocked_reorder_vector_by_pattern<block_size>(mat->Nb, rb, toOrder, x);
|
||||
|
||||
if (verbosity > 2) {
|
||||
t2 = second();
|
||||
|
Loading…
Reference in New Issue
Block a user