Changed block_size template to parameter for block functions

This commit is contained in:
Tong Dong Qiu 2021-11-15 11:36:50 +01:00
parent 288e548b89
commit adb4649277
4 changed files with 34 additions and 53 deletions

View File

@ -73,8 +73,7 @@ void sortBlockedRow(int *colIndices, double *data, int left, int right, unsigned
// LUMat->nnzValues[ik] = LUMat->nnzValues[ik] - (pivot * LUMat->nnzValues[jk]) in ilu decomposition
// a = a - (b * c)
template <unsigned int block_size>
void blockMultSub(double *a, double *b, double *c)
void blockMultSub(double *a, double *b, double *c, unsigned int block_size)
{
for (unsigned int row = 0; row < block_size; row++) {
for (unsigned int col = 0; col < block_size; col++) {
@ -89,8 +88,7 @@ void blockMultSub(double *a, double *b, double *c)
/*Perform a 3x3 matrix-matrix multiplicationj on two blocks*/
template <unsigned int block_size>
void blockMult(double *mat1, double *mat2, double *resMat) {
void blockMult(double *mat1, double *mat2, double *resMat, unsigned int block_size) {
for (unsigned int row = 0; row < block_size; row++) {
for (unsigned int col = 0; col < block_size; col++) {
double temp = 0;
@ -105,8 +103,7 @@ void blockMult(double *mat1, double *mat2, double *resMat) {
#if HAVE_FPGA
/*Subtract two blocks from one another element by element*/
template <unsigned int block_size>
void blockSub(double *mat1, double *mat2, double *resMat) {
void blockSub(double *mat1, double *mat2, double *resMat, unsigned int block_size) {
for (unsigned int row = 0; row < block_size; row++) {
for (unsigned int col = 0; col < block_size; col++) {
resMat[row * block_size + col] = mat1[row * block_size + col] - mat2[row * block_size + col];
@ -115,8 +112,7 @@ void blockSub(double *mat1, double *mat2, double *resMat) {
}
/*Multiply a block with a vector block, and add the result, scaled by a constant, to the result vector*/
template <unsigned int block_size>
void blockVectMult(double *mat, double *vect, double scale, double *resVect, bool resetRes) {
void blockVectMult(double *mat, double *vect, double scale, double *resVect, bool resetRes, unsigned int block_size) {
for (unsigned int row = 0; row < block_size; row++) {
if (resetRes) {
resVect[row] = 0.0;
@ -467,34 +463,5 @@ void blockedDiagtoRDF(double *blockedDiagVals, int rowSize, int numColors, std::
#endif // HAVE_FPGA
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template void blockMultSub<n>(double *, double *, double *); \
template void blockMult<n>(double *, double *, double *); \
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);
INSTANTIATE_BDA_FUNCTIONS(3);
INSTANTIATE_BDA_FUNCTIONS(4);
INSTANTIATE_BDA_FUNCTIONS(5);
INSTANTIATE_BDA_FUNCTIONS(6);
#undef INSTANTIATE_BDA_FUNCTIONS
#if HAVE_FPGA
#define INSTANTIATE_BDA_FPGA_FUNCTIONS(n) \
template void blockSub<n>(double *, double *, double *); \
template void blockVectMult<n>(double *, double *, double, double *, bool);
INSTANTIATE_BDA_FPGA_FUNCTIONS(1);
INSTANTIATE_BDA_FPGA_FUNCTIONS(2);
INSTANTIATE_BDA_FPGA_FUNCTIONS(3);
INSTANTIATE_BDA_FPGA_FUNCTIONS(4);
INSTANTIATE_BDA_FPGA_FUNCTIONS(5);
INSTANTIATE_BDA_FPGA_FUNCTIONS(6);
#undef INSTANTIATE_BDA_FPGA_FUNCTIONS
#endif // HAVE_FPGA
} // namespace Accelerator
} // namespace Opm

View File

@ -146,23 +146,37 @@ void sortBlockedRow(int *colIndices, double *data, int left, int right, unsigned
/// \param[inout] a block to be subtracted from
/// \param[in] b input block
/// \param[in] c input block
template <unsigned int block_size>
void blockMultSub(double *a, double *b, double *c);
/// \param[in] block_size size of block
void blockMultSub(double *a, double *b, double *c, unsigned int block_size);
/// Perform a 3x3 matrix-matrix multiplication on two blocks
/// Perform a matrix-matrix multiplication on two blocks
/// resMat = mat1 * mat2
/// \param[in] mat1 input block 1
/// \param[in] mat2 input block 2
/// \param[inout] resMat output block
template <unsigned int block_size>
void blockMult(double *mat1, double *mat2, double *resMat);
/// \param[out] resMat output block
/// \param[in] block_size size of block
void blockMult(double *mat1, double *mat2, double *resMat, unsigned int block_size);
#if HAVE_FPGA
template <unsigned int block_size>
void blockSub(double *mat1, double *mat2, double *resMat);
/// Perform a matrix-matrix subtraction on two blocks, element-wise
/// resMat = mat1 - mat2
/// \param[in] mat1 input block 1
/// \param[in] mat2 input block 2
/// \param[out] resMat output block
/// \param[in] block_size size of block
void blockSub(double *mat1, double *mat2, double *resMat, unsigned int block_size);
template <unsigned int block_size>
void blockVectMult(double *mat, double *vect, double scale, double *resVect, bool resetRes);
/// Perform a matrix-vector multiplication
/// resVect = mat * vect
/// resVect += mat * vect
/// \param[in] mat input matrix
/// \param[in] vect input vector
/// \param[in] scale multiply output with this factor
/// \param[inout] resVect output vector
/// \param[in] resetRes if true, overwrite resVect, otherwise add to it
/// \param[in] block_size size of block
void blockVectMult(double *mat, double *vect, double scale, double *resVect, bool resetRes, unsigned int block_size);
/// Convert a blocked inverse diagonal to the FPGA format.
/// This is the only blocked structure on the FPGA, since it needs blocked matrix-vector multiplication after the backwards substitution of U.

View File

@ -569,7 +569,7 @@ void ChowPatelIlu<block_size>::decomposition(
} else {
Lmat->colIndices[num_blocks_L] = j;
// multiply block of L with corresponding diag block
blockMult<bs>(LUmat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs, Lmat->nnzValues + num_blocks_L * bs * bs);
blockMult(LUmat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs, Lmat->nnzValues + num_blocks_L * bs * bs, bs);
num_blocks_L++;
}
}
@ -671,7 +671,7 @@ void ChowPatelIlu<block_size>::decomposition(
}
if (colL == rowU2) {
// Aij -= (Lik * Ukj)
blockMultSub<bs>(&aij[0], Lmat->nnzValues + jk * bs * bs, Ut->nnzValues + k * bs * bs);
blockMultSub(&aij[0], Lmat->nnzValues + jk * bs * bs, Ut->nnzValues + k * bs * bs, bs);
}
}
}
@ -713,7 +713,7 @@ void ChowPatelIlu<block_size>::decomposition(
if (rowU == colL) {
// Aij -= (Lik * Ukj)
blockMultSub<bs>(&aij[0], Lmat->nnzValues + k * bs * bs , Ut->nnzValues + jk * bs * bs);
blockMultSub(&aij[0], Lmat->nnzValues + k * bs * bs , Ut->nnzValues + jk * bs * bs, bs);
}
}
@ -721,7 +721,7 @@ void ChowPatelIlu<block_size>::decomposition(
double ujj[bs*bs];
inverter(Ut->nnzValues + (Ut->rowPointers[j+1] - 1) * bs * bs, &ujj[0]);
// Lij_new = (Aij - sum) / Ujj
blockMult<bs>(&aij[0], &ujj[0], Ltmp + ij * bs * bs);
blockMult(&aij[0], &ujj[0], Ltmp + ij * bs * bs, bs);
}
}
// 1st sweep writes to Ltmp

View File

@ -286,7 +286,7 @@ bool FPGABILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
LSize++;
// calculate the pivot of this row
blockMult<bs>(LUMat->nnzValues + ij * bs * bs, invDiagVals + j * bs * bs, &pivot[0]);
blockMult(LUMat->nnzValues + ij * bs * bs, invDiagVals + j * bs * bs, &pivot[0], bs);
memcpy(LUMat->nnzValues + ij * bs * bs, &pivot[0], sizeof(double) * bs * bs);
@ -296,7 +296,7 @@ bool FPGABILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
// subtract that row scaled by the pivot from this row.
while (ik < iRowEnd && jk < jRowEnd) {
if (LUMat->colIndices[ik] == LUMat->colIndices[jk]) {
blockMultSub<bs>(LUMat->nnzValues + ik * bs * bs, pivot, LUMat->nnzValues + jk * bs * bs);
blockMultSub(LUMat->nnzValues + ik * bs * bs, pivot, LUMat->nnzValues + jk * bs * bs, bs);
ik++;
jk++;
} else {