mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Changed block_size template to variable for BlockedMatrix
This commit is contained in:
parent
a6b6a62b27
commit
9acffab47e
@ -54,7 +54,7 @@ BILU0<block_size>::~BILU0()
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
bool BILU0<block_size>::init(BlockedMatrix<block_size> *mat)
|
bool BILU0<block_size>::init(BlockedMatrix *mat)
|
||||||
{
|
{
|
||||||
const unsigned int bs = block_size;
|
const unsigned int bs = block_size;
|
||||||
|
|
||||||
@ -67,14 +67,14 @@ BILU0<block_size>::~BILU0()
|
|||||||
int *CSCColPointers = nullptr;
|
int *CSCColPointers = nullptr;
|
||||||
|
|
||||||
if (opencl_ilu_reorder == ILUReorder::NONE) {
|
if (opencl_ilu_reorder == ILUReorder::NONE) {
|
||||||
LUmat = std::make_unique<BlockedMatrix<block_size> >(*mat);
|
LUmat = std::make_unique<BlockedMatrix>(*mat);
|
||||||
} else {
|
} else {
|
||||||
toOrder.resize(Nb);
|
toOrder.resize(Nb);
|
||||||
fromOrder.resize(Nb);
|
fromOrder.resize(Nb);
|
||||||
CSCRowIndices = new int[nnzbs];
|
CSCRowIndices = new int[nnzbs];
|
||||||
CSCColPointers = new int[Nb + 1];
|
CSCColPointers = new int[Nb + 1];
|
||||||
rmat = std::make_shared<BlockedMatrix<block_size> >(mat->Nb, mat->nnzbs);
|
rmat = std::make_shared<BlockedMatrix>(mat->Nb, mat->nnzbs, block_size);
|
||||||
LUmat = std::make_unique<BlockedMatrix<block_size> >(*rmat);
|
LUmat = std::make_unique<BlockedMatrix>(*rmat);
|
||||||
|
|
||||||
Timer t_convert;
|
Timer t_convert;
|
||||||
csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb);
|
csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb);
|
||||||
@ -122,8 +122,8 @@ BILU0<block_size>::~BILU0()
|
|||||||
invDiagVals = new double[mat->Nb * bs * bs];
|
invDiagVals = new double[mat->Nb * bs * bs];
|
||||||
|
|
||||||
#if CHOW_PATEL
|
#if CHOW_PATEL
|
||||||
Lmat = std::make_unique<BlockedMatrix<block_size> >(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
Lmat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||||
Umat = std::make_unique<BlockedMatrix<block_size> >(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
Umat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
LUmat->nnzValues = new double[mat->nnzbs * bs * bs];
|
LUmat->nnzValues = new double[mat->nnzbs * bs * bs];
|
||||||
@ -166,7 +166,7 @@ BILU0<block_size>::~BILU0()
|
|||||||
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
bool BILU0<block_size>::create_preconditioner(BlockedMatrix<block_size> *mat)
|
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
|
||||||
{
|
{
|
||||||
const unsigned int bs = block_size;
|
const unsigned int bs = block_size;
|
||||||
auto *m = mat;
|
auto *m = mat;
|
||||||
@ -174,7 +174,7 @@ BILU0<block_size>::~BILU0()
|
|||||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||||
m = rmat.get();
|
m = rmat.get();
|
||||||
Timer t_reorder;
|
Timer t_reorder;
|
||||||
reorderBlockedMatrixByPattern<block_size>(mat, toOrder.data(), fromOrder.data(), rmat.get());
|
reorderBlockedMatrixByPattern(mat, toOrder.data(), fromOrder.data(), rmat.get());
|
||||||
|
|
||||||
if (verbosity >= 3){
|
if (verbosity >= 3){
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
@ -308,8 +308,8 @@ void BILU0<block_size>::setOpenCLQueue(cl::CommandQueue *queue_) {
|
|||||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
template BILU0<n>::BILU0(ILUReorder, int); \
|
template BILU0<n>::BILU0(ILUReorder, int); \
|
||||||
template BILU0<n>::~BILU0(); \
|
template BILU0<n>::~BILU0(); \
|
||||||
template bool BILU0<n>::init(BlockedMatrix<n>*); \
|
template bool BILU0<n>::init(BlockedMatrix*); \
|
||||||
template bool BILU0<n>::create_preconditioner(BlockedMatrix<n>*); \
|
template bool BILU0<n>::create_preconditioner(BlockedMatrix*); \
|
||||||
template void BILU0<n>::apply(const cl::Buffer&, cl::Buffer&); \
|
template void BILU0<n>::apply(const cl::Buffer&, cl::Buffer&); \
|
||||||
template void BILU0<n>::setOpenCLContext(cl::Context*); \
|
template void BILU0<n>::setOpenCLContext(cl::Context*); \
|
||||||
template void BILU0<n>::setOpenCLQueue(cl::CommandQueue*);
|
template void BILU0<n>::setOpenCLQueue(cl::CommandQueue*);
|
||||||
|
@ -46,10 +46,10 @@ namespace Accelerator
|
|||||||
int Nb; // number of blockrows of the matrix
|
int Nb; // number of blockrows of the matrix
|
||||||
int nnz; // number of nonzeroes of the matrix (scalar)
|
int nnz; // number of nonzeroes of the matrix (scalar)
|
||||||
int nnzbs; // number of blocks of the matrix
|
int nnzbs; // number of blocks of the matrix
|
||||||
std::unique_ptr<BlockedMatrix<block_size> > LUmat = nullptr;
|
std::unique_ptr<BlockedMatrix> LUmat = nullptr;
|
||||||
std::shared_ptr<BlockedMatrix<block_size> > rmat = nullptr; // only used with PAR_SIM
|
std::shared_ptr<BlockedMatrix> rmat = nullptr; // only used with PAR_SIM
|
||||||
#if CHOW_PATEL
|
#if CHOW_PATEL
|
||||||
std::unique_ptr<BlockedMatrix<block_size> > Lmat = nullptr, Umat = nullptr;
|
std::unique_ptr<BlockedMatrix> Lmat = nullptr, Umat = nullptr;
|
||||||
#endif
|
#endif
|
||||||
double *invDiagVals = nullptr;
|
double *invDiagVals = nullptr;
|
||||||
std::vector<int> diagIndex;
|
std::vector<int> diagIndex;
|
||||||
@ -91,10 +91,10 @@ namespace Accelerator
|
|||||||
~BILU0();
|
~BILU0();
|
||||||
|
|
||||||
// analysis
|
// analysis
|
||||||
bool init(BlockedMatrix<block_size> *mat);
|
bool init(BlockedMatrix *mat);
|
||||||
|
|
||||||
// ilu_decomposition
|
// ilu_decomposition
|
||||||
bool create_preconditioner(BlockedMatrix<block_size> *mat);
|
bool create_preconditioner(BlockedMatrix *mat);
|
||||||
|
|
||||||
// apply preconditioner, x = prec(y)
|
// apply preconditioner, x = prec(y)
|
||||||
void apply(const cl::Buffer& y, cl::Buffer& x);
|
void apply(const cl::Buffer& y, cl::Buffer& x);
|
||||||
@ -112,7 +112,7 @@ namespace Accelerator
|
|||||||
return fromOrder.data();
|
return fromOrder.data();
|
||||||
}
|
}
|
||||||
|
|
||||||
BlockedMatrix<block_size>* getRMat()
|
BlockedMatrix* getRMat()
|
||||||
{
|
{
|
||||||
return rmat.get();
|
return rmat.get();
|
||||||
}
|
}
|
||||||
|
@ -39,8 +39,7 @@ using Opm::OpmLog;
|
|||||||
|
|
||||||
/*Sort a row of matrix elements from a blocked CSR-format.*/
|
/*Sort a row of matrix elements from a blocked CSR-format.*/
|
||||||
|
|
||||||
template <unsigned int block_size>
|
void sortBlockedRow(int *colIndices, double *data, int left, int right, unsigned block_size) {
|
||||||
void sortBlockedRow(int *colIndices, double *data, int left, int right) {
|
|
||||||
const unsigned int bs = block_size;
|
const unsigned int bs = block_size;
|
||||||
int l = left;
|
int l = left;
|
||||||
int r = right;
|
int r = right;
|
||||||
@ -65,10 +64,10 @@ void sortBlockedRow(int *colIndices, double *data, int left, int right) {
|
|||||||
} while (l < r);
|
} while (l < r);
|
||||||
|
|
||||||
if (left < r)
|
if (left < r)
|
||||||
sortBlockedRow<bs>(colIndices, data, left, r);
|
sortBlockedRow(colIndices, data, left, r, bs);
|
||||||
|
|
||||||
if (right > l)
|
if (right > l)
|
||||||
sortBlockedRow<bs>(colIndices, data, l, right);
|
sortBlockedRow(colIndices, data, l, right, bs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -130,8 +129,7 @@ void blockVectMult(double *mat, double *vect, double scale, double *resVect, boo
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
int BlockedMatrix::countUnblockedNnzs() {
|
||||||
int BlockedMatrix<block_size>::countUnblockedNnzs() {
|
|
||||||
int numNnzsOverThreshold = 0;
|
int numNnzsOverThreshold = 0;
|
||||||
int totalNnzs = rowPointers[Nb];
|
int totalNnzs = rowPointers[Nb];
|
||||||
for (unsigned int idx = 0; idx < totalNnzs * block_size * block_size; idx++) {
|
for (unsigned int idx = 0; idx < totalNnzs * block_size * block_size; idx++) {
|
||||||
@ -146,8 +144,7 @@ int BlockedMatrix<block_size>::countUnblockedNnzs() {
|
|||||||
* Unblock the blocked matrix. Input the blocked matrix and output a CSR matrix without blocks.
|
* Unblock the blocked matrix. Input the blocked matrix and output a CSR matrix without blocks.
|
||||||
* If unblocking the U matrix, the rows in all blocks need to written to the new matrix in reverse order.
|
* If unblocking the U matrix, the rows in all blocks need to written to the new matrix in reverse order.
|
||||||
*/
|
*/
|
||||||
template <unsigned int block_size>
|
void BlockedMatrix::unblock(Matrix *mat, bool isUMatrix) {
|
||||||
void BlockedMatrix<block_size>::unblock(Matrix *mat, bool isUMatrix) {
|
|
||||||
const unsigned int bs = block_size;
|
const unsigned int bs = block_size;
|
||||||
int valIndex = 0, nnzsPerRow;
|
int valIndex = 0, nnzsPerRow;
|
||||||
|
|
||||||
@ -185,8 +182,7 @@ void BlockedMatrix<block_size>::unblock(Matrix *mat, bool isUMatrix) {
|
|||||||
|
|
||||||
/*Optimized version*/
|
/*Optimized version*/
|
||||||
// ub* prefixes indicate unblocked data
|
// ub* prefixes indicate unblocked data
|
||||||
template <unsigned int block_size>
|
int BlockedMatrix::toRDF(int numColors, int *nodesPerColor, bool isUMatrix,
|
||||||
int BlockedMatrix<block_size>::toRDF(int numColors, int *nodesPerColor, bool isUMatrix,
|
|
||||||
std::vector<std::vector<int> >& colIndicesInColor, int nnzsPerRowLimit, int *nnzValsSizes,
|
std::vector<std::vector<int> >& colIndicesInColor, int nnzsPerRowLimit, int *nnzValsSizes,
|
||||||
std::vector<std::vector<double> >& ubNnzValues, short int *ubColIndices, unsigned char *NROffsets, int *colorSizes, int *valSize)
|
std::vector<std::vector<double> >& ubNnzValues, short int *ubColIndices, unsigned char *NROffsets, int *colorSizes, int *valSize)
|
||||||
{
|
{
|
||||||
@ -225,8 +221,7 @@ int BlockedMatrix<block_size>::toRDF(int numColors, int *nodesPerColor, bool isU
|
|||||||
// PIndicesAddr: contiguously for each color: indices of x in global x vector, unblocked
|
// PIndicesAddr: contiguously for each color: indices of x in global x vector, unblocked
|
||||||
// if color 0 has A unique colAccesses, PIndicesAddr[0 - A] are for color 0
|
// if color 0 has A unique colAccesses, PIndicesAddr[0 - A] are for color 0
|
||||||
// then PIndicesAddr[A - A+B] are for color 1. Directly copied to FPGA
|
// then PIndicesAddr[A - A+B] are for color 1. Directly copied to FPGA
|
||||||
template <unsigned int block_size>
|
int BlockedMatrix::findPartitionColumns(int numColors, int *nodesPerColor,
|
||||||
int BlockedMatrix<block_size>::findPartitionColumns(int numColors, int *nodesPerColor,
|
|
||||||
int rowsPerColorLimit, int columnsPerColorLimit,
|
int rowsPerColorLimit, int columnsPerColorLimit,
|
||||||
std::vector<std::vector<int> >& colIndicesInColor, int *PIndicesAddr, int *colorSizes,
|
std::vector<std::vector<int> >& colIndicesInColor, int *PIndicesAddr, int *colorSizes,
|
||||||
std::vector<std::vector<int> >& LColIndicesInColor, int *LPIndicesAddr, int *LColorSizes,
|
std::vector<std::vector<int> >& LColIndicesInColor, int *LPIndicesAddr, int *LColorSizes,
|
||||||
@ -474,7 +469,6 @@ void blockedDiagtoRDF(double *blockedDiagVals, int rowSize, int numColors, std::
|
|||||||
|
|
||||||
|
|
||||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
template void sortBlockedRow<n>(int *, double *, int, int); \
|
|
||||||
template void blockMultSub<n>(double *, double *, double *); \
|
template void blockMultSub<n>(double *, double *, double *); \
|
||||||
template void blockMult<n>(double *, double *, double *); \
|
template void blockMult<n>(double *, double *, double *); \
|
||||||
|
|
||||||
@ -490,15 +484,7 @@ INSTANTIATE_BDA_FUNCTIONS(6);
|
|||||||
#if HAVE_FPGA
|
#if HAVE_FPGA
|
||||||
#define INSTANTIATE_BDA_FPGA_FUNCTIONS(n) \
|
#define INSTANTIATE_BDA_FPGA_FUNCTIONS(n) \
|
||||||
template void blockSub<n>(double *, double *, double *); \
|
template void blockSub<n>(double *, double *, double *); \
|
||||||
template void blockVectMult<n>(double *, double *, double, double *, bool); \
|
template void blockVectMult<n>(double *, double *, double, double *, bool);
|
||||||
template int BlockedMatrix<n>::toRDF(int, int *, bool, \
|
|
||||||
std::vector<std::vector<int> >& , int, int *, \
|
|
||||||
std::vector<std::vector<double> >&, short int *, unsigned char *, int *, int *); \
|
|
||||||
template int BlockedMatrix<n>::findPartitionColumns(int, int *, \
|
|
||||||
int, int, \
|
|
||||||
std::vector<std::vector<int> >& , int *, int *, \
|
|
||||||
std::vector<std::vector<int> >& , int *, int *, \
|
|
||||||
std::vector<std::vector<int> >& , int *, int *);
|
|
||||||
|
|
||||||
INSTANTIATE_BDA_FPGA_FUNCTIONS(1);
|
INSTANTIATE_BDA_FPGA_FUNCTIONS(1);
|
||||||
INSTANTIATE_BDA_FPGA_FUNCTIONS(2);
|
INSTANTIATE_BDA_FPGA_FUNCTIONS(2);
|
||||||
|
@ -39,7 +39,6 @@ namespace Accelerator
|
|||||||
|
|
||||||
/// This struct resembles a blocked csr matrix, like Dune::BCRSMatrix.
|
/// This struct resembles a blocked csr matrix, like Dune::BCRSMatrix.
|
||||||
/// The data is stored in contiguous memory, such that they can be copied to a device in one transfer.
|
/// The data is stored in contiguous memory, such that they can be copied to a device in one transfer.
|
||||||
template<unsigned int block_size>
|
|
||||||
class BlockedMatrix
|
class BlockedMatrix
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -48,12 +47,14 @@ public:
|
|||||||
/// Allocate BlockedMatrix and data arrays with given sizes
|
/// Allocate BlockedMatrix and data arrays with given sizes
|
||||||
/// \param[in] Nb number of blockrows
|
/// \param[in] Nb number of blockrows
|
||||||
/// \param[in] nnzbs number of nonzero blocks
|
/// \param[in] nnzbs number of nonzero blocks
|
||||||
BlockedMatrix(int Nb_, int nnzbs_)
|
/// \param[in] block_size the number of rows and columns for each block
|
||||||
: nnzValues(new double[nnzbs_*block_size*block_size]),
|
BlockedMatrix(int Nb_, int nnzbs_, unsigned int block_size_)
|
||||||
colIndices(new int[nnzbs_*block_size*block_size]),
|
: nnzValues(new double[nnzbs_*block_size_*block_size_]),
|
||||||
|
colIndices(new int[nnzbs_*block_size_*block_size_]),
|
||||||
rowPointers(new int[Nb_+1]),
|
rowPointers(new int[Nb_+1]),
|
||||||
Nb(Nb_),
|
Nb(Nb_),
|
||||||
nnzbs(nnzbs_),
|
nnzbs(nnzbs_),
|
||||||
|
block_size(block_size_),
|
||||||
deleteNnzs(true),
|
deleteNnzs(true),
|
||||||
deleteSparsity(true)
|
deleteSparsity(true)
|
||||||
{}
|
{}
|
||||||
@ -61,11 +62,12 @@ public:
|
|||||||
/// Allocate BlockedMatrix, but copy sparsity pattern instead of allocating new memory
|
/// Allocate BlockedMatrix, but copy sparsity pattern instead of allocating new memory
|
||||||
/// \param[in] M matrix to be copied
|
/// \param[in] M matrix to be copied
|
||||||
BlockedMatrix(const BlockedMatrix& M)
|
BlockedMatrix(const BlockedMatrix& M)
|
||||||
: nnzValues(new double[M.nnzbs*block_size*block_size]),
|
: nnzValues(new double[M.nnzbs*M.block_size*M.block_size]),
|
||||||
colIndices(M.colIndices),
|
colIndices(M.colIndices),
|
||||||
rowPointers(M.rowPointers),
|
rowPointers(M.rowPointers),
|
||||||
Nb(M.Nb),
|
Nb(M.Nb),
|
||||||
nnzbs(M.nnzbs),
|
nnzbs(M.nnzbs),
|
||||||
|
block_size(M.block_size),
|
||||||
deleteNnzs(true),
|
deleteNnzs(true),
|
||||||
deleteSparsity(false)
|
deleteSparsity(false)
|
||||||
{}
|
{}
|
||||||
@ -73,15 +75,17 @@ public:
|
|||||||
/// Allocate BlockedMatrix, but let data arrays point to existing arrays
|
/// Allocate BlockedMatrix, but let data arrays point to existing arrays
|
||||||
/// \param[in] Nb number of blockrows
|
/// \param[in] Nb number of blockrows
|
||||||
/// \param[in] nnzbs number of nonzero blocks
|
/// \param[in] nnzbs number of nonzero blocks
|
||||||
|
/// \param[in] block_size the number of rows and columns for each block
|
||||||
/// \param[in] nnzValues array of nonzero values, contains nnzb*block_size*block_size scalars
|
/// \param[in] nnzValues array of nonzero values, contains nnzb*block_size*block_size scalars
|
||||||
/// \param[in] colIndices array of column indices, contains nnzb entries
|
/// \param[in] colIndices array of column indices, contains nnzb entries
|
||||||
/// \param[in] rowPointers array of row pointers, contains Nb+1 entries
|
/// \param[in] rowPointers array of row pointers, contains Nb+1 entries
|
||||||
BlockedMatrix(int Nb_, int nnzbs_, double *nnzValues_, int *colIndices_, int *rowPointers_)
|
BlockedMatrix(int Nb_, int nnzbs_, unsigned int block_size_, double *nnzValues_, int *colIndices_, int *rowPointers_)
|
||||||
: nnzValues(nnzValues_),
|
: nnzValues(nnzValues_),
|
||||||
colIndices(colIndices_),
|
colIndices(colIndices_),
|
||||||
rowPointers(rowPointers_),
|
rowPointers(rowPointers_),
|
||||||
Nb(Nb_),
|
Nb(Nb_),
|
||||||
nnzbs(nnzbs_),
|
nnzbs(nnzbs_),
|
||||||
|
block_size(block_size_),
|
||||||
deleteNnzs(false),
|
deleteNnzs(false),
|
||||||
deleteSparsity(false)
|
deleteSparsity(false)
|
||||||
{}
|
{}
|
||||||
@ -123,6 +127,7 @@ public:
|
|||||||
int *rowPointers;
|
int *rowPointers;
|
||||||
int Nb;
|
int Nb;
|
||||||
int nnzbs;
|
int nnzbs;
|
||||||
|
unsigned int block_size;
|
||||||
bool deleteNnzs;
|
bool deleteNnzs;
|
||||||
bool deleteSparsity;
|
bool deleteSparsity;
|
||||||
};
|
};
|
||||||
@ -133,8 +138,8 @@ public:
|
|||||||
/// \param[inout] data
|
/// \param[inout] data
|
||||||
/// \param[in] left lower index of data of row
|
/// \param[in] left lower index of data of row
|
||||||
/// \param[in] right upper index of data of row
|
/// \param[in] right upper index of data of row
|
||||||
template <unsigned int block_size>
|
/// \param[in] block_size size of blocks in the row
|
||||||
void sortBlockedRow(int *colIndices, double *data, int left, int right);
|
void sortBlockedRow(int *colIndices, double *data, int left, int right, unsigned block_size);
|
||||||
|
|
||||||
/// Multiply and subtract blocks
|
/// Multiply and subtract blocks
|
||||||
/// a = a - (b * c)
|
/// a = a - (b * c)
|
||||||
|
@ -106,7 +106,7 @@ void solve_transposed_3x3(const double *A, const double *b, double *x) {
|
|||||||
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void CPR<block_size>::create_preconditioner(BlockedMatrix<block_size> *mat_) {
|
void CPR<block_size>::create_preconditioner(BlockedMatrix *mat_) {
|
||||||
this->mat = mat_;
|
this->mat = mat_;
|
||||||
|
|
||||||
try{
|
try{
|
||||||
@ -498,11 +498,8 @@ void CPR<block_size>::apply(const cl::Buffer& y, cl::Buffer& x) {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
template CPR<n>::CPR(int, ILUReorder); \
|
template class CPR<n>;
|
||||||
template void CPR<n>::init(int, int, std::shared_ptr<cl::Context>&, std::shared_ptr<cl::CommandQueue>&); \
|
|
||||||
template void CPR<n>::apply(const cl::Buffer&, cl::Buffer&); \
|
|
||||||
template void CPR<n>::create_preconditioner(BlockedMatrix<n> *mat);
|
|
||||||
|
|
||||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||||
|
@ -66,7 +66,7 @@ private:
|
|||||||
std::unique_ptr<cl::Buffer> d_coarse_y, d_coarse_x; // stores the scalar vectors
|
std::unique_ptr<cl::Buffer> d_coarse_y, d_coarse_x; // stores the scalar vectors
|
||||||
std::once_flag opencl_buffers_allocated; // only allocate OpenCL Buffers once
|
std::once_flag opencl_buffers_allocated; // only allocate OpenCL Buffers once
|
||||||
|
|
||||||
BlockedMatrix<block_size> *mat = nullptr; // input matrix, blocked
|
BlockedMatrix *mat = nullptr; // input matrix, blocked
|
||||||
using DuneMat = Dune::BCRSMatrix<Dune::FieldMatrix<double, 1, 1> >;
|
using DuneMat = Dune::BCRSMatrix<Dune::FieldMatrix<double, 1, 1> >;
|
||||||
using DuneVec = Dune::BlockVector<Dune::FieldVector<double, 1> >;
|
using DuneVec = Dune::BlockVector<Dune::FieldVector<double, 1> >;
|
||||||
using MatrixOperator = Dune::MatrixAdapter<DuneMat, DuneVec, DuneVec>;
|
using MatrixOperator = Dune::MatrixAdapter<DuneMat, DuneVec, DuneVec>;
|
||||||
@ -106,7 +106,7 @@ public:
|
|||||||
// apply preconditioner, x = prec(y)
|
// apply preconditioner, x = prec(y)
|
||||||
void apply(const cl::Buffer& y, cl::Buffer& x);
|
void apply(const cl::Buffer& y, cl::Buffer& x);
|
||||||
|
|
||||||
void create_preconditioner(BlockedMatrix<block_size> *mat);
|
void create_preconditioner(BlockedMatrix *mat);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -482,7 +482,7 @@ __kernel void chow_patel_ilu_sweep(
|
|||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void ChowPatelIlu<block_size>::decomposition(
|
void ChowPatelIlu<block_size>::decomposition(
|
||||||
cl::CommandQueue *queue, [[maybe_unused]] cl::Context *context,
|
cl::CommandQueue *queue, [[maybe_unused]] cl::Context *context,
|
||||||
BlockedMatrix<block_size> *LUmat, BlockedMatrix<block_size> *Lmat, BlockedMatrix<block_size> *Umat,
|
BlockedMatrix *LUmat, BlockedMatrix *Lmat, BlockedMatrix *Umat,
|
||||||
double *invDiagVals, std::vector<int>& diagIndex,
|
double *invDiagVals, std::vector<int>& diagIndex,
|
||||||
cl::Buffer& d_diagIndex, cl::Buffer& d_invDiagVals,
|
cl::Buffer& d_diagIndex, cl::Buffer& d_invDiagVals,
|
||||||
cl::Buffer& d_Lvals, cl::Buffer& d_Lcols, cl::Buffer& d_Lrows,
|
cl::Buffer& d_Lvals, cl::Buffer& d_Lcols, cl::Buffer& d_Lrows,
|
||||||
|
@ -82,7 +82,7 @@ public:
|
|||||||
/// This function calls gpu_decomposition() if CHOW_PATEL_GPU is set
|
/// This function calls gpu_decomposition() if CHOW_PATEL_GPU is set
|
||||||
void decomposition(
|
void decomposition(
|
||||||
cl::CommandQueue *queue, cl::Context *context,
|
cl::CommandQueue *queue, cl::Context *context,
|
||||||
BlockedMatrix<block_size> *LUmat, BlockedMatrix<block_size> *Lmat, BlockedMatrix<block_size> *Umat,
|
BlockedMatrix *LUmat, BlockedMatrix *Lmat, BlockedMatrix *Umat,
|
||||||
double *invDiagVals, std::vector<int>& diagIndex,
|
double *invDiagVals, std::vector<int>& diagIndex,
|
||||||
cl::Buffer& d_diagIndex, cl::Buffer& d_invDiagVals,
|
cl::Buffer& d_diagIndex, cl::Buffer& d_invDiagVals,
|
||||||
cl::Buffer& d_Lvals, cl::Buffer& d_Lcols, cl::Buffer& d_Lrows,
|
cl::Buffer& d_Lvals, cl::Buffer& d_Lcols, cl::Buffer& d_Lrows,
|
||||||
|
@ -59,7 +59,7 @@ FPGABILU0<block_size>::~FPGABILU0()
|
|||||||
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
bool FPGABILU0<block_size>::init(BlockedMatrix<block_size> *mat)
|
bool FPGABILU0<block_size>::init(BlockedMatrix *mat)
|
||||||
{
|
{
|
||||||
const unsigned int bs = block_size;
|
const unsigned int bs = block_size;
|
||||||
|
|
||||||
@ -91,8 +91,8 @@ bool FPGABILU0<block_size>::init(BlockedMatrix<block_size> *mat)
|
|||||||
}
|
}
|
||||||
|
|
||||||
Timer t_analysis;
|
Timer t_analysis;
|
||||||
rMat = std::make_shared<BlockedMatrix<block_size> >(mat->Nb, mat->nnzbs);
|
rMat = std::make_shared<BlockedMatrix>(mat->Nb, mat->nnzbs, block_size);
|
||||||
LUMat = std::make_unique<BlockedMatrix<block_size> >(*rMat);
|
LUMat = std::make_unique<BlockedMatrix>(*rMat);
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
if (level_scheduling) {
|
if (level_scheduling) {
|
||||||
out << "FPGABILU0 reordering strategy: " << "level_scheduling\n";
|
out << "FPGABILU0 reordering strategy: " << "level_scheduling\n";
|
||||||
@ -117,7 +117,7 @@ bool FPGABILU0<block_size>::init(BlockedMatrix<block_size> *mat)
|
|||||||
int NROffsetSize = 0, LNROffsetSize = 0, UNROffsetSize = 0;
|
int NROffsetSize = 0, LNROffsetSize = 0, UNROffsetSize = 0;
|
||||||
int blockDiagSize = 0;
|
int blockDiagSize = 0;
|
||||||
// This reordering is needed here only to te result can be used to calculate worst-case scenario array sizes
|
// This reordering is needed here only to te result can be used to calculate worst-case scenario array sizes
|
||||||
reorderBlockedMatrixByPattern<bs>(mat, toOrder.data(), fromOrder.data(), rMat.get());
|
reorderBlockedMatrixByPattern(mat, toOrder.data(), fromOrder.data(), rMat.get());
|
||||||
int doneRows = 0;
|
int doneRows = 0;
|
||||||
for (int c = 0; c < numColors; c++) {
|
for (int c = 0; c < numColors; c++) {
|
||||||
for (int i = doneRows; i < doneRows + rowsPerColor[c]; i++) {
|
for (int i = doneRows; i < doneRows + rowsPerColor[c]; i++) {
|
||||||
@ -187,8 +187,8 @@ bool FPGABILU0<block_size>::init(BlockedMatrix<block_size> *mat)
|
|||||||
|
|
||||||
diagIndex.resize(mat->Nb, 0);
|
diagIndex.resize(mat->Nb, 0);
|
||||||
invDiagVals = new double[mat->Nb * bs * bs];
|
invDiagVals = new double[mat->Nb * bs * bs];
|
||||||
LMat = std::make_unique<BlockedMatrix<block_size> >(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
LMat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2, block_size);
|
||||||
UMat = std::make_unique<BlockedMatrix<block_size> >(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
UMat = std::make_unique<BlockedMatrix>(mat->Nb, (mat->nnzbs - mat->Nb) / 2, block_size);
|
||||||
resultPointers[0] = (void *) colorSizes.data();
|
resultPointers[0] = (void *) colorSizes.data();
|
||||||
resultPointers[1] = (void *) PIndicesAddr.data();
|
resultPointers[1] = (void *) PIndicesAddr.data();
|
||||||
resultPointers[2] = (void *) nnzValues.data();
|
resultPointers[2] = (void *) nnzValues.data();
|
||||||
@ -232,11 +232,11 @@ bool FPGABILU0<block_size>::init(BlockedMatrix<block_size> *mat)
|
|||||||
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
bool FPGABILU0<block_size>::create_preconditioner(BlockedMatrix<block_size> *mat)
|
bool FPGABILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
|
||||||
{
|
{
|
||||||
const unsigned int bs = block_size;
|
const unsigned int bs = block_size;
|
||||||
Timer t_reorder;
|
Timer t_reorder;
|
||||||
reorderBlockedMatrixByPattern<bs>(mat, toOrder.data(), fromOrder.data(), rMat.get());
|
reorderBlockedMatrixByPattern(mat, toOrder.data(), fromOrder.data(), rMat.get());
|
||||||
|
|
||||||
if (verbosity >= 3) {
|
if (verbosity >= 3) {
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
@ -402,8 +402,8 @@ bool FPGABILU0<block_size>::create_preconditioner(BlockedMatrix<block_size> *mat
|
|||||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
template FPGABILU0<n>::FPGABILU0(ILUReorder, int, int, int, int, int); \
|
template FPGABILU0<n>::FPGABILU0(ILUReorder, int, int, int, int, int); \
|
||||||
template FPGABILU0<n>::~FPGABILU0(); \
|
template FPGABILU0<n>::~FPGABILU0(); \
|
||||||
template bool FPGABILU0<n>::init(BlockedMatrix<n> *); \
|
template bool FPGABILU0<n>::init(BlockedMatrix*); \
|
||||||
template bool FPGABILU0<n>::create_preconditioner(BlockedMatrix<n> *); \
|
template bool FPGABILU0<n>::create_preconditioner(BlockedMatrix *);
|
||||||
|
|
||||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||||
|
@ -45,8 +45,8 @@ private:
|
|||||||
int Nb; // number of blockrows of the matrix
|
int Nb; // number of blockrows of the matrix
|
||||||
int nnz; // number of nonzeroes of the matrix (scalar)
|
int nnz; // number of nonzeroes of the matrix (scalar)
|
||||||
int nnzbs; // number of blocks of the matrix
|
int nnzbs; // number of blocks of the matrix
|
||||||
std::unique_ptr<BlockedMatrix<block_size> > LMat = nullptr, UMat = nullptr, LUMat = nullptr;
|
std::unique_ptr<BlockedMatrix> LMat = nullptr, UMat = nullptr, LUMat = nullptr;
|
||||||
std::shared_ptr<BlockedMatrix<block_size> > rMat = nullptr; // reordered mat
|
std::shared_ptr<BlockedMatrix> rMat = nullptr; // reordered mat
|
||||||
double *invDiagVals = nullptr;
|
double *invDiagVals = nullptr;
|
||||||
std::vector<int> diagIndex;
|
std::vector<int> diagIndex;
|
||||||
std::vector<int> toOrder, fromOrder;
|
std::vector<int> toOrder, fromOrder;
|
||||||
@ -82,10 +82,10 @@ public:
|
|||||||
~FPGABILU0();
|
~FPGABILU0();
|
||||||
|
|
||||||
// analysis (optional)
|
// analysis (optional)
|
||||||
bool init(BlockedMatrix<block_size> *mat);
|
bool init(BlockedMatrix *mat);
|
||||||
|
|
||||||
// ilu_decomposition
|
// ilu_decomposition
|
||||||
bool create_preconditioner(BlockedMatrix<block_size> *mat);
|
bool create_preconditioner(BlockedMatrix *mat);
|
||||||
|
|
||||||
int* getToOrder()
|
int* getToOrder()
|
||||||
{
|
{
|
||||||
@ -97,7 +97,7 @@ public:
|
|||||||
return fromOrder.data();
|
return fromOrder.data();
|
||||||
}
|
}
|
||||||
|
|
||||||
BlockedMatrix<block_size>* getRMat()
|
BlockedMatrix* getRMat()
|
||||||
{
|
{
|
||||||
return rMat.get();
|
return rMat.get();
|
||||||
}
|
}
|
||||||
|
@ -262,7 +262,7 @@ void FpgaSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, double
|
|||||||
|
|
||||||
// allocate host memory for matrices and vectors
|
// allocate host memory for matrices and vectors
|
||||||
// actual data for mat points to std::vector.data() in ISTLSolverEbos, so no alloc/free here
|
// actual data for mat points to std::vector.data() in ISTLSolverEbos, so no alloc/free here
|
||||||
mat.reset(new BlockedMatrix<block_size>(N_ / block_size, nnz_ / block_size / block_size, vals, cols, rows));
|
mat.reset(new BlockedMatrix(N_ / block_size, nnz_ / block_size / block_size, block_size, vals, cols, rows));
|
||||||
|
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "Initializing FPGA data, matrix size: " << this->N << " blocks, nnz: " << this->nnzb << " blocks, " << \
|
oss << "Initializing FPGA data, matrix size: " << this->N << " blocks, nnz: " << this->nnzb << " blocks, " << \
|
||||||
|
@ -56,8 +56,8 @@ private:
|
|||||||
bool level_scheduling = false;
|
bool level_scheduling = false;
|
||||||
|
|
||||||
// LUMat will shallow copy rowPointers and colIndices of mat/rMat
|
// LUMat will shallow copy rowPointers and colIndices of mat/rMat
|
||||||
std::unique_ptr<BlockedMatrix<block_size> > mat = nullptr;
|
std::unique_ptr<BlockedMatrix> mat = nullptr;
|
||||||
BlockedMatrix<block_size> *rMat = nullptr;
|
BlockedMatrix *rMat = nullptr;
|
||||||
std::unique_ptr<Preconditioner> prec = nullptr;
|
std::unique_ptr<Preconditioner> prec = nullptr;
|
||||||
|
|
||||||
// vectors with data processed by the preconditioner (input to the kernel)
|
// vectors with data processed by the preconditioner (input to the kernel)
|
||||||
|
@ -53,7 +53,7 @@ void OpenclMatrix<block_size>::upload(cl::CommandQueue *queue, Matrix *matrix) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void OpenclMatrix<block_size>::upload(cl::CommandQueue *queue, BlockedMatrix<block_size> *matrix) {
|
void OpenclMatrix<block_size>::upload(cl::CommandQueue *queue, BlockedMatrix *matrix) {
|
||||||
upload(queue, matrix->nnzValues, matrix->colIndices, matrix->rowPointers);
|
upload(queue, matrix->nnzValues, matrix->colIndices, matrix->rowPointers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ public:
|
|||||||
|
|
||||||
void upload(cl::CommandQueue *queue, double *vals, int *cols, int *rows);
|
void upload(cl::CommandQueue *queue, double *vals, int *cols, int *rows);
|
||||||
void upload(cl::CommandQueue *queue, Matrix *matrix);
|
void upload(cl::CommandQueue *queue, Matrix *matrix);
|
||||||
void upload(cl::CommandQueue *queue, BlockedMatrix<block_size> *matrix);
|
void upload(cl::CommandQueue *queue, BlockedMatrix *matrix);
|
||||||
|
|
||||||
cl::Buffer nnzValues;
|
cl::Buffer nnzValues;
|
||||||
cl::Buffer colIndices;
|
cl::Buffer colIndices;
|
||||||
|
@ -177,10 +177,10 @@ int colorBlockedNodes(int rows, const int *CSRRowPointers, const int *CSRColIndi
|
|||||||
/* Reorder a matrix by a specified input order.
|
/* Reorder a matrix by a specified input order.
|
||||||
* Both a to order array, which contains for every node from the old matrix where it will move in the new matrix,
|
* Both a to order array, which contains for every node from the old matrix where it will move in the new matrix,
|
||||||
* and the from order, which contains for every node in the new matrix where it came from in the old matrix.*/
|
* and the from order, which contains for every node in the new matrix where it came from in the old matrix.*/
|
||||||
|
void reorderBlockedMatrixByPattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rmat) {
|
||||||
|
assert(mat->block_size == rmat->block_size);
|
||||||
|
|
||||||
template <unsigned int block_size>
|
const unsigned int bs = mat->block_size;
|
||||||
void reorderBlockedMatrixByPattern(BlockedMatrix<block_size> *mat, int *toOrder, int *fromOrder, BlockedMatrix<block_size> *rmat) {
|
|
||||||
const unsigned int bs = block_size;
|
|
||||||
int rIndex = 0;
|
int rIndex = 0;
|
||||||
int i, k;
|
int i, k;
|
||||||
unsigned int j;
|
unsigned int j;
|
||||||
@ -204,7 +204,7 @@ void reorderBlockedMatrixByPattern(BlockedMatrix<block_size> *mat, int *toOrder,
|
|||||||
}
|
}
|
||||||
// re-sort the column indices of every row.
|
// re-sort the column indices of every row.
|
||||||
for (i = 0; i < mat->Nb; i++) {
|
for (i = 0; i < mat->Nb; i++) {
|
||||||
sortBlockedRow<bs>(rmat->colIndices, rmat->nnzValues, rmat->rowPointers[i], rmat->rowPointers[i + 1] - 1);
|
sortBlockedRow(rmat->colIndices, rmat->nnzValues, rmat->rowPointers[i], rmat->rowPointers[i + 1] - 1, bs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -370,7 +370,6 @@ void csrPatternToCsc(int *CSRColIndices, int *CSRRowPointers, int *CSCRowIndices
|
|||||||
|
|
||||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
template int colorBlockedNodes<n>(int, const int *, const int *, const int *, const int *, std::vector<int>&, int, int); \
|
template int colorBlockedNodes<n>(int, const int *, const int *, const int *, const int *, std::vector<int>&, int, int); \
|
||||||
template void reorderBlockedMatrixByPattern<n>(BlockedMatrix<n> *, int *, int *, BlockedMatrix<n> *); \
|
|
||||||
template void reorderBlockedVectorByPattern<n>(int, double*, int*, double*); \
|
template void reorderBlockedVectorByPattern<n>(int, double*, int*, double*); \
|
||||||
template void findGraphColoring<n>(const int *, const int *, const int *, const int *, int, int, int, int *, int *, int *, std::vector<int>&); \
|
template void findGraphColoring<n>(const int *, const int *, const int *, const int *, int, int, int, int *, int *, int *, std::vector<int>&); \
|
||||||
|
|
||||||
|
@ -53,8 +53,7 @@ int colorBlockedNodes(int rows, const int *CSRRowPointers, const int *CSRColIndi
|
|||||||
/// \param[in] toOrder reorder pattern that lists for each index in the original order, to which index in the new order it should be moved
|
/// \param[in] toOrder reorder pattern that lists for each index in the original order, to which index in the new order it should be moved
|
||||||
/// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
|
/// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
|
||||||
/// \param[inout] rMat reordered Matrix
|
/// \param[inout] rMat reordered Matrix
|
||||||
template <unsigned int block_size>
|
void reorderBlockedMatrixByPattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rmat);
|
||||||
void reorderBlockedMatrixByPattern(BlockedMatrix<block_size> *mat, int *toOrder, int *fromOrder, BlockedMatrix<block_size> *rmat);
|
|
||||||
|
|
||||||
/// Compute reorder mapping from the color that each node has received
|
/// Compute reorder mapping from the color that each node has received
|
||||||
/// The toOrder, fromOrder and iters arrays must be allocated already
|
/// The toOrder, fromOrder and iters arrays must be allocated already
|
||||||
|
@ -407,7 +407,7 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
|||||||
#if COPY_ROW_BY_ROW
|
#if COPY_ROW_BY_ROW
|
||||||
vals_contiguous = new double[N];
|
vals_contiguous = new double[N];
|
||||||
#endif
|
#endif
|
||||||
mat.reset(new BlockedMatrix<block_size>(Nb, nnzb, vals, cols, rows));
|
mat.reset(new BlockedMatrix(Nb, nnzb, block_size, vals, cols, rows));
|
||||||
|
|
||||||
d_x = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
d_x = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||||
d_b = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
d_b = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||||
|
@ -75,8 +75,8 @@ private:
|
|||||||
bool use_cpr; // allow to enable CPR
|
bool use_cpr; // allow to enable CPR
|
||||||
int *toOrder = nullptr, *fromOrder = nullptr; // BILU0 reorders rows of the matrix via these mappings
|
int *toOrder = nullptr, *fromOrder = nullptr; // BILU0 reorders rows of the matrix via these mappings
|
||||||
bool analysis_done = false;
|
bool analysis_done = false;
|
||||||
std::unique_ptr<BlockedMatrix<block_size> > mat = nullptr; // original matrix
|
std::unique_ptr<BlockedMatrix> mat = nullptr; // original matrix
|
||||||
BlockedMatrix<block_size> *rmat = nullptr; // reordered matrix (or original if no reordering), used for spmv
|
BlockedMatrix *rmat = nullptr; // reordered matrix (or original if no reordering), used for spmv
|
||||||
ILUReorder opencl_ilu_reorder; // reordering strategy
|
ILUReorder opencl_ilu_reorder; // reordering strategy
|
||||||
std::vector<cl::Event> events;
|
std::vector<cl::Event> events;
|
||||||
cl_int err;
|
cl_int err;
|
||||||
|
Loading…
Reference in New Issue
Block a user