mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Added memory management to BlockedMatrix
This commit is contained in:
parent
779a713330
commit
a164a57220
@ -49,20 +49,14 @@ namespace bda
|
|||||||
delete[] invDiagVals;
|
delete[] invDiagVals;
|
||||||
delete[] diagIndex;
|
delete[] diagIndex;
|
||||||
delete[] rowsPerColor;
|
delete[] rowsPerColor;
|
||||||
freeBlockedMatrix(&LUmat);
|
|
||||||
freeBlockedMatrix(&Lmat);
|
|
||||||
freeBlockedMatrix(&Umat);
|
|
||||||
delete[] toOrder;
|
delete[] toOrder;
|
||||||
delete[] fromOrder;
|
delete[] fromOrder;
|
||||||
freeBlockedMatrix(&rmat);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
bool BILU0<block_size>::init(BlockedMatrix *mat)
|
bool BILU0<block_size>::init(BlockedMatrix<block_size> *mat)
|
||||||
{
|
{
|
||||||
const unsigned int bs = block_size;
|
const unsigned int bs = block_size;
|
||||||
int *CSCRowIndices = nullptr;
|
|
||||||
int *CSCColPointers = nullptr;
|
|
||||||
|
|
||||||
this->N = mat->Nb * block_size;
|
this->N = mat->Nb * block_size;
|
||||||
this->Nb = mat->Nb;
|
this->Nb = mat->Nb;
|
||||||
@ -72,23 +66,20 @@ namespace bda
|
|||||||
toOrder = new int[N];
|
toOrder = new int[N];
|
||||||
fromOrder = new int[N];
|
fromOrder = new int[N];
|
||||||
|
|
||||||
if (level_scheduling) {
|
int *CSCRowIndices = new int[nnzbs];
|
||||||
CSCRowIndices = new int[nnzbs];
|
int *CSCColPointers = new int[Nb + 1];
|
||||||
CSCColPointers = new int[Nb + 1];
|
|
||||||
|
|
||||||
Timer t_convert;
|
Timer t_convert;
|
||||||
csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb);
|
csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb);
|
||||||
if(verbosity >= 3){
|
if(verbosity >= 3){
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s\n";
|
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
Timer t_analysis;
|
Timer t_analysis;
|
||||||
rmat = allocateBlockedMatrix<block_size>(mat->Nb, mat->nnzbs);
|
rmat = std::make_shared<BlockedMatrix<block_size> >(mat->Nb, mat->nnzbs);
|
||||||
LUmat = softCopyBlockedMatrix(rmat);
|
LUmat = std::make_unique<BlockedMatrix<block_size> >(*rmat);
|
||||||
|
|
||||||
if (level_scheduling) {
|
if (level_scheduling) {
|
||||||
rowsPerColor = findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, &numColors, toOrder, fromOrder);
|
rowsPerColor = findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, &numColors, toOrder, fromOrder);
|
||||||
} else if (graph_coloring) {
|
} else if (graph_coloring) {
|
||||||
@ -103,17 +94,17 @@ namespace bda
|
|||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
delete[] CSCRowIndices;
|
||||||
|
delete[] CSCColPointers;
|
||||||
|
|
||||||
diagIndex = new int[mat->Nb];
|
diagIndex = new int[mat->Nb];
|
||||||
invDiagVals = new double[mat->Nb * bs * bs];
|
invDiagVals = new double[mat->Nb * bs * bs];
|
||||||
|
|
||||||
Lmat = allocateBlockedMatrix<block_size>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
Lmat = std::make_unique<BlockedMatrix<block_size> >(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||||
Umat = allocateBlockedMatrix<block_size>(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
Umat = std::make_unique<BlockedMatrix<block_size> >(mat->Nb, (mat->nnzbs - mat->Nb) / 2);
|
||||||
|
|
||||||
LUmat->nnzValues = new double[mat->nnzbs * bs * bs];
|
LUmat->nnzValues = new double[mat->nnzbs * bs * bs];
|
||||||
|
|
||||||
delete[] CSCRowIndices;
|
|
||||||
delete[] CSCColPointers;
|
|
||||||
|
|
||||||
s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Lmat->nnzbs);
|
s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Lmat->nnzbs);
|
||||||
s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Umat->nnzbs);
|
s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Umat->nnzbs);
|
||||||
s.Lcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Lmat->nnzbs);
|
s.Lcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Lmat->nnzbs);
|
||||||
@ -145,12 +136,12 @@ namespace bda
|
|||||||
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
|
bool BILU0<block_size>::create_preconditioner(BlockedMatrix<block_size> *mat)
|
||||||
{
|
{
|
||||||
const unsigned int bs = block_size;
|
const unsigned int bs = block_size;
|
||||||
|
|
||||||
Timer t_reorder;
|
Timer t_reorder;
|
||||||
reorderBlockedMatrixByPattern<block_size>(mat, toOrder, fromOrder, rmat);
|
reorderBlockedMatrixByPattern<block_size>(mat, toOrder, fromOrder, rmat.get());
|
||||||
|
|
||||||
if (verbosity >= 3){
|
if (verbosity >= 3){
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "BILU0 reorder matrix: " << t_reorder.stop() << " s";
|
out << "BILU0 reorder matrix: " << t_reorder.stop() << " s";
|
||||||
@ -160,6 +151,7 @@ namespace bda
|
|||||||
// TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
|
// TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
|
||||||
Timer t_copy;
|
Timer t_copy;
|
||||||
memcpy(LUmat->nnzValues, rmat->nnzValues, sizeof(double) * bs * bs * rmat->nnzbs);
|
memcpy(LUmat->nnzValues, rmat->nnzValues, sizeof(double) * bs * bs * rmat->nnzbs);
|
||||||
|
|
||||||
if (verbosity >= 3){
|
if (verbosity >= 3){
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "BILU0 memcpy: " << t_copy.stop() << " s";
|
out << "BILU0 memcpy: " << t_copy.stop() << " s";
|
||||||
@ -332,8 +324,8 @@ namespace bda
|
|||||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
template BILU0<n>::BILU0(bool, bool, int); \
|
template BILU0<n>::BILU0(bool, bool, int); \
|
||||||
template BILU0<n>::~BILU0(); \
|
template BILU0<n>::~BILU0(); \
|
||||||
template bool BILU0<n>::init(BlockedMatrix*); \
|
template bool BILU0<n>::init(BlockedMatrix<n>*); \
|
||||||
template bool BILU0<n>::create_preconditioner(BlockedMatrix*); \
|
template bool BILU0<n>::create_preconditioner(BlockedMatrix<n>*); \
|
||||||
template void BILU0<n>::apply(cl::Buffer& x, cl::Buffer& y); \
|
template void BILU0<n>::apply(cl::Buffer& x, cl::Buffer& y); \
|
||||||
template void BILU0<n>::setOpenCLContext(cl::Context*); \
|
template void BILU0<n>::setOpenCLContext(cl::Context*); \
|
||||||
template void BILU0<n>::setOpenCLQueue(cl::CommandQueue*); \
|
template void BILU0<n>::setOpenCLQueue(cl::CommandQueue*); \
|
||||||
|
@ -39,8 +39,8 @@ namespace bda
|
|||||||
int Nb; // number of blockrows of the matrix
|
int Nb; // number of blockrows of the matrix
|
||||||
int nnz; // number of nonzeroes of the matrix (scalar)
|
int nnz; // number of nonzeroes of the matrix (scalar)
|
||||||
int nnzbs; // number of blocks of the matrix
|
int nnzbs; // number of blocks of the matrix
|
||||||
BlockedMatrix *Lmat = nullptr, *Umat = nullptr, *LUmat = nullptr;
|
std::unique_ptr<BlockedMatrix<block_size> > Lmat = nullptr, Umat = nullptr, LUmat = nullptr;
|
||||||
BlockedMatrix *rmat = nullptr; // only used with PAR_SIM
|
std::shared_ptr<BlockedMatrix<block_size> > rmat = nullptr; // only used with PAR_SIM
|
||||||
double *invDiagVals = nullptr;
|
double *invDiagVals = nullptr;
|
||||||
int *diagIndex = nullptr, *rowsPerColor = nullptr;
|
int *diagIndex = nullptr, *rowsPerColor = nullptr;
|
||||||
int *toOrder = nullptr, *fromOrder = nullptr;
|
int *toOrder = nullptr, *fromOrder = nullptr;
|
||||||
@ -73,10 +73,10 @@ namespace bda
|
|||||||
~BILU0();
|
~BILU0();
|
||||||
|
|
||||||
// analysis
|
// analysis
|
||||||
bool init(BlockedMatrix *mat);
|
bool init(BlockedMatrix<block_size> *mat);
|
||||||
|
|
||||||
// ilu_decomposition
|
// ilu_decomposition
|
||||||
bool create_preconditioner(BlockedMatrix *mat);
|
bool create_preconditioner(BlockedMatrix<block_size> *mat);
|
||||||
|
|
||||||
// apply preconditioner, y = prec(x)
|
// apply preconditioner, y = prec(x)
|
||||||
void apply(cl::Buffer& x, cl::Buffer& y);
|
void apply(cl::Buffer& x, cl::Buffer& y);
|
||||||
@ -99,9 +99,9 @@ namespace bda
|
|||||||
return fromOrder;
|
return fromOrder;
|
||||||
}
|
}
|
||||||
|
|
||||||
BlockedMatrix* getRMat()
|
BlockedMatrix<block_size>* getRMat()
|
||||||
{
|
{
|
||||||
return rmat;
|
return rmat.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -27,40 +27,6 @@ using bda::BlockedMatrix;
|
|||||||
namespace bda
|
namespace bda
|
||||||
{
|
{
|
||||||
|
|
||||||
template <unsigned int block_size>
|
|
||||||
BlockedMatrix *allocateBlockedMatrix(int Nb, int nnzbs) {
|
|
||||||
BlockedMatrix *mat = new BlockedMatrix();
|
|
||||||
|
|
||||||
mat->nnzValues = new double[nnzbs * block_size * block_size];
|
|
||||||
mat->colIndices = new int[nnzbs];
|
|
||||||
mat->rowPointers = new int[Nb + 1];
|
|
||||||
mat->Nb = Nb;
|
|
||||||
mat->nnzbs = nnzbs;
|
|
||||||
|
|
||||||
return mat;
|
|
||||||
}
|
|
||||||
|
|
||||||
void freeBlockedMatrix(BlockedMatrix **mat) {
|
|
||||||
if (*mat) {
|
|
||||||
delete[] (*mat)->nnzValues;
|
|
||||||
delete[] (*mat)->colIndices;
|
|
||||||
delete[] (*mat)->rowPointers;
|
|
||||||
delete (*mat);
|
|
||||||
*mat = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
BlockedMatrix *softCopyBlockedMatrix(BlockedMatrix *mat) {
|
|
||||||
BlockedMatrix *res = new BlockedMatrix();
|
|
||||||
res->nnzValues = mat->nnzValues;
|
|
||||||
res->colIndices = mat->colIndices;
|
|
||||||
res->rowPointers = mat->rowPointers;
|
|
||||||
res->Nb = mat->Nb;
|
|
||||||
res->nnzbs = mat->nnzbs;
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*Sort a row of matrix elements from a blocked CSR-format.*/
|
/*Sort a row of matrix elements from a blocked CSR-format.*/
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
@ -129,7 +95,6 @@ void blockMult(double *mat1, double *mat2, double *resMat) {
|
|||||||
|
|
||||||
|
|
||||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
template BlockedMatrix *allocateBlockedMatrix<n>(int Nb, int nnzbs); \
|
|
||||||
template void sortBlockedRow<n>(int *, double *, int, int); \
|
template void sortBlockedRow<n>(int *, double *, int, int); \
|
||||||
template void blockMultSub<n>(double *, double *, double *); \
|
template void blockMultSub<n>(double *, double *, double *); \
|
||||||
template void blockMult<n>(double *, double *, double *); \
|
template void blockMult<n>(double *, double *, double *); \
|
||||||
|
@ -23,29 +23,68 @@
|
|||||||
namespace bda
|
namespace bda
|
||||||
{
|
{
|
||||||
|
|
||||||
typedef struct {
|
/// This struct resembles a blocked csr matrix, like Dune::BCRSMatrix.
|
||||||
|
/// The data is stored in contiguous memory, such that they can be copied to a device in one transfer.
|
||||||
|
template<int BS>
|
||||||
|
struct BlockedMatrix{
|
||||||
|
/// Allocate BlockedMatrix and data arrays with given sizes
|
||||||
|
/// \param[in] Nb number of blockrows
|
||||||
|
/// \param[in] nnzbs number of nonzero blocks
|
||||||
|
BlockedMatrix(int Nb_, int nnzbs_)
|
||||||
|
: nnzValues(new double[nnzbs_*BS*BS]),
|
||||||
|
colIndices(new int[nnzbs_*BS*BS]),
|
||||||
|
rowPointers(new int[Nb_+1]),
|
||||||
|
Nb(Nb_),
|
||||||
|
nnzbs(nnzbs_),
|
||||||
|
deleteNnzs(true),
|
||||||
|
deleteSparsity(true)
|
||||||
|
{}
|
||||||
|
/// Allocate BlockedMatrix, but copy sparsity pattern instead of allocating new memory
|
||||||
|
/// \param[in] M matrix to be copied
|
||||||
|
BlockedMatrix(const BlockedMatrix& M)
|
||||||
|
: nnzValues(new double[M.nnzbs*BS*BS]),
|
||||||
|
colIndices(M.colIndices),
|
||||||
|
rowPointers(M.rowPointers),
|
||||||
|
Nb(M.Nb),
|
||||||
|
nnzbs(M.nnzbs),
|
||||||
|
deleteNnzs(true),
|
||||||
|
deleteSparsity(false)
|
||||||
|
{}
|
||||||
|
/// Allocate BlockedMatrix, but let data arrays point to existing arrays
|
||||||
|
/// \param[in] Nb number of blockrows
|
||||||
|
/// \param[in] nnzbs number of nonzero blocks
|
||||||
|
/// \param[in] nnzValues array of nonzero values, contains nnzb*BS*BS scalars
|
||||||
|
/// \param[in] colIndices array of column indices, contains nnzb entries
|
||||||
|
/// \param[in] rowPointers array of row pointers, contains Nb+1 entries
|
||||||
|
BlockedMatrix(int Nb_, int nnzbs_, double *nnzValues_, int *colIndices_, int *rowPointers_)
|
||||||
|
: nnzValues(nnzValues_),
|
||||||
|
colIndices(colIndices_),
|
||||||
|
rowPointers(rowPointers_),
|
||||||
|
Nb(Nb_),
|
||||||
|
nnzbs(nnzbs_),
|
||||||
|
deleteNnzs(false),
|
||||||
|
deleteSparsity(false)
|
||||||
|
{}
|
||||||
|
|
||||||
|
~BlockedMatrix(){
|
||||||
|
if (deleteNnzs) {
|
||||||
|
delete[] nnzValues;
|
||||||
|
}
|
||||||
|
if (deleteSparsity) {
|
||||||
|
delete[] colIndices;
|
||||||
|
delete[] rowPointers;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
double *nnzValues;
|
double *nnzValues;
|
||||||
int *colIndices;
|
int *colIndices;
|
||||||
int *rowPointers;
|
int *rowPointers;
|
||||||
int Nb;
|
int Nb;
|
||||||
int nnzbs;
|
int nnzbs;
|
||||||
} BlockedMatrix;
|
bool deleteNnzs;
|
||||||
|
bool deleteSparsity;
|
||||||
|
};
|
||||||
|
|
||||||
/// Allocate BlockedMatrix and data arrays with given sizes
|
|
||||||
/// \param[in] Nb number of blockrows
|
|
||||||
/// \param[in] nnzbs number of nonzero blocks
|
|
||||||
/// \return pointer to BlockedMatrix
|
|
||||||
template <unsigned int block_size>
|
|
||||||
BlockedMatrix *allocateBlockedMatrix(int Nb, int nnzbs);
|
|
||||||
|
|
||||||
/// Allocate BlockedMatrix, but copy data pointers instead of allocating new memory
|
|
||||||
/// \param[in] mat matrix to be copied
|
|
||||||
/// \return pointer to BlockedMatrix
|
|
||||||
BlockedMatrix *softCopyBlockedMatrix(BlockedMatrix *mat);
|
|
||||||
|
|
||||||
/// Free BlockedMatrix and its data
|
|
||||||
/// \param[in] mat matrix to be free'd
|
|
||||||
void freeBlockedMatrix(BlockedMatrix **mat);
|
|
||||||
|
|
||||||
/// Sort a row of matrix elements from a blocked CSR-format
|
/// Sort a row of matrix elements from a blocked CSR-format
|
||||||
/// \param[inout] colIndices
|
/// \param[inout] colIndices
|
||||||
|
@ -168,32 +168,32 @@ int colorBlockedNodes(int rows, const int *CSRRowPointers, const int *CSRColIndi
|
|||||||
* and the from order, which contains for every node in the new matrix where it came from in the old matrix.*/
|
* and the from order, which contains for every node in the new matrix where it came from in the old matrix.*/
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void reorderBlockedMatrixByPattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rMat) {
|
void reorderBlockedMatrixByPattern(BlockedMatrix<block_size> *mat, int *toOrder, int *fromOrder, BlockedMatrix<block_size> *rmat) {
|
||||||
const unsigned int bs = block_size;
|
const unsigned int bs = block_size;
|
||||||
int rIndex = 0;
|
int rIndex = 0;
|
||||||
int i, k;
|
int i, k;
|
||||||
unsigned int j;
|
unsigned int j;
|
||||||
|
|
||||||
rMat->rowPointers[0] = 0;
|
rmat->rowPointers[0] = 0;
|
||||||
for (i = 0; i < mat->Nb; i++) {
|
for (i = 0; i < mat->Nb; i++) {
|
||||||
int thisRow = fromOrder[i];
|
int thisRow = fromOrder[i];
|
||||||
// put thisRow from the old matrix into row i of the new matrix
|
// put thisRow from the old matrix into row i of the new matrix
|
||||||
rMat->rowPointers[i + 1] = rMat->rowPointers[i] + mat->rowPointers[thisRow + 1] - mat->rowPointers[thisRow];
|
rmat->rowPointers[i + 1] = rmat->rowPointers[i] + mat->rowPointers[thisRow + 1] - mat->rowPointers[thisRow];
|
||||||
for (k = mat->rowPointers[thisRow]; k < mat->rowPointers[thisRow + 1]; k++) {
|
for (k = mat->rowPointers[thisRow]; k < mat->rowPointers[thisRow + 1]; k++) {
|
||||||
for (j = 0; j < bs * bs; j++){
|
for (j = 0; j < bs * bs; j++){
|
||||||
rMat->nnzValues[rIndex * bs * bs + j] = mat->nnzValues[k * bs * bs + j];
|
rmat->nnzValues[rIndex * bs * bs + j] = mat->nnzValues[k * bs * bs + j];
|
||||||
}
|
}
|
||||||
rMat->colIndices[rIndex] = mat->colIndices[k];
|
rmat->colIndices[rIndex] = mat->colIndices[k];
|
||||||
rIndex++;
|
rIndex++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// re-assign column indices according to the new positions of the nodes referenced by the column indices
|
// re-assign column indices according to the new positions of the nodes referenced by the column indices
|
||||||
for (i = 0; i < mat->nnzbs; i++) {
|
for (i = 0; i < mat->nnzbs; i++) {
|
||||||
rMat->colIndices[i] = toOrder[rMat->colIndices[i]];
|
rmat->colIndices[i] = toOrder[rmat->colIndices[i]];
|
||||||
}
|
}
|
||||||
// re-sort the column indices of every row.
|
// re-sort the column indices of every row.
|
||||||
for (i = 0; i < mat->Nb; i++) {
|
for (i = 0; i < mat->Nb; i++) {
|
||||||
sortBlockedRow<bs>(rMat->colIndices, rMat->nnzValues, rMat->rowPointers[i], rMat->rowPointers[i + 1] - 1);
|
sortBlockedRow<bs>(rmat->colIndices, rmat->nnzValues, rmat->rowPointers[i], rmat->rowPointers[i + 1] - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -377,7 +377,7 @@ void csrPatternToCsc(int *CSRColIndices, int *CSRRowPointers, int *CSCRowIndices
|
|||||||
|
|
||||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
template int colorBlockedNodes<n>(int, const int *, const int *, const int *, const int *, std::vector<int>&, int, int); \
|
template int colorBlockedNodes<n>(int, const int *, const int *, const int *, const int *, std::vector<int>&, int, int); \
|
||||||
template void reorderBlockedMatrixByPattern<n>(BlockedMatrix *, int *, int *, BlockedMatrix *); \
|
template void reorderBlockedMatrixByPattern<n>(BlockedMatrix<n> *, int *, int *, BlockedMatrix<n> *); \
|
||||||
template void reorderBlockedVectorByPattern<n>(int, double*, int*, double*); \
|
template void reorderBlockedVectorByPattern<n>(int, double*, int*, double*); \
|
||||||
template int* findGraphColoring<n>(const int *, const int *, const int *, const int *, int, int, int, int *, int *, int *); \
|
template int* findGraphColoring<n>(const int *, const int *, const int *, const int *, int, int, int, int *, int *, int *); \
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ int colorBlockedNodes(int rows, const int *CSRRowPointers, const int *CSRColIndi
|
|||||||
/// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
|
/// \param[in] fromOrder reorder pattern that lists for each index in the new order, from which index in the original order it was moved
|
||||||
/// \param[inout] rMat reordered Matrix
|
/// \param[inout] rMat reordered Matrix
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void reorderBlockedMatrixByPattern(BlockedMatrix *mat, int *toOrder, int *fromOrder, BlockedMatrix *rMat);
|
void reorderBlockedMatrixByPattern(BlockedMatrix<block_size> *mat, int *toOrder, int *fromOrder, BlockedMatrix<block_size> *rmat);
|
||||||
|
|
||||||
/// Compute reorder mapping from the color that each node has received
|
/// Compute reorder mapping from the color that each node has received
|
||||||
/// The toOrder, fromOrder and iters arrays must be allocated already
|
/// The toOrder, fromOrder and iters arrays must be allocated already
|
||||||
|
@ -468,12 +468,7 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
|||||||
vals_contiguous = new double[N];
|
vals_contiguous = new double[N];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
mat = new BlockedMatrix();
|
mat.reset(new BlockedMatrix<block_size>(Nb, nnzb, vals, cols, rows));
|
||||||
mat->Nb = Nb;
|
|
||||||
mat->nnzbs = nnzb;
|
|
||||||
mat->nnzValues = vals;
|
|
||||||
mat->colIndices = cols;
|
|
||||||
mat->rowPointers = rows;
|
|
||||||
|
|
||||||
d_x = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
d_x = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||||
d_b = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
d_b = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * N);
|
||||||
@ -520,7 +515,6 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
|||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::finalize() {
|
void openclSolverBackend<block_size>::finalize() {
|
||||||
delete mat;
|
|
||||||
delete[] rb;
|
delete[] rb;
|
||||||
delete[] tmp;
|
delete[] tmp;
|
||||||
#if COPY_ROW_BY_ROW
|
#if COPY_ROW_BY_ROW
|
||||||
@ -595,7 +589,7 @@ template <unsigned int block_size>
|
|||||||
bool openclSolverBackend<block_size>::analyse_matrix() {
|
bool openclSolverBackend<block_size>::analyse_matrix() {
|
||||||
Timer t;
|
Timer t;
|
||||||
|
|
||||||
bool success = prec->init(mat);
|
bool success = prec->init(mat.get());
|
||||||
int work_group_size = 32;
|
int work_group_size = 32;
|
||||||
int num_work_groups = ceilDivision(N, work_group_size);
|
int num_work_groups = ceilDivision(N, work_group_size);
|
||||||
int total_work_items = num_work_groups * work_group_size;
|
int total_work_items = num_work_groups * work_group_size;
|
||||||
@ -637,7 +631,7 @@ template <unsigned int block_size>
|
|||||||
bool openclSolverBackend<block_size>::create_preconditioner() {
|
bool openclSolverBackend<block_size>::create_preconditioner() {
|
||||||
Timer t;
|
Timer t;
|
||||||
|
|
||||||
bool result = prec->create_preconditioner(mat);
|
bool result = prec->create_preconditioner(mat.get());
|
||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
|
@ -68,7 +68,7 @@ private:
|
|||||||
cl::Buffer d_tmp; // used as tmp GPU buffer for dot() and norm()
|
cl::Buffer d_tmp; // used as tmp GPU buffer for dot() and norm()
|
||||||
double *tmp = nullptr; // used as tmp CPU buffer for dot() and norm()
|
double *tmp = nullptr; // used as tmp CPU buffer for dot() and norm()
|
||||||
|
|
||||||
// shared pointers are also passed to BILU0
|
// shared pointers are also passed to other objects
|
||||||
std::shared_ptr<cl::Context> context;
|
std::shared_ptr<cl::Context> context;
|
||||||
std::shared_ptr<cl::CommandQueue> queue;
|
std::shared_ptr<cl::CommandQueue> queue;
|
||||||
std::unique_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg> > dot_k;
|
std::unique_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg> > dot_k;
|
||||||
@ -81,7 +81,9 @@ private:
|
|||||||
|
|
||||||
Preconditioner *prec = nullptr; // only supported preconditioner is BILU0
|
Preconditioner *prec = nullptr; // only supported preconditioner is BILU0
|
||||||
int *toOrder = nullptr, *fromOrder = nullptr; // BILU0 reorders rows of the matrix via these mappings
|
int *toOrder = nullptr, *fromOrder = nullptr; // BILU0 reorders rows of the matrix via these mappings
|
||||||
BlockedMatrix *mat = nullptr, *rmat = nullptr; // normal and reordered matrix
|
std::unique_ptr<BlockedMatrix<block_size> > mat = nullptr; // original matrix
|
||||||
|
BlockedMatrix<block_size> *rmat = nullptr; // reordered matrix, used for spmv
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// Divide A by B, and round up: return (int)ceil(A/B)
|
/// Divide A by B, and round up: return (int)ceil(A/B)
|
||||||
|
Loading…
Reference in New Issue
Block a user