Formatting changes

This commit is contained in:
Tong Dong Qiu 2021-11-30 16:05:58 +01:00
parent 0881089406
commit e0a4d271ea
2 changed files with 267 additions and 265 deletions

View File

@ -47,6 +47,7 @@ BILU0<block_size>::BILU0(ILUReorder opencl_ilu_reorder_, int verbosity_) :
#endif
}
template <unsigned int block_size>
BILU0<block_size>::~BILU0()
{
@ -61,9 +62,10 @@ void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& con
queue = queue_.get();
}
template <unsigned int block_size>
bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
{
template <unsigned int block_size>
bool BILU0<block_size>::analyze_matrix(BlockedMatrix *mat)
{
const unsigned int bs = block_size;
this->N = mat->Nb * block_size;
@ -86,7 +88,7 @@ void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& con
Timer t_convert;
csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb);
if(verbosity >= 3){
if (verbosity >= 3) {
std::ostringstream out;
out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s";
OpmLog::info(out.str());
@ -106,13 +108,13 @@ void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& con
// numColors = 1;
// rowsPerColor.emplace_back(Nb);
numColors = Nb;
for(int i = 0; i < Nb; ++i){
for (int i = 0; i < Nb; ++i) {
rowsPerColor.emplace_back(1);
}
} else {
OPM_THROW(std::logic_error, "Error ilu reordering strategy not set correctly\n");
}
if(verbosity >= 1){
if (verbosity >= 1) {
out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors\n";
}
#if CHOW_PATEL
@ -157,7 +159,7 @@ void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& con
rowsPerColorPrefix.resize(numColors + 1); // resize initializes value 0.0
for (int i = 0; i < numColors; ++i) {
rowsPerColorPrefix[i+1] = rowsPerColorPrefix[i] + rowsPerColor[i];
rowsPerColorPrefix[i + 1] = rowsPerColorPrefix[i] + rowsPerColor[i];
}
err |= queue->enqueueWriteBuffer(s.rowsPerColor, CL_FALSE, 0, (numColors + 1) * sizeof(int), rowsPerColorPrefix.data(), nullptr, &events[1]);
@ -172,10 +174,9 @@ void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& con
} // end init()
template <unsigned int block_size>
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
{
template <unsigned int block_size>
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
{
const unsigned int bs = block_size;
auto *m = mat;
@ -184,7 +185,7 @@ void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& con
Timer t_reorder;
reorderBlockedMatrixByPattern(mat, toOrder.data(), fromOrder.data(), rmat.get());
if (verbosity >= 3){
if (verbosity >= 3) {
std::ostringstream out;
out << "BILU0 reorder matrix: " << t_reorder.stop() << " s";
OpmLog::info(out.str());
@ -196,7 +197,7 @@ void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& con
Timer t_copy;
memcpy(LUmat->nnzValues, m->nnzValues, sizeof(double) * bs * bs * m->nnzbs);
if (verbosity >= 3){
if (verbosity >= 3) {
std::ostringstream out;
out << "BILU0 memcpy: " << t_copy.stop() << " s";
OpmLog::info(out.str());
@ -215,12 +216,12 @@ void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& con
events.resize(1);
err = queue->enqueueWriteBuffer(s.LUvals, CL_FALSE, 0, LUmat->nnzbs * bs * bs * sizeof(double), LUmat->nnzValues, nullptr, &events[0]);
std::call_once(pattern_uploaded, [&](){
std::call_once(pattern_uploaded, [&]() {
// find the positions of each diagonal block
// must be done after reordering
for (int row = 0; row < Nb; ++row) {
int rowStart = LUmat->rowPointers[row];
int rowEnd = LUmat->rowPointers[row+1];
int rowEnd = LUmat->rowPointers[row + 1];
auto candidate = std::find(LUmat->colIndices + rowStart, LUmat->colIndices + rowEnd, row);
assert(candidate != LUmat->colIndices + rowEnd);
@ -250,7 +251,7 @@ void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& con
cl::Event event;
for (int color = 0; color < numColors; ++color) {
const unsigned int firstRow = rowsPerColorPrefix[color];
const unsigned int lastRow = rowsPerColorPrefix[color+1];
const unsigned int lastRow = rowsPerColorPrefix[color + 1];
if (verbosity >= 4) {
out << "color " << color << ": " << firstRow << " - " << lastRow << " = " << lastRow - firstRow << "\n";
}
@ -264,19 +265,20 @@ void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& con
#endif // CHOW_PATEL
return true;
} // end create_preconditioner()
} // end create_preconditioner()
// kernels are blocking on an NVIDIA GPU, so waiting for events is not needed
// however, if individual kernel calls are timed, waiting for events is needed
// behavior on other GPUs is untested
template <unsigned int block_size>
void BILU0<block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
{
// kernels are blocking on an NVIDIA GPU, so waiting for events is not needed
// however, if individual kernel calls are timed, waiting for events is needed
// behavior on other GPUs is untested
template <unsigned int block_size>
void BILU0<block_size>::apply(const cl::Buffer& y, cl::Buffer& x)
{
const double relaxation = 0.9;
cl::Event event;
Timer t_apply;
for(int color = 0; color < numColors; ++color){
for (int color = 0; color < numColors; ++color) {
#if CHOW_PATEL
OpenclKernels::ILU_apply1(s.Lvals, s.Lcols, s.Lrows, s.diagIndex, y, x, s.rowsPerColor, color, Nb, block_size);
#else
@ -284,7 +286,7 @@ void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& con
#endif
}
for(int color = numColors-1; color >= 0; --color){
for (int color = numColors - 1; color >= 0; --color) {
#if CHOW_PATEL
OpenclKernels::ILU_apply2(s.Uvals, s.Ucols, s.Urows, s.diagIndex, s.invDiagVals, x, s.rowsPerColor, color, Nb, block_size);
#else
@ -300,7 +302,7 @@ void BILU0<block_size>::init(int Nb, int nnzb, std::shared_ptr<cl::Context>& con
out << "BILU0 apply: " << t_apply.stop() << " s";
OpmLog::info(out.str());
}
}
}

View File

@ -36,11 +36,11 @@ namespace Opm
namespace Accelerator
{
/// This class implements a Blocked ILU0 preconditioner
/// The decomposition is done on CPU, and reorders the rows of the matrix
template <unsigned int block_size>
class BILU0 : public Preconditioner<block_size>
{
/// This class implements a Blocked ILU0 preconditioner
/// The decomposition is done on CPU, and reorders the rows of the matrix
template <unsigned int block_size>
class BILU0 : public Preconditioner<block_size>
{
typedef Preconditioner<block_size> Base;
using Base::N;
@ -49,7 +49,7 @@ namespace Accelerator
using Base::nnzb;
using Base::verbosity;
private:
private:
std::unique_ptr<BlockedMatrix> LUmat = nullptr;
std::shared_ptr<BlockedMatrix> rmat = nullptr; // only used with PAR_SIM
#if CHOW_PATEL
@ -87,7 +87,7 @@ namespace Accelerator
ChowPatelIlu<block_size> chowPatelIlu;
#endif
public:
public:
BILU0(ILUReorder opencl_ilu_reorder, int verbosity);
@ -119,7 +119,7 @@ namespace Accelerator
return rmat.get();
}
};
};
} // namespace Accelerator
} // namespace Opm