| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | /*
 | 
					
						
							|  |  |  |   Copyright 2019 Equinor ASA | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   This file is part of the Open Porous Media project (OPM). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   OPM is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  |   it under the terms of the GNU General Public License as published by | 
					
						
							|  |  |  |   the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  |   (at your option) any later version. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   OPM is distributed in the hope that it will be useful, | 
					
						
							|  |  |  |   but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  |   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  |   GNU General Public License for more details. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   You should have received a copy of the GNU General Public License | 
					
						
							|  |  |  |   along with OPM.  If not, see <http://www.gnu.org/licenses/>.
 | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-25 18:44:49 +02:00
										 |  |  | #include <config.h>
 | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | #include <opm/common/OpmLog/OpmLog.hpp>
 | 
					
						
							|  |  |  | #include <opm/common/ErrorMacros.hpp>
 | 
					
						
							| 
									
										
										
										
											2020-07-02 11:08:18 +02:00
										 |  |  | #include <opm/simulators/linalg/MatrixBlock.hpp>
 | 
					
						
							| 
									
										
										
										
											2020-07-01 14:50:34 +02:00
										 |  |  | #include <dune/common/timer.hh>
 | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | #include <opm/simulators/linalg/bda/BdaSolver.hpp>
 | 
					
						
							|  |  |  | #include <opm/simulators/linalg/bda/BILU0.hpp>
 | 
					
						
							| 
									
										
										
										
											2020-12-17 14:49:59 +01:00
										 |  |  | #include <opm/simulators/linalg/bda/fgpilu.hpp>
 | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | #include <opm/simulators/linalg/bda/Reorder.hpp>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace bda | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-17 14:49:59 +01:00
										 |  |  | // if CHOW_PATEL is 0, exact ILU decomposition is performed on CPU
 | 
					
						
							|  |  |  | // if CHOW_PATEL is 1, iterative ILU decomposition (FGPILU) is done, as described in:
 | 
					
						
							|  |  |  | //    FINE-GRAINED PARALLEL INCOMPLETE LU FACTORIZATION, E. Chow and A. Patel, SIAM 2015, https://doi.org/10.1137/140968896
 | 
					
						
							|  |  |  | // if CHOW_PATEL_GPU is 0, the decomposition is done on CPU
 | 
					
						
							|  |  |  | // if CHOW_PATEL_GPU is 1, the decomposition is done by bda::FGPILU::decomposition() on GPU
 | 
					
						
							| 
									
										
										
										
											2020-12-17 15:50:53 +01:00
										 |  |  | #define CHOW_PATEL     0
 | 
					
						
							| 
									
										
										
										
											2020-12-17 14:49:59 +01:00
										 |  |  | #define CHOW_PATEL_GPU 1
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |     using Opm::OpmLog; | 
					
						
							| 
									
										
										
										
											2020-07-01 14:50:34 +02:00
										 |  |  |     using Dune::Timer; | 
					
						
							| 
									
										
										
										
											2020-06-24 19:48:50 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-24 20:09:14 +02:00
										 |  |  |     template <unsigned int block_size> | 
					
						
							| 
									
										
										
										
											2020-10-16 15:05:02 +02:00
										 |  |  |     BILU0<block_size>::BILU0(ILUReorder opencl_ilu_reorder_, int verbosity_) : | 
					
						
							|  |  |  |         verbosity(verbosity_), opencl_ilu_reorder(opencl_ilu_reorder_) | 
					
						
							|  |  |  |     {} | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-24 20:09:14 +02:00
										 |  |  |     template <unsigned int block_size> | 
					
						
							|  |  |  |     BILU0<block_size>::~BILU0() | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |     { | 
					
						
							|  |  |  |         delete[] invDiagVals; | 
					
						
							|  |  |  |         delete[] diagIndex; | 
					
						
							| 
									
										
										
										
											2021-01-11 16:51:05 +01:00
										 |  |  |         if (opencl_ilu_reorder != ILUReorder::NONE) { | 
					
						
							|  |  |  |             delete[] toOrder; | 
					
						
							|  |  |  |             delete[] fromOrder; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-24 20:09:14 +02:00
										 |  |  |     template <unsigned int block_size> | 
					
						
							| 
									
										
										
										
											2020-07-03 14:10:31 +02:00
										 |  |  |     bool BILU0<block_size>::init(BlockedMatrix<block_size> *mat) | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2020-06-25 11:51:41 +02:00
										 |  |  |         const unsigned int bs = block_size; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         this->N = mat->Nb * block_size; | 
					
						
							|  |  |  |         this->Nb = mat->Nb; | 
					
						
							|  |  |  |         this->nnz = mat->nnzbs * block_size * block_size; | 
					
						
							|  |  |  |         this->nnzbs = mat->nnzbs; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-11 16:51:05 +01:00
										 |  |  |         int *CSCRowIndices = nullptr; | 
					
						
							|  |  |  |         int *CSCColPointers = nullptr; | 
					
						
							| 
									
										
										
										
											2020-07-03 14:10:31 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-11 16:51:05 +01:00
										 |  |  |         if (opencl_ilu_reorder == ILUReorder::NONE) { | 
					
						
							|  |  |  |             LUmat = std::make_unique<BlockedMatrix<block_size> >(*mat); | 
					
						
							|  |  |  |         } else { | 
					
						
							|  |  |  |             toOrder = new int[Nb]; | 
					
						
							|  |  |  |             fromOrder = new int[Nb]; | 
					
						
							|  |  |  |             CSCRowIndices = new int[nnzbs]; | 
					
						
							|  |  |  |             CSCColPointers = new int[Nb + 1]; | 
					
						
							|  |  |  |             rmat = std::make_shared<BlockedMatrix<block_size> >(mat->Nb, mat->nnzbs); | 
					
						
							|  |  |  |             LUmat = std::make_unique<BlockedMatrix<block_size> >(*rmat); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             Timer t_convert; | 
					
						
							|  |  |  |             csrPatternToCsc(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb); | 
					
						
							|  |  |  |             if(verbosity >= 3){ | 
					
						
							|  |  |  |                 std::ostringstream out; | 
					
						
							|  |  |  |                 out << "BILU0 convert CSR to CSC: " << t_convert.stop() << " s"; | 
					
						
							|  |  |  |                 OpmLog::info(out.str()); | 
					
						
							|  |  |  |             } | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-01 14:50:34 +02:00
										 |  |  |         Timer t_analysis; | 
					
						
							| 
									
										
										
										
											2020-10-15 10:52:10 +02:00
										 |  |  |         std::ostringstream out; | 
					
						
							| 
									
										
										
										
											2020-10-16 15:05:02 +02:00
										 |  |  |         if (opencl_ilu_reorder == ILUReorder::LEVEL_SCHEDULING) { | 
					
						
							| 
									
										
										
										
											2020-10-15 10:52:10 +02:00
										 |  |  |             out << "BILU0 reordering strategy: " << "level_scheduling\n"; | 
					
						
							| 
									
										
										
										
											2020-07-07 11:00:38 +02:00
										 |  |  |             findLevelScheduling(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, &numColors, toOrder, fromOrder, rowsPerColor); | 
					
						
							| 
									
										
										
										
											2020-10-16 15:05:02 +02:00
										 |  |  |         } else if (opencl_ilu_reorder == ILUReorder::GRAPH_COLORING) { | 
					
						
							| 
									
										
										
										
											2020-10-15 10:52:10 +02:00
										 |  |  |             out << "BILU0 reordering strategy: " << "graph_coloring\n"; | 
					
						
							| 
									
										
										
										
											2020-07-07 11:00:38 +02:00
										 |  |  |             findGraphColoring<block_size>(mat->colIndices, mat->rowPointers, CSCRowIndices, CSCColPointers, mat->Nb, mat->Nb, mat->Nb, &numColors, toOrder, fromOrder, rowsPerColor); | 
					
						
							| 
									
										
										
										
											2020-12-17 14:49:59 +01:00
										 |  |  |         } else if (opencl_ilu_reorder == ILUReorder::NONE) { | 
					
						
							|  |  |  |             out << "BILU0 reordering strategy: none\n"; | 
					
						
							| 
									
										
										
										
											2021-01-11 16:51:05 +01:00
										 |  |  |             // numColors = 1;
 | 
					
						
							|  |  |  |             // rowsPerColor.emplace_back(Nb);
 | 
					
						
							|  |  |  |             numColors = Nb; | 
					
						
							| 
									
										
										
										
											2020-12-17 14:49:59 +01:00
										 |  |  |             for(int i = 0; i < Nb; ++i){ | 
					
						
							| 
									
										
										
										
											2021-01-11 16:51:05 +01:00
										 |  |  |                 rowsPerColor.emplace_back(1); | 
					
						
							| 
									
										
										
										
											2020-12-17 14:49:59 +01:00
										 |  |  |             } | 
					
						
							| 
									
										
										
										
											2020-10-15 10:52:10 +02:00
										 |  |  |         } else { | 
					
						
							|  |  |  |             OPM_THROW(std::logic_error, "Error ilu reordering strategy not set correctly\n"); | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |         } | 
					
						
							|  |  |  |         if(verbosity >= 3){ | 
					
						
							| 
									
										
										
										
											2020-12-17 14:49:59 +01:00
										 |  |  |             out << "BILU0 analysis took: " << t_analysis.stop() << " s, " << numColors << " colors\n"; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         if(verbosity >= 2){ | 
					
						
							|  |  |  |             out << "BILU0 CHOW_PATEL: " << CHOW_PATEL << ", CHOW_PATEL_GPU: " << CHOW_PATEL_GPU; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2020-10-15 10:52:10 +02:00
										 |  |  |         OpmLog::info(out.str()); | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-11 16:51:05 +01:00
										 |  |  |         if (opencl_ilu_reorder != ILUReorder::NONE) { | 
					
						
							|  |  |  |             delete[] CSCRowIndices; | 
					
						
							|  |  |  |             delete[] CSCColPointers; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2020-07-03 14:10:31 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |         diagIndex = new int[mat->Nb]; | 
					
						
							| 
									
										
										
										
											2020-06-25 11:51:41 +02:00
										 |  |  |         invDiagVals = new double[mat->Nb * bs * bs]; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-03 14:10:31 +02:00
										 |  |  |         Lmat = std::make_unique<BlockedMatrix<block_size> >(mat->Nb, (mat->nnzbs - mat->Nb) / 2); | 
					
						
							|  |  |  |         Umat = std::make_unique<BlockedMatrix<block_size> >(mat->Nb, (mat->nnzbs - mat->Nb) / 2); | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |         s.Lvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Lmat->nnzbs); | 
					
						
							|  |  |  |         s.Uvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * Umat->nnzbs); | 
					
						
							|  |  |  |         s.Lcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Lmat->nnzbs); | 
					
						
							|  |  |  |         s.Ucols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Umat->nnzbs); | 
					
						
							|  |  |  |         s.Lrows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Lmat->Nb + 1)); | 
					
						
							|  |  |  |         s.Urows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Umat->Nb + 1)); | 
					
						
							| 
									
										
										
										
											2020-06-25 11:51:41 +02:00
										 |  |  |         s.invDiagVals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * bs * bs * mat->Nb); | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |         s.rowsPerColor = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (numColors + 1)); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |         queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, Lmat->nnzbs * sizeof(double) * bs * bs, Lmat->nnzValues); | 
					
						
							|  |  |  |         queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, Umat->nnzbs * sizeof(double) * bs * bs, Umat->nnzValues); | 
					
						
							|  |  |  |         queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, Lmat->nnzbs * sizeof(int), Lmat->colIndices); | 
					
						
							|  |  |  |         queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, Umat->nnzbs * sizeof(int), Umat->colIndices); | 
					
						
							|  |  |  |         queue->enqueueWriteBuffer(s.Lrows, CL_TRUE, 0, (Lmat->Nb + 1) * sizeof(int), Lmat->rowPointers); | 
					
						
							|  |  |  |         queue->enqueueWriteBuffer(s.Urows, CL_TRUE, 0, (Umat->Nb + 1) * sizeof(int), Umat->rowPointers); | 
					
						
							| 
									
										
										
										
											2020-06-25 11:51:41 +02:00
										 |  |  |         queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, mat->Nb * sizeof(double) * bs * bs, invDiagVals); | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         int *rowsPerColorPrefix = new int[numColors + 1]; | 
					
						
							| 
									
										
										
										
											2020-08-05 21:17:57 +02:00
										 |  |  |         rowsPerColorPrefix[0] = 0; | 
					
						
							|  |  |  |         for (int i = 0; i < numColors; ++i) { | 
					
						
							| 
									
										
										
										
											2020-08-07 09:38:10 +02:00
										 |  |  |             rowsPerColorPrefix[i+1] = rowsPerColorPrefix[i] + rowsPerColor[i]; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |         } | 
					
						
							|  |  |  |         queue->enqueueWriteBuffer(s.rowsPerColor, CL_TRUE, 0, (numColors + 1) * sizeof(int), rowsPerColorPrefix); | 
					
						
							|  |  |  |         delete[] rowsPerColorPrefix; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return true; | 
					
						
							|  |  |  |     } // end init()
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-17 14:49:59 +01:00
										 |  |  |     // implements Fine-Grained Parallel ILU algorithm (FGPILU), Chow and Patel 2015
 | 
					
						
							|  |  |  |     template <unsigned int block_size> | 
					
						
							|  |  |  |     void BILU0<block_size>::chow_patel_decomposition() | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         const unsigned int bs = block_size; | 
					
						
							|  |  |  |         int num_sweeps = 6; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // split matrix into L and U
 | 
					
						
							|  |  |  |         // also convert U into BSC format (Ut)
 | 
					
						
							|  |  |  |         // Ut stores diagonal for now
 | 
					
						
							|  |  |  |         int num_blocks_L = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Ut is actually BSC format
 | 
					
						
							|  |  |  |         std::unique_ptr<BlockedMatrix<bs> > Ut = std::make_unique<BlockedMatrix<bs> >(Umat->Nb, Umat->nnzbs + Umat->Nb); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Lmat->rowPointers[0] = 0; | 
					
						
							|  |  |  |         for (int i = 0; i < Nb+1; i++) { | 
					
						
							|  |  |  |             Ut->rowPointers[i] = 0; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // for every row
 | 
					
						
							|  |  |  |         for (int i = 0; i < Nb; i++) { | 
					
						
							|  |  |  |             int iRowStart = LUmat->rowPointers[i]; | 
					
						
							|  |  |  |             int iRowEnd = LUmat->rowPointers[i + 1]; | 
					
						
							|  |  |  |             // for every block in this row
 | 
					
						
							|  |  |  |             for (int ij = iRowStart; ij < iRowEnd; ij++) { | 
					
						
							|  |  |  |                 int j = LUmat->colIndices[ij]; | 
					
						
							|  |  |  |                 if (i <= j) { | 
					
						
							|  |  |  |                     Ut->rowPointers[j+1]++;   // actually colPointers
 | 
					
						
							|  |  |  |                 } else { | 
					
						
							|  |  |  |                     Lmat->colIndices[num_blocks_L] = j; | 
					
						
							|  |  |  |                     memcpy(Lmat->nnzValues + num_blocks_L * bs * bs, LUmat->nnzValues + ij * bs * bs, sizeof(double) * bs * bs); | 
					
						
							|  |  |  |                     num_blocks_L++; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             Lmat->rowPointers[i+1] = num_blocks_L; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // prefix sum
 | 
					
						
							|  |  |  |         int sum = 0; | 
					
						
							|  |  |  |         for (int i = 1; i < Nb+1; i++) { | 
					
						
							|  |  |  |             sum += Ut->rowPointers[i]; | 
					
						
							|  |  |  |             Ut->rowPointers[i] = sum; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // for every row
 | 
					
						
							|  |  |  |         for (int i = 0; i < Nb; i++) { | 
					
						
							|  |  |  |             int iRowStart = LUmat->rowPointers[i]; | 
					
						
							|  |  |  |             int iRowEnd = LUmat->rowPointers[i + 1]; | 
					
						
							|  |  |  |             // for every block in this row
 | 
					
						
							|  |  |  |             for (int ij = iRowStart; ij < iRowEnd; ij++) { | 
					
						
							|  |  |  |                 int j = LUmat->colIndices[ij]; | 
					
						
							|  |  |  |                 if (i <= j){ | 
					
						
							|  |  |  |                     int idx = Ut->rowPointers[j]++; | 
					
						
							|  |  |  |                     Ut->colIndices[idx] = i;     // actually rowIndices
 | 
					
						
							|  |  |  |                     memcpy(Ut->nnzValues + idx * bs * bs, LUmat->nnzValues + ij * bs * bs, sizeof(double) * bs * bs); | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // rotate
 | 
					
						
							|  |  |  |         // the Ut->rowPointers were increased in the last loop
 | 
					
						
							|  |  |  |         for (int i = Nb; i > 0; --i) { | 
					
						
							|  |  |  |             Ut->rowPointers[i] = Ut->rowPointers[i-1]; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         Ut->rowPointers[0] = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Opm::Detail::Inverter<bs> inverter; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Utmp is needed for CPU and GPU decomposition, because U is transposed, and reversed at the end
 | 
					
						
							|  |  |  |         // Ltmp is only needed for CPU decomposition, GPU creates GPU buffer for Ltmp
 | 
					
						
							|  |  |  |         double *Utmp = new double[Ut->nnzbs * block_size * block_size]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // actual ILU decomposition
 | 
					
						
							|  |  |  | #if CHOW_PATEL_GPU
 | 
					
						
							|  |  |  |         fgpilu.decomposition(queue, context, | 
					
						
							|  |  |  |                     Ut->rowPointers, Ut->colIndices, Ut->nnzValues, Ut->nnzbs, | 
					
						
							|  |  |  |                     Lmat->rowPointers, Lmat->colIndices, Lmat->nnzValues, Lmat->nnzbs, | 
					
						
							|  |  |  |                     LUmat->rowPointers, LUmat->colIndices, LUmat->nnzValues, LUmat->nnzbs, | 
					
						
							|  |  |  |                     Nb, num_sweeps, verbosity); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |         double *Ltmp = new double[Lmat->nnzbs * block_size * block_size]; | 
					
						
							|  |  |  |         for (int sweep = 0; sweep < num_sweeps; ++sweep) { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             // for every row
 | 
					
						
							|  |  |  |             for (int row = 0; row < Nb; row++) { | 
					
						
							|  |  |  |                 // update U
 | 
					
						
							|  |  |  |                 int jColStart = Ut->rowPointers[row]; | 
					
						
							|  |  |  |                 int jColEnd = Ut->rowPointers[row + 1]; | 
					
						
							|  |  |  |                 // for every block in this row
 | 
					
						
							|  |  |  |                 for (int ij = jColStart; ij < jColEnd; ij++) { | 
					
						
							|  |  |  |                     int col = Ut->colIndices[ij]; | 
					
						
							|  |  |  |                     // refine Uij element (or diagonal)
 | 
					
						
							|  |  |  |                     int i1 = LUmat->rowPointers[col]; | 
					
						
							|  |  |  |                     int i2 = LUmat->rowPointers[col+1]; | 
					
						
							|  |  |  |                     int kk = 0; | 
					
						
							|  |  |  |                     for(kk = i1; kk < i2; ++kk) { | 
					
						
							|  |  |  |                         ptrdiff_t c = LUmat->colIndices[kk]; | 
					
						
							|  |  |  |                         if (c >= row) { | 
					
						
							|  |  |  |                             break; | 
					
						
							|  |  |  |                         } | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                     double aij[bs*bs]; | 
					
						
							|  |  |  |                     memcpy(&aij[0], LUmat->nnzValues + kk * bs * bs, sizeof(double) * bs * bs); | 
					
						
							|  |  |  |                     int jk = Lmat->rowPointers[col]; | 
					
						
							|  |  |  |                     int ik = (jk < Lmat->rowPointers[col+1]) ? Lmat->colIndices[jk] : Nb; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                     for (int k = jColStart; k < ij; ++k) { | 
					
						
							|  |  |  |                         int ki = Ut->colIndices[k]; | 
					
						
							|  |  |  |                         while (ik < ki) { | 
					
						
							|  |  |  |                             ++jk; | 
					
						
							|  |  |  |                             ik = Lmat->colIndices[jk]; | 
					
						
							|  |  |  |                         } | 
					
						
							|  |  |  |                         if (ik == ki) { | 
					
						
							|  |  |  |                             blockMultSub<bs>(&aij[0], Lmat->nnzValues + jk * bs * bs, Ut->nnzValues + k * bs * bs); | 
					
						
							|  |  |  |                         } | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                     memcpy(Utmp + ij * bs * bs, &aij[0], sizeof(double) * bs * bs); | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 // update L
 | 
					
						
							|  |  |  |                 int iRowStart = Lmat->rowPointers[row]; | 
					
						
							|  |  |  |                 int iRowEnd = Lmat->rowPointers[row + 1]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 for (int ij = iRowStart; ij < iRowEnd; ij++) { | 
					
						
							|  |  |  |                     int j = Lmat->colIndices[ij]; | 
					
						
							|  |  |  |                     // refine Lij element
 | 
					
						
							|  |  |  |                     int i1 = LUmat->rowPointers[row]; | 
					
						
							|  |  |  |                     int i2 = LUmat->rowPointers[row+1]; | 
					
						
							|  |  |  |                     int kk = 0; | 
					
						
							|  |  |  |                     for(kk = i1; kk < i2; ++kk) { | 
					
						
							|  |  |  |                         ptrdiff_t c = LUmat->colIndices[kk]; | 
					
						
							|  |  |  |                         if (c >= j) { | 
					
						
							|  |  |  |                             break; | 
					
						
							|  |  |  |                         } | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                     double aij[bs*bs]; | 
					
						
							|  |  |  |                     memcpy(&aij[0], LUmat->nnzValues + kk * bs * bs, sizeof(double) * bs * bs); | 
					
						
							|  |  |  |                     int jk = Ut->rowPointers[j]; | 
					
						
							|  |  |  |                     int ik = Ut->colIndices[jk]; | 
					
						
							|  |  |  |                     for (int k = iRowStart; k < ij; ++k) { | 
					
						
							|  |  |  |                         int ki = Lmat->colIndices[k]; | 
					
						
							|  |  |  |                         while(ik < ki) { | 
					
						
							|  |  |  |                             ++jk; | 
					
						
							|  |  |  |                             ik = Ut->colIndices[jk]; | 
					
						
							|  |  |  |                         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                         if(ik == ki) { | 
					
						
							|  |  |  |                             blockMultSub<bs>(&aij[0], Lmat->nnzValues + k * bs * bs , Ut->nnzValues + jk * bs * bs); | 
					
						
							|  |  |  |                         } | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                     // calculate aij / ujj
 | 
					
						
							|  |  |  |                     double ujj[bs*bs]; | 
					
						
							|  |  |  |                     inverter(Ut->nnzValues + (Ut->rowPointers[j+1] - 1) * bs * bs, &ujj[0]); | 
					
						
							|  |  |  |                     // lij = aij / ujj
 | 
					
						
							|  |  |  |                     blockMult<bs>(&aij[0], &ujj[0], Ltmp + ij * bs * bs); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             double *t = Lmat->nnzValues; | 
					
						
							|  |  |  |             Lmat->nnzValues = Ltmp; | 
					
						
							|  |  |  |             Ltmp = t; | 
					
						
							|  |  |  |             t = Ut->nnzValues; | 
					
						
							|  |  |  |             Ut->nnzValues = Utmp; | 
					
						
							|  |  |  |             Utmp = t; | 
					
						
							|  |  |  |         } // end sweep
 | 
					
						
							|  |  |  |         delete[] Ltmp; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // convert Ut to BSR
 | 
					
						
							|  |  |  |         // diagonal stored separately
 | 
					
						
							|  |  |  |         std::vector<int> ptr(Nb+1, 0); | 
					
						
							|  |  |  |         std::vector<int> col(Ut->rowPointers[Nb]); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // count blocks per row for U (BSR)
 | 
					
						
							|  |  |  |         // store diagonal in invDiagVals
 | 
					
						
							|  |  |  |         for(int i = 0; i < Nb; ++i) { | 
					
						
							|  |  |  |             for(int k = Ut->rowPointers[i]; k < Ut->rowPointers[i+1]; ++k) { | 
					
						
							|  |  |  |                 int j = Ut->colIndices[k]; | 
					
						
							|  |  |  |                 if (j == i) { | 
					
						
							|  |  |  |                     inverter(Ut->nnzValues + k * bs * bs, invDiagVals + i * bs * bs); | 
					
						
							|  |  |  |                 } else { | 
					
						
							|  |  |  |                     ++ptr[j+1]; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // prefix sum
 | 
					
						
							|  |  |  |         int sumU = 0; | 
					
						
							|  |  |  |         for (int i = 1; i < Nb+1; i++) { | 
					
						
							|  |  |  |             sumU += ptr[i]; | 
					
						
							|  |  |  |             ptr[i] = sumU; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // actually copy nonzero values for U
 | 
					
						
							|  |  |  |         for(int i = 0; i < Nb; ++i) { | 
					
						
							|  |  |  |             for(int k = Ut->rowPointers[i]; k < Ut->rowPointers[i+1]; ++k) { | 
					
						
							|  |  |  |                 int j = Ut->colIndices[k]; | 
					
						
							|  |  |  |                 if (j != i) { | 
					
						
							|  |  |  |                     int head = ptr[j]++; | 
					
						
							|  |  |  |                     col[head]  = i; | 
					
						
							|  |  |  |                     memcpy(Utmp + head * bs * bs, Ut->nnzValues + k * bs * bs, sizeof(double) * bs * bs); | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // the ptr[] were increased in the last loop
 | 
					
						
							|  |  |  |         std::rotate(ptr.begin(), ptr.end() - 1, ptr.end()); | 
					
						
							|  |  |  |         ptr.front() = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // reversing the rows of U, because that is the order they are used in
 | 
					
						
							|  |  |  |         int URowIndex = 0; | 
					
						
							|  |  |  |         int offsetU = 0;   // number of nnz blocks that are already copied to Umat
 | 
					
						
							|  |  |  |         Umat->rowPointers[0] = 0; | 
					
						
							|  |  |  |         for (int i = LUmat->Nb - 1; i >= 0; i--) { | 
					
						
							|  |  |  |             int rowSize = ptr[i + 1] - ptr[i];   // number of blocks in this row
 | 
					
						
							|  |  |  |             memcpy(Umat->nnzValues + offsetU * bs * bs, Utmp + ptr[i] * bs * bs, sizeof(double) * bs * bs * rowSize); | 
					
						
							|  |  |  |             memcpy(Umat->colIndices + offsetU, col.data() + ptr[i], sizeof(int) * rowSize); | 
					
						
							|  |  |  |             offsetU += rowSize; | 
					
						
							|  |  |  |             Umat->rowPointers[URowIndex + 1] = offsetU; | 
					
						
							|  |  |  |             URowIndex++; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         delete[] Utmp; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-24 20:09:14 +02:00
										 |  |  |     template <unsigned int block_size> | 
					
						
							| 
									
										
										
										
											2020-07-03 14:10:31 +02:00
										 |  |  |     bool BILU0<block_size>::create_preconditioner(BlockedMatrix<block_size> *mat) | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |     { | 
					
						
							| 
									
										
										
										
											2020-06-25 11:51:41 +02:00
										 |  |  |         const unsigned int bs = block_size; | 
					
						
							| 
									
										
										
										
											2021-01-11 16:51:05 +01:00
										 |  |  |         auto *m = mat; | 
					
						
							| 
									
										
										
										
											2020-07-03 14:10:31 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-11 16:51:05 +01:00
										 |  |  |         if (opencl_ilu_reorder != ILUReorder::NONE) { | 
					
						
							|  |  |  |             m = rmat.get(); | 
					
						
							|  |  |  |             Timer t_reorder; | 
					
						
							|  |  |  |             reorderBlockedMatrixByPattern<block_size>(mat, toOrder, fromOrder, rmat.get()); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if (verbosity >= 3){ | 
					
						
							|  |  |  |                 std::ostringstream out; | 
					
						
							|  |  |  |                 out << "BILU0 reorder matrix: " << t_reorder.stop() << " s"; | 
					
						
							|  |  |  |                 OpmLog::info(out.str()); | 
					
						
							|  |  |  |             } | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
 | 
					
						
							| 
									
										
										
										
											2021-01-11 16:51:05 +01:00
										 |  |  |         // this copy can have mat or rmat ->nnzValues as origin, depending on the reorder strategy
 | 
					
						
							| 
									
										
										
										
											2020-07-01 14:50:34 +02:00
										 |  |  |         Timer t_copy; | 
					
						
							| 
									
										
										
										
											2021-01-11 16:51:05 +01:00
										 |  |  |         memcpy(LUmat->nnzValues, m->nnzValues, sizeof(double) * bs * bs * m->nnzbs); | 
					
						
							| 
									
										
										
										
											2020-07-03 14:10:31 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |         if (verbosity >= 3){ | 
					
						
							|  |  |  |             std::ostringstream out; | 
					
						
							| 
									
										
										
										
											2020-07-01 14:50:34 +02:00
										 |  |  |             out << "BILU0 memcpy: " << t_copy.stop() << " s"; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |             OpmLog::info(out.str()); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-17 14:49:59 +01:00
										 |  |  |         Timer t_decomposition; | 
					
						
							|  |  |  | #if CHOW_PATEL
 | 
					
						
							|  |  |  |         chow_patel_decomposition(); | 
					
						
							|  |  |  | #else
 | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |         int i, j, ij, ik, jk; | 
					
						
							|  |  |  |         int iRowStart, iRowEnd, jRowEnd; | 
					
						
							| 
									
										
										
										
											2020-06-25 11:51:41 +02:00
										 |  |  |         double pivot[bs * bs]; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         int LSize = 0; | 
					
						
							| 
									
										
										
										
											2020-07-02 11:08:18 +02:00
										 |  |  |         Opm::Detail::Inverter<bs> inverter;   // reuse inverter to invert blocks
 | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         // go through all rows
 | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |         for (i = 0; i < LUmat->Nb; i++) { | 
					
						
							|  |  |  |             iRowStart = LUmat->rowPointers[i]; | 
					
						
							|  |  |  |             iRowEnd = LUmat->rowPointers[i + 1]; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |             // go through all elements of the row
 | 
					
						
							|  |  |  |             for (ij = iRowStart; ij < iRowEnd; ij++) { | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |                 j = LUmat->colIndices[ij]; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |                 // if the element is the diagonal, store the index and go to next row
 | 
					
						
							|  |  |  |                 if (j == i) { | 
					
						
							|  |  |  |                     diagIndex[i] = ij; | 
					
						
							|  |  |  |                     break; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 // if an element beyond the diagonal is reach, no diagonal was found
 | 
					
						
							|  |  |  |                 // throw an error now. TODO: perform reordering earlier to prevent this
 | 
					
						
							|  |  |  |                 if (j > i) { | 
					
						
							|  |  |  |                     std::ostringstream out; | 
					
						
							|  |  |  |                     out << "BILU0 Error could not find diagonal value in row: " << i; | 
					
						
							|  |  |  |                     OpmLog::error(out.str()); | 
					
						
							|  |  |  |                     return false; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 LSize++; | 
					
						
							|  |  |  |                 // calculate the pivot of this row
 | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |                 blockMult<bs>(LUmat->nnzValues + ij * bs * bs, invDiagVals + j * bs * bs, &pivot[0]); | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |                 memcpy(LUmat->nnzValues + ij * bs * bs, &pivot[0], sizeof(double) * block_size * block_size); | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |                 jRowEnd = LUmat->rowPointers[j + 1]; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |                 jk = diagIndex[j] + 1; | 
					
						
							|  |  |  |                 ik = ij + 1; | 
					
						
							|  |  |  |                 // substract that row scaled by the pivot from this row.
 | 
					
						
							|  |  |  |                 while (ik < iRowEnd && jk < jRowEnd) { | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |                     if (LUmat->colIndices[ik] == LUmat->colIndices[jk]) { | 
					
						
							|  |  |  |                         blockMultSub<bs>(LUmat->nnzValues + ik * bs * bs, pivot, LUmat->nnzValues + jk * bs * bs); | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |                         ik++; | 
					
						
							|  |  |  |                         jk++; | 
					
						
							|  |  |  |                     } else { | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |                         if (LUmat->colIndices[ik] < LUmat->colIndices[jk]) | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |                         { ik++; } | 
					
						
							|  |  |  |                         else | 
					
						
							|  |  |  |                         { jk++; } | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             // store the inverse in the diagonal!
 | 
					
						
							| 
									
										
										
										
											2020-07-02 11:08:18 +02:00
										 |  |  |             inverter(LUmat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs); | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |             memcpy(LUmat->nnzValues + ij * bs * bs, invDiagVals + i * bs * bs, sizeof(double) * bs * bs); | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |         Lmat->rowPointers[0] = 0; | 
					
						
							|  |  |  |         Umat->rowPointers[0] = 0; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         // Split the LU matrix into two by comparing column indices to diagonal indices
 | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |         for (i = 0; i < LUmat->Nb; i++) { | 
					
						
							|  |  |  |             int offsetL = Lmat->rowPointers[i]; | 
					
						
							|  |  |  |             int rowSize = diagIndex[i] - LUmat->rowPointers[i]; | 
					
						
							|  |  |  |             int offsetLU = LUmat->rowPointers[i]; | 
					
						
							|  |  |  |             memcpy(Lmat->nnzValues + offsetL * bs * bs, LUmat->nnzValues + offsetLU * bs * bs, sizeof(double) * bs * bs * rowSize); | 
					
						
							|  |  |  |             memcpy(Lmat->colIndices + offsetL, LUmat->colIndices + offsetLU, sizeof(int) * rowSize); | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |             offsetL += rowSize; | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |             Lmat->rowPointers[i + 1] = offsetL; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |         } | 
					
						
							|  |  |  |         // Reverse the order or the (blocked) rows for the U matrix,
 | 
					
						
							|  |  |  |         // because the rows are accessed in reverse order when applying the ILU0
 | 
					
						
							|  |  |  |         int URowIndex = 0; | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |         for (i = LUmat->Nb - 1; i >= 0; i--) { | 
					
						
							|  |  |  |             int offsetU = Umat->rowPointers[URowIndex]; | 
					
						
							|  |  |  |             int rowSize = LUmat->rowPointers[i + 1] - diagIndex[i] - 1; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |             int offsetLU = diagIndex[i] + 1; | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |             memcpy(Umat->nnzValues + offsetU * bs * bs, LUmat->nnzValues + offsetLU * bs * bs, sizeof(double) * bs * bs * rowSize); | 
					
						
							|  |  |  |             memcpy(Umat->colIndices + offsetU, LUmat->colIndices + offsetLU, sizeof(int) * rowSize); | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |             offsetU += rowSize; | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |             Umat->rowPointers[URowIndex + 1] = offsetU; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |             URowIndex++; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2020-12-17 14:49:59 +01:00
										 |  |  | #endif
 | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |         if (verbosity >= 3) { | 
					
						
							|  |  |  |             std::ostringstream out; | 
					
						
							| 
									
										
										
										
											2020-07-01 14:50:34 +02:00
										 |  |  |             out << "BILU0 decomposition: " << t_decomposition.stop() << " s"; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |             OpmLog::info(out.str()); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-01 14:50:34 +02:00
										 |  |  |         Timer t_copyToGpu; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |         if (pattern_uploaded == false) { | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |             queue->enqueueWriteBuffer(s.Lcols, CL_TRUE, 0, Lmat->nnzbs * sizeof(int), Lmat->colIndices); | 
					
						
							|  |  |  |             queue->enqueueWriteBuffer(s.Ucols, CL_TRUE, 0, Umat->nnzbs * sizeof(int), Umat->colIndices); | 
					
						
							|  |  |  |             queue->enqueueWriteBuffer(s.Lrows, CL_TRUE, 0, (Lmat->Nb + 1) * sizeof(int), Lmat->rowPointers); | 
					
						
							|  |  |  |             queue->enqueueWriteBuffer(s.Urows, CL_TRUE, 0, (Umat->Nb + 1) * sizeof(int), Umat->rowPointers); | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |             pattern_uploaded = true; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2020-07-01 15:16:14 +02:00
										 |  |  |         queue->enqueueWriteBuffer(s.Lvals, CL_TRUE, 0, Lmat->nnzbs * sizeof(double) * bs * bs, Lmat->nnzValues); | 
					
						
							|  |  |  |         queue->enqueueWriteBuffer(s.Uvals, CL_TRUE, 0, Umat->nnzbs * sizeof(double) * bs * bs, Umat->nnzValues); | 
					
						
							| 
									
										
										
										
											2020-06-25 11:51:41 +02:00
										 |  |  |         queue->enqueueWriteBuffer(s.invDiagVals, CL_TRUE, 0, Nb * sizeof(double) * bs * bs, invDiagVals); | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |         if (verbosity >= 3) { | 
					
						
							|  |  |  |             std::ostringstream out; | 
					
						
							| 
									
										
										
										
											2020-07-01 14:50:34 +02:00
										 |  |  |             out << "BILU0 copy to GPU: " << t_copyToGpu.stop() << " s"; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |             OpmLog::info(out.str()); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return true; | 
					
						
							|  |  |  |     } // end create_preconditioner()
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // kernels are blocking on an NVIDIA GPU, so waiting for events is not needed
 | 
					
						
							|  |  |  |     // however, if individual kernel calls are timed, waiting for events is needed
 | 
					
						
							|  |  |  |     // behavior on other GPUs is untested
 | 
					
						
							| 
									
										
										
										
											2020-06-24 20:09:14 +02:00
										 |  |  |     template <unsigned int block_size> | 
					
						
							|  |  |  |     void BILU0<block_size>::apply(cl::Buffer& x, cl::Buffer& y) | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |     { | 
					
						
							|  |  |  |         cl::Event event; | 
					
						
							| 
									
										
										
										
											2020-07-01 14:50:34 +02:00
										 |  |  |         Timer t_apply; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-25 09:40:20 +02:00
										 |  |  |         for(int color = 0; color < numColors; ++color){ | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |             event = (*ILU_apply1)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), s.Lvals, s.Lcols, s.Lrows, (unsigned int)Nb, x, y, s.rowsPerColor, color, block_size, cl::Local(lmem_per_work_group)); | 
					
						
							|  |  |  |             // event.wait();
 | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2020-12-17 14:49:59 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |         for(int color = numColors-1; color >= 0; --color){ | 
					
						
							|  |  |  |             event = (*ILU_apply2)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), s.Uvals, s.Ucols, s.Urows, (unsigned int)Nb, s.invDiagVals, y, s.rowsPerColor, color, block_size, cl::Local(lmem_per_work_group)); | 
					
						
							|  |  |  |             // event.wait();
 | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (verbosity >= 3) { | 
					
						
							|  |  |  |             event.wait(); | 
					
						
							|  |  |  |             std::ostringstream out; | 
					
						
							| 
									
										
										
										
											2020-07-01 14:50:34 +02:00
										 |  |  |             out << "BILU0 apply: " << t_apply.stop() << " s"; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |             OpmLog::info(out.str()); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-24 20:09:14 +02:00
										 |  |  |     template <unsigned int block_size> | 
					
						
							| 
									
										
										
										
											2020-06-25 09:40:20 +02:00
										 |  |  |     void BILU0<block_size>::setOpenCLContext(cl::Context *context_){ | 
					
						
							|  |  |  |         this->context = context_; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-06-24 20:09:14 +02:00
										 |  |  |     template <unsigned int block_size> | 
					
						
							| 
									
										
										
										
											2020-06-25 09:40:20 +02:00
										 |  |  |     void BILU0<block_size>::setOpenCLQueue(cl::CommandQueue *queue_){ | 
					
						
							|  |  |  |         this->queue = queue_; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-06-24 20:09:14 +02:00
										 |  |  |     template <unsigned int block_size> | 
					
						
							| 
									
										
										
										
											2020-06-25 09:40:20 +02:00
										 |  |  |     void BILU0<block_size>::setKernelParameters(const unsigned int work_group_size_, const unsigned int total_work_items_, const unsigned int lmem_per_work_group_){ | 
					
						
							|  |  |  |         this->work_group_size = work_group_size_; | 
					
						
							|  |  |  |         this->total_work_items = total_work_items_; | 
					
						
							|  |  |  |         this->lmem_per_work_group = lmem_per_work_group_; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-06-24 20:09:14 +02:00
										 |  |  |     template <unsigned int block_size> | 
					
						
							|  |  |  |     void BILU0<block_size>::setKernels( | 
					
						
							| 
									
										
										
										
											2020-06-23 11:30:15 +02:00
										 |  |  |         cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> *ILU_apply1_, | 
					
						
							|  |  |  |         cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> *ILU_apply2_ | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |     ){ | 
					
						
							| 
									
										
										
										
											2020-06-23 11:30:15 +02:00
										 |  |  |         this->ILU_apply1 = ILU_apply1_; | 
					
						
							|  |  |  |         this->ILU_apply2 = ILU_apply2_; | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-24 20:09:14 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | #define INSTANTIATE_BDA_FUNCTIONS(n)                                                     \
 | 
					
						
							| 
									
										
										
										
											2020-10-16 15:05:02 +02:00
										 |  |  | template BILU0<n>::BILU0(ILUReorder, int);                                               \ | 
					
						
							| 
									
										
										
										
											2020-07-01 19:43:22 +02:00
										 |  |  | template BILU0<n>::~BILU0();                                                             \ | 
					
						
							| 
									
										
										
										
											2020-07-03 14:10:31 +02:00
										 |  |  | template bool BILU0<n>::init(BlockedMatrix<n>*);                                         \ | 
					
						
							| 
									
										
										
										
											2020-12-17 14:49:59 +01:00
										 |  |  | template void BILU0<n>::chow_patel_decomposition();                                      \ | 
					
						
							| 
									
										
										
										
											2020-07-03 14:10:31 +02:00
										 |  |  | template bool BILU0<n>::create_preconditioner(BlockedMatrix<n>*);                        \ | 
					
						
							| 
									
										
										
										
											2020-06-24 20:09:14 +02:00
										 |  |  | template void BILU0<n>::apply(cl::Buffer& x, cl::Buffer& y);                             \ | 
					
						
							|  |  |  | template void BILU0<n>::setOpenCLContext(cl::Context*);                                  \ | 
					
						
							|  |  |  | template void BILU0<n>::setOpenCLQueue(cl::CommandQueue*);                               \ | 
					
						
							|  |  |  | template void BILU0<n>::setKernelParameters(unsigned int, unsigned int, unsigned int);   \ | 
					
						
							|  |  |  | template void BILU0<n>::setKernels(                                                      \ | 
					
						
							|  |  |  |     cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> *, \ | 
					
						
							|  |  |  |     cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> *  \ | 
					
						
							|  |  |  |     ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | INSTANTIATE_BDA_FUNCTIONS(1); | 
					
						
							|  |  |  | INSTANTIATE_BDA_FUNCTIONS(2); | 
					
						
							|  |  |  | INSTANTIATE_BDA_FUNCTIONS(3); | 
					
						
							|  |  |  | INSTANTIATE_BDA_FUNCTIONS(4); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #undef INSTANTIATE_BDA_FUNCTIONS
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-22 18:26:49 +02:00
										 |  |  | } // end namespace bda
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 |