From 1df9d2e51f6d3596143898e2ba0171f162066bb6 Mon Sep 17 00:00:00 2001 From: Eduardo Bueno Date: Thu, 6 Jan 2022 16:31:40 -0300 Subject: [PATCH] Adds BISAI preconditioner to openclSolver --- CMakeLists_files.cmake | 5 + opm/simulators/linalg/bda/BILU0.cpp | 1 - opm/simulators/linalg/bda/BILU0.hpp | 10 + opm/simulators/linalg/bda/BISAI.cpp | 275 ++++++ opm/simulators/linalg/bda/BISAI.hpp | 156 ++++ .../linalg/bda/opencl/Preconditioner.cpp | 3 + .../linalg/bda/opencl/Preconditioner.hpp | 3 +- .../linalg/bda/opencl/kernels/isaiL.cl | 80 ++ .../linalg/bda/opencl/kernels/isaiU.cl | 84 ++ .../bda/opencl/kernels/spmv_blocked_add.cl | 68 ++ opm/simulators/linalg/bda/openclKernels.cpp | 57 +- opm/simulators/linalg/bda/openclKernels.hpp | 23 +- .../linalg/bda/openclSolverBackend.cpp | 10 +- opm/simulators/linalg/setupPropertyTree.cpp | 22 +- opm/simulators/linalg/setupPropertyTree.hpp | 1 + tests/offset_map_matrix.txt | 850 ++++++++++++++++++ tests/offset_map_matrix_transposed.txt | 850 ++++++++++++++++++ tests/test_csrToCscOffsetMap.cpp | 63 ++ 18 files changed, 2551 insertions(+), 10 deletions(-) create mode 100644 opm/simulators/linalg/bda/BISAI.cpp create mode 100644 opm/simulators/linalg/bda/BISAI.hpp create mode 100644 opm/simulators/linalg/bda/opencl/kernels/isaiL.cl create mode 100644 opm/simulators/linalg/bda/opencl/kernels/isaiU.cl create mode 100644 opm/simulators/linalg/bda/opencl/kernels/spmv_blocked_add.cl create mode 100644 tests/offset_map_matrix.txt create mode 100644 tests/offset_map_matrix_transposed.txt create mode 100644 tests/test_csrToCscOffsetMap.cpp diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index a3d832a38..70b6f8481 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -100,6 +100,7 @@ if(OPENCL_FOUND) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BILU0.cpp) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/Reorder.cpp) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/ChowPatelIlu.cpp) + list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BISAI.cpp) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/CPR.cpp) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl.cpp) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/openclKernels.cpp) @@ -137,6 +138,7 @@ list (APPEND TEST_SOURCE_FILES tests/test_ecl_output.cc tests/test_blackoil_amg.cpp tests/test_convergencereport.cpp + tests/test_csrToCscOffsetMap.cpp tests/test_flexiblesolver.cpp tests/test_preconditionerfactory.cpp tests/test_graphcoloring.cpp @@ -203,6 +205,8 @@ list (APPEND TEST_DATA_FILES tests/norne_pvt.data tests/wells_no_perforation.data tests/matr33.txt + tests/offset_map_matrix.txt + tests/offset_map_matrix_transposed.txt tests/rhs3.txt tests/matr33rep.txt tests/rhs3rep.txt @@ -255,6 +259,7 @@ list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/bda/cuda_header.hpp opm/simulators/linalg/bda/cusparseSolverBackend.hpp opm/simulators/linalg/bda/ChowPatelIlu.hpp + opm/simulators/linalg/bda/BISAI.hpp opm/simulators/linalg/bda/FPGABILU0.hpp opm/simulators/linalg/bda/FPGASolverBackend.hpp opm/simulators/linalg/bda/FPGAUtils.hpp diff --git a/opm/simulators/linalg/bda/BILU0.cpp b/opm/simulators/linalg/bda/BILU0.cpp index 73a169d68..877b6d676 100644 --- a/opm/simulators/linalg/bda/BILU0.cpp +++ b/opm/simulators/linalg/bda/BILU0.cpp @@ -244,7 +244,6 @@ bool BILU0::create_preconditioner(BlockedMatrix *mat) return true; } // end create_preconditioner() - // kernels are blocking on an NVIDIA GPU, so waiting for events is not needed // however, if individual kernel calls are timed, waiting for events is needed // behavior on other GPUs is untested diff --git a/opm/simulators/linalg/bda/BILU0.hpp b/opm/simulators/linalg/bda/BILU0.hpp index aee2edc3e..48f26db9c 100644 --- a/opm/simulators/linalg/bda/BILU0.hpp +++ b/opm/simulators/linalg/bda/BILU0.hpp @@ -115,6 +115,16 @@ public: return rmat.get(); } + std::tuple, std::vector, std::vector> get_preconditioner_structure() + { + return {{LUmat->rowPointers, LUmat->rowPointers + (Nb + 1)}, {LUmat->colIndices, LUmat->colIndices + nnzb}, diagIndex}; + } + + std::pair get_preconditioner_data() + { + return std::make_pair(s.LUvals, s.invDiagVals); + } + }; } // namespace Accelerator diff --git a/opm/simulators/linalg/bda/BISAI.cpp b/opm/simulators/linalg/bda/BISAI.cpp new file mode 100644 index 000000000..4a77abfa7 --- /dev/null +++ b/opm/simulators/linalg/bda/BISAI.cpp @@ -0,0 +1,275 @@ +/* + Copyright 2022 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +namespace Opm +{ +namespace Accelerator +{ + +using Opm::OpmLog; +using Dune::Timer; + +template +BISAI::BISAI(ILUReorder opencl_ilu_reorder_, int verbosity_) : + Preconditioner(verbosity_) +{ + bilu0 = std::make_unique >(opencl_ilu_reorder_, verbosity_); +} + +template +void BISAI::setOpencl(std::shared_ptr& context_, std::shared_ptr& queue_) +{ + context = context_; + queue = queue_; + + bilu0->setOpencl(context, queue); +} + +std::vector buildCsrToCscOffsetMap(std::vector colPointers, std::vector rowIndices){ + std::vector aux(colPointers); // colPointers must be copied to this vector + std::vector csrToCscOffsetMap(rowIndices.size()); // map must have the same size as the indices vector + + for(unsigned int row = 0; row < colPointers.size() - 1; row++){ + for(int jj = colPointers[row]; jj < colPointers[row+1]; jj++){ + int col = rowIndices[jj]; + int dest = aux[col]; + csrToCscOffsetMap[dest] = jj; + aux[col]++; + } + } + + return csrToCscOffsetMap; +} + +template +bool BISAI::analyze_matrix(BlockedMatrix *mat) +{ + const unsigned int bs = block_size; + + this->N = mat->Nb * bs; + this->Nb = mat->Nb; + this->nnz = mat->nnzbs * bs * bs; + this->nnzb = mat->nnzbs; + + bilu0->analyze_matrix(mat); + + return true; +} + +template +void BISAI::buildLowerSubsystemsStructures(){ + lower.subsystemPointers.assign(Nb + 1, 0); + + Dune::Timer t_buildLowerSubsystemsStructures; + + for(int tcol = 0; tcol < Nb; tcol++){ + int frow = diagIndex[tcol] + 1; + int lrow = colPointers[tcol + 1]; + int nx = lrow - frow; + int nv = 0; + + for(int sweep = 0; sweep < nx - 1; sweep++){ + for(int xid = sweep + 1; xid < nx; xid++){ + for(int ptr = diagIndex[rowIndices[frow + sweep]] + 1; ptr < colPointers[rowIndices[frow + sweep + 1]]; ptr++){ + if(rowIndices[ptr] == rowIndices[frow + xid]){ + lower.nzIndices.push_back(csrToCscOffsetMap[ptr]); + lower.knownRhsIndices.push_back(csrToCscOffsetMap[frow + sweep]); + lower.unknownRhsIndices.push_back(csrToCscOffsetMap[frow + xid]); + nv++; + } + } + } + } + + lower.subsystemPointers[tcol + 1] = lower.subsystemPointers[tcol] + nv; + } + + if(verbosity >= 4){ + std::ostringstream out; + out << "BISAI buildLowerSubsystemsStructures time: " << t_buildLowerSubsystemsStructures.stop() << " s"; + OpmLog::info(out.str()); + } +} + +template +void BISAI::buildUpperSubsystemsStructures(){ + upper.subsystemPointers.assign(Nb + 1, 0); + + Dune::Timer t_buildUpperSubsystemsStructures; + + for(int tcol = 0; tcol < Nb; tcol++){ + int frow = colPointers[tcol]; + int lrow = diagIndex[tcol]; + int nx = lrow - frow + 1; + int nv = 0; + + for(int sweep = 0; sweep < nx - 1; sweep++){ + for(int xid = 0; xid < nx; xid++){ + for(int ptr = colPointers[rowIndices[lrow - sweep]]; ptr < diagIndex[rowIndices[lrow - sweep]]; ptr++){ + if(rowIndices[ptr] == rowIndices[lrow - xid]){ + upper.nzIndices.push_back(csrToCscOffsetMap[ptr]); + upper.knownRhsIndices.push_back(csrToCscOffsetMap[lrow - sweep]); + upper.unknownRhsIndices.push_back(csrToCscOffsetMap[lrow - xid]); + nv++; + } + } + } + } + + upper.subsystemPointers[tcol + 1] = upper.subsystemPointers[tcol] + nv; + } + + if(verbosity >= 4){ + std::ostringstream out; + out << "BISAI buildUpperSubsystemsStructures time: " << t_buildUpperSubsystemsStructures.stop() << " s"; + OpmLog::info(out.str()); + } +} + +template +bool BISAI::create_preconditioner(BlockedMatrix *mat) +{ + const unsigned int bs = block_size; + + if (bs != 3) { + OPM_THROW(std::logic_error, "Creation of ISAI preconditioner on GPU only supports block_size = 3"); + } + + Dune::Timer t_preconditioner; + + bilu0->create_preconditioner(mat); + + std::call_once(initialize, [&]() { + std::tie(colPointers, rowIndices, diagIndex) = bilu0->get_preconditioner_structure(); + + csrToCscOffsetMap = buildCsrToCscOffsetMap(colPointers, rowIndices); + buildLowerSubsystemsStructures(); + buildUpperSubsystemsStructures(); + + d_colPointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * colPointers.size()); + d_rowIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * rowIndices.size()); + d_csrToCscOffsetMap = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * csrToCscOffsetMap.size()); + d_diagIndex = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * diagIndex.size()); + d_invLvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * nnzb * bs * bs); + d_invUvals = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * nnzb * bs * bs); + d_invL_x = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * Nb * bs); + d_lower.subsystemPointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * lower.subsystemPointers.size()); + d_upper.subsystemPointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * upper.subsystemPointers.size()); + + if(!lower.nzIndices.empty()){ // knownRhsIndices and unknownRhsIndices will also be empty if nzIndices is empty + d_lower.nzIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * lower.nzIndices.size()); + d_lower.knownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * lower.knownRhsIndices.size()); + d_lower.unknownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * lower.unknownRhsIndices.size()); + } + + if(!upper.nzIndices.empty()){ // knownRhsIndices and unknownRhsIndices will also be empty if nzIndices is empty + d_upper.nzIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * upper.nzIndices.size()); + d_upper.knownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * upper.knownRhsIndices.size()); + d_upper.unknownRhsIndices = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * upper.unknownRhsIndices.size()); + } + + events.resize(6); + err = queue->enqueueWriteBuffer(d_colPointers, CL_FALSE, 0, colPointers.size() * sizeof(int), colPointers.data(), nullptr, &events[0]); + err |= queue->enqueueWriteBuffer(d_rowIndices, CL_FALSE, 0, rowIndices.size() * sizeof(int), rowIndices.data(), nullptr, &events[1]); + err |= queue->enqueueWriteBuffer(d_csrToCscOffsetMap, CL_FALSE, 0, csrToCscOffsetMap.size() * sizeof(int), csrToCscOffsetMap.data(), nullptr, &events[2]); + err |= queue->enqueueWriteBuffer(d_diagIndex, CL_FALSE, 0, diagIndex.size() * sizeof(int), diagIndex.data(), nullptr, &events[3]); + err |= queue->enqueueWriteBuffer(d_lower.subsystemPointers, CL_FALSE, 0, sizeof(int) * lower.subsystemPointers.size(), lower.subsystemPointers.data(), nullptr, &events[4]); + err |= queue->enqueueWriteBuffer(d_upper.subsystemPointers, CL_FALSE, 0, sizeof(int) * upper.subsystemPointers.size(), upper.subsystemPointers.data(), nullptr, &events[5]); + + if(!lower.nzIndices.empty()){ + events.resize(events.size() + 3); + err |= queue->enqueueWriteBuffer(d_lower.nzIndices, CL_FALSE, 0, sizeof(int) * lower.nzIndices.size(), lower.nzIndices.data(), nullptr, &events[events.size() - 3]); + err |= queue->enqueueWriteBuffer(d_lower.knownRhsIndices, CL_FALSE, 0, sizeof(int) * lower.knownRhsIndices.size(), lower.knownRhsIndices.data(), nullptr, &events[events.size() - 2]); + err |= queue->enqueueWriteBuffer(d_lower.unknownRhsIndices, CL_FALSE, 0, sizeof(int) * lower.unknownRhsIndices.size(), lower.unknownRhsIndices.data(), nullptr, &events[events.size() - 1]); + } + + if(!upper.nzIndices.empty()){ + events.resize(events.size() + 3); + err |= queue->enqueueWriteBuffer(d_upper.nzIndices, CL_FALSE, 0, sizeof(int) * upper.nzIndices.size(), upper.nzIndices.data(), nullptr, &events[events.size() - 3]); + err |= queue->enqueueWriteBuffer(d_upper.knownRhsIndices, CL_FALSE, 0, sizeof(int) * upper.knownRhsIndices.size(), upper.knownRhsIndices.data(), nullptr, &events[events.size() - 2]); + err |= queue->enqueueWriteBuffer(d_upper.unknownRhsIndices, CL_FALSE, 0, sizeof(int) * upper.unknownRhsIndices.size(), upper.unknownRhsIndices.data(), nullptr, &events[events.size() - 1]); + } + + cl::WaitForEvents(events); + events.clear(); + + if (err != CL_SUCCESS) { + // enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL + OPM_THROW(std::logic_error, "BISAI OpenCL enqueueWriteBuffer error"); + } + }); + + std::tie(d_LUvals, d_invDiagVals) = bilu0->get_preconditioner_data(); + + events.resize(2); + err = queue->enqueueFillBuffer(d_invLvals, 0, 0, sizeof(double) * nnzb * bs * bs, nullptr, &events[0]); + err |= queue->enqueueFillBuffer(d_invUvals, 0, 0, sizeof(double) * nnzb * bs * bs, nullptr, &events[1]); + cl::WaitForEvents(events); + events.clear(); + + OpenclKernels::isaiL(d_diagIndex, d_colPointers, d_csrToCscOffsetMap, d_lower.subsystemPointers, d_lower.nzIndices, d_lower.unknownRhsIndices, d_lower.knownRhsIndices, d_LUvals, d_invLvals, Nb); + OpenclKernels::isaiU(d_diagIndex, d_colPointers, d_rowIndices, d_csrToCscOffsetMap, d_upper.subsystemPointers, d_upper.nzIndices, d_upper.unknownRhsIndices, d_upper.knownRhsIndices, d_LUvals, + d_invDiagVals, d_invUvals, Nb); + + if(verbosity >= 4){ + std::ostringstream out; + out << "BISAI createPreconditioner time: " << t_preconditioner.stop() << " s"; + OpmLog::info(out.str()); + } + + return true; +} + +template +void BISAI::apply(const cl::Buffer& x, cl::Buffer& y){ + const unsigned int bs = block_size; + + OpenclKernels::spmv(d_invLvals, d_rowIndices, d_colPointers, x, d_invL_x, Nb, bs, true, true); // application of isaiL is a simple spmv with addition + // (to compensate for the unitary diagonal that is not + // included in isaiL, for simplicity) + OpenclKernels::spmv(d_invUvals, d_rowIndices, d_colPointers, d_invL_x, y, Nb, bs); // application of isaiU is a simple spmv +} + +#define INSTANTIATE_BDA_FUNCTIONS(n) \ +template class BISAI; + +INSTANTIATE_BDA_FUNCTIONS(1); +INSTANTIATE_BDA_FUNCTIONS(2); +INSTANTIATE_BDA_FUNCTIONS(3); +INSTANTIATE_BDA_FUNCTIONS(4); +INSTANTIATE_BDA_FUNCTIONS(5); +INSTANTIATE_BDA_FUNCTIONS(6); + +#undef INSTANTIATE_BDA_FUNCTIONS + +} +} diff --git a/opm/simulators/linalg/bda/BISAI.hpp b/opm/simulators/linalg/bda/BISAI.hpp new file mode 100644 index 000000000..e3d95e8c3 --- /dev/null +++ b/opm/simulators/linalg/bda/BISAI.hpp @@ -0,0 +1,156 @@ +/* + Copyright 2022 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#ifndef BISAI_HPP +#define BISAI_HPP + +#include + +#include +#include +#include +#include +#include + +namespace Opm +{ +namespace Accelerator +{ + +template +class openclSolverBackend; + +class BlockedMatrix; + +/// This class implements a Blocked version of the Incomplete Sparse Approximate Inverse (ISAI) preconditioner. +/// Inspired by the paper "Incomplete Sparse Approximate Inverses for Parallel Preconditioning" by Anzt et. al. +template +class BISAI : public Preconditioner +{ + typedef Preconditioner Base; + + using Base::N; + using Base::Nb; + using Base::nnz; + using Base::nnzb; + using Base::verbosity; + using Base::context; + using Base::queue; + using Base::events; + using Base::err; + +private: + std::once_flag initialize; + + std::vector colPointers; + std::vector rowIndices; + std::vector diagIndex; + std::vector csrToCscOffsetMap; + std::vector invLvals; + std::vector invUvals; + + cl::Buffer d_colPointers; + cl::Buffer d_rowIndices; + cl::Buffer d_csrToCscOffsetMap; + cl::Buffer d_diagIndex; + cl::Buffer d_LUvals; + cl::Buffer d_invDiagVals; + cl::Buffer d_invLvals; + cl::Buffer d_invUvals; + cl::Buffer d_invL_x; + + ILUReorder opencl_ilu_reorder; + std::unique_ptr > bilu0; + + /// Struct that holds the structure of the small subsystems for each column + typedef struct{ + /// This vector holds the cumulative sum for the number of non-zero blocks for each subsystem. + /// Works similarly to row and column pointers for the CSR and CSC matrix representations. + std::vector subsystemPointers; + /// This vector holds the indices of the non-zero blocks for the target subsystem. These blocks are + /// the ones that are present in the shadow set of the non-zero blocks of column j of the main matrix, + /// as described in section 2.3 of the paper. The amount of non-zero blocks for j-th subsystem is + /// given by subsystemPointers[j+1] - subsystemPointers[j]. + std::vector nzIndices; + /// This vector holds the indices of the already known values of the right hand sides of the subsystems. + /// Its purpose is to aid in the parallel solution of the subsystems. + std::vector knownRhsIndices; + /// This vector holds the indices of the unknown values of the right hand sides of the subsystems. + std::vector unknownRhsIndices; + } subsystemStructure; + + /// GPU version of subsystemStructure + typedef struct{ + cl::Buffer subsystemPointers; + cl::Buffer nzIndices; + cl::Buffer knownRhsIndices; + cl::Buffer unknownRhsIndices; + } subsystemStructureGPU; + + subsystemStructure lower, upper; + subsystemStructureGPU d_lower, d_upper; + + /// An approximate inverse for L is computed by solving a small lower triangular system for each column of the main matrix. + /// This function finds the structure of each of these subsystems and fills the 'lower' struct. + void buildLowerSubsystemsStructures(); + + /// An approximate inverse for U is computed by solving a small upper triangular system for each column of the main matrix. + /// This function finds the structure of each of theses subsystems and fills the 'upper' struct. + void buildUpperSubsystemsStructures(); + +public: + BISAI(ILUReorder opencl_ilu_reorder, int verbosity); + + // set own Opencl variables, but also that of the bilu0 preconditioner + void setOpencl(std::shared_ptr& context, std::shared_ptr& queue) override; + + // analysis, find reordering if specified + bool analyze_matrix(BlockedMatrix *mat) override; + + // ilu_decomposition + bool create_preconditioner(BlockedMatrix *mat) override; + + // apply preconditioner, x = prec(y) + void apply(const cl::Buffer& y, cl::Buffer& x) override; + + int* getToOrder() override + { + return bilu0->getToOrder(); + } + + int* getFromOrder() override + { + return bilu0->getFromOrder(); + } + + BlockedMatrix* getRMat() override + { + return bilu0->getRMat(); + } +}; + +/// Similar function to csrPatternToCsc. It gives an offset map from CSR to CSC instead of the full CSR to CSC conversion. +/// The map works as follows: if an element 'e' of the matrix is in the i-th position in the CSR representation, it will be +/// in the csrToCscOffsetMap[i]-th position in the CSC representation. +std::vector buildCsrToCscOffsetMap(std::vector colPointers, std::vector rowIndices); + +} // namespace Accelerator +} // namespace Opm + +#endif diff --git a/opm/simulators/linalg/bda/opencl/Preconditioner.cpp b/opm/simulators/linalg/bda/opencl/Preconditioner.cpp index 8f3d5a687..95eee9be8 100644 --- a/opm/simulators/linalg/bda/opencl/Preconditioner.cpp +++ b/opm/simulators/linalg/bda/opencl/Preconditioner.cpp @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -44,6 +45,8 @@ std::unique_ptr > Preconditioner::create( return std::make_unique >(opencl_ilu_reorder, verbosity); } else if (type == PreconditionerType::CPR) { return std::make_unique >(verbosity, opencl_ilu_reorder); + } else if (type == PreconditionerType::BISAI) { + return std::make_unique >(opencl_ilu_reorder, verbosity); } else { OPM_THROW(std::logic_error, "Invalid PreconditionerType"); } diff --git a/opm/simulators/linalg/bda/opencl/Preconditioner.hpp b/opm/simulators/linalg/bda/opencl/Preconditioner.hpp index ce301d34d..53f45b4c2 100644 --- a/opm/simulators/linalg/bda/opencl/Preconditioner.hpp +++ b/opm/simulators/linalg/bda/opencl/Preconditioner.hpp @@ -54,7 +54,8 @@ protected: public: enum PreconditionerType { BILU0, - CPR + CPR, + BISAI }; static std::unique_ptr create(PreconditionerType type, int verbosity, ILUReorder opencl_ilu_reorder); diff --git a/opm/simulators/linalg/bda/opencl/kernels/isaiL.cl b/opm/simulators/linalg/bda/opencl/kernels/isaiL.cl new file mode 100644 index 000000000..d05d40e1f --- /dev/null +++ b/opm/simulators/linalg/bda/opencl/kernels/isaiL.cl @@ -0,0 +1,80 @@ +__kernel void block_sub(__global double *mat1, __global double *mat2, __global double *result) +{ + const unsigned int bs = 3; + const unsigned int warpsize = 32; + const unsigned int num_active_threads = (warpsize / bs / bs) * bs * bs; + const unsigned int idx_t = get_local_id(0); + const unsigned int lane = idx_t % warpsize; + + if(lane < num_active_threads){ + const unsigned int row = lane % bs; + const unsigned int col = (lane / bs) % bs; + result[bs * row + col] = mat1[bs * row + col] - mat2[bs * row + col]; + } +} + +__kernel void block_mult_sub_isai(__global double *a, __global double *b, __global double *c) +{ + const unsigned int bs = 3; + const unsigned int warpsize = 32; + const unsigned int num_active_threads = (warpsize / bs / bs) * bs * bs; + const unsigned int idx_t = get_local_id(0); + const unsigned int lane = idx_t % warpsize; + + if(lane < num_active_threads){ + const unsigned int row = lane % bs; + const unsigned int col = (lane / bs) % bs; + double temp = 0.0; + + for (unsigned int k = 0; k < bs; k++) { + temp += b[bs * row + k] * c[bs * k + col]; + } + + a[bs * row + col] -= temp; + } +} + +__kernel void isaiL(__global const int *diagIndex, + __global const int *colPtr, + __global const int *mapping, + __global const int *nvc, + __global const int *luIdxs, + __global const int *xxIdxs, + __global const int *dxIdxs, + __global const double *LU, + __global double *invL, + const unsigned int Nb) +{ + const unsigned int warpsize = 32; + const unsigned int idx_b = get_group_id(0); + const unsigned int idx_t = get_local_id(0); + const unsigned int idx = get_global_id(0); + const unsigned int bs = 3; + const unsigned int num_threads = get_global_size(0); + const unsigned int num_warps_in_grid = num_threads / warpsize; + const unsigned int num_active_threads = (warpsize / bs / bs) * bs * bs; + const unsigned int num_blocks_per_warp = warpsize / bs / bs; + const unsigned int lane = idx_t % warpsize; + const unsigned int c = (lane / bs) % bs; + const unsigned int r = lane % bs; + unsigned int tcol = idx / warpsize; + + while(tcol < Nb){ + const unsigned int frow = diagIndex[tcol] + 1; + const unsigned int lrow = colPtr[tcol + 1]; + const unsigned int nx = lrow - frow; + + if(lane < num_active_threads){ + for(unsigned int xid = 0; xid < nx; xid++){ + unsigned int xpos = mapping[frow + xid]; + block_sub(invL + xpos * bs * bs, LU + xpos * bs * bs, invL + xpos * bs * bs); + } + + for(unsigned int v = nvc[tcol]; v < nvc[tcol + 1]; v++){ + block_mult_sub_isai(invL + xxIdxs[v] * bs * bs, LU + luIdxs[v] * bs * bs, invL + dxIdxs[v] * bs * bs); + } + } + + tcol += num_warps_in_grid; + } +} diff --git a/opm/simulators/linalg/bda/opencl/kernels/isaiU.cl b/opm/simulators/linalg/bda/opencl/kernels/isaiU.cl new file mode 100644 index 000000000..c2964586e --- /dev/null +++ b/opm/simulators/linalg/bda/opencl/kernels/isaiU.cl @@ -0,0 +1,84 @@ +__kernel void block_add(__global double *mat1, __global double *mat2, __global double *result) +{ + const unsigned int bs = 3; + const unsigned int warpsize = 32; + const unsigned int num_active_threads = (warpsize / bs / bs) * bs * bs; + const unsigned int idx_t = get_local_id(0); + const unsigned int lane = idx_t % warpsize; + + if(lane < num_active_threads){ + const unsigned int row = lane % bs; + const unsigned int col = (lane / bs) % bs; + result[bs * row + col] = mat1[bs * row + col] + mat2[bs * row + col]; + } +} + +__kernel void block_mult_isai(__global double *mat1, __global double *mat2, __global double *result) +{ + const unsigned int bs = 3; + const unsigned int warpsize = 32; + const unsigned int num_active_threads = (warpsize / bs / bs) * bs * bs; + const unsigned int idx_t = get_local_id(0); + const unsigned int lane = idx_t % warpsize; + + if(lane < num_active_threads){ + const unsigned int row = lane % bs; + const unsigned int col = (lane / bs) % bs; + double temp = 0.0; + + for (unsigned int k = 0; k < bs; k++) { + temp += mat1[bs * row + k] * mat2[bs * k + col]; + } + + result[bs * row + col] = temp; + } +} + +__kernel void isaiU(__global const int *diagIndex, + __global const int *colPtr, + __global const int *rowIndices, + __global const int *mapping, + __global const int *nvc, + __global const int *luIdxs, + __global const int *xxIdxs, + __global const int *dxIdxs, + __global const double *LU, + __global const double *invDiagVals, + __global double *invU, + const unsigned int Nb) +{ + const unsigned int warpsize = 32; + const unsigned int idx_b = get_group_id(0); + const unsigned int idx_t = get_local_id(0); + const unsigned int idx = get_global_id(0); + const unsigned int bs = 3; + const unsigned int num_threads = get_global_size(0); + const unsigned int num_warps_in_grid = num_threads / warpsize; + const unsigned int num_active_threads = (warpsize / bs / bs) * bs * bs; + const unsigned int num_blocks_per_warp = warpsize / bs / bs; + const unsigned int lane = idx_t % warpsize; + const unsigned int c = (lane / bs) % bs; + const unsigned int r = lane % bs; + unsigned int tcol = idx / warpsize; + + while(tcol < Nb){ + const unsigned int frow = colPtr[tcol]; + const unsigned int lrow = diagIndex[tcol]; + const unsigned int nx = lrow - frow + 1; + + if(lane < num_active_threads){ + block_add(invU + lrow * bs * bs, invDiagVals + tcol * bs * bs, invU + lrow * bs * bs); + + for(unsigned int v = nvc[tcol]; v < nvc[tcol + 1]; v++){ + block_mult_sub_isai(invU + xxIdxs[v] * bs * bs, LU + luIdxs[v] * bs * bs, invU + dxIdxs[v] * bs * bs); + } + + for(unsigned int xid = 1; xid < nx; xid++){ + unsigned int xpos = mapping[lrow - xid]; + block_mult_isai(invDiagVals + rowIndices[lrow - xid] * bs * bs, invU + xpos * bs * bs, invU + xpos * bs * bs); + } + } + + tcol += num_warps_in_grid; + } +} diff --git a/opm/simulators/linalg/bda/opencl/kernels/spmv_blocked_add.cl b/opm/simulators/linalg/bda/opencl/kernels/spmv_blocked_add.cl new file mode 100644 index 000000000..e1472e43d --- /dev/null +++ b/opm/simulators/linalg/bda/opencl/kernels/spmv_blocked_add.cl @@ -0,0 +1,68 @@ +/// b = mat * x +/// algorithm based on: +/// Optimization of Block Sparse Matrix-Vector Multiplication on Shared-MemoryParallel Architectures, +/// Ryan Eberhardt, Mark Hoemmen, 2016, https://doi.org/10.1109/IPDPSW.2016.42 +__kernel void spmv_blocked_add( + __global const double *vals, + __global const int *cols, + __global const int *rows, + const int Nb, + __global const double *x, + __global double *out, + const unsigned int block_size, + __local double *tmp) +{ + const unsigned int warpsize = 32; + const unsigned int bsize = get_local_size(0); + const unsigned int idx_b = get_global_id(0) / bsize; + const unsigned int idx_t = get_local_id(0); + unsigned int idx = idx_b * bsize + idx_t; + const unsigned int bs = block_size; + const unsigned int num_active_threads = (warpsize/bs/bs)*bs*bs; + const unsigned int num_blocks_per_warp = warpsize/bs/bs; + const unsigned int NUM_THREADS = get_global_size(0); + const unsigned int num_warps_in_grid = NUM_THREADS / warpsize; + unsigned int target_block_row = idx / warpsize; + const unsigned int lane = idx_t % warpsize; + const unsigned int c = (lane / bs) % bs; + const unsigned int r = lane % bs; + + // for 3x3 blocks: + // num_active_threads: 27 + // num_blocks_per_warp: 3 + + while(target_block_row < Nb){ + unsigned int first_block = rows[target_block_row]; + unsigned int last_block = rows[target_block_row+1]; + unsigned int block = first_block + lane / (bs*bs); + double local_out = 0.0; + + if(lane < num_active_threads){ + for(; block < last_block; block += num_blocks_per_warp){ + double x_elem = x[cols[block]*bs + c]; + double A_elem = vals[block*bs*bs + c + r*bs]; + local_out += x_elem * A_elem; + } + } + + // do reduction in shared mem + tmp[lane] = local_out; + barrier(CLK_LOCAL_MEM_FENCE); + + for(unsigned int offset = 3; offset <= 24; offset <<= 1) + { + if (lane + offset < warpsize) + { + tmp[lane] += tmp[lane + offset]; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + + if(lane < bs){ + unsigned int row = target_block_row*bs + lane; + out[row] = tmp[lane]; + out[row] += x[row]; + } + target_block_row += num_warps_in_grid; + } +} diff --git a/opm/simulators/linalg/bda/openclKernels.cpp b/opm/simulators/linalg/bda/openclKernels.cpp index e09b8ac3e..fa1ae2a9a 100644 --- a/opm/simulators/linalg/bda/openclKernels.cpp +++ b/opm/simulators/linalg/bda/openclKernels.cpp @@ -52,6 +52,7 @@ std::unique_ptr > OpenclKernels::add_coarse_pressure_correction_k; std::unique_ptr > OpenclKernels::prolongate_vector_k; std::unique_ptr OpenclKernels::spmv_blocked_k; +std::unique_ptr OpenclKernels::spmv_blocked_add_k; std::unique_ptr OpenclKernels::spmv_k; std::unique_ptr OpenclKernels::spmv_noreset_k; std::unique_ptr OpenclKernels::residual_blocked_k; @@ -61,7 +62,8 @@ std::unique_ptr OpenclKernels::ILU_apply2_k; std::unique_ptr OpenclKernels::stdwell_apply_k; std::unique_ptr OpenclKernels::stdwell_apply_no_reorder_k; std::unique_ptr OpenclKernels::ilu_decomp_k; - +std::unique_ptr OpenclKernels::isaiL_k; +std::unique_ptr OpenclKernels::isaiU_k; // divide A by B, and round up: return (int)ceil(A/B) unsigned int ceilDivision(const unsigned int A, const unsigned int B) @@ -90,6 +92,7 @@ void OpenclKernels::init(cl::Context *context, cl::CommandQueue *queue_, std::ve sources.emplace_back(add_coarse_pressure_correction_str); sources.emplace_back(prolongate_vector_str); sources.emplace_back(spmv_blocked_str); + sources.emplace_back(spmv_blocked_add_str); sources.emplace_back(spmv_str); sources.emplace_back(spmv_noreset_str); sources.emplace_back(residual_blocked_str); @@ -104,6 +107,8 @@ void OpenclKernels::init(cl::Context *context, cl::CommandQueue *queue_, std::ve sources.emplace_back(stdwell_apply_str); sources.emplace_back(stdwell_apply_no_reorder_str); sources.emplace_back(ILU_decomp_str); + sources.emplace_back(isaiL_str); + sources.emplace_back(isaiU_str); cl::Program program = cl::Program(*context, sources); program.build(devices); @@ -122,6 +127,7 @@ void OpenclKernels::init(cl::Context *context, cl::CommandQueue *queue_, std::ve add_coarse_pressure_correction_k.reset(new cl::KernelFunctor(cl::Kernel(program, "add_coarse_pressure_correction"))); prolongate_vector_k.reset(new cl::KernelFunctor(cl::Kernel(program, "prolongate_vector"))); spmv_blocked_k.reset(new spmv_blocked_kernel_type(cl::Kernel(program, "spmv_blocked"))); + spmv_blocked_add_k.reset(new spmv_blocked_kernel_type(cl::Kernel(program, "spmv_blocked_add"))); spmv_k.reset(new spmv_kernel_type(cl::Kernel(program, "spmv"))); spmv_noreset_k.reset(new spmv_kernel_type(cl::Kernel(program, "spmv_noreset"))); residual_blocked_k.reset(new residual_blocked_kernel_type(cl::Kernel(program, "residual_blocked"))); @@ -131,6 +137,8 @@ void OpenclKernels::init(cl::Context *context, cl::CommandQueue *queue_, std::ve stdwell_apply_k.reset(new stdwell_apply_kernel_type(cl::Kernel(program, "stdwell_apply"))); stdwell_apply_no_reorder_k.reset(new stdwell_apply_no_reorder_kernel_type(cl::Kernel(program, "stdwell_apply_no_reorder"))); ilu_decomp_k.reset(new ilu_decomp_kernel_type(cl::Kernel(program, "ilu_decomp"))); + isaiL_k.reset(new isaiL_kernel_type(cl::Kernel(program, "isaiL"))); + isaiU_k.reset(new isaiU_kernel_type(cl::Kernel(program, "isaiU"))); initialized = true; } // end get_opencl_kernels() @@ -311,7 +319,7 @@ void OpenclKernels::prolongate_vector(const cl::Buffer& in, cl::Buffer& out, con } } -void OpenclKernels::spmv(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& x, cl::Buffer& b, int Nb, unsigned int block_size, bool reset) +void OpenclKernels::spmv(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, const cl::Buffer& x, cl::Buffer& b, int Nb, unsigned int block_size, bool reset, bool add) { const unsigned int work_group_size = 32; const unsigned int num_work_groups = ceilDivision(Nb, work_group_size); @@ -321,7 +329,11 @@ void OpenclKernels::spmv(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, c cl::Event event; if (block_size > 1) { - event = (*spmv_blocked_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), vals, cols, rows, Nb, x, b, block_size, cl::Local(lmem_per_work_group)); + if (add) { + event = (*spmv_blocked_add_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), vals, cols, rows, Nb, x, b, block_size, cl::Local(lmem_per_work_group)); + } else { + event = (*spmv_blocked_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), vals, cols, rows, Nb, x, b, block_size, cl::Local(lmem_per_work_group)); + } } else { if (reset) { event = (*spmv_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), vals, cols, rows, Nb, x, b, cl::Local(lmem_per_work_group)); @@ -460,5 +472,44 @@ void OpenclKernels::apply_stdwells_no_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffe } } +void OpenclKernels::isaiL(cl::Buffer& diagIndex, cl::Buffer& colPointers, cl::Buffer& mapping, cl::Buffer& nvc, + cl::Buffer& luIdxs, cl::Buffer& xxIdxs, cl::Buffer& dxIdxs, cl::Buffer& LUvals, cl::Buffer& invLvals, unsigned int Nb) +{ + const unsigned int work_group_size = 256; + const unsigned int num_work_groups = ceilDivision(Nb, work_group_size); + const unsigned int total_work_items = num_work_groups * work_group_size; + + Timer t_isaiL; + cl::Event event = (*isaiL_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), + diagIndex, colPointers, mapping, nvc, luIdxs, xxIdxs, dxIdxs, LUvals, invLvals, Nb); + + if (verbosity >= 4) { + event.wait(); + std::ostringstream oss; + oss << std::scientific << "OpenclKernels isaiL() time: " << t_isaiL.stop() << " s"; + OpmLog::info(oss.str()); + } +} + +void OpenclKernels::isaiU(cl::Buffer& diagIndex, cl::Buffer& colPointers, cl::Buffer& rowIndices, cl::Buffer& mapping, + cl::Buffer& nvc, cl::Buffer& luIdxs, cl::Buffer& xxIdxs, cl::Buffer& dxIdxs, cl::Buffer& LUvals, + cl::Buffer& invDiagVals, cl::Buffer& invUvals, unsigned int Nb) +{ + const unsigned int work_group_size = 256; + const unsigned int num_work_groups = ceilDivision(Nb, work_group_size); + const unsigned int total_work_items = num_work_groups * work_group_size; + + Timer t_isaiU; + cl::Event event = (*isaiU_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)), + diagIndex, colPointers, rowIndices, mapping, nvc, luIdxs, xxIdxs, dxIdxs, LUvals, invDiagVals, invUvals, Nb); + + if (verbosity >= 4) { + event.wait(); + std::ostringstream oss; + oss << std::scientific << "OpenclKernels isaiU() time: " << t_isaiU.stop() << " s"; + OpmLog::info(oss.str()); + } +} + } // namespace Accelerator } // namespace Opm diff --git a/opm/simulators/linalg/bda/openclKernels.hpp b/opm/simulators/linalg/bda/openclKernels.hpp index d03a40c02..7f98e9a1d 100644 --- a/opm/simulators/linalg/bda/openclKernels.hpp +++ b/opm/simulators/linalg/bda/openclKernels.hpp @@ -31,9 +31,9 @@ namespace Accelerator { using spmv_blocked_kernel_type = cl::KernelFunctor; + const cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>; using spmv_kernel_type = cl::KernelFunctor; + const cl::Buffer&, cl::Buffer&, cl::LocalSpaceArg>; using residual_blocked_kernel_type = cl::KernelFunctor; using residual_kernel_type = cl::KernelFunctor; using ilu_decomp_kernel_type = cl::KernelFunctor; +using isaiL_kernel_type = cl::KernelFunctor; +using isaiU_kernel_type = cl::KernelFunctor; class OpenclKernels { @@ -71,6 +75,7 @@ private: static std::unique_ptr > add_coarse_pressure_correction_k; static std::unique_ptr > prolongate_vector_k; static std::unique_ptr spmv_blocked_k; + static std::unique_ptr spmv_blocked_add_k; static std::unique_ptr spmv_k; static std::unique_ptr spmv_noreset_k; static std::unique_ptr residual_blocked_k; @@ -80,6 +85,8 @@ private: static std::unique_ptr stdwell_apply_k; static std::unique_ptr stdwell_apply_no_reorder_k; static std::unique_ptr ilu_decomp_k; + static std::unique_ptr isaiL_k; + static std::unique_ptr isaiU_k; OpenclKernels(){}; // disable instantiation @@ -94,6 +101,7 @@ public: static const std::string add_coarse_pressure_correction_str; static const std::string prolongate_vector_str; static const std::string spmv_blocked_str; + static const std::string spmv_blocked_add_str; static const std::string spmv_str; static const std::string spmv_noreset_str; static const std::string residual_blocked_str; @@ -108,6 +116,8 @@ public: static const std::string stdwell_apply_str; static const std::string stdwell_apply_no_reorder_str; static const std::string ILU_decomp_str; + static const std::string isaiL_str; + static const std::string isaiU_str; static void init(cl::Context *context, cl::CommandQueue *queue, std::vector& devices, int verbosity); @@ -120,7 +130,7 @@ public: static void full_to_pressure_restriction(const cl::Buffer& fine_y, cl::Buffer& weights, cl::Buffer& coarse_y, int Nb); static void add_coarse_pressure_correction(cl::Buffer& coarse_x, cl::Buffer& fine_x, int pressure_idx, int Nb); static void prolongate_vector(const cl::Buffer& in, cl::Buffer& out, const cl::Buffer& cols, int N); - static void spmv(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& x, cl::Buffer& b, int Nb, unsigned int block_size, bool reset = true); + static void spmv(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, const cl::Buffer& x, cl::Buffer& b, int Nb, unsigned int block_size, bool reset = true, bool add = false); static void residual(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& x, const cl::Buffer& rhs, cl::Buffer& out, int Nb, unsigned int block_size); static void ILU_apply1(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& diagIndex, @@ -139,6 +149,13 @@ public: static void apply_stdwells_no_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffer &d_Dnnzs_ocl, cl::Buffer &d_Bnnzs_ocl, cl::Buffer &d_Ccols_ocl, cl::Buffer &d_Bcols_ocl, cl::Buffer &d_x, cl::Buffer &d_y, int dim, int dim_wells, cl::Buffer &d_val_pointers_ocl, int num_std_wells); + + static void isaiL(cl::Buffer& diagIndex, cl::Buffer& colPointers, cl::Buffer& mapping, cl::Buffer& nvc, + cl::Buffer& luIdxs, cl::Buffer& xxIdxs, cl::Buffer& dxIdxs, cl::Buffer& LUvals, cl::Buffer& invLvals, unsigned int Nb); + + static void isaiU(cl::Buffer& diagIndex, cl::Buffer& colPointers, cl::Buffer& rowIndices, cl::Buffer& mapping, + cl::Buffer& nvc, cl::Buffer& luIdxs, cl::Buffer& xxIdxs, cl::Buffer& dxIdxs, cl::Buffer& LUvals, + cl::Buffer& invDiagVals, cl::Buffer& invUvals, unsigned int Nb); }; } // namespace Accelerator diff --git a/opm/simulators/linalg/bda/openclSolverBackend.cpp b/opm/simulators/linalg/bda/openclSolverBackend.cpp index 33cbef770..c91da75ad 100644 --- a/opm/simulators/linalg/bda/openclSolverBackend.cpp +++ b/opm/simulators/linalg/bda/openclSolverBackend.cpp @@ -47,11 +47,17 @@ using Dune::Timer; template openclSolverBackend::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_, ILUReorder opencl_ilu_reorder_, std::string linsolver) : BdaSolver(verbosity_, maxit_, tolerance_, platformID_, deviceID_), opencl_ilu_reorder(opencl_ilu_reorder_) { - bool use_cpr; + bool use_cpr, use_isai; + if (linsolver.compare("ilu0") == 0) { use_cpr = false; + use_isai = false; } else if (linsolver.compare("cpr_quasiimpes") == 0) { use_cpr = true; + use_isai = false; + } else if (linsolver.compare("isai") == 0) { + use_cpr = false; + use_isai = true; } else if (linsolver.compare("cpr_trueimpes") == 0) { OPM_THROW(std::logic_error, "Error openclSolver does not support --linsolver=cpr_trueimpes"); } else { @@ -61,6 +67,8 @@ openclSolverBackend::openclSolverBackend(int verbosity_, int maxit_, using PreconditionerType = typename Preconditioner::PreconditionerType; if (use_cpr) { prec = Preconditioner::create(PreconditionerType::CPR, verbosity, opencl_ilu_reorder); + } else if (use_isai) { + prec = Preconditioner::create(PreconditionerType::BISAI, verbosity, opencl_ilu_reorder); } else { prec = Preconditioner::create(PreconditionerType::BILU0, verbosity, opencl_ilu_reorder); } diff --git a/opm/simulators/linalg/setupPropertyTree.cpp b/opm/simulators/linalg/setupPropertyTree.cpp index 5b4b8e7da..e10b55b6e 100644 --- a/opm/simulators/linalg/setupPropertyTree.cpp +++ b/opm/simulators/linalg/setupPropertyTree.cpp @@ -84,10 +84,15 @@ setupPropertyTree(FlowLinearSolverParameters p, // Note: copying the parameters return setupILU(conf, p); } + // Same configuration as ILU0. + if (conf == "isai") { + return setupISAI(conf, p); + } + // No valid configuration option found. OPM_THROW(std::invalid_argument, conf << " is not a valid setting for --linear-solver-configuration." - << " Please use ilu0, cpr, cpr_trueimpes, or cpr_quasiimpes"); + << " Please use ilu0, cpr, cpr_trueimpes, cpr_quasiimpes or isai"); } PropertyTree @@ -187,5 +192,20 @@ setupILU([[maybe_unused]] const std::string& conf, const FlowLinearSolverParamet } +PropertyTree +setupISAI([[maybe_unused]] const std::string& conf, const FlowLinearSolverParameters& p) +{ + using namespace std::string_literals; + PropertyTree prm; + prm.put("tol", p.linear_solver_reduction_); + prm.put("maxiter", p.linear_solver_maxiter_); + prm.put("verbosity", p.linear_solver_verbosity_); + prm.put("solver", "bicgstab"s); + prm.put("preconditioner.type", "ParOverILU0"s); + prm.put("preconditioner.relaxation", p.ilu_relaxation_); + prm.put("preconditioner.ilulevel", p.ilu_fillin_level_); + return prm; +} + } // namespace Opm diff --git a/opm/simulators/linalg/setupPropertyTree.hpp b/opm/simulators/linalg/setupPropertyTree.hpp index c5c278d0e..15bb8e80d 100644 --- a/opm/simulators/linalg/setupPropertyTree.hpp +++ b/opm/simulators/linalg/setupPropertyTree.hpp @@ -35,6 +35,7 @@ PropertyTree setupPropertyTree(FlowLinearSolverParameters p, PropertyTree setupCPR(const std::string& conf, const FlowLinearSolverParameters& p); PropertyTree setupAMG(const std::string& conf, const FlowLinearSolverParameters& p); PropertyTree setupILU(const std::string& conf, const FlowLinearSolverParameters& p); +PropertyTree setupISAI(const std::string& conf, const FlowLinearSolverParameters& p); } // namespace Opm diff --git a/tests/offset_map_matrix.txt b/tests/offset_map_matrix.txt new file mode 100644 index 000000000..74d40940e --- /dev/null +++ b/tests/offset_map_matrix.txt @@ -0,0 +1,850 @@ +%%MatrixMarket matrix coordinate real general +50 50 848 +1 1 -1.1640162693857792 +1 2 -1.4182558603860957 +1 4 -0.6652783046746638 +1 7 -0.3810535906432545 +1 11 -0.6563172555901167 +1 13 2.4881286599083707 +1 17 -1.0345553063001411 +1 19 0.1279122570117053 +1 22 -1.4450482561517808 +1 33 -1.4973404272897985 +1 39 0.989494790153136 +1 43 1.4035610072308178 +1 46 -1.4634598848419837 +2 1 0.7937901820374852 +2 2 -1.3483019250309103 +2 3 -1.534536353394763 +2 5 -0.7819255095818141 +2 6 0.21912483161129548 +2 7 1.9185876920903795 +2 9 0.6283208544623571 +2 13 0.36352373656672116 +2 14 -1.471128765967237 +2 19 -0.6521277429033553 +2 23 0.7234427285193916 +2 24 -0.9401018026326134 +2 25 -0.6068777593411266 +2 26 0.19726805543992912 +2 29 -1.1776729668562218 +2 30 -0.32162365081842914 +2 31 -0.8363377726122794 +2 32 -0.29111795440353216 +2 34 1.0322677020266033 +2 38 0.23638934071376647 +2 39 0.23020966740491275 +2 40 -0.417421746191773 +2 48 -0.049356574469409484 +3 2 -0.03366827688954402 +3 3 -1.5301951784625547 +3 7 -1.7463778248751873 +3 8 0.7838767011031638 +3 13 -1.270926473120464 +3 15 -0.43354480832235565 +3 21 -0.6791679623513996 +3 24 0.3182001978470255 +3 26 1.481267255773548 +3 32 0.7678228268120352 +3 38 0.42052610249848366 +3 39 -1.5750879578362842 +3 41 1.260449664220387 +3 42 0.916298331782843 +3 46 0.6803299319321364 +3 48 -0.4732624627863532 +3 49 1.082460026604664 +3 50 -2.1654996525990207 +4 1 0.4450803054440506 +4 4 -0.31297245312801325 +4 9 -0.5620032379365633 +4 13 0.8811374859858175 +4 18 -0.8416050779420092 +4 26 1.3804749091718078 +4 30 0.16219779998776584 +4 31 0.14096119833458218 +4 33 -1.6625439465924297 +4 41 0.7887533576814337 +4 43 -0.6942950200964312 +4 46 -0.8351336538683832 +4 48 1.213172044194188 +5 2 -0.6080286294166338 +5 5 -2.0943444221144385 +5 6 1.384330977877084 +5 9 1.834623676906424 +5 10 -0.8761980873715278 +5 11 -1.3360478306222685 +5 16 -0.003415086818400862 +5 18 -0.15009657485305272 +5 19 1.5105685169571559 +5 20 0.867385510476716 +5 21 -0.5457894297692334 +5 25 -0.2599301955884775 +5 28 -0.37742893332343647 +5 30 -2.0213225252568523 +5 34 -0.4448199086705089 +5 35 1.019035325272961 +5 38 1.0560318627318444 +5 40 -0.8667352840192679 +5 42 -0.2631232293157956 +5 46 1.4596572629236262 +5 48 -0.7950035029585614 +6 2 -0.5462785017897367 +6 5 0.9916250609520301 +6 6 -2.5024560757759584 +6 8 -0.40223557718951425 +6 9 -0.7256159324984878 +6 17 0.07000920053694806 +6 23 1.422082337992691 +6 28 -0.04405406333208112 +6 29 0.7296763419443075 +6 30 -0.7881979624241918 +6 33 1.1142960251799454 +6 34 -0.7936946409282006 +6 39 0.5765186765450424 +6 41 0.7659896684142042 +6 42 0.13757544444400835 +6 43 1.8468549087768105 +6 45 0.384422957040213 +6 46 0.03664962391465893 +6 48 -1.7539479827579822 +7 1 0.9257970515279664 +7 2 0.588230091950247 +7 3 0.1685561295248156 +7 7 1.0238461950417235 +7 8 -0.6875468016096212 +7 10 -1.8334088102150734 +7 11 0.02438319611417031 +7 12 -0.17819186052258176 +7 23 -2.6512848145758023 +7 24 0.3658007898009245 +7 26 -0.13756076948189752 +7 27 0.5444979337183926 +7 30 0.11144196646762107 +7 31 0.25394983326980825 +7 33 -0.5598435615074352 +7 34 -1.1639958660164635 +7 38 0.6617241341738084 +7 39 -0.9373040350814857 +7 45 1.1925387648834163 +7 47 -0.1252429583063194 +7 49 0.1863755738201403 +8 3 -2.164029237298608 +8 6 -0.10844113537970246 +8 7 -0.24450770802913588 +8 8 -0.8922198144344635 +8 11 -0.12260404864223766 +8 12 0.6450457464392393 +8 14 -0.6578139885661349 +8 15 0.039395765965406844 +8 17 -1.2765747158407104 +8 19 0.24614546177602523 +8 22 0.6267031103839719 +8 25 -1.1586051275107683 +8 26 -1.5703953625058857 +8 27 -1.2368616300459365 +8 28 0.5162274385614679 +8 29 0.5784419970119425 +8 30 1.1380623107633334 +8 31 0.46002560089012295 +8 33 0.3694717874042891 +8 40 0.923124753692947 +8 43 2.658342741659545 +8 45 -1.0437279202863807 +8 47 1.763510158441251 +8 50 -0.9841878306649064 +9 2 0.6698415452946738 +9 4 -0.22297071789884176 +9 5 -0.6340402036919065 +9 6 -0.09008175722253114 +9 9 1.4180024237640354 +9 14 0.1704233226646137 +9 20 -0.6090500494978475 +9 21 1.5910224144131337 +9 23 1.275902068255959 +9 26 1.0824338823514525 +9 33 0.11288998150107547 +9 38 -0.31286012327974233 +9 39 -0.9616670972077335 +9 43 -0.1557761657700043 +9 44 -0.7780802453094313 +10 5 0.6904879131191686 +10 7 -1.2572953371397426 +10 10 -0.34305114851994023 +10 11 1.913802957055161 +10 13 1.2157369460234284 +10 15 -0.9042650592903851 +10 17 0.15803252082876973 +10 19 0.3063724756501122 +10 22 0.043737219469907065 +10 23 -1.1543198039610394 +10 31 -0.08120594221258237 +10 32 -1.1588785933228243 +10 33 -0.2113739458997056 +10 36 -0.3723874359167642 +10 37 -0.8574315946374444 +10 40 2.367858385708291 +10 43 -0.27965944361682565 +10 44 0.9621859844154922 +10 45 0.6901517908099928 +10 49 -0.2804225085524067 +10 50 0.20841210893226758 +11 1 0.9836055582782907 +11 5 0.8326285756360723 +11 7 0.24044476525093098 +11 8 -2.166532388842017 +11 10 -0.21031550096597298 +11 11 0.052994199785863 +11 15 1.9901151965695802 +11 21 0.45572509322845134 +11 24 -0.880593093625388 +11 27 0.7769946200047971 +11 29 -0.6814443521195398 +11 31 -0.7148953964722308 +11 32 0.2730781519022447 +11 37 -0.9290991191450806 +11 38 -0.9451980513352437 +11 44 -0.3437583521791611 +11 45 1.1747279568692308 +11 48 -0.11645253155382618 +12 7 -0.3034845262733902 +12 8 -0.08896191759061581 +12 12 0.5036978455262423 +12 15 0.3178607500557986 +12 19 1.3656156116578528 +12 20 1.1118854001888767 +12 22 -0.8978198546337864 +12 23 2.1532891981919398 +12 32 -1.9082499979596879 +12 40 -0.3943560805577487 +12 44 -0.3943010011405521 +12 49 0.007563048280009652 +12 50 -2.220558434696489 +13 1 -1.5365799932767636 +13 2 1.5314847817626112 +13 3 -0.9013718463799956 +13 4 1.0404662267385238 +13 10 -0.7817664262628053 +13 13 -0.5232082838283634 +13 16 0.3666746605756056 +13 22 -0.23479496561332408 +13 23 0.863733656290349 +13 30 -0.6061769949271227 +13 35 0.28382042440385924 +13 36 -0.25931363965647 +13 38 0.8270056737690984 +13 39 -0.5786289190874652 +13 41 -1.2990853709128758 +13 42 -1.50712248377886 +13 44 0.5422591071477139 +13 45 -0.21580145602040487 +13 46 -1.2193548360559685 +13 48 -1.590979588417989 +14 2 -1.9451772588176117 +14 8 0.022673140961644918 +14 9 -0.5758308273936078 +14 14 0.36511143247178185 +14 19 0.3679269796019015 +14 21 -0.269563417198021 +14 25 1.2119432169046207 +14 30 -0.4883570034605246 +14 32 0.1929203286050791 +14 39 -0.665559643265664 +14 47 1.3035909325429085 +14 49 -0.05476379144623836 +15 3 -0.053377020810862416 +15 8 1.9302575725447662 +15 10 -0.24243408886509507 +15 11 -0.25808094168155327 +15 12 0.47022708237844313 +15 15 -0.21214613807025123 +15 18 0.5867216736722363 +15 24 0.7986489182639825 +15 25 1.0301100826999252 +15 28 -0.21495395125912922 +15 30 -0.22652664640648987 +15 36 0.8411036559911869 +15 37 -0.7333516526667732 +15 42 -0.6962738652430717 +15 45 0.3329558482408548 +15 48 0.7586961458850335 +16 5 -1.4187067485863714 +16 13 -1.1627075345265057 +16 16 -1.1700699602496287 +16 19 -1.297407153884415 +16 20 1.3380866088463814 +16 23 -0.14566124018497298 +16 25 -0.06062994018104683 +16 30 0.048962833975358604 +16 34 0.9787562410040349 +16 35 -0.20254479485946494 +16 37 0.9002267443027536 +16 38 0.30180264302299753 +16 42 1.4657321299948673 +16 44 0.7600082166858397 +16 45 0.6843029647870434 +16 50 0.25677106966826024 +17 1 -0.31889825918510856 +17 6 1.026637062372315 +17 8 1.4211714691297912 +17 10 -0.466633655454311 +17 17 2.057542153979448 +17 24 -0.5630353084417016 +17 25 -0.4488634934076392 +17 31 2.314826945590068 +17 32 0.5645189366269747 +17 33 0.430944809151469 +17 37 -1.5659164439886035 +17 38 1.3446543976449992 +17 39 0.16050636445850358 +17 42 -0.6812083686626337 +17 49 0.01691514384681582 +18 4 0.6554662920123833 +18 5 -1.45963372849715 +18 15 -1.2677974537647643 +18 18 1.0268909956413137 +18 19 1.670504151612372 +18 21 -1.3522607198306429 +18 22 0.9138575180392129 +18 26 0.5151808617665289 +18 27 0.8872213986143038 +18 28 0.6632108591259984 +18 29 -0.016457428751376547 +18 31 0.2881445763087828 +18 35 1.7146098616900045 +18 37 -0.048677578009558006 +18 39 0.3902666562041006 +18 40 -0.23800731217447277 +18 41 1.2886892265696677 +18 42 0.2242644622375494 +18 43 0.47336574177226165 +18 50 1.1994117376842808 +19 1 -0.39253109359986643 +19 2 1.9075673368028128 +19 5 -1.5273902445105394 +19 8 -0.2200309217946991 +19 10 2.5056340285582945 +19 12 -0.46546860438513893 +19 14 0.5168055058171757 +19 16 -0.9320521921233156 +19 18 -1.7096973464241725 +19 19 0.178485414863175 +19 21 0.7304342632083394 +19 23 0.41065018231659844 +19 28 1.3069055601152304 +19 30 -0.07175822018383508 +19 32 -0.36736793027216463 +19 33 -1.8115664104912763 +19 36 -0.09067839720333831 +19 38 -0.14465110726083497 +19 39 -0.08636768503320821 +19 44 -0.3230570890696174 +19 48 -0.5569466316853989 +20 5 -0.4295126407181027 +20 9 0.8312798004651497 +20 12 0.03858849641785166 +20 16 0.545499564690656 +20 20 -1.0628386977026885 +20 24 -0.9315381939834686 +20 27 0.37698482117910265 +20 32 1.4819952940382584 +20 33 0.6236503638914098 +20 40 2.2486893766823304 +20 46 0.022516229850399704 +20 49 -0.03387523377214145 +20 50 -0.7257134336923491 +21 3 -0.5583769074807758 +21 5 0.12665741243159795 +21 9 -0.34467441842337554 +21 11 1.5416427795918037 +21 14 -3.0661434454048577 +21 18 -0.41134206177101656 +21 19 -0.3486759541002314 +21 21 1.4416618057022288 +21 23 1.7089719515418829 +21 24 -0.32785682960722545 +21 26 1.1373870730253055 +21 27 -0.10278776233517312 +21 28 0.20695410496947933 +21 30 -0.14874277299015853 +21 31 0.2285986238609225 +21 33 0.4141295701554122 +21 35 -0.16573464523789216 +21 37 0.22719250836895127 +21 38 -1.5098630892695195 +21 39 0.7822184044581182 +21 42 0.5995462787659518 +21 46 -0.2240540589407036 +21 47 -1.2962434723456115 +22 1 -0.5793323361097524 +22 8 -0.03505736410726527 +22 10 0.6307722669061757 +22 12 -1.185908358558203 +22 13 0.0029110234588312816 +22 18 -1.590643355298001 +22 22 -0.38997602971752393 +22 30 0.4765514015457819 +22 31 0.4174992330508226 +22 38 1.5146545030151244 +22 43 0.8298425409842126 +22 47 -0.04228241863062719 +22 49 0.45457456578587424 +22 50 -1.1566862391524098 +23 2 -1.9698745149917238 +23 6 -1.1487758118131826 +23 7 -1.016279948636187 +23 9 -1.103938427411287 +23 10 -0.9648443208463784 +23 12 -0.07274655469813145 +23 13 0.8019872114836829 +23 16 -0.5209084907769845 +23 19 -0.6028095933201297 +23 21 2.1003051378001736 +23 23 0.1359246059433681 +23 29 0.40024946656281557 +23 41 -0.8641963668688665 +23 42 0.9674206084831138 +23 43 -0.7557517791534388 +23 49 -0.3424198383378352 +23 50 1.0600904923975039 +24 2 1.4429022378839527 +24 3 -0.7518382047822895 +24 7 -0.41292035365905505 +24 11 -0.4833724836233828 +24 15 -1.0279771629198782 +24 17 0.1463338699576612 +24 20 0.6456287790508382 +24 21 0.5058247103747011 +24 24 -2.0684513498355543 +24 25 -0.8512288810076797 +24 27 1.148884671597174 +24 35 0.25103415205289326 +24 39 2.0020873904272007 +24 43 -2.935996702414229 +24 47 -0.9248689802737877 +24 49 0.13113063788741536 +25 2 0.09621563484279039 +25 5 0.0599901831814009 +25 8 -1.2375607185923478 +25 14 0.6345381365357213 +25 15 -0.9448011825734395 +25 16 -0.08505780958159175 +25 17 -0.03564580799203364 +25 24 1.2374259922959447 +25 25 -0.1604328850133287 +25 30 -0.6068539082393232 +25 35 0.47485124108248405 +25 37 0.36723666950817846 +25 44 1.5025883220527734 +25 47 0.40883036401167866 +25 48 1.5585942382638724 +26 2 0.07883200076125381 +26 3 -0.40132102677902637 +26 4 -0.663062781125293 +26 7 -0.6684418642959067 +26 8 1.472744651290698 +26 9 -1.2383717266311935 +26 18 -1.2122742950461864 +26 21 -1.5510465651462715 +26 26 -0.34160273454920076 +26 31 0.7099253785523588 +26 33 -0.7003154285834834 +26 36 -0.08125040687196416 +26 41 0.6417134382950819 +26 43 -2.6081718756105245 +26 46 0.2339848674555718 +26 47 0.2759566556012328 +27 7 -2.0755994327594602 +27 8 0.8088274106104499 +27 11 0.974406383611474 +27 18 -1.5022287075310796 +27 20 -0.6364908491221679 +27 21 1.2467485587865426 +27 24 -0.12989022011210294 +27 27 -1.049895181199333 +27 29 -0.5229534305586139 +27 30 0.7090837164422252 +27 32 -1.9321375284200226 +27 33 -1.2536990757033035 +27 34 1.0279419501258225 +27 37 1.8832189975305362 +27 41 0.18433794384070049 +27 45 -0.26575531783126416 +27 47 0.03090772796543303 +28 5 -0.7932803290713425 +28 6 -0.6630696833480801 +28 8 -0.32277425409292204 +28 15 -0.18755509721332722 +28 18 -0.0892853445948387 +28 19 -0.17696469216467683 +28 21 -1.4152081013682751 +28 28 -0.36468501804081466 +28 44 -1.1578571290730693 +28 47 -1.6730258025822042 +28 48 0.7735993049415383 +28 50 -0.47801841057184147 +29 2 0.595849447740606 +29 6 0.47431478072366634 +29 8 -0.7966386401804985 +29 11 1.4876647342674352 +29 18 -0.4243814194984253 +29 23 1.0248501073544416 +29 27 -0.5280636772373556 +29 29 -0.21903776444931183 +29 30 0.22364449113099255 +29 33 -0.3301008627573632 +29 35 -1.3338177825326964 +29 39 -0.06005851769202786 +29 40 0.8364493521479699 +29 45 -1.8301977405737044 +29 46 -1.425355178871837 +30 2 0.5269515297309378 +30 4 -0.49698443201116266 +30 5 -0.4874813084443682 +30 6 -0.8099105660941541 +30 7 -0.5904094047314329 +30 8 -2.087342692376935 +30 13 2.9077013947784587 +30 14 -0.27900645459565404 +30 15 -0.26807666520279977 +30 16 -0.358663473664849 +30 19 0.35836776689243804 +30 21 1.1998411897590597 +30 22 0.10453795825144374 +30 25 1.4630136922322297 +30 27 0.7399577153407773 +30 29 -1.2635446843612566 +30 30 1.6892674372722856 +30 33 -0.5108583741339386 +30 34 0.08286643194019476 +30 37 -0.09483020778890679 +30 41 0.6316357189016462 +30 42 1.6015330014980078 +30 44 -0.5541835784531992 +30 45 -0.8565116618557076 +31 2 -0.6960004626619509 +31 4 -0.30462056536131826 +31 7 -0.261807265179605 +31 8 -0.5730555371336071 +31 10 -0.37864535423765516 +31 11 -0.31418657759189195 +31 17 0.3240193677152476 +31 18 -0.18748929361220787 +31 21 1.0474631764961058 +31 22 0.6373793929640664 +31 26 0.24701320916104821 +31 31 -0.27058576124155637 +31 32 -0.33896569150367495 +31 33 0.06853705824473785 +31 40 0.6475751809327814 +31 43 1.5682947228175141 +31 44 -1.1022795928271207 +31 47 0.32481440986337634 +31 48 0.11265499671793419 +32 2 0.37933950601952177 +32 3 -1.1568320678723263 +32 10 0.7869885185901785 +32 11 -0.8510365020968427 +32 12 0.12165151897775572 +32 14 0.25501130765100477 +32 17 0.8767042629399194 +32 19 -0.07455050405559993 +32 20 1.573227515097274 +32 27 0.6897679279300565 +32 31 -0.32793923284304544 +32 32 0.4006499331706247 +32 41 -1.0705576808070694 +32 50 -0.2775868659382469 +33 1 0.14744182493718497 +33 4 -0.6923215291493166 +33 6 -1.2798021914361069 +33 7 3.5764098949810883 +33 8 -0.4742672521150719 +33 9 -1.557238370045065 +33 10 0.41129790130516936 +33 17 1.7702467024724706 +33 19 0.19350233525289207 +33 20 -2.1389403308430635 +33 21 0.21433663624097388 +33 26 1.5166177239213605 +33 27 0.5326431766769445 +33 29 -0.0015272493892254833 +33 30 -1.4959887747650866 +33 31 0.5421061090700042 +33 33 0.32282155609409496 +33 38 -1.0915692958521774 +33 42 0.741477390687887 +33 47 -1.0688929411373385 +34 2 1.0496873840205287 +34 5 -1.512840225979526 +34 6 0.2755092972417648 +34 7 -1.5863653198432963 +34 16 -0.567903667447174 +34 27 0.6446890958109828 +34 30 0.33276085921545256 +34 34 -0.130430818781885 +34 40 0.24693187856634105 +34 44 2.9027745150583693 +34 45 -0.9090649708360288 +34 47 1.0079900569285618 +34 48 -1.1530778364717937 +34 49 0.1104832662714099 +35 5 -1.0098914072143839 +35 13 1.3838716362191885 +35 16 0.3221563239825315 +35 18 -0.5054762419605355 +35 21 0.518515588169861 +35 24 -0.7112789602243167 +35 25 0.22788676487242837 +35 29 0.26499196005706155 +35 35 -2.593701594115636 +35 44 2.2654742987627388 +35 45 -0.3544882549830476 +36 10 -0.033206487195276746 +36 13 -1.0217860718890381 +36 15 0.9230211574005858 +36 19 1.086711253627532 +36 26 -1.1630525991640832 +36 36 -0.8389692549293629 +36 41 -0.7600220108083793 +36 45 0.3657993136657405 +36 46 -0.01931487091001962 +36 47 1.0396884112884546 +37 10 1.9193037801989004 +37 11 -1.772139618278466 +37 15 -0.9702697034498694 +37 16 2.1492285343913546 +37 17 -0.6673855868389723 +37 18 0.9698526505638108 +37 21 1.4361689811948404 +37 25 -1.581589583841258 +37 27 0.9902373597197129 +37 30 -0.42105324564550745 +37 37 -0.7329491440492724 +37 38 0.02721474230670799 +37 40 -0.8503811966999917 +37 45 1.0402448120111014 +37 46 0.1588238087213455 +37 48 2.137119953061256 +38 2 -0.0261420052473049 +38 3 -0.9096557927976113 +38 5 0.8122027346670994 +38 7 -0.8036437077016981 +38 9 -0.46305278042313824 +38 11 0.5396599499689051 +38 13 2.006746894112772 +38 16 0.08022060382631145 +38 17 0.4070580977842981 +38 19 0.49240634519099963 +38 21 -0.9403774991086832 +38 22 -1.2695126530163323 +38 33 0.9774258854688503 +38 37 1.318917532824395 +38 38 2.163080294336954 +38 39 -0.7447775781806444 +38 40 0.5060717711506773 +38 41 0.022702354808947074 +38 46 -0.43107669760441397 +38 48 0.6007713976459741 +39 1 0.9291558960850789 +39 2 -1.1719970627857899 +39 3 0.04978777007580999 +39 6 -0.32116383232701534 +39 7 0.8935606667152927 +39 9 1.975036253128496 +39 13 0.8903960128227447 +39 14 0.7872046128340136 +39 17 1.6727361991529073 +39 18 0.08849464191518713 +39 19 -2.3089508528627882 +39 21 1.6468084960466498 +39 24 -0.4931710357498156 +39 29 -1.5687180737479716 +39 38 0.07825609870963708 +39 39 2.2224441004709536 +40 2 -1.3914232143607397 +40 5 1.157507282921122 +40 8 -1.6907873131437854 +40 10 -0.8418229756603166 +40 12 0.8565275263655966 +40 18 -1.0276598863393047 +40 20 0.030943762917297835 +40 29 0.40867970763397743 +40 31 0.6705341320881398 +40 34 0.30175170992146455 +40 37 -0.08173706249071087 +40 38 0.4014594610686071 +40 40 -0.28579869989672485 +40 43 -1.366910671923716 +40 44 0.7442952361656232 +40 45 1.292449844798062 +40 49 -2.32598739075215 +41 3 -1.370435067347104 +41 4 -0.08428947343182698 +41 6 1.2760984847860903 +41 13 -0.8454238115844133 +41 18 -0.3342748568430244 +41 23 0.6685783648127224 +41 26 -0.32810390167705866 +41 27 1.3598591649721137 +41 30 0.19054442732253277 +41 32 -0.19990888352018535 +41 36 -0.20867669896709384 +41 38 -1.455987297145165 +41 41 0.08534899462806295 +41 43 0.554963681447742 +41 44 0.32183822829816355 +42 3 0.704821379563854 +42 5 -0.9552588717698243 +42 6 1.0398553604918348 +42 13 -0.5798502029869829 +42 15 -0.652996450024776 +42 16 0.7710557811361994 +42 17 -1.5589504826566947 +42 18 -1.1150972482248656 +42 21 -0.3215901789307355 +42 23 0.5724341918932558 +42 30 -0.5099300232857825 +42 33 -1.0239327839252508 +42 42 -0.006691882232823868 +42 45 0.7720023165168481 +42 46 1.4550702877527384 +42 47 -0.5536180259277853 +42 48 -0.2779983042343706 +42 50 -1.1270496990990801 +43 1 1.0071224344289873 +43 4 0.5766183253652049 +43 6 0.4800663248825837 +43 8 0.36179553535727443 +43 9 0.9801810629985204 +43 10 -1.381580862200723 +43 18 1.442194175628304 +43 22 -2.3000089540811786 +43 23 -0.7149221036982287 +43 24 0.20347640658517296 +43 26 0.6091698926757204 +43 31 0.6891464324768787 +43 40 0.40796542678525016 +43 41 0.14958388826691624 +43 43 -0.8101419813262694 +43 47 0.3352540150335276 +44 9 0.26261346060858826 +44 10 0.5098474995146199 +44 11 1.1396985794453043 +44 12 0.4022838113175317 +44 13 -0.0733851080937527 +44 16 -0.6836626835367803 +44 19 0.1667988756665793 +44 25 0.3141544403775245 +44 28 1.1918366315921012 +44 30 -0.4215979334199488 +44 31 -0.6111960672745367 +44 34 1.5090732121656385 +44 35 2.0234982492522264 +44 40 -1.3108596956540004 +44 41 0.500700981837014 +44 44 1.3609293863079706 +44 45 -1.3728282273693 +44 46 -1.977359424991569 +44 50 0.6464864152970877 +45 6 1.0633168430605209 +45 7 -0.6795281544668453 +45 8 -0.8701771032307314 +45 10 -1.3482667030759583 +45 11 -0.3351696118613294 +45 13 -1.1231610375497214 +45 15 1.6871653118512346 +45 16 0.42097806219891837 +45 27 -0.14036879613366618 +45 29 -0.44190622421866926 +45 30 0.09731741880144082 +45 34 1.2610484976670604 +45 35 -1.6757968834265067 +45 36 -0.884729340158471 +45 37 0.38483990282915864 +45 40 0.27245515619319055 +45 42 -1.044754118030573 +45 44 0.10404307915336418 +45 45 -0.4617864520844464 +46 1 1.5449845988734408 +46 3 1.8217250724522946 +46 4 1.0741632952223088 +46 5 0.14850787913879612 +46 6 2.081953039981725 +46 13 -0.5552565814710013 +46 20 -0.8845278982028552 +46 21 1.1566737069305788 +46 26 -1.2280652269275167 +46 29 -1.3485823306482367 +46 36 -1.2083681538002453 +46 37 -2.4487808896825056 +46 38 1.7442260273832304 +46 42 1.0001301999825352 +46 44 -0.49483745018569086 +46 46 -0.8250457475747013 +46 49 0.7942407188469035 +46 50 -0.013385331857797247 +47 7 -0.6841391502718691 +47 8 1.7881792083562311 +47 14 -0.695114695200466 +47 21 2.0399342834373835 +47 22 -0.17163913422192256 +47 24 1.567483215070286 +47 25 -0.19141914335132404 +47 26 -0.1466140522412783 +47 27 1.1187665932706545 +47 28 0.5889598589019907 +47 31 0.4797792026017961 +47 33 1.2247117314166511 +47 34 -1.8119903697827897 +47 36 1.1435982258383846 +47 42 -0.37480403318925 +47 43 0.43301190414407553 +47 47 1.405536675169851 +47 49 2.2629687753985697 +48 2 0.5810097127270123 +48 3 0.2976541603998468 +48 4 -0.2908484532174809 +48 5 1.018472177155085 +48 6 0.7041063156765373 +48 11 -1.0315108150054737 +48 13 0.4231153597815732 +48 15 2.734185344937642 +48 19 -0.5147513639446858 +48 25 -1.162844645443759 +48 28 0.5380904236896372 +48 31 -0.19142376390894825 +48 34 0.018412610358746574 +48 37 1.3889671175581886 +48 38 -0.3847295407791098 +48 42 0.47957725552200126 +48 48 -0.34290644782094615 +49 3 -0.2515274117735154 +49 7 -0.3481452345058658 +49 10 -0.8886304715042729 +49 12 -0.8731752572125361 +49 14 -0.01240852504064206 +49 17 0.29280094056297085 +49 20 1.3424764284588322 +49 22 0.9295043375407388 +49 23 -0.20518228894617255 +49 24 0.05822047131618444 +49 34 -0.44644317681565976 +49 40 -1.9668692061679034 +49 46 0.14697167280653758 +49 47 1.9229743378437136 +49 49 -0.41278594756894593 +50 3 -0.9153983187060337 +50 8 -1.9622312834633189 +50 10 -0.2317525904783636 +50 12 0.19138779002200085 +50 16 0.9520739919390878 +50 18 -1.0643510066136 +50 20 0.5240343163814127 +50 22 -0.10490072331175097 +50 23 0.14330635689385154 +50 28 0.04164096979493717 +50 32 -0.4556687922526301 +50 42 -0.11291910077313887 +50 44 -1.2275511793995748 +50 46 1.3349507539799068 +50 50 0.2337292936885357 diff --git a/tests/offset_map_matrix_transposed.txt b/tests/offset_map_matrix_transposed.txt new file mode 100644 index 000000000..ca6a1791c --- /dev/null +++ b/tests/offset_map_matrix_transposed.txt @@ -0,0 +1,850 @@ +%%MatrixMarket matrix coordinate real general +50 50 848 +1 1 -1.1640162693857792 +2 1 -1.4182558603860957 +4 1 -0.6652783046746638 +7 1 -0.3810535906432545 +11 1 -0.6563172555901167 +13 1 2.4881286599083707 +17 1 -1.0345553063001411 +19 1 0.1279122570117053 +22 1 -1.4450482561517808 +33 1 -1.4973404272897985 +39 1 0.989494790153136 +43 1 1.4035610072308178 +46 1 -1.4634598848419837 +1 2 0.7937901820374852 +2 2 -1.3483019250309103 +3 2 -1.534536353394763 +5 2 -0.7819255095818141 +6 2 0.21912483161129548 +7 2 1.9185876920903795 +9 2 0.6283208544623571 +13 2 0.36352373656672116 +14 2 -1.471128765967237 +19 2 -0.6521277429033553 +23 2 0.7234427285193916 +24 2 -0.9401018026326134 +25 2 -0.6068777593411266 +26 2 0.19726805543992912 +29 2 -1.1776729668562218 +30 2 -0.32162365081842914 +31 2 -0.8363377726122794 +32 2 -0.29111795440353216 +34 2 1.0322677020266033 +38 2 0.23638934071376647 +39 2 0.23020966740491275 +40 2 -0.417421746191773 +48 2 -0.049356574469409484 +2 3 -0.03366827688954402 +3 3 -1.5301951784625547 +7 3 -1.7463778248751873 +8 3 0.7838767011031638 +13 3 -1.270926473120464 +15 3 -0.43354480832235565 +21 3 -0.6791679623513996 +24 3 0.3182001978470255 +26 3 1.481267255773548 +32 3 0.7678228268120352 +38 3 0.42052610249848366 +39 3 -1.5750879578362842 +41 3 1.260449664220387 +42 3 0.916298331782843 +46 3 0.6803299319321364 +48 3 -0.4732624627863532 +49 3 1.082460026604664 +50 3 -2.1654996525990207 +1 4 0.4450803054440506 +4 4 -0.31297245312801325 +9 4 -0.5620032379365633 +13 4 0.8811374859858175 +18 4 -0.8416050779420092 +26 4 1.3804749091718078 +30 4 0.16219779998776584 +31 4 0.14096119833458218 +33 4 -1.6625439465924297 +41 4 0.7887533576814337 +43 4 -0.6942950200964312 +46 4 -0.8351336538683832 +48 4 1.213172044194188 +2 5 -0.6080286294166338 +5 5 -2.0943444221144385 +6 5 1.384330977877084 +9 5 1.834623676906424 +10 5 -0.8761980873715278 +11 5 -1.3360478306222685 +16 5 -0.003415086818400862 +18 5 -0.15009657485305272 +19 5 1.5105685169571559 +20 5 0.867385510476716 +21 5 -0.5457894297692334 +25 5 -0.2599301955884775 +28 5 -0.37742893332343647 +30 5 -2.0213225252568523 +34 5 -0.4448199086705089 +35 5 1.019035325272961 +38 5 1.0560318627318444 +40 5 -0.8667352840192679 +42 5 -0.2631232293157956 +46 5 1.4596572629236262 +48 5 -0.7950035029585614 +2 6 -0.5462785017897367 +5 6 0.9916250609520301 +6 6 -2.5024560757759584 +8 6 -0.40223557718951425 +9 6 -0.7256159324984878 +17 6 0.07000920053694806 +23 6 1.422082337992691 +28 6 -0.04405406333208112 +29 6 0.7296763419443075 +30 6 -0.7881979624241918 +33 6 1.1142960251799454 +34 6 -0.7936946409282006 +39 6 0.5765186765450424 +41 6 0.7659896684142042 +42 6 0.13757544444400835 +43 6 1.8468549087768105 +45 6 0.384422957040213 +46 6 0.03664962391465893 +48 6 -1.7539479827579822 +1 7 0.9257970515279664 +2 7 0.588230091950247 +3 7 0.1685561295248156 +7 7 1.0238461950417235 +8 7 -0.6875468016096212 +10 7 -1.8334088102150734 +11 7 0.02438319611417031 +12 7 -0.17819186052258176 +23 7 -2.6512848145758023 +24 7 0.3658007898009245 +26 7 -0.13756076948189752 +27 7 0.5444979337183926 +30 7 0.11144196646762107 +31 7 0.25394983326980825 +33 7 -0.5598435615074352 +34 7 -1.1639958660164635 +38 7 0.6617241341738084 +39 7 -0.9373040350814857 +45 7 1.1925387648834163 +47 7 -0.1252429583063194 +49 7 0.1863755738201403 +3 8 -2.164029237298608 +6 8 -0.10844113537970246 +7 8 -0.24450770802913588 +8 8 -0.8922198144344635 +11 8 -0.12260404864223766 +12 8 0.6450457464392393 +14 8 -0.6578139885661349 +15 8 0.039395765965406844 +17 8 -1.2765747158407104 +19 8 0.24614546177602523 +22 8 0.6267031103839719 +25 8 -1.1586051275107683 +26 8 -1.5703953625058857 +27 8 -1.2368616300459365 +28 8 0.5162274385614679 +29 8 0.5784419970119425 +30 8 1.1380623107633334 +31 8 0.46002560089012295 +33 8 0.3694717874042891 +40 8 0.923124753692947 +43 8 2.658342741659545 +45 8 -1.0437279202863807 +47 8 1.763510158441251 +50 8 -0.9841878306649064 +2 9 0.6698415452946738 +4 9 -0.22297071789884176 +5 9 -0.6340402036919065 +6 9 -0.09008175722253114 +9 9 1.4180024237640354 +14 9 0.1704233226646137 +20 9 -0.6090500494978475 +21 9 1.5910224144131337 +23 9 1.275902068255959 +26 9 1.0824338823514525 +33 9 0.11288998150107547 +38 9 -0.31286012327974233 +39 9 -0.9616670972077335 +43 9 -0.1557761657700043 +44 9 -0.7780802453094313 +5 10 0.6904879131191686 +7 10 -1.2572953371397426 +10 10 -0.34305114851994023 +11 10 1.913802957055161 +13 10 1.2157369460234284 +15 10 -0.9042650592903851 +17 10 0.15803252082876973 +19 10 0.3063724756501122 +22 10 0.043737219469907065 +23 10 -1.1543198039610394 +31 10 -0.08120594221258237 +32 10 -1.1588785933228243 +33 10 -0.2113739458997056 +36 10 -0.3723874359167642 +37 10 -0.8574315946374444 +40 10 2.367858385708291 +43 10 -0.27965944361682565 +44 10 0.9621859844154922 +45 10 0.6901517908099928 +49 10 -0.2804225085524067 +50 10 0.20841210893226758 +1 11 0.9836055582782907 +5 11 0.8326285756360723 +7 11 0.24044476525093098 +8 11 -2.166532388842017 +10 11 -0.21031550096597298 +11 11 0.052994199785863 +15 11 1.9901151965695802 +21 11 0.45572509322845134 +24 11 -0.880593093625388 +27 11 0.7769946200047971 +29 11 -0.6814443521195398 +31 11 -0.7148953964722308 +32 11 0.2730781519022447 +37 11 -0.9290991191450806 +38 11 -0.9451980513352437 +44 11 -0.3437583521791611 +45 11 1.1747279568692308 +48 11 -0.11645253155382618 +7 12 -0.3034845262733902 +8 12 -0.08896191759061581 +12 12 0.5036978455262423 +15 12 0.3178607500557986 +19 12 1.3656156116578528 +20 12 1.1118854001888767 +22 12 -0.8978198546337864 +23 12 2.1532891981919398 +32 12 -1.9082499979596879 +40 12 -0.3943560805577487 +44 12 -0.3943010011405521 +49 12 0.007563048280009652 +50 12 -2.220558434696489 +1 13 -1.5365799932767636 +2 13 1.5314847817626112 +3 13 -0.9013718463799956 +4 13 1.0404662267385238 +10 13 -0.7817664262628053 +13 13 -0.5232082838283634 +16 13 0.3666746605756056 +22 13 -0.23479496561332408 +23 13 0.863733656290349 +30 13 -0.6061769949271227 +35 13 0.28382042440385924 +36 13 -0.25931363965647 +38 13 0.8270056737690984 +39 13 -0.5786289190874652 +41 13 -1.2990853709128758 +42 13 -1.50712248377886 +44 13 0.5422591071477139 +45 13 -0.21580145602040487 +46 13 -1.2193548360559685 +48 13 -1.590979588417989 +2 14 -1.9451772588176117 +8 14 0.022673140961644918 +9 14 -0.5758308273936078 +14 14 0.36511143247178185 +19 14 0.3679269796019015 +21 14 -0.269563417198021 +25 14 1.2119432169046207 +30 14 -0.4883570034605246 +32 14 0.1929203286050791 +39 14 -0.665559643265664 +47 14 1.3035909325429085 +49 14 -0.05476379144623836 +3 15 -0.053377020810862416 +8 15 1.9302575725447662 +10 15 -0.24243408886509507 +11 15 -0.25808094168155327 +12 15 0.47022708237844313 +15 15 -0.21214613807025123 +18 15 0.5867216736722363 +24 15 0.7986489182639825 +25 15 1.0301100826999252 +28 15 -0.21495395125912922 +30 15 -0.22652664640648987 +36 15 0.8411036559911869 +37 15 -0.7333516526667732 +42 15 -0.6962738652430717 +45 15 0.3329558482408548 +48 15 0.7586961458850335 +5 16 -1.4187067485863714 +13 16 -1.1627075345265057 +16 16 -1.1700699602496287 +19 16 -1.297407153884415 +20 16 1.3380866088463814 +23 16 -0.14566124018497298 +25 16 -0.06062994018104683 +30 16 0.048962833975358604 +34 16 0.9787562410040349 +35 16 -0.20254479485946494 +37 16 0.9002267443027536 +38 16 0.30180264302299753 +42 16 1.4657321299948673 +44 16 0.7600082166858397 +45 16 0.6843029647870434 +50 16 0.25677106966826024 +1 17 -0.31889825918510856 +6 17 1.026637062372315 +8 17 1.4211714691297912 +10 17 -0.466633655454311 +17 17 2.057542153979448 +24 17 -0.5630353084417016 +25 17 -0.4488634934076392 +31 17 2.314826945590068 +32 17 0.5645189366269747 +33 17 0.430944809151469 +37 17 -1.5659164439886035 +38 17 1.3446543976449992 +39 17 0.16050636445850358 +42 17 -0.6812083686626337 +49 17 0.01691514384681582 +4 18 0.6554662920123833 +5 18 -1.45963372849715 +15 18 -1.2677974537647643 +18 18 1.0268909956413137 +19 18 1.670504151612372 +21 18 -1.3522607198306429 +22 18 0.9138575180392129 +26 18 0.5151808617665289 +27 18 0.8872213986143038 +28 18 0.6632108591259984 +29 18 -0.016457428751376547 +31 18 0.2881445763087828 +35 18 1.7146098616900045 +37 18 -0.048677578009558006 +39 18 0.3902666562041006 +40 18 -0.23800731217447277 +41 18 1.2886892265696677 +42 18 0.2242644622375494 +43 18 0.47336574177226165 +50 18 1.1994117376842808 +1 19 -0.39253109359986643 +2 19 1.9075673368028128 +5 19 -1.5273902445105394 +8 19 -0.2200309217946991 +10 19 2.5056340285582945 +12 19 -0.46546860438513893 +14 19 0.5168055058171757 +16 19 -0.9320521921233156 +18 19 -1.7096973464241725 +19 19 0.178485414863175 +21 19 0.7304342632083394 +23 19 0.41065018231659844 +28 19 1.3069055601152304 +30 19 -0.07175822018383508 +32 19 -0.36736793027216463 +33 19 -1.8115664104912763 +36 19 -0.09067839720333831 +38 19 -0.14465110726083497 +39 19 -0.08636768503320821 +44 19 -0.3230570890696174 +48 19 -0.5569466316853989 +5 20 -0.4295126407181027 +9 20 0.8312798004651497 +12 20 0.03858849641785166 +16 20 0.545499564690656 +20 20 -1.0628386977026885 +24 20 -0.9315381939834686 +27 20 0.37698482117910265 +32 20 1.4819952940382584 +33 20 0.6236503638914098 +40 20 2.2486893766823304 +46 20 0.022516229850399704 +49 20 -0.03387523377214145 +50 20 -0.7257134336923491 +3 21 -0.5583769074807758 +5 21 0.12665741243159795 +9 21 -0.34467441842337554 +11 21 1.5416427795918037 +14 21 -3.0661434454048577 +18 21 -0.41134206177101656 +19 21 -0.3486759541002314 +21 21 1.4416618057022288 +23 21 1.7089719515418829 +24 21 -0.32785682960722545 +26 21 1.1373870730253055 +27 21 -0.10278776233517312 +28 21 0.20695410496947933 +30 21 -0.14874277299015853 +31 21 0.2285986238609225 +33 21 0.4141295701554122 +35 21 -0.16573464523789216 +37 21 0.22719250836895127 +38 21 -1.5098630892695195 +39 21 0.7822184044581182 +42 21 0.5995462787659518 +46 21 -0.2240540589407036 +47 21 -1.2962434723456115 +1 22 -0.5793323361097524 +8 22 -0.03505736410726527 +10 22 0.6307722669061757 +12 22 -1.185908358558203 +13 22 0.0029110234588312816 +18 22 -1.590643355298001 +22 22 -0.38997602971752393 +30 22 0.4765514015457819 +31 22 0.4174992330508226 +38 22 1.5146545030151244 +43 22 0.8298425409842126 +47 22 -0.04228241863062719 +49 22 0.45457456578587424 +50 22 -1.1566862391524098 +2 23 -1.9698745149917238 +6 23 -1.1487758118131826 +7 23 -1.016279948636187 +9 23 -1.103938427411287 +10 23 -0.9648443208463784 +12 23 -0.07274655469813145 +13 23 0.8019872114836829 +16 23 -0.5209084907769845 +19 23 -0.6028095933201297 +21 23 2.1003051378001736 +23 23 0.1359246059433681 +29 23 0.40024946656281557 +41 23 -0.8641963668688665 +42 23 0.9674206084831138 +43 23 -0.7557517791534388 +49 23 -0.3424198383378352 +50 23 1.0600904923975039 +2 24 1.4429022378839527 +3 24 -0.7518382047822895 +7 24 -0.41292035365905505 +11 24 -0.4833724836233828 +15 24 -1.0279771629198782 +17 24 0.1463338699576612 +20 24 0.6456287790508382 +21 24 0.5058247103747011 +24 24 -2.0684513498355543 +25 24 -0.8512288810076797 +27 24 1.148884671597174 +35 24 0.25103415205289326 +39 24 2.0020873904272007 +43 24 -2.935996702414229 +47 24 -0.9248689802737877 +49 24 0.13113063788741536 +2 25 0.09621563484279039 +5 25 0.0599901831814009 +8 25 -1.2375607185923478 +14 25 0.6345381365357213 +15 25 -0.9448011825734395 +16 25 -0.08505780958159175 +17 25 -0.03564580799203364 +24 25 1.2374259922959447 +25 25 -0.1604328850133287 +30 25 -0.6068539082393232 +35 25 0.47485124108248405 +37 25 0.36723666950817846 +44 25 1.5025883220527734 +47 25 0.40883036401167866 +48 25 1.5585942382638724 +2 26 0.07883200076125381 +3 26 -0.40132102677902637 +4 26 -0.663062781125293 +7 26 -0.6684418642959067 +8 26 1.472744651290698 +9 26 -1.2383717266311935 +18 26 -1.2122742950461864 +21 26 -1.5510465651462715 +26 26 -0.34160273454920076 +31 26 0.7099253785523588 +33 26 -0.7003154285834834 +36 26 -0.08125040687196416 +41 26 0.6417134382950819 +43 26 -2.6081718756105245 +46 26 0.2339848674555718 +47 26 0.2759566556012328 +7 27 -2.0755994327594602 +8 27 0.8088274106104499 +11 27 0.974406383611474 +18 27 -1.5022287075310796 +20 27 -0.6364908491221679 +21 27 1.2467485587865426 +24 27 -0.12989022011210294 +27 27 -1.049895181199333 +29 27 -0.5229534305586139 +30 27 0.7090837164422252 +32 27 -1.9321375284200226 +33 27 -1.2536990757033035 +34 27 1.0279419501258225 +37 27 1.8832189975305362 +41 27 0.18433794384070049 +45 27 -0.26575531783126416 +47 27 0.03090772796543303 +5 28 -0.7932803290713425 +6 28 -0.6630696833480801 +8 28 -0.32277425409292204 +15 28 -0.18755509721332722 +18 28 -0.0892853445948387 +19 28 -0.17696469216467683 +21 28 -1.4152081013682751 +28 28 -0.36468501804081466 +44 28 -1.1578571290730693 +47 28 -1.6730258025822042 +48 28 0.7735993049415383 +50 28 -0.47801841057184147 +2 29 0.595849447740606 +6 29 0.47431478072366634 +8 29 -0.7966386401804985 +11 29 1.4876647342674352 +18 29 -0.4243814194984253 +23 29 1.0248501073544416 +27 29 -0.5280636772373556 +29 29 -0.21903776444931183 +30 29 0.22364449113099255 +33 29 -0.3301008627573632 +35 29 -1.3338177825326964 +39 29 -0.06005851769202786 +40 29 0.8364493521479699 +45 29 -1.8301977405737044 +46 29 -1.425355178871837 +2 30 0.5269515297309378 +4 30 -0.49698443201116266 +5 30 -0.4874813084443682 +6 30 -0.8099105660941541 +7 30 -0.5904094047314329 +8 30 -2.087342692376935 +13 30 2.9077013947784587 +14 30 -0.27900645459565404 +15 30 -0.26807666520279977 +16 30 -0.358663473664849 +19 30 0.35836776689243804 +21 30 1.1998411897590597 +22 30 0.10453795825144374 +25 30 1.4630136922322297 +27 30 0.7399577153407773 +29 30 -1.2635446843612566 +30 30 1.6892674372722856 +33 30 -0.5108583741339386 +34 30 0.08286643194019476 +37 30 -0.09483020778890679 +41 30 0.6316357189016462 +42 30 1.6015330014980078 +44 30 -0.5541835784531992 +45 30 -0.8565116618557076 +2 31 -0.6960004626619509 +4 31 -0.30462056536131826 +7 31 -0.261807265179605 +8 31 -0.5730555371336071 +10 31 -0.37864535423765516 +11 31 -0.31418657759189195 +17 31 0.3240193677152476 +18 31 -0.18748929361220787 +21 31 1.0474631764961058 +22 31 0.6373793929640664 +26 31 0.24701320916104821 +31 31 -0.27058576124155637 +32 31 -0.33896569150367495 +33 31 0.06853705824473785 +40 31 0.6475751809327814 +43 31 1.5682947228175141 +44 31 -1.1022795928271207 +47 31 0.32481440986337634 +48 31 0.11265499671793419 +2 32 0.37933950601952177 +3 32 -1.1568320678723263 +10 32 0.7869885185901785 +11 32 -0.8510365020968427 +12 32 0.12165151897775572 +14 32 0.25501130765100477 +17 32 0.8767042629399194 +19 32 -0.07455050405559993 +20 32 1.573227515097274 +27 32 0.6897679279300565 +31 32 -0.32793923284304544 +32 32 0.4006499331706247 +41 32 -1.0705576808070694 +50 32 -0.2775868659382469 +1 33 0.14744182493718497 +4 33 -0.6923215291493166 +6 33 -1.2798021914361069 +7 33 3.5764098949810883 +8 33 -0.4742672521150719 +9 33 -1.557238370045065 +10 33 0.41129790130516936 +17 33 1.7702467024724706 +19 33 0.19350233525289207 +20 33 -2.1389403308430635 +21 33 0.21433663624097388 +26 33 1.5166177239213605 +27 33 0.5326431766769445 +29 33 -0.0015272493892254833 +30 33 -1.4959887747650866 +31 33 0.5421061090700042 +33 33 0.32282155609409496 +38 33 -1.0915692958521774 +42 33 0.741477390687887 +47 33 -1.0688929411373385 +2 34 1.0496873840205287 +5 34 -1.512840225979526 +6 34 0.2755092972417648 +7 34 -1.5863653198432963 +16 34 -0.567903667447174 +27 34 0.6446890958109828 +30 34 0.33276085921545256 +34 34 -0.130430818781885 +40 34 0.24693187856634105 +44 34 2.9027745150583693 +45 34 -0.9090649708360288 +47 34 1.0079900569285618 +48 34 -1.1530778364717937 +49 34 0.1104832662714099 +5 35 -1.0098914072143839 +13 35 1.3838716362191885 +16 35 0.3221563239825315 +18 35 -0.5054762419605355 +21 35 0.518515588169861 +24 35 -0.7112789602243167 +25 35 0.22788676487242837 +29 35 0.26499196005706155 +35 35 -2.593701594115636 +44 35 2.2654742987627388 +45 35 -0.3544882549830476 +10 36 -0.033206487195276746 +13 36 -1.0217860718890381 +15 36 0.9230211574005858 +19 36 1.086711253627532 +26 36 -1.1630525991640832 +36 36 -0.8389692549293629 +41 36 -0.7600220108083793 +45 36 0.3657993136657405 +46 36 -0.01931487091001962 +47 36 1.0396884112884546 +10 37 1.9193037801989004 +11 37 -1.772139618278466 +15 37 -0.9702697034498694 +16 37 2.1492285343913546 +17 37 -0.6673855868389723 +18 37 0.9698526505638108 +21 37 1.4361689811948404 +25 37 -1.581589583841258 +27 37 0.9902373597197129 +30 37 -0.42105324564550745 +37 37 -0.7329491440492724 +38 37 0.02721474230670799 +40 37 -0.8503811966999917 +45 37 1.0402448120111014 +46 37 0.1588238087213455 +48 37 2.137119953061256 +2 38 -0.0261420052473049 +3 38 -0.9096557927976113 +5 38 0.8122027346670994 +7 38 -0.8036437077016981 +9 38 -0.46305278042313824 +11 38 0.5396599499689051 +13 38 2.006746894112772 +16 38 0.08022060382631145 +17 38 0.4070580977842981 +19 38 0.49240634519099963 +21 38 -0.9403774991086832 +22 38 -1.2695126530163323 +33 38 0.9774258854688503 +37 38 1.318917532824395 +38 38 2.163080294336954 +39 38 -0.7447775781806444 +40 38 0.5060717711506773 +41 38 0.022702354808947074 +46 38 -0.43107669760441397 +48 38 0.6007713976459741 +1 39 0.9291558960850789 +2 39 -1.1719970627857899 +3 39 0.04978777007580999 +6 39 -0.32116383232701534 +7 39 0.8935606667152927 +9 39 1.975036253128496 +13 39 0.8903960128227447 +14 39 0.7872046128340136 +17 39 1.6727361991529073 +18 39 0.08849464191518713 +19 39 -2.3089508528627882 +21 39 1.6468084960466498 +24 39 -0.4931710357498156 +29 39 -1.5687180737479716 +38 39 0.07825609870963708 +39 39 2.2224441004709536 +2 40 -1.3914232143607397 +5 40 1.157507282921122 +8 40 -1.6907873131437854 +10 40 -0.8418229756603166 +12 40 0.8565275263655966 +18 40 -1.0276598863393047 +20 40 0.030943762917297835 +29 40 0.40867970763397743 +31 40 0.6705341320881398 +34 40 0.30175170992146455 +37 40 -0.08173706249071087 +38 40 0.4014594610686071 +40 40 -0.28579869989672485 +43 40 -1.366910671923716 +44 40 0.7442952361656232 +45 40 1.292449844798062 +49 40 -2.32598739075215 +3 41 -1.370435067347104 +4 41 -0.08428947343182698 +6 41 1.2760984847860903 +13 41 -0.8454238115844133 +18 41 -0.3342748568430244 +23 41 0.6685783648127224 +26 41 -0.32810390167705866 +27 41 1.3598591649721137 +30 41 0.19054442732253277 +32 41 -0.19990888352018535 +36 41 -0.20867669896709384 +38 41 -1.455987297145165 +41 41 0.08534899462806295 +43 41 0.554963681447742 +44 41 0.32183822829816355 +3 42 0.704821379563854 +5 42 -0.9552588717698243 +6 42 1.0398553604918348 +13 42 -0.5798502029869829 +15 42 -0.652996450024776 +16 42 0.7710557811361994 +17 42 -1.5589504826566947 +18 42 -1.1150972482248656 +21 42 -0.3215901789307355 +23 42 0.5724341918932558 +30 42 -0.5099300232857825 +33 42 -1.0239327839252508 +42 42 -0.006691882232823868 +45 42 0.7720023165168481 +46 42 1.4550702877527384 +47 42 -0.5536180259277853 +48 42 -0.2779983042343706 +50 42 -1.1270496990990801 +1 43 1.0071224344289873 +4 43 0.5766183253652049 +6 43 0.4800663248825837 +8 43 0.36179553535727443 +9 43 0.9801810629985204 +10 43 -1.381580862200723 +18 43 1.442194175628304 +22 43 -2.3000089540811786 +23 43 -0.7149221036982287 +24 43 0.20347640658517296 +26 43 0.6091698926757204 +31 43 0.6891464324768787 +40 43 0.40796542678525016 +41 43 0.14958388826691624 +43 43 -0.8101419813262694 +47 43 0.3352540150335276 +9 44 0.26261346060858826 +10 44 0.5098474995146199 +11 44 1.1396985794453043 +12 44 0.4022838113175317 +13 44 -0.0733851080937527 +16 44 -0.6836626835367803 +19 44 0.1667988756665793 +25 44 0.3141544403775245 +28 44 1.1918366315921012 +30 44 -0.4215979334199488 +31 44 -0.6111960672745367 +34 44 1.5090732121656385 +35 44 2.0234982492522264 +40 44 -1.3108596956540004 +41 44 0.500700981837014 +44 44 1.3609293863079706 +45 44 -1.3728282273693 +46 44 -1.977359424991569 +50 44 0.6464864152970877 +6 45 1.0633168430605209 +7 45 -0.6795281544668453 +8 45 -0.8701771032307314 +10 45 -1.3482667030759583 +11 45 -0.3351696118613294 +13 45 -1.1231610375497214 +15 45 1.6871653118512346 +16 45 0.42097806219891837 +27 45 -0.14036879613366618 +29 45 -0.44190622421866926 +30 45 0.09731741880144082 +34 45 1.2610484976670604 +35 45 -1.6757968834265067 +36 45 -0.884729340158471 +37 45 0.38483990282915864 +40 45 0.27245515619319055 +42 45 -1.044754118030573 +44 45 0.10404307915336418 +45 45 -0.4617864520844464 +1 46 1.5449845988734408 +3 46 1.8217250724522946 +4 46 1.0741632952223088 +5 46 0.14850787913879612 +6 46 2.081953039981725 +13 46 -0.5552565814710013 +20 46 -0.8845278982028552 +21 46 1.1566737069305788 +26 46 -1.2280652269275167 +29 46 -1.3485823306482367 +36 46 -1.2083681538002453 +37 46 -2.4487808896825056 +38 46 1.7442260273832304 +42 46 1.0001301999825352 +44 46 -0.49483745018569086 +46 46 -0.8250457475747013 +49 46 0.7942407188469035 +50 46 -0.013385331857797247 +7 47 -0.6841391502718691 +8 47 1.7881792083562311 +14 47 -0.695114695200466 +21 47 2.0399342834373835 +22 47 -0.17163913422192256 +24 47 1.567483215070286 +25 47 -0.19141914335132404 +26 47 -0.1466140522412783 +27 47 1.1187665932706545 +28 47 0.5889598589019907 +31 47 0.4797792026017961 +33 47 1.2247117314166511 +34 47 -1.8119903697827897 +36 47 1.1435982258383846 +42 47 -0.37480403318925 +43 47 0.43301190414407553 +47 47 1.405536675169851 +49 47 2.2629687753985697 +2 48 0.5810097127270123 +3 48 0.2976541603998468 +4 48 -0.2908484532174809 +5 48 1.018472177155085 +6 48 0.7041063156765373 +11 48 -1.0315108150054737 +13 48 0.4231153597815732 +15 48 2.734185344937642 +19 48 -0.5147513639446858 +25 48 -1.162844645443759 +28 48 0.5380904236896372 +31 48 -0.19142376390894825 +34 48 0.018412610358746574 +37 48 1.3889671175581886 +38 48 -0.3847295407791098 +42 48 0.47957725552200126 +48 48 -0.34290644782094615 +3 49 -0.2515274117735154 +7 49 -0.3481452345058658 +10 49 -0.8886304715042729 +12 49 -0.8731752572125361 +14 49 -0.01240852504064206 +17 49 0.29280094056297085 +20 49 1.3424764284588322 +22 49 0.9295043375407388 +23 49 -0.20518228894617255 +24 49 0.05822047131618444 +34 49 -0.44644317681565976 +40 49 -1.9668692061679034 +46 49 0.14697167280653758 +47 49 1.9229743378437136 +49 49 -0.41278594756894593 +3 50 -0.9153983187060337 +8 50 -1.9622312834633189 +10 50 -0.2317525904783636 +12 50 0.19138779002200085 +16 50 0.9520739919390878 +18 50 -1.0643510066136 +20 50 0.5240343163814127 +22 50 -0.10490072331175097 +23 50 0.14330635689385154 +28 50 0.04164096979493717 +32 50 -0.4556687922526301 +42 50 -0.11291910077313887 +44 50 -1.2275511793995748 +46 50 1.3349507539799068 +50 50 0.2337292936885357 diff --git a/tests/test_csrToCscOffsetMap.cpp b/tests/test_csrToCscOffsetMap.cpp new file mode 100644 index 000000000..59134bbd5 --- /dev/null +++ b/tests/test_csrToCscOffsetMap.cpp @@ -0,0 +1,63 @@ +#include +#include + +#define BOOST_TEST_MODULE CsrToCscOffsetMap +#include +#include + +#include +#include +#include +#include + +#include + +BOOST_AUTO_TEST_CASE(testcsrtocscoffsetmap){ + using Matrix = Dune::BCRSMatrix; + + Matrix matrix; + { + std::ifstream mfile("offset_map_matrix.txt"); + if (!mfile) { + throw std::runtime_error("Could not read matrix file"); + } + readMatrixMarket(matrix, mfile); + } + + // a transposed version of the matrix is read because the transposed + // of a CSR representation is equivalente to CSC + Matrix matrix_transposed; + { + std::ifstream mfile("offset_map_matrix_transposed.txt"); + if (!mfile) { + throw std::runtime_error("Could not read matrix file"); + } + readMatrixMarket(matrix_transposed, mfile); + } + + // has to make copy because the output of readMatrixMarket does not + // have contiguous non-zero values + Matrix matrix_copy(matrix); + Matrix matrix_transposed_copy(matrix_transposed); + + std::vector rowPointers, colIndices, map; + double* nnzValues; + double* nnzValues_transposed; + + nnzValues = &matrix_copy[0][0]; + nnzValues_transposed = &matrix_transposed_copy[0][0]; + + rowPointers.emplace_back(0); + for (Matrix::Iterator r = matrix.begin(); r != matrix.end(); ++r) { + for (auto c = r->begin(); c != r->end(); ++c) { + colIndices.emplace_back(c.index()); + } + rowPointers.emplace_back(colIndices.size()); + } + + map = Opm::Accelerator::buildCsrToCscOffsetMap(rowPointers, colIndices); + + for (unsigned int i = 0; i < colIndices.size(); i++){ + BOOST_CHECK_EQUAL(nnzValues[i], nnzValues_transposed[map[i]]); + } +}