adjust output and threads used for upper solve

2025-02-25 18:55:30 -06:00 · 2024-12-03 16:47:02 +01:00 · 2024-12-03 16:47:02 +01:00 · 7e4cb4856e
commit 7e4cb4856e
parent c8366e96ac
3 changed files with 11 additions and 11 deletions
--- a/opm/simulators/linalg/gpuistl/GpuDILU.cpp
+++ b/opm/simulators/linalg/gpuistl/GpuDILU.cpp
@ -270,12 +270,12 @@ GpuDILU<M, X, Y, l>::tuneThreadBlockSizes()
    auto tuneMoveThreadBlockSizeInUpdate = [this](int moveThreadBlockSize){
        this->update(moveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
    };
-    m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "Kernel moving data to reordered matrix");
+    m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "(in DILU update) Move data to reordered matrix");

    auto tuneFactorizationThreadBlockSizeInUpdate = [this](int factorizationThreadBlockSize){
        this->update(m_moveThreadBlockSize, factorizationThreadBlockSize);
    };
-    m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing DILU factorization");
+    m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "(in DILU update) DILU factorization");

    // tune the thread-block size of the apply
    GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
@ -285,12 +285,12 @@ GpuDILU<M, X, Y, l>::tuneThreadBlockSizes()
    auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize){
        this->apply(tmpV, tmpD, lowerSolveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
    };
-    m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply, "Kernel computing a lower triangular solve for a level set");
+    m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply, "(in DILU apply) Triangular lower solve");

    auto tuneUpperSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int upperSolveThreadBlockSize){
-        this->apply(tmpV, tmpD, m_moveThreadBlockSize, upperSolveThreadBlockSize);
+        this->apply(tmpV, tmpD, m_lowerSolveThreadBlockSize, upperSolveThreadBlockSize);
    };
-    m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
+    m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "(in DILU apply) Triangular upper solve");
 }

 } // namespace Opm::gpuistl
--- a/opm/simulators/linalg/gpuistl/OpmGpuILU0.cpp
+++ b/opm/simulators/linalg/gpuistl/OpmGpuILU0.cpp
@ -331,13 +331,13 @@ OpmGpuILU0<M, X, Y, l>::tuneThreadBlockSizes()
    auto tuneMoveThreadBlockSizeInUpdate
        = [this](int moveThreadBlockSize) { this->update(moveThreadBlockSize, m_ILU0FactorizationThreadBlockSize); };
    m_moveThreadBlockSize
-        = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "Kernel moving data to reordered matrix");
+        = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "(in ILU update) Move data to reordered matrix");

    auto tuneFactorizationThreadBlockSizeInUpdate = [this](int factorizationThreadBlockSize) {
        this->update(m_moveThreadBlockSize, factorizationThreadBlockSize);
    };
    m_ILU0FactorizationThreadBlockSize
-        = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing ILU0 factorization");
+        = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "(in ILU update) ILU factorization");

    // tune the thread-block size of the apply
    GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
@ -348,13 +348,13 @@ OpmGpuILU0<M, X, Y, l>::tuneThreadBlockSizes()
        this->apply(tmpV, tmpD, lowerSolveThreadBlockSize, m_ILU0FactorizationThreadBlockSize);
    };
    m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(
-        tuneLowerSolveThreadBlockSizeInApply, "Kernel computing a lower triangular solve for a level set");
+        tuneLowerSolveThreadBlockSizeInApply, "(in ILU apply) Triangular lower solve");

    auto tuneUpperSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int upperSolveThreadBlockSize) {
-        this->apply(tmpV, tmpD, m_moveThreadBlockSize, upperSolveThreadBlockSize);
+        this->apply(tmpV, tmpD, m_lowerSolveThreadBlockSize, upperSolveThreadBlockSize);
    };
    m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(
-        tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
+        tuneUpperSolveThreadBlockSizeInApply, "(in ILU apply) Triangular upper solve");
 }

 } // namespace Opm::gpuistl
--- a/opm/simulators/linalg/gpuistl/detail/autotuner.hpp
+++ b/opm/simulators/linalg/gpuistl/detail/autotuner.hpp
@ -83,7 +83,7 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
    }

    OpmLog::info(
-        fmt::format("{}: Tuned Blocksize: {} (fastest runtime: {}).", descriptionOfFunction, bestBlockSize, bestTime));
+        fmt::format("[Kernel tuning completed] {}: Tuned Blocksize = {}, Fastest Runtime = {}ms.", descriptionOfFunction, bestBlockSize, bestTime));

    return bestBlockSize;
 }