make use of OpmLog for printing tuning result

2025-02-25 18:55:30 -06:00 · 2024-08-22 10:07:06 +02:00 · 2024-08-22 10:07:06 +02:00 · 8b50c90fd1
commit 8b50c90fd1
parent ae4e6a65fc
3 changed files with 15 additions and 11 deletions
--- a/opm/simulators/linalg/cuistl/CuDILU.cpp
+++ b/opm/simulators/linalg/cuistl/CuDILU.cpp
@ -36,6 +36,7 @@
 #include <tuple>
 #include <functional>
 #include <utility>
+#include <string>
 namespace Opm::cuistl
 {

@ -269,12 +270,12 @@ CuDILU<M, X, Y, l>::tuneThreadBlockSizes()
    auto tuneMoveThreadBlockSizeInUpdate = [this](int moveThreadBlockSize){
        this->update(moveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
    };
-    m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate);
+    m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "Kernel moving data to reordered matrix");

    auto tuneFactorizationThreadBlockSizeInUpdate = [this](int factorizationThreadBlockSize){
        this->update(m_moveThreadBlockSize, factorizationThreadBlockSize);
    };
-    m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate);
+    m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing DILU factorization");

    // tune the thread-block size of the apply
    CuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
@ -284,12 +285,12 @@ CuDILU<M, X, Y, l>::tuneThreadBlockSizes()
    auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize){
        this->apply(tmpV, tmpD, lowerSolveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
    };
-    m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply);
+    m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply, "Kernel computing a lower triangular solve for a level set");

    auto tuneUpperSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int upperSolveThreadBlockSize){
        this->apply(tmpV, tmpD, m_moveThreadBlockSize, upperSolveThreadBlockSize);
    };
-    m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply);
+    m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
 }

 } // namespace Opm::cuistl
--- a/opm/simulators/linalg/cuistl/OpmCuILU0.cpp
+++ b/opm/simulators/linalg/cuistl/OpmCuILU0.cpp
@ -36,6 +36,7 @@
 #include <tuple>
 #include <functional>
 #include <utility>
+#include <string>
 namespace Opm::cuistl
 {

@ -262,12 +263,12 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
    auto tuneMoveThreadBlockSizeInUpdate = [this](int moveThreadBlockSize){
        this->update(moveThreadBlockSize, m_ILU0FactorizationThreadBlockSize);
    };
-    m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate);
+    m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "Kernel moving data to reordered matrix");

    auto tuneFactorizationThreadBlockSizeInUpdate = [this](int factorizationThreadBlockSize){
        this->update(m_moveThreadBlockSize, factorizationThreadBlockSize);
    };
-    m_ILU0FactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate);
+    m_ILU0FactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing ILU0 factorization");

    // tune the thread-block size of the apply
    CuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
@ -277,12 +278,12 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
    auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize){
        this->apply(tmpV, tmpD, lowerSolveThreadBlockSize, m_ILU0FactorizationThreadBlockSize);
    };
-    m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply);
+    m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply, "Kernel computing a lower triangular solve for a level set");

    auto tuneUpperSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int upperSolveThreadBlockSize){
        this->apply(tmpV, tmpD, m_moveThreadBlockSize, upperSolveThreadBlockSize);
    };
-    m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply);
+    m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
 }

 } // namespace Opm::cuistl
--- a/opm/simulators/linalg/cuistl/detail/autotuner.hpp
+++ b/opm/simulators/linalg/cuistl/detail/autotuner.hpp
@ -19,9 +19,11 @@
 #include <cuda_runtime.h>
 #include <opm/common/ErrorMacros.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
+#include <opm/common/OpmLog/OpmLog.hpp>
 #include <functional>
 #include <utility>
 #include <limits>
+#include <string>

 namespace Opm::cuistl::detail
 {
@ -31,7 +33,7 @@ namespace Opm::cuistl::detail
    /// @tparam ...Args types of the arguments needed to call the function
    /// @param f the function to tune, which takes the thread block size as the input
    template <typename func, typename... Args>
-    int tuneThreadBlockSize(func& f) {
+    int tuneThreadBlockSize(func& f, std::string descriptionOfFunction) {

        // TODO: figure out a more rigorous way of deciding how many runs will suffice?
        constexpr const int runs = 2;
@ -73,8 +75,8 @@ namespace Opm::cuistl::detail
                }
            }
        }
-        // TODO: have this only be printed if linear solver verbosity >0?
-        printf("best size: %d, best time %f\n", bestBlockSize, bestTime);
+
+        OpmLog::info(fmt::format("{}: Tuned Blocksize: {} (fastest runtime: {}).", descriptionOfFunction, bestBlockSize, bestTime));

        return bestBlockSize;
    }