make use of OpmLog for printing tuning result

This commit is contained in:
Tobias Meyer Andersen 2024-08-22 10:07:06 +02:00
parent ae4e6a65fc
commit 8b50c90fd1
3 changed files with 15 additions and 11 deletions

View File

@ -36,6 +36,7 @@
#include <tuple>
#include <functional>
#include <utility>
#include <string>
namespace Opm::cuistl
{
@ -269,12 +270,12 @@ CuDILU<M, X, Y, l>::tuneThreadBlockSizes()
auto tuneMoveThreadBlockSizeInUpdate = [this](int moveThreadBlockSize){
this->update(moveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
};
m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate);
m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "Kernel moving data to reordered matrix");
auto tuneFactorizationThreadBlockSizeInUpdate = [this](int factorizationThreadBlockSize){
this->update(m_moveThreadBlockSize, factorizationThreadBlockSize);
};
m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate);
m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing DILU factorization");
// tune the thread-block size of the apply
CuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
@ -284,12 +285,12 @@ CuDILU<M, X, Y, l>::tuneThreadBlockSizes()
auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize){
this->apply(tmpV, tmpD, lowerSolveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
};
m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply);
m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply, "Kernel computing a lower triangular solve for a level set");
auto tuneUpperSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int upperSolveThreadBlockSize){
this->apply(tmpV, tmpD, m_moveThreadBlockSize, upperSolveThreadBlockSize);
};
m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply);
m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
}
} // namespace Opm::cuistl

View File

@ -36,6 +36,7 @@
#include <tuple>
#include <functional>
#include <utility>
#include <string>
namespace Opm::cuistl
{
@ -262,12 +263,12 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
auto tuneMoveThreadBlockSizeInUpdate = [this](int moveThreadBlockSize){
this->update(moveThreadBlockSize, m_ILU0FactorizationThreadBlockSize);
};
m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate);
m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "Kernel moving data to reordered matrix");
auto tuneFactorizationThreadBlockSizeInUpdate = [this](int factorizationThreadBlockSize){
this->update(m_moveThreadBlockSize, factorizationThreadBlockSize);
};
m_ILU0FactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate);
m_ILU0FactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing ILU0 factorization");
// tune the thread-block size of the apply
CuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
@ -277,12 +278,12 @@ OpmCuILU0<M, X, Y, l>::tuneThreadBlockSizes()
auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize){
this->apply(tmpV, tmpD, lowerSolveThreadBlockSize, m_ILU0FactorizationThreadBlockSize);
};
m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply);
m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply, "Kernel computing a lower triangular solve for a level set");
auto tuneUpperSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int upperSolveThreadBlockSize){
this->apply(tmpV, tmpD, m_moveThreadBlockSize, upperSolveThreadBlockSize);
};
m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply);
m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
}
} // namespace Opm::cuistl

View File

@ -19,9 +19,11 @@
#include <cuda_runtime.h>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <functional>
#include <utility>
#include <limits>
#include <string>
namespace Opm::cuistl::detail
{
@ -31,7 +33,7 @@ namespace Opm::cuistl::detail
/// @tparam ...Args types of the arguments needed to call the function
/// @param f the function to tune, which takes the thread block size as the input
template <typename func, typename... Args>
int tuneThreadBlockSize(func& f) {
int tuneThreadBlockSize(func& f, std::string descriptionOfFunction) {
// TODO: figure out a more rigorous way of deciding how many runs will suffice?
constexpr const int runs = 2;
@ -73,8 +75,8 @@ namespace Opm::cuistl::detail
}
}
}
// TODO: have this only be printed if linear solver verbosity >0?
printf("best size: %d, best time %f\n", bestBlockSize, bestTime);
OpmLog::info(fmt::format("{}: Tuned Blocksize: {} (fastest runtime: {}).", descriptionOfFunction, bestBlockSize, bestTime));
return bestBlockSize;
}