Merge pull request #5779 from multitalentloes/adjust_gpu_autotuning

adjust output and parameters in gpu autotuner
This commit is contained in:
Atgeirr Flø Rasmussen
2025-01-02 16:08:47 +01:00
committed by GitHub
3 changed files with 11 additions and 11 deletions

View File

@@ -270,12 +270,12 @@ GpuDILU<M, X, Y, l>::tuneThreadBlockSizes()
auto tuneMoveThreadBlockSizeInUpdate = [this](int moveThreadBlockSize){
this->update(moveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
};
m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "Kernel moving data to reordered matrix");
m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "(in DILU update) Move data to reordered matrix");
auto tuneFactorizationThreadBlockSizeInUpdate = [this](int factorizationThreadBlockSize){
this->update(m_moveThreadBlockSize, factorizationThreadBlockSize);
};
m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing DILU factorization");
m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "(in DILU update) DILU factorization");
// tune the thread-block size of the apply
GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
@@ -285,12 +285,12 @@ GpuDILU<M, X, Y, l>::tuneThreadBlockSizes()
auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize){
this->apply(tmpV, tmpD, lowerSolveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
};
m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply, "Kernel computing a lower triangular solve for a level set");
m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply, "(in DILU apply) Triangular lower solve");
auto tuneUpperSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int upperSolveThreadBlockSize){
this->apply(tmpV, tmpD, m_moveThreadBlockSize, upperSolveThreadBlockSize);
this->apply(tmpV, tmpD, m_lowerSolveThreadBlockSize, upperSolveThreadBlockSize);
};
m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "(in DILU apply) Triangular upper solve");
}
} // namespace Opm::gpuistl

View File

@@ -331,13 +331,13 @@ OpmGpuILU0<M, X, Y, l>::tuneThreadBlockSizes()
auto tuneMoveThreadBlockSizeInUpdate
= [this](int moveThreadBlockSize) { this->update(moveThreadBlockSize, m_ILU0FactorizationThreadBlockSize); };
m_moveThreadBlockSize
= detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "Kernel moving data to reordered matrix");
= detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "(in ILU update) Move data to reordered matrix");
auto tuneFactorizationThreadBlockSizeInUpdate = [this](int factorizationThreadBlockSize) {
this->update(m_moveThreadBlockSize, factorizationThreadBlockSize);
};
m_ILU0FactorizationThreadBlockSize
= detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing ILU0 factorization");
= detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "(in ILU update) ILU factorization");
// tune the thread-block size of the apply
GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
@@ -348,13 +348,13 @@ OpmGpuILU0<M, X, Y, l>::tuneThreadBlockSizes()
this->apply(tmpV, tmpD, lowerSolveThreadBlockSize, m_ILU0FactorizationThreadBlockSize);
};
m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(
tuneLowerSolveThreadBlockSizeInApply, "Kernel computing a lower triangular solve for a level set");
tuneLowerSolveThreadBlockSizeInApply, "(in ILU apply) Triangular lower solve");
auto tuneUpperSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int upperSolveThreadBlockSize) {
this->apply(tmpV, tmpD, m_moveThreadBlockSize, upperSolveThreadBlockSize);
this->apply(tmpV, tmpD, m_lowerSolveThreadBlockSize, upperSolveThreadBlockSize);
};
m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(
tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
tuneUpperSolveThreadBlockSizeInApply, "(in ILU apply) Triangular upper solve");
}
} // namespace Opm::gpuistl

View File

@@ -83,7 +83,7 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
}
OpmLog::info(
fmt::format("{}: Tuned Blocksize: {} (fastest runtime: {}).", descriptionOfFunction, bestBlockSize, bestTime));
fmt::format("[Kernel tuning completed] {}: Tuned Blocksize = {}, Fastest Runtime = {}ms.", descriptionOfFunction, bestBlockSize, bestTime));
return bestBlockSize;
}