adjust output and threads used for upper solve

This commit is contained in:
Tobias Meyer Andersen 2024-12-03 16:47:02 +01:00
parent c8366e96ac
commit 7e4cb4856e
3 changed files with 11 additions and 11 deletions

View File

@ -270,12 +270,12 @@ GpuDILU<M, X, Y, l>::tuneThreadBlockSizes()
auto tuneMoveThreadBlockSizeInUpdate = [this](int moveThreadBlockSize){
this->update(moveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
};
m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "Kernel moving data to reordered matrix");
m_moveThreadBlockSize = detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "(in DILU update) Move data to reordered matrix");
auto tuneFactorizationThreadBlockSizeInUpdate = [this](int factorizationThreadBlockSize){
this->update(m_moveThreadBlockSize, factorizationThreadBlockSize);
};
m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing DILU factorization");
m_DILUFactorizationThreadBlockSize = detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "(in DILU update) DILU factorization");
// tune the thread-block size of the apply
GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
@ -285,12 +285,12 @@ GpuDILU<M, X, Y, l>::tuneThreadBlockSizes()
auto tuneLowerSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int lowerSolveThreadBlockSize){
this->apply(tmpV, tmpD, lowerSolveThreadBlockSize, m_DILUFactorizationThreadBlockSize);
};
m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply, "Kernel computing a lower triangular solve for a level set");
m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneLowerSolveThreadBlockSizeInApply, "(in DILU apply) Triangular lower solve");
auto tuneUpperSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int upperSolveThreadBlockSize){
this->apply(tmpV, tmpD, m_moveThreadBlockSize, upperSolveThreadBlockSize);
this->apply(tmpV, tmpD, m_lowerSolveThreadBlockSize, upperSolveThreadBlockSize);
};
m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(tuneUpperSolveThreadBlockSizeInApply, "(in DILU apply) Triangular upper solve");
}
} // namespace Opm::gpuistl

View File

@ -331,13 +331,13 @@ OpmGpuILU0<M, X, Y, l>::tuneThreadBlockSizes()
auto tuneMoveThreadBlockSizeInUpdate
= [this](int moveThreadBlockSize) { this->update(moveThreadBlockSize, m_ILU0FactorizationThreadBlockSize); };
m_moveThreadBlockSize
= detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "Kernel moving data to reordered matrix");
= detail::tuneThreadBlockSize(tuneMoveThreadBlockSizeInUpdate, "(in ILU update) Move data to reordered matrix");
auto tuneFactorizationThreadBlockSizeInUpdate = [this](int factorizationThreadBlockSize) {
this->update(m_moveThreadBlockSize, factorizationThreadBlockSize);
};
m_ILU0FactorizationThreadBlockSize
= detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "Kernel computing ILU0 factorization");
= detail::tuneThreadBlockSize(tuneFactorizationThreadBlockSizeInUpdate, "(in ILU update) ILU factorization");
// tune the thread-block size of the apply
GpuVector<field_type> tmpV(m_gpuMatrix.N() * m_gpuMatrix.blockSize());
@ -348,13 +348,13 @@ OpmGpuILU0<M, X, Y, l>::tuneThreadBlockSizes()
this->apply(tmpV, tmpD, lowerSolveThreadBlockSize, m_ILU0FactorizationThreadBlockSize);
};
m_lowerSolveThreadBlockSize = detail::tuneThreadBlockSize(
tuneLowerSolveThreadBlockSizeInApply, "Kernel computing a lower triangular solve for a level set");
tuneLowerSolveThreadBlockSizeInApply, "(in ILU apply) Triangular lower solve");
auto tuneUpperSolveThreadBlockSizeInApply = [this, &tmpV, &tmpD](int upperSolveThreadBlockSize) {
this->apply(tmpV, tmpD, m_moveThreadBlockSize, upperSolveThreadBlockSize);
this->apply(tmpV, tmpD, m_lowerSolveThreadBlockSize, upperSolveThreadBlockSize);
};
m_upperSolveThreadBlockSize = detail::tuneThreadBlockSize(
tuneUpperSolveThreadBlockSizeInApply, "Kernel computing an upper triangular solve for a level set");
tuneUpperSolveThreadBlockSizeInApply, "(in ILU apply) Triangular upper solve");
}
} // namespace Opm::gpuistl

View File

@ -83,7 +83,7 @@ tuneThreadBlockSize(func& f, std::string descriptionOfFunction)
}
OpmLog::info(
fmt::format("{}: Tuned Blocksize: {} (fastest runtime: {}).", descriptionOfFunction, bestBlockSize, bestTime));
fmt::format("[Kernel tuning completed] {}: Tuned Blocksize = {}, Fastest Runtime = {}ms.", descriptionOfFunction, bestBlockSize, bestTime));
return bestBlockSize;
}