[IE][VPU]: Fix buffer size calculation bug (#3825)

This commit is contained in:
Andrey Sokolov 2021-01-20 12:07:06 +03:00 committed by GitHub
parent 1aeb7fb089
commit e08ad2989e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 23 additions and 22 deletions

View File

@ -6,14 +6,14 @@ include_guard(GLOBAL)
set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x) set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x)
set(VPU_SUPPORTED_FIRMWARES_HASH set(VPU_SUPPORTED_FIRMWARES_HASH
"ed5a91aba906485b5159ba646a1b38d22ecc891d00912741a6ae9185ed4ddbfa" "39a35758b76463f633f377616057c7d2a24562c7c1cfc36744f28949619e57c9"
"ea1e10b93b8c49999d95add29b302b6a657e94190ffc8b9a6ec499de2bc99465") "798df21b5b3a8c4a6faab61f9220b2b216ba6c4a5acf75aaa17a8520bc639bfe")
# #
# Default packages # Default packages
# #
set(FIRMWARE_PACKAGE_VERSION 1574) set(FIRMWARE_PACKAGE_VERSION 1579)
set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2") set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2")
# #

View File

@ -1,4 +1,4 @@
// Copyright (C) 2018-2020 Intel Corporation // Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// //
@ -13,12 +13,6 @@ namespace vpu {
namespace { namespace {
bool isOneSliceEnough(int maxBoxesNum) {
// boxes threshold to use only one slice
constexpr int boxesThreshold = 3400;
return (maxBoxesNum <= boxesThreshold);
}
class StaticShapeNMS final : public StageNode { class StaticShapeNMS final : public StageNode {
private: private:
StagePtr cloneImpl() const override { StagePtr cloneImpl() const override {
@ -38,10 +32,9 @@ private:
} }
StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override { StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
const auto& inDesc = input(0)->desc(); const auto use_one_slice = attrs().get<bool>("use_one_slice");
const auto& maxBoxesNum = inDesc.dim(Dim::H);
return isOneSliceEnough(maxBoxesNum) ? StageSHAVEsRequirements::OnlyOne : StageSHAVEsRequirements::NeedMax; return use_one_slice ? StageSHAVEsRequirements::OnlyOne : StageSHAVEsRequirements::NeedMax;
} }
void initialCheckImpl() const override { void initialCheckImpl() const override {
@ -92,7 +85,7 @@ bool isCMXEnough(int cmxSize, int numSlices, std::vector<int> bufferSizes) {
const auto buffer_allocate = [&curOffset, &curSlice, &numSlices, cmxSize](int numBytes) { const auto buffer_allocate = [&curOffset, &curSlice, &numSlices, cmxSize](int numBytes) {
if (curOffset + numBytes < cmxSize) { if (curOffset + numBytes < cmxSize) {
curOffset += numBytes; curOffset += numBytes;
} else if (curSlice + 1 < numSlices && numBytes < cmxSize) { } else if ((curSlice + 1 < numSlices) && (numBytes < cmxSize)) {
curSlice++; curSlice++;
curOffset = numBytes; curOffset = numBytes;
} else { } else {
@ -102,7 +95,11 @@ bool isCMXEnough(int cmxSize, int numSlices, std::vector<int> bufferSizes) {
return true; return true;
}; };
return std::all_of(bufferSizes.begin(), bufferSizes.end(), buffer_allocate); return (numSlices > 0) && std::all_of(bufferSizes.begin(), bufferSizes.end(), buffer_allocate);
}
bool isOneSliceEnough(int cmxSize, const std::vector<int>& bufferSizes) {
return isCMXEnough(cmxSize, 1, bufferSizes);
} }
} // namespace } // namespace
@ -153,18 +150,22 @@ void FrontEnd::parseStaticShapeNMS(const Model& model, const ie::CNNLayerPtr& la
const auto perm = DimsOrder::fromNumDims(inputDims0.size()).toPermutation(); const auto perm = DimsOrder::fromNumDims(inputDims0.size()).toPermutation();
const auto spatDim = inputDims0[perm[1]]; const auto spatDim = inputDims0[perm[1]];
const int ddrBufferSize0 = 2 * sizeof(int16_t) * 4 * spatDim; const int ddrBufferSize0 = sizeof(int16_t) * 4 * spatDim;
const int ddrBufferSize1 = 2 * sizeof(int16_t) * spatDim; const int ddrBufferSize1 = sizeof(int16_t) * spatDim;
const int ddrBufferSize2 = 2 * sizeof(int32_t) * spatDim; const int ddrBufferSize2 = sizeof(int32_t) * spatDim;
const int ddrBufferSize = (ddrBufferSize0 + ddrBufferSize1 + ddrBufferSize2) + 2 * vpu::DATA_ALIGNMENT; const int ddrBufferSize = 2 * (ddrBufferSize0 + ddrBufferSize1 + ddrBufferSize2) + 2 * vpu::DATA_ALIGNMENT;
const int cmxTempBufferSize = 4 * sizeof(int32_t) * 256;
const auto& env = CompileEnv::get(); const auto& env = CompileEnv::get();
const auto numSlices = isOneSliceEnough(spatDim) ? 1 : env.resources.numSHAVEs; const std::vector<int> bufferSizes = {ddrBufferSize0, ddrBufferSize1, ddrBufferSize2,
ddrBufferSize0, ddrBufferSize1, ddrBufferSize2, cmxTempBufferSize};
const int cmxTempBufferSize = 4 * sizeof(int32_t) * 256; const bool use_one_slice = isOneSliceEnough(CMX_SHAVE_BUFFER_SIZE, bufferSizes);
const auto numSlices = use_one_slice ? 1 : env.resources.numSHAVEs;
stage->attrs().set<bool>("use_one_slice", use_one_slice);
if (!isCMXEnough(CMX_SHAVE_BUFFER_SIZE, numSlices, {ddrBufferSize0, ddrBufferSize1, ddrBufferSize2, cmxTempBufferSize})) { if (!isCMXEnough(CMX_SHAVE_BUFFER_SIZE, numSlices, bufferSizes)) {
model->addTempBuffer(stage, DataDesc({ddrBufferSize})); model->addTempBuffer(stage, DataDesc({ddrBufferSize}));
} }
} }