[IE][VPU]: Fix buffer size calculation bug (#3825)

This commit is contained in:
Andrey Sokolov 2021-01-20 12:07:06 +03:00 committed by GitHub
parent 1aeb7fb089
commit e08ad2989e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 23 additions and 22 deletions

View File

@ -6,14 +6,14 @@ include_guard(GLOBAL)
set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x)
set(VPU_SUPPORTED_FIRMWARES_HASH
"ed5a91aba906485b5159ba646a1b38d22ecc891d00912741a6ae9185ed4ddbfa"
"ea1e10b93b8c49999d95add29b302b6a657e94190ffc8b9a6ec499de2bc99465")
"39a35758b76463f633f377616057c7d2a24562c7c1cfc36744f28949619e57c9"
"798df21b5b3a8c4a6faab61f9220b2b216ba6c4a5acf75aaa17a8520bc639bfe")
#
# Default packages
#
set(FIRMWARE_PACKAGE_VERSION 1574)
set(FIRMWARE_PACKAGE_VERSION 1579)
set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2")
#

View File

@ -1,4 +1,4 @@
// Copyright (C) 2018-2020 Intel Corporation
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
@ -13,12 +13,6 @@ namespace vpu {
namespace {
bool isOneSliceEnough(int maxBoxesNum) {
// boxes threshold to use only one slice
constexpr int boxesThreshold = 3400;
return (maxBoxesNum <= boxesThreshold);
}
class StaticShapeNMS final : public StageNode {
private:
StagePtr cloneImpl() const override {
@ -38,10 +32,9 @@ private:
}
StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
const auto& inDesc = input(0)->desc();
const auto& maxBoxesNum = inDesc.dim(Dim::H);
const auto use_one_slice = attrs().get<bool>("use_one_slice");
return isOneSliceEnough(maxBoxesNum) ? StageSHAVEsRequirements::OnlyOne : StageSHAVEsRequirements::NeedMax;
return use_one_slice ? StageSHAVEsRequirements::OnlyOne : StageSHAVEsRequirements::NeedMax;
}
void initialCheckImpl() const override {
@ -92,7 +85,7 @@ bool isCMXEnough(int cmxSize, int numSlices, std::vector<int> bufferSizes) {
const auto buffer_allocate = [&curOffset, &curSlice, &numSlices, cmxSize](int numBytes) {
if (curOffset + numBytes < cmxSize) {
curOffset += numBytes;
} else if (curSlice + 1 < numSlices && numBytes < cmxSize) {
} else if ((curSlice + 1 < numSlices) && (numBytes < cmxSize)) {
curSlice++;
curOffset = numBytes;
} else {
@ -102,7 +95,11 @@ bool isCMXEnough(int cmxSize, int numSlices, std::vector<int> bufferSizes) {
return true;
};
return std::all_of(bufferSizes.begin(), bufferSizes.end(), buffer_allocate);
return (numSlices > 0) && std::all_of(bufferSizes.begin(), bufferSizes.end(), buffer_allocate);
}
bool isOneSliceEnough(int cmxSize, const std::vector<int>& bufferSizes) {
return isCMXEnough(cmxSize, 1, bufferSizes);
}
} // namespace
@ -153,18 +150,22 @@ void FrontEnd::parseStaticShapeNMS(const Model& model, const ie::CNNLayerPtr& la
const auto perm = DimsOrder::fromNumDims(inputDims0.size()).toPermutation();
const auto spatDim = inputDims0[perm[1]];
const int ddrBufferSize0 = 2 * sizeof(int16_t) * 4 * spatDim;
const int ddrBufferSize1 = 2 * sizeof(int16_t) * spatDim;
const int ddrBufferSize2 = 2 * sizeof(int32_t) * spatDim;
const int ddrBufferSize = (ddrBufferSize0 + ddrBufferSize1 + ddrBufferSize2) + 2 * vpu::DATA_ALIGNMENT;
const int ddrBufferSize0 = sizeof(int16_t) * 4 * spatDim;
const int ddrBufferSize1 = sizeof(int16_t) * spatDim;
const int ddrBufferSize2 = sizeof(int32_t) * spatDim;
const int ddrBufferSize = 2 * (ddrBufferSize0 + ddrBufferSize1 + ddrBufferSize2) + 2 * vpu::DATA_ALIGNMENT;
const int cmxTempBufferSize = 4 * sizeof(int32_t) * 256;
const auto& env = CompileEnv::get();
const auto numSlices = isOneSliceEnough(spatDim) ? 1 : env.resources.numSHAVEs;
const std::vector<int> bufferSizes = {ddrBufferSize0, ddrBufferSize1, ddrBufferSize2,
ddrBufferSize0, ddrBufferSize1, ddrBufferSize2, cmxTempBufferSize};
const int cmxTempBufferSize = 4 * sizeof(int32_t) * 256;
const bool use_one_slice = isOneSliceEnough(CMX_SHAVE_BUFFER_SIZE, bufferSizes);
const auto numSlices = use_one_slice ? 1 : env.resources.numSHAVEs;
stage->attrs().set<bool>("use_one_slice", use_one_slice);
if (!isCMXEnough(CMX_SHAVE_BUFFER_SIZE, numSlices, {ddrBufferSize0, ddrBufferSize1, ddrBufferSize2, cmxTempBufferSize})) {
if (!isCMXEnough(CMX_SHAVE_BUFFER_SIZE, numSlices, bufferSizes)) {
model->addTempBuffer(stage, DataDesc({ddrBufferSize}));
}
}