[IE][VPU]: Fix buffer size calculation bug (#3825)
This commit is contained in:
parent
1aeb7fb089
commit
e08ad2989e
@ -6,14 +6,14 @@ include_guard(GLOBAL)
|
||||
|
||||
set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x)
|
||||
set(VPU_SUPPORTED_FIRMWARES_HASH
|
||||
"ed5a91aba906485b5159ba646a1b38d22ecc891d00912741a6ae9185ed4ddbfa"
|
||||
"ea1e10b93b8c49999d95add29b302b6a657e94190ffc8b9a6ec499de2bc99465")
|
||||
"39a35758b76463f633f377616057c7d2a24562c7c1cfc36744f28949619e57c9"
|
||||
"798df21b5b3a8c4a6faab61f9220b2b216ba6c4a5acf75aaa17a8520bc639bfe")
|
||||
|
||||
#
|
||||
# Default packages
|
||||
#
|
||||
|
||||
set(FIRMWARE_PACKAGE_VERSION 1574)
|
||||
set(FIRMWARE_PACKAGE_VERSION 1579)
|
||||
set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2")
|
||||
|
||||
#
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
@ -13,12 +13,6 @@ namespace vpu {
|
||||
|
||||
namespace {
|
||||
|
||||
bool isOneSliceEnough(int maxBoxesNum) {
|
||||
// boxes threshold to use only one slice
|
||||
constexpr int boxesThreshold = 3400;
|
||||
return (maxBoxesNum <= boxesThreshold);
|
||||
}
|
||||
|
||||
class StaticShapeNMS final : public StageNode {
|
||||
private:
|
||||
StagePtr cloneImpl() const override {
|
||||
@ -38,10 +32,9 @@ private:
|
||||
}
|
||||
|
||||
StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override {
|
||||
const auto& inDesc = input(0)->desc();
|
||||
const auto& maxBoxesNum = inDesc.dim(Dim::H);
|
||||
const auto use_one_slice = attrs().get<bool>("use_one_slice");
|
||||
|
||||
return isOneSliceEnough(maxBoxesNum) ? StageSHAVEsRequirements::OnlyOne : StageSHAVEsRequirements::NeedMax;
|
||||
return use_one_slice ? StageSHAVEsRequirements::OnlyOne : StageSHAVEsRequirements::NeedMax;
|
||||
}
|
||||
|
||||
void initialCheckImpl() const override {
|
||||
@ -92,7 +85,7 @@ bool isCMXEnough(int cmxSize, int numSlices, std::vector<int> bufferSizes) {
|
||||
const auto buffer_allocate = [&curOffset, &curSlice, &numSlices, cmxSize](int numBytes) {
|
||||
if (curOffset + numBytes < cmxSize) {
|
||||
curOffset += numBytes;
|
||||
} else if (curSlice + 1 < numSlices && numBytes < cmxSize) {
|
||||
} else if ((curSlice + 1 < numSlices) && (numBytes < cmxSize)) {
|
||||
curSlice++;
|
||||
curOffset = numBytes;
|
||||
} else {
|
||||
@ -102,7 +95,11 @@ bool isCMXEnough(int cmxSize, int numSlices, std::vector<int> bufferSizes) {
|
||||
return true;
|
||||
};
|
||||
|
||||
return std::all_of(bufferSizes.begin(), bufferSizes.end(), buffer_allocate);
|
||||
return (numSlices > 0) && std::all_of(bufferSizes.begin(), bufferSizes.end(), buffer_allocate);
|
||||
}
|
||||
|
||||
bool isOneSliceEnough(int cmxSize, const std::vector<int>& bufferSizes) {
|
||||
return isCMXEnough(cmxSize, 1, bufferSizes);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@ -153,18 +150,22 @@ void FrontEnd::parseStaticShapeNMS(const Model& model, const ie::CNNLayerPtr& la
|
||||
const auto perm = DimsOrder::fromNumDims(inputDims0.size()).toPermutation();
|
||||
const auto spatDim = inputDims0[perm[1]];
|
||||
|
||||
const int ddrBufferSize0 = 2 * sizeof(int16_t) * 4 * spatDim;
|
||||
const int ddrBufferSize1 = 2 * sizeof(int16_t) * spatDim;
|
||||
const int ddrBufferSize2 = 2 * sizeof(int32_t) * spatDim;
|
||||
const int ddrBufferSize = (ddrBufferSize0 + ddrBufferSize1 + ddrBufferSize2) + 2 * vpu::DATA_ALIGNMENT;
|
||||
const int ddrBufferSize0 = sizeof(int16_t) * 4 * spatDim;
|
||||
const int ddrBufferSize1 = sizeof(int16_t) * spatDim;
|
||||
const int ddrBufferSize2 = sizeof(int32_t) * spatDim;
|
||||
const int ddrBufferSize = 2 * (ddrBufferSize0 + ddrBufferSize1 + ddrBufferSize2) + 2 * vpu::DATA_ALIGNMENT;
|
||||
const int cmxTempBufferSize = 4 * sizeof(int32_t) * 256;
|
||||
|
||||
const auto& env = CompileEnv::get();
|
||||
|
||||
const auto numSlices = isOneSliceEnough(spatDim) ? 1 : env.resources.numSHAVEs;
|
||||
const std::vector<int> bufferSizes = {ddrBufferSize0, ddrBufferSize1, ddrBufferSize2,
|
||||
ddrBufferSize0, ddrBufferSize1, ddrBufferSize2, cmxTempBufferSize};
|
||||
|
||||
const int cmxTempBufferSize = 4 * sizeof(int32_t) * 256;
|
||||
const bool use_one_slice = isOneSliceEnough(CMX_SHAVE_BUFFER_SIZE, bufferSizes);
|
||||
const auto numSlices = use_one_slice ? 1 : env.resources.numSHAVEs;
|
||||
stage->attrs().set<bool>("use_one_slice", use_one_slice);
|
||||
|
||||
if (!isCMXEnough(CMX_SHAVE_BUFFER_SIZE, numSlices, {ddrBufferSize0, ddrBufferSize1, ddrBufferSize2, cmxTempBufferSize})) {
|
||||
if (!isCMXEnough(CMX_SHAVE_BUFFER_SIZE, numSlices, bufferSizes)) {
|
||||
model->addTempBuffer(stage, DataDesc({ddrBufferSize}));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user