diff --git a/inference-engine/cmake/vpu_dependencies.cmake b/inference-engine/cmake/vpu_dependencies.cmake index 23dd2e74cda..3c5fd59cb95 100644 --- a/inference-engine/cmake/vpu_dependencies.cmake +++ b/inference-engine/cmake/vpu_dependencies.cmake @@ -6,14 +6,14 @@ include_guard(GLOBAL) set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x) set(VPU_SUPPORTED_FIRMWARES_HASH - "ed5a91aba906485b5159ba646a1b38d22ecc891d00912741a6ae9185ed4ddbfa" - "ea1e10b93b8c49999d95add29b302b6a657e94190ffc8b9a6ec499de2bc99465") + "39a35758b76463f633f377616057c7d2a24562c7c1cfc36744f28949619e57c9" + "798df21b5b3a8c4a6faab61f9220b2b216ba6c4a5acf75aaa17a8520bc639bfe") # # Default packages # -set(FIRMWARE_PACKAGE_VERSION 1574) +set(FIRMWARE_PACKAGE_VERSION 1579) set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2") # diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/static_shape_nms.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/static_shape_nms.cpp index c4deabc0d27..4399a1487c9 100644 --- a/inference-engine/src/vpu/graph_transformer/src/stages/static_shape_nms.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/stages/static_shape_nms.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -13,12 +13,6 @@ namespace vpu { namespace { -bool isOneSliceEnough(int maxBoxesNum) { - // boxes threshold to use only one slice - constexpr int boxesThreshold = 3400; - return (maxBoxesNum <= boxesThreshold); -} - class StaticShapeNMS final : public StageNode { private: StagePtr cloneImpl() const override { @@ -38,10 +32,9 @@ private: } StageSHAVEsRequirements getSHAVEsRequirementsImpl() const override { - const auto& inDesc = input(0)->desc(); - const auto& maxBoxesNum = inDesc.dim(Dim::H); + const auto use_one_slice = attrs().get("use_one_slice"); - return isOneSliceEnough(maxBoxesNum) ? StageSHAVEsRequirements::OnlyOne : StageSHAVEsRequirements::NeedMax; + return use_one_slice ? StageSHAVEsRequirements::OnlyOne : StageSHAVEsRequirements::NeedMax; } void initialCheckImpl() const override { @@ -92,7 +85,7 @@ bool isCMXEnough(int cmxSize, int numSlices, std::vector bufferSizes) { const auto buffer_allocate = [&curOffset, &curSlice, &numSlices, cmxSize](int numBytes) { if (curOffset + numBytes < cmxSize) { curOffset += numBytes; - } else if (curSlice + 1 < numSlices && numBytes < cmxSize) { + } else if ((curSlice + 1 < numSlices) && (numBytes < cmxSize)) { curSlice++; curOffset = numBytes; } else { @@ -102,7 +95,11 @@ bool isCMXEnough(int cmxSize, int numSlices, std::vector bufferSizes) { return true; }; - return std::all_of(bufferSizes.begin(), bufferSizes.end(), buffer_allocate); + return (numSlices > 0) && std::all_of(bufferSizes.begin(), bufferSizes.end(), buffer_allocate); +} + +bool isOneSliceEnough(int cmxSize, const std::vector& bufferSizes) { + return isCMXEnough(cmxSize, 1, bufferSizes); } } // namespace @@ -153,18 +150,22 @@ void FrontEnd::parseStaticShapeNMS(const Model& model, const ie::CNNLayerPtr& la const auto perm = DimsOrder::fromNumDims(inputDims0.size()).toPermutation(); const auto spatDim = inputDims0[perm[1]]; - const int ddrBufferSize0 = 2 * sizeof(int16_t) * 4 * spatDim; - const int ddrBufferSize1 = 2 * sizeof(int16_t) * spatDim; - const int ddrBufferSize2 = 2 * sizeof(int32_t) * spatDim; - const int ddrBufferSize = (ddrBufferSize0 + ddrBufferSize1 + ddrBufferSize2) + 2 * vpu::DATA_ALIGNMENT; + const int ddrBufferSize0 = sizeof(int16_t) * 4 * spatDim; + const int ddrBufferSize1 = sizeof(int16_t) * spatDim; + const int ddrBufferSize2 = sizeof(int32_t) * spatDim; + const int ddrBufferSize = 2 * (ddrBufferSize0 + ddrBufferSize1 + ddrBufferSize2) + 2 * vpu::DATA_ALIGNMENT; + const int cmxTempBufferSize = 4 * sizeof(int32_t) * 256; const auto& env = CompileEnv::get(); - const auto numSlices = isOneSliceEnough(spatDim) ? 1 : env.resources.numSHAVEs; + const std::vector bufferSizes = {ddrBufferSize0, ddrBufferSize1, ddrBufferSize2, + ddrBufferSize0, ddrBufferSize1, ddrBufferSize2, cmxTempBufferSize}; - const int cmxTempBufferSize = 4 * sizeof(int32_t) * 256; + const bool use_one_slice = isOneSliceEnough(CMX_SHAVE_BUFFER_SIZE, bufferSizes); + const auto numSlices = use_one_slice ? 1 : env.resources.numSHAVEs; + stage->attrs().set("use_one_slice", use_one_slice); - if (!isCMXEnough(CMX_SHAVE_BUFFER_SIZE, numSlices, {ddrBufferSize0, ddrBufferSize1, ddrBufferSize2, cmxTempBufferSize})) { + if (!isCMXEnough(CMX_SHAVE_BUFFER_SIZE, numSlices, bufferSizes)) { model->addTempBuffer(stage, DataDesc({ddrBufferSize})); } }