diff --git a/inference-engine/cmake/vpu_dependencies.cmake b/inference-engine/cmake/vpu_dependencies.cmake index 75a0325c641..fa67cfafc1e 100644 --- a/inference-engine/cmake/vpu_dependencies.cmake +++ b/inference-engine/cmake/vpu_dependencies.cmake @@ -19,7 +19,7 @@ set(VPU_SUPPORTED_FIRMWARES usb-ma2450 usb-ma2x8x pcie-ma248x) # Default packages # -set(FIRMWARE_PACKAGE_VERSION 1440) +set(FIRMWARE_PACKAGE_VERSION 1445) set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.1") # diff --git a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/structs.hpp b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/structs.hpp index fc55128e7ac..cb82372d924 100644 --- a/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/structs.hpp +++ b/inference-engine/src/vpu/graph_transformer/include/vpu/middleend/allocator/structs.hpp @@ -21,6 +21,7 @@ namespace vpu { const int DDR_MAX_SIZE = 512 * 1024 * 1024; const int CMX_SLICE_SIZE = 128 * 1024; const int DATA_ALIGNMENT = 64; +const int CMX_SHAVE_BUFFER_SIZE = 100 * 1024; // // Allocator Structs diff --git a/inference-engine/src/vpu/graph_transformer/src/stages/static_shape_nms.cpp b/inference-engine/src/vpu/graph_transformer/src/stages/static_shape_nms.cpp index f5cf1e4f6b5..06c2efc5940 100644 --- a/inference-engine/src/vpu/graph_transformer/src/stages/static_shape_nms.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/stages/static_shape_nms.cpp @@ -3,7 +3,7 @@ // #include - +#include #include #include @@ -54,8 +54,10 @@ private: void serializeParamsImpl(BlobSerializer& serializer) const override { const auto center_point_box = attrs().get("center_point_box"); + const auto use_ddr_buffer = !tempBuffers().empty(); serializer.append(static_cast(center_point_box)); + serializer.append(static_cast(use_ddr_buffer)); } void serializeDataImpl(BlobSerializer& serializer) const override { @@ -74,11 +76,35 @@ private: input5->serializeBuffer(serializer); outputData->serializeBuffer(serializer); outputDims->serializeBuffer(serializer); + + if (!tempBuffers().empty()) + tempBuffer(0)->serializeBuffer(serializer); } }; +bool isCMXEnough(int cmxSize, int numSlices, std::vector bufferSizes) { + int curOffset = 0; + int curSlice = 0; + + const auto buffer_allocate = [&curOffset, &curSlice, &numSlices, cmxSize](int numBytes) { + if (curOffset + numBytes < cmxSize) { + curOffset += numBytes; + } else if (curSlice < numSlices && numBytes < cmxSize) { + curSlice++; + curOffset = numBytes; + } else { + return false; + } + + return true; + }; + + return std::all_of(bufferSizes.begin(), bufferSizes.end(), buffer_allocate); +} + } // namespace + void FrontEnd::parseStaticShapeNMS(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs) const { VPU_THROW_UNLESS(inputs.size() == 6, "StaticShapeNMS with name {} parsing failed, expected number of inputs: 6, but {} provided", @@ -119,6 +145,22 @@ void FrontEnd::parseStaticShapeNMS(const Model& model, const ie::CNNLayerPtr& la auto stage = model->addNewStage(layer->name, StageType::StaticShapeNMS, layer, usedInputs, DataVector{outIndices, outShape}); stage->attrs().set("center_point_box", centerPointBox); + + const auto inputDims0 = inputs[0]->desc().dims(); + const auto perm = DimsOrder::fromNumDims(inputDims0.size()).toPermutation(); + const auto spatDim = inputDims0[perm[1]]; + + const int ddrBufferSize0 = 2 * sizeof(int16_t) * 4 * spatDim; + const int ddrBufferSize1 = 2 * sizeof(int16_t) * spatDim; + const int ddrBufferSize2 = 2 * sizeof(int32_t) * spatDim; + const int ddrBufferSize = ddrBufferSize0 + ddrBufferSize1 + ddrBufferSize2 + 2 * vpu::DATA_ALIGNMENT; + + const auto& env = CompileEnv::get(); + const auto numSlices = env.resources.numSHAVEs; + + const int cmxTempBufferSize = 4 * sizeof(int32_t) * 256; + if (!isCMXEnough(CMX_SHAVE_BUFFER_SIZE, numSlices, {ddrBufferSize0, ddrBufferSize1, ddrBufferSize2, cmxTempBufferSize})) + model->addTempBuffer(stage, DataDesc({ddrBufferSize})); } } // namespace vpu