[VPU][Benchmark app] Support trivial networks (#4060)
* [BENCHMARK APP] Support I64 input filling * [VPU] Support trivial networks * [VPU] Add test cases * [VPU][Samples] Review fixes * [Benchmark] Support scalar generating in python benchmark app * [VPU] Allocate shape for unused inputs robustly * [VPU] Make processing trivial cases more efficient * [VPU] Rename isUsed->isConsumed * [VPU][Tests][Samples] Review fixes * [VPU] Process trivial cases at the very beginning * [VPU] Review fixes
This commit is contained in:
@@ -275,6 +275,8 @@ void fillBlobs(const std::vector<std::string>& inputFiles,
|
||||
fillBlobBinary<short>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
|
||||
} else if (item.second->getPrecision() == InferenceEngine::Precision::I32) {
|
||||
fillBlobBinary<int32_t>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
|
||||
} else if (item.second->getPrecision() == InferenceEngine::Precision::I64) {
|
||||
fillBlobBinary<int64_t>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
|
||||
} else if (item.second->getPrecision() == InferenceEngine::Precision::U8) {
|
||||
fillBlobBinary<uint8_t>(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount);
|
||||
} else {
|
||||
@@ -294,6 +296,8 @@ void fillBlobs(const std::vector<std::string>& inputFiles,
|
||||
fillBlobImInfo<short>(inputBlob, batchSize, image_size);
|
||||
} else if (item.second->getPrecision() == InferenceEngine::Precision::I32) {
|
||||
fillBlobImInfo<int32_t>(inputBlob, batchSize, image_size);
|
||||
} else if (item.second->getPrecision() == InferenceEngine::Precision::I64) {
|
||||
fillBlobImInfo<int64_t>(inputBlob, batchSize, image_size);
|
||||
} else {
|
||||
THROW_IE_EXCEPTION << "Input precision is not supported for image info!";
|
||||
}
|
||||
@@ -310,6 +314,8 @@ void fillBlobs(const std::vector<std::string>& inputFiles,
|
||||
fillBlobRandom<short>(inputBlob);
|
||||
} else if (item.second->getPrecision() == InferenceEngine::Precision::I32) {
|
||||
fillBlobRandom<int32_t>(inputBlob);
|
||||
} else if (item.second->getPrecision() == InferenceEngine::Precision::I64) {
|
||||
fillBlobRandom<int64_t>(inputBlob);
|
||||
} else if (item.second->getPrecision() == InferenceEngine::Precision::U8) {
|
||||
fillBlobRandom<uint8_t>(inputBlob);
|
||||
} else if (item.second->getPrecision() == InferenceEngine::Precision::I8) {
|
||||
|
||||
@@ -209,6 +209,8 @@ private:
|
||||
void parseLayer(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs,
|
||||
const UnsupportedLayerCallback& onUnsupported, const SupportedLayerCallback& onSupported = nullptr);
|
||||
|
||||
void processTrivialCases(const Model& model);
|
||||
|
||||
private:
|
||||
StageBuilder::Ptr _stageBuilder;
|
||||
const ie::ICore* _core = nullptr;
|
||||
|
||||
@@ -215,6 +215,8 @@ public:
|
||||
|
||||
Data getTopParentData() const;
|
||||
|
||||
bool isConsumed() const;
|
||||
|
||||
//
|
||||
// DataDesc
|
||||
//
|
||||
|
||||
@@ -63,9 +63,6 @@ int BackEnd::serializeIOInfoSection(
|
||||
VPU_INTERNAL_CHECK(data->producerEdge() == nullptr,
|
||||
"serializeIOInfoSection failed on input data {}. Input must have no producer but actually it has: {} with type {}",
|
||||
data->name(), data->producerEdge()->producer()->name(), data->producerEdge()->producer()->type());
|
||||
VPU_INTERNAL_CHECK(data->numConsumers() != 0,
|
||||
"serializeIOInfoSection failed on input data {}. Input must have at least one consumer but it doesn't ",
|
||||
data->usage());
|
||||
}
|
||||
|
||||
if (dataUsage == DataUsage::Output) {
|
||||
|
||||
@@ -378,6 +378,39 @@ void FrontEnd::parseLayer(const Model& model, const ie::CNNLayerPtr& layer, cons
|
||||
}
|
||||
}
|
||||
|
||||
void FrontEnd::processTrivialCases(const Model& model) {
|
||||
std::unordered_map<ie::DataPtr, DataVector> ieToVpuDataVector;
|
||||
for (const auto& data : model->datas()) {
|
||||
const auto& origData = data->origData();
|
||||
if (origData != nullptr) {
|
||||
ieToVpuDataVector[origData].push_back(data);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<DataVector> trivialCases;
|
||||
for (const auto& dataObjectsWithTheSameOrigData : ieToVpuDataVector) {
|
||||
if (dataObjectsWithTheSameOrigData.second.size() > 1) {
|
||||
VPU_THROW_UNLESS(dataObjectsWithTheSameOrigData.second.size() == 2,
|
||||
"There can't be more than two data objects associated with the same original IE data object with name {}",
|
||||
dataObjectsWithTheSameOrigData.first->getName());
|
||||
trivialCases.push_back(dataObjectsWithTheSameOrigData.second);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& trivialCase : trivialCases) {
|
||||
const auto& unconnectedOutput = trivialCase.front()->usage() == DataUsage::Output ? trivialCase.front() : trivialCase.back();
|
||||
const auto& unconnectedInput = unconnectedOutput == trivialCase.front() ? trivialCase.back() : trivialCase.front();
|
||||
|
||||
_stageBuilder->addCopyStage(
|
||||
model,
|
||||
unconnectedInput->name() + "@copy",
|
||||
nullptr,
|
||||
{unconnectedInput},
|
||||
{unconnectedOutput},
|
||||
"processTrivialCase");
|
||||
}
|
||||
}
|
||||
|
||||
void FrontEnd::defaultOnUnsupportedLayerCallback(const Model& model, const ie::CNNLayerPtr& layer, const DataVector& inputs, const DataVector& outputs,
|
||||
const std::string& extraMessage) {
|
||||
const auto& env = CompileEnv::get();
|
||||
@@ -475,6 +508,12 @@ ModelPtr FrontEnd::runCommonPasses(ie::CNNNetwork network,
|
||||
|
||||
parseInputAndOutputData(model);
|
||||
|
||||
//
|
||||
// Process trivial cases like `input->output`, `const->output`
|
||||
//
|
||||
|
||||
processTrivialCases(model);
|
||||
|
||||
if (!CompileEnv::get().config.disableConvertStages) {
|
||||
addDataTypeConvertStages(model);
|
||||
}
|
||||
|
||||
@@ -80,6 +80,10 @@ void FrontEnd::addDataTypeConvertStages(const Model& model) {
|
||||
|
||||
bindData(inputFP16, input->origData());
|
||||
|
||||
for (const auto consumerEdge : input->consumerEdges()) {
|
||||
model->replaceStageInput(consumerEdge, inputFP16);
|
||||
}
|
||||
|
||||
_stageBuilder->createConvertStage(
|
||||
model,
|
||||
inputFP16->name(),
|
||||
@@ -126,6 +130,10 @@ void FrontEnd::addDataTypeConvertStages(const Model& model) {
|
||||
|
||||
bindData(outputFP16, output->origData());
|
||||
|
||||
if (const auto producerEdge = output->producerEdge()) {
|
||||
model->replaceStageOutput(producerEdge, outputFP16);
|
||||
}
|
||||
|
||||
const auto stage = _stageBuilder->createConvertStage(
|
||||
model,
|
||||
outputFP16->name(),
|
||||
|
||||
@@ -116,21 +116,6 @@ void FrontEnd::parseInputAndOutputData(const Model& model) {
|
||||
descriptor,
|
||||
ieBlobContent(ieBlob, descriptor.type()));
|
||||
|
||||
// User might ask to return the output from Const layer.
|
||||
if (const auto vpuOutData = getVpuData(ieData)) {
|
||||
env.log->trace("The constant %s is network output", vpuData);
|
||||
|
||||
IE_ASSERT(vpuOutData->usage() == DataUsage::Output);
|
||||
|
||||
_stageBuilder->addCopyStage(
|
||||
model,
|
||||
formatString("%s@return-const", vpuData->name()),
|
||||
nullptr,
|
||||
vpuData,
|
||||
vpuOutData,
|
||||
"parseInputAndOutputData::const");
|
||||
}
|
||||
|
||||
bindData(vpuData, ieData);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -206,14 +206,14 @@ AllocationResult runAllocator(const Model& model, EnableShapeAllocation enableSh
|
||||
//
|
||||
|
||||
if (enableShapeAllocation == EnableShapeAllocation::YES) {
|
||||
for (const auto& stage : model->getStages()) {
|
||||
const auto& allocateShape = [&allocator](const Data& data) {
|
||||
if (!data->isShapeAllocated()) {
|
||||
const auto shapeLocation = allocator.allocateShape(data);
|
||||
data->setShapeAllocationInfo(shapeLocation);
|
||||
}
|
||||
};
|
||||
const auto& allocateShape = [&allocator](const Data& data) {
|
||||
if (!data->isShapeAllocated()) {
|
||||
const auto shapeLocation = allocator.allocateShape(data);
|
||||
data->setShapeAllocationInfo(shapeLocation);
|
||||
}
|
||||
};
|
||||
|
||||
for (const auto& stage : model->getStages()) {
|
||||
for (const auto& input : stage->inputs()) {
|
||||
allocateShape(input);
|
||||
}
|
||||
@@ -224,6 +224,22 @@ AllocationResult runAllocator(const Model& model, EnableShapeAllocation enableSh
|
||||
allocateShape(tempBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate shape for unused inputs
|
||||
DataVector unusedInputs;
|
||||
const auto& dataObjects = model->datas();
|
||||
std::copy_if(dataObjects.begin(), dataObjects.end(), std::back_inserter(unusedInputs), [](const Data& data) {
|
||||
return data->usage() == DataUsage::Input && !data->isConsumed();
|
||||
});
|
||||
// There is no guarantee that model->datas() always contain data objects in the same order from run to run,
|
||||
// so to stabilize allocation, and as a result, the final blob, we need to sort them
|
||||
std::sort(unusedInputs.begin(), unusedInputs.end(), [](const Data& lhs, const Data& rhs) { return lhs->name() < rhs->name(); });
|
||||
std::for_each(unusedInputs.begin(), unusedInputs.end(), [&allocateShape](const Data& unusedInput) { allocateShape(unusedInput); });
|
||||
|
||||
for (const auto& data : model->datas()) {
|
||||
VPU_THROW_UNLESS(data->isShapeAllocated(), "Shape for data {} with usage {} is not allocated",
|
||||
data->name(), data->usage());
|
||||
}
|
||||
}
|
||||
|
||||
return AllocationResult();
|
||||
|
||||
@@ -29,6 +29,17 @@ void PassImpl::runForStage(const Model& model, const Stage& convert) {
|
||||
const auto input = convert->input(0);
|
||||
const auto output = convert->output(0);
|
||||
|
||||
//
|
||||
// Check and remove the convert that was added to unused input
|
||||
// In this case we will have the converted intermediate data object which is not consumed
|
||||
//
|
||||
|
||||
if (output->usage() == DataUsage::Intermediate && !output->isConsumed()) {
|
||||
model->removeStage(convert);
|
||||
model->removeUnusedData(output);
|
||||
return;
|
||||
}
|
||||
|
||||
//
|
||||
// We remove Convert stage if input and output data types are equal.
|
||||
// It could happen if there was a non-IO FP16 <-> FP32 conversion in
|
||||
|
||||
@@ -31,6 +31,10 @@ namespace vpu {
|
||||
// DataNode
|
||||
//
|
||||
|
||||
bool DataNode::isConsumed() const {
|
||||
return numConsumers() > 0 || !childDataToShapeEdges().empty();
|
||||
}
|
||||
|
||||
Data DataNode::getTopParentData() const {
|
||||
Data topParent = this;
|
||||
while (auto nextParent = topParent->parentData()) {
|
||||
|
||||
@@ -1952,8 +1952,6 @@ void ModelObj::removeStage(const Stage& stage) {
|
||||
void ModelObj::cleanUp() {
|
||||
for (const auto& data : datas()) {
|
||||
if (data->_usage == DataUsage::Input) {
|
||||
VPU_THROW_UNLESS(!data->_consumerEdges.empty() || !data->childDataToShapeEdges().empty(),
|
||||
"Input data {} must either have at least one consumer (but got zero) or be a shape data.", data->name());
|
||||
IE_ASSERT(data->_parentDataToDataEdge == nullptr);
|
||||
} else if (data->_usage == DataUsage::Output) {
|
||||
IE_ASSERT(data->_producerEdge != nullptr);
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "subgraph_tests/parameter_result.hpp"
|
||||
|
||||
using namespace SubgraphTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Check, ParameterResultSubgraphTest,
|
||||
::testing::Values(CommonTestUtils::DEVICE_MYRIAD),
|
||||
ParameterResultSubgraphTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
@@ -0,0 +1,19 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "subgraph_tests/parameter_shapeof_result.hpp"
|
||||
|
||||
using namespace SubgraphTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_Check, ParameterShapeOfResultSubgraphTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(
|
||||
ngraph::element::f32,
|
||||
ngraph::element::f16),
|
||||
::testing::Values(CommonTestUtils::DEVICE_MYRIAD)),
|
||||
ParameterShapeOfResultSubgraphTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
@@ -0,0 +1,15 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared_test_classes/subgraph/parameter_shapeof_result.hpp"
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
|
||||
TEST_P(ParameterShapeOfResultSubgraphTest, CompareWithRefs) {
|
||||
Run();
|
||||
}
|
||||
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
@@ -0,0 +1,30 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
|
||||
typedef std::tuple<
|
||||
ngraph::element::Type, // Input type
|
||||
std::string // Device name
|
||||
> parameterShapeOfResultParams;
|
||||
|
||||
class ParameterShapeOfResultSubgraphTest : public testing::WithParamInterface<parameterShapeOfResultParams>,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<parameterShapeOfResultParams> obj);
|
||||
protected:
|
||||
void SetUp() override;
|
||||
};
|
||||
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
@@ -0,0 +1,34 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "shared_test_classes/subgraph/parameter_shapeof_result.hpp"
|
||||
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
#include <ngraph/opsets/opset6.hpp>
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
|
||||
std::string ParameterShapeOfResultSubgraphTest::getTestCaseName(testing::TestParamInfo<parameterShapeOfResultParams> obj) {
|
||||
ngraph::element::Type inType;
|
||||
std::string targetDevice;
|
||||
std::tie(inType, targetDevice) = obj.param;
|
||||
std::ostringstream result;
|
||||
result << "InType=" << inType;
|
||||
result << "TargetDevice=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void ParameterShapeOfResultSubgraphTest::SetUp() {
|
||||
ngraph::element::Type inType;
|
||||
std::tie(inType, targetDevice) = this->GetParam();
|
||||
inPrc = InferenceEngine::details::convertPrecision(inType);
|
||||
|
||||
const auto parameter = std::make_shared<ngraph::opset6::Parameter>(inType, ngraph::Shape{1, 3, 10, 10});
|
||||
const auto shapeOf = std::make_shared<ngraph::opset6::ShapeOf>(parameter);
|
||||
const ngraph::ResultVector results{std::make_shared<ngraph::opset6::Result>(shapeOf)};
|
||||
ngraph::ParameterVector params = {parameter};
|
||||
function = std::make_shared<ngraph::Function>(results, params, "ParameterShapeOfResult");
|
||||
}
|
||||
|
||||
} // namespace SubgraphTestsDefinitions
|
||||
@@ -178,6 +178,7 @@ def get_dtype(precision):
|
||||
format_map = {
|
||||
'FP32' : np.float32,
|
||||
'I32' : np.int32,
|
||||
'I64' : np.int64,
|
||||
'FP16' : np.float16,
|
||||
'I16' : np.int16,
|
||||
'U16' : np.uint16,
|
||||
@@ -219,4 +220,6 @@ def fill_blob_with_image_info(image_size, layer):
|
||||
return im_info
|
||||
|
||||
def fill_blob_with_random(layer):
|
||||
return np.random.rand(*layer.shape).astype(get_dtype(layer.precision))
|
||||
if layer.shape:
|
||||
return np.random.rand(*layer.shape).astype(get_dtype(layer.precision))
|
||||
return (get_dtype(layer.precision))(np.random.rand())
|
||||
|
||||
Reference in New Issue
Block a user