Merge remote-tracking branch 'upstream/master' into debian-packages

This commit is contained in:
Ilya Lavrenov
2021-12-08 18:06:01 +03:00
58 changed files with 1357 additions and 1123 deletions

View File

@@ -287,8 +287,8 @@ if(ENABLE_INTEL_GNA)
set(GNA_HASH "cc954e67525006bf8bd353a6682e38bf208f6d74e973e0fc292850e721f17452")
endif()
if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
set(GNA_VERSION "03.00.00.1377")
set(GNA_HASH "d45fb48994d8c2803a16e88e29ae48851066325b97c1c6c4a5bf4f4573d55c65")
set(GNA_VERSION "03.00.00.1455")
set(GNA_HASH "8ac1af18eb32777b00193f4f8c252ee4f8bd64a9069138b4a5aaeebd82ead464")
endif()
set(FILES_TO_EXTRACT_LIST gna_${GNA_VERSION}/include)

View File

@@ -63,10 +63,10 @@ Caffe*-specific parameters:
-k K Path to CustomLayersMapping.xml to register custom
layers
--mean_file MEAN_FILE, -mf MEAN_FILE
Mean image to be used for the input. Should be a
[DEPRECATED] Mean image to be used for the input. Should be a
binaryproto file
--mean_file_offsets MEAN_FILE_OFFSETS, -mo MEAN_FILE_OFFSETS
Mean image offsets to be used for the input
[DEPRECATED] Mean image offsets to be used for the input
binaryproto file. When the mean image is bigger than
the expected input, it is cropped. By default, centers
of the input image and the mean image are the same and

View File

@@ -42,7 +42,7 @@ To convert a Paddle\* model:
Parameters to convert your model:
* [Framework-agnostic parameters](Converting_Model_General.md): These parameters are used to convert a model trained with any supported framework.
> **NOTE:** `--scale`, `--scale_values`, `--mean_values`, `--mean_file` are not supported in the current version of mo_paddle.
> **NOTE:** `--scale`, `--scale_values`, `--mean_values` are not supported in the current version of mo_paddle.
### Example of Converting a Paddle* Model
Below is the example command to convert yolo v3 Paddle\* network to OpenVINO IR network with Model Optimizer.

View File

@@ -6,7 +6,7 @@ mo --input_model INPUT_MODEL --output_dir <OUTPUT_MODEL_DIR>
```
You need to have have write permissions for an output directory.
> **NOTE**: Some models require using additional arguments to specify conversion parameters, such as `--input_shape`, `--scale`, `--scale_values`, `--mean_values`, `--mean_file`. To learn about when you need to use these parameters, refer to [Converting a Model Using General Conversion Parameters](Converting_Model_General.md).
> **NOTE**: Some models require using additional arguments to specify conversion parameters, such as `--input_shape`, `--scale`, `--scale_values`, `--mean_values`. To learn about when you need to use these parameters, refer to [Converting a Model Using General Conversion Parameters](Converting_Model_General.md).
To adjust the conversion process, you may use general parameters defined in the [Converting a Model Using General Conversion Parameters](Converting_Model_General.md) and
Framework-specific parameters for:

View File

@@ -151,7 +151,7 @@ Usually neural network models are trained with the normalized input data. This m
In the first case, the Model Optimizer generates the IR with required pre-processing layers and Inference Engine samples may be used to infer the model.
In the second case, information about mean/scale values should be provided to the Model Optimizer to embed it to the generated IR. Model Optimizer provides a number of command line parameters to specify them: `--scale`, `--scale_values`, `--mean_values`, `--mean_file`.
In the second case, information about mean/scale values should be provided to the Model Optimizer to embed it to the generated IR. Model Optimizer provides a number of command line parameters to specify them: `--scale`, `--scale_values`, `--mean_values`.
If both mean and scale values are specified, the mean is subtracted first and then scale is applied. Input values are *divided* by the scale value(s).

View File

@@ -2,30 +2,23 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "shape_inference.hpp"
#include <ngraph/runtime/host_tensor.hpp>
#include <openvino/core/node.hpp>
#include <ngraph/runtime/host_tensor.hpp>
#include <openvino/opsets/opset1.hpp>
#include <openvino/opsets/opset2.hpp>
#include <openvino/opsets/opset4.hpp>
#include <openvino/opsets/opset5.hpp>
#include <openvino/opsets/opset6.hpp>
#include <openvino/opsets/opset8.hpp>
#include "assign_shape_inference.hpp"
#include "convolution_shape_inference.hpp"
#include "experimental_detectron_detection_output_shape_inference.hpp"
#include "experimental_detectron_prior_grid_generator_shape_inference.hpp"
#include "fake_quantize.hpp"
#include "lstm_cell_shape_inference.hpp"
#include "read_value_shape_inference.hpp"
#include "reduce_shape_inference.hpp"
#include "shape_inference.hpp"
#include "shape_nodes.hpp"
#include "static_shape.hpp"
#include "tile_shape_inference.hpp"
#include "utils.hpp"
#include "shape_inference.hpp"
#include "convolution_shape_inference.hpp"
#include "reduce_shape_inference.hpp"
#include "shape_nodes.hpp"
#include "fake_quantize.hpp"
#include "experimental_detectron_detection_output_shape_inference.hpp"
void shape_inference(ov::Node* op,
const std::vector<ov::StaticShape>& input_shapes,
@@ -34,53 +27,44 @@ void shape_inference(ov::Node* op,
if (auto node = ov::as_type<ov::opset8::Convolution>(op)) {
ov::CoordinateDiff pads_begin, pads_end;
bool status = resolve_auto_pad_for_shape(node, pads_begin, pads_end, input_shapes, 2, 2);
OPENVINO_ASSERT(status,
"Convolution shape inference doesn't have enough information to calculate static shapes");
OPENVINO_ASSERT(status, "Convolution shape inference doesn't have enough information to calculate static shapes");
shape_infer(node, pads_begin, pads_end, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset8::GroupConvolution>(op)) {
ov::CoordinateDiff pads_begin, pads_end;
bool status = resolve_auto_pad_for_shape(node, pads_begin, pads_end, input_shapes, 2, 3);
OPENVINO_ASSERT(status,
"GroupConvolution shape inference doesn't have enough information to calculate static shapes");
OPENVINO_ASSERT(status, "GroupConvolution shape inference doesn't have enough information to calculate static shapes");
shape_infer(node, pads_begin, pads_end, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset8::ConvolutionBackpropData>(op)) {
ov::CoordinateDiff pads_begin, pads_end;
ov::StaticShape output_shape_input;
if (node->get_input_size() == 3)
get_data_as_shape<ov::StaticShape>(2, op, output_shape_input, constant_data);
bool status =
resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 2);
OPENVINO_ASSERT(
status,
"ConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
bool status = resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 2);
OPENVINO_ASSERT(status, "ConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
shape_infer(node, pads_begin, pads_end, output_shape_input, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset8::GroupConvolutionBackpropData>(op)) {
ov::CoordinateDiff pads_begin, pads_end;
ov::StaticShape output_shape_input;
if (node->get_input_size() == 3)
get_data_as_shape<ov::StaticShape>(2, op, output_shape_input, constant_data);
bool status =
resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 3);
OPENVINO_ASSERT(
status,
"GroupConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
bool status = resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 3);
OPENVINO_ASSERT(status, "GroupConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
shape_infer(node, pads_begin, pads_end, output_shape_input, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::op::util::ArithmeticReductionKeepDims>(op)) {
shape_infer(node, input_shapes, output_shapes, constant_data);
} else if (auto node = ov::as_type<ov::op::util::LogicalReductionKeepDims>(op)) {
shape_infer(node, input_shapes, output_shapes, constant_data);
} else if (ov::is_type<ov::op::util::UnaryElementwiseArithmetic>(op) || ov::is_type<ov::opset1::Convert>(op) ||
ov::is_type<ov::opset1::Clamp>(op) || ov::is_type<ov::opset1::GRN>(op) ||
ov::is_type<ov::opset1::LRN>(op) || ov::is_type<ov::opset1::LogicalNot>(op) ||
ov::is_type<ov::opset4::Mish>(op) || ov::is_type<ov::opset2::MVN>(op) ||
ov::is_type<ov::opset6::MVN>(op) || ov::is_type<ov::opset1::PRelu>(op) ||
ov::is_type<ov::opset1::Relu>(op) || ov::is_type<ov::opset4::Swish>(op) ||
ov::is_type<ov::opset1::Softmax>(op) || ov::is_type<ov::opset1::Elu>(op) ||
ov::is_type<ov::opset5::Round>(op)) {
} else if (ov::is_type<ov::op::util::UnaryElementwiseArithmetic>(op) ||
ov::is_type<ov::opset1::Convert>(op) || ov::is_type<ov::opset1::Clamp>(op) ||
ov::is_type<ov::opset1::GRN>(op) || ov::is_type<ov::opset1::LRN>(op) ||
ov::is_type<ov::opset1::LogicalNot>(op) || ov::is_type<ov::opset4::Mish>(op) ||
ov::is_type<ov::opset2::MVN>(op) || ov::is_type<ov::opset6::MVN>(op) ||
ov::is_type<ov::opset1::PRelu>(op) || ov::is_type<ov::opset1::Relu>(op) ||
ov::is_type<ov::opset4::Swish>(op) || ov::is_type<ov::opset1::Softmax>(op) ||
ov::is_type<ov::opset1::Elu>(op) || ov::is_type<ov::opset5::Round>(op)) {
copy_shape_infer(node, input_shapes, output_shapes);
} else if (ov::is_type<ov::op::util::BinaryElementwiseArithmetic>(op) ||
ov::is_type<ov::op::util::BinaryElementwiseComparison>(op) ||
ov::is_type<ov::op::util::BinaryElementwiseLogical>(op)) {
ov::is_type<ov::op::util::BinaryElementwiseComparison>(op) || ov::is_type<ov::op::util::BinaryElementwiseLogical>(op)) {
eltwise_shape_infer(op, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset1::FakeQuantize>(op)) {
shape_infer(node, input_shapes, output_shapes);
@@ -96,30 +80,15 @@ void shape_inference(ov::Node* op,
shape_infer(node, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset6::ExperimentalDetectronDetectionOutput>(op)) {
shape_infer(node, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset3::Assign>(op)) {
shape_infer(node, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset6::Assign>(op)) {
shape_infer(node, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset6::ExperimentalDetectronPriorGridGenerator>(op)) {
shape_infer(node, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset1::LSTMCell>(op)) {
shape_infer(node, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset6::LSTMCell>(op)) {
shape_infer(node, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset3::ReadValue>(op)) {
shape_infer(node, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset6::ReadValue>(op)) {
shape_infer(node, input_shapes, output_shapes);
} else if (auto node = ov::as_type<ov::opset6::Tile>(op)) {
shape_infer(node, input_shapes, output_shapes, constant_data);
} else {
ngraph::OutputVector new_inputs;
for (size_t i = 0; i < op->get_input_size(); ++i) {
if (constant_data.count(i)) {
new_inputs.push_back(std::make_shared<ov::opset1::Constant>(constant_data.at(i)));
} else {
new_inputs.push_back(std::make_shared<ov::opset1::Parameter>(op->get_input_element_type(i),
input_shapes[i].to_partial_shape()));
new_inputs.push_back(
std::make_shared<ov::opset1::Parameter>(
op->get_input_element_type(i), input_shapes[i].to_partial_shape()));
}
}
const auto local_op = op->clone_with_new_inputs(new_inputs);
@@ -127,10 +96,8 @@ void shape_inference(ov::Node* op,
output_shapes.resize(op->get_output_size());
for (size_t i = 0; i < output_shapes.size(); ++i) {
const auto& partial_shape = local_op->get_output_partial_shape(i);
OPENVINO_ASSERT(
partial_shape.is_static(),
"On device shape infer shouldn't support default shape infer for nodes with internal dynamism");
const auto &partial_shape = local_op->get_output_partial_shape(i);
OPENVINO_ASSERT(partial_shape.is_static(), "On device shape infer shouldn't support default shape infer for nodes with internal dynamism");
output_shapes[i] = ov::StaticShape(partial_shape.to_shape());
}
}

View File

@@ -0,0 +1,149 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <memory>
#include <queue>
#include <ngraph/function.hpp>
#include <ngraph/opsets/opset7.hpp>
#include <transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp>
#include <transformations/op_conversions/einsum_decomposition.hpp>
#include <transformations/init_node_info.hpp>
#include <ngraph/pass/manager.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
using namespace testing;
using namespace ngraph;
TEST_F(TransformationTestsF, TransposeReshapeEliminationForMatMul) {
Shape data_shape_1{10, 2};
Shape data_shape_2{10, 2, 25};
{
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
auto const_transpose_before = opset1::Constant::create(element::i32, Shape{3}, {1, 2, 0});
auto transpose_before = std::make_shared<opset1::Transpose>(data_2, const_transpose_before);
auto const_reshape_before = opset1::Constant::create(element::i32, Shape{2}, {2, 250});
auto reshape_before = std::make_shared<opset1::Reshape>(transpose_before, const_reshape_before, false);
auto matmul = std::make_shared<opset1::MatMul>(data_1, reshape_before);
auto const_reshape_after = opset1::Constant::create(element::i32, Shape{3}, {10, 10, 25});
auto reshape_after = std::make_shared<opset1::Reshape>(matmul, const_reshape_after, false);
auto const_tranpose_after = opset1::Constant::create(element::i32, Shape{3}, {2, 0, 1});
auto tranpose_after = std::make_shared<opset1::Transpose>(reshape_after, const_tranpose_after);
function = std::make_shared<Function>(NodeVector{tranpose_after}, ParameterVector{data_1, data_2});
manager.register_pass<pass::InitNodeInfo>();
manager.register_pass<pass::TransposeReshapeEliminationForMatmul>();
}
{
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
auto matmul = std::make_shared<opset1::MatMul>(data_1, data_2);
function_ref = std::make_shared<Function>(NodeVector{matmul}, ParameterVector{data_1, data_2});
}
}
TEST_F(TransformationTestsF, TransposeReshapeEliminationForMatMul_TransposedA) {
Shape data_shape_1{2, 10};
Shape data_shape_2{10, 2, 25};
{
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
auto const_transpose_before = opset1::Constant::create(element::i32, Shape{3}, {1, 2, 0});
auto transpose_before = std::make_shared<opset1::Transpose>(data_2, const_transpose_before);
auto const_reshape_before = opset1::Constant::create(element::i32, Shape{2}, {2, 250});
auto reshape_before = std::make_shared<opset1::Reshape>(transpose_before, const_reshape_before, false);
auto matmul = std::make_shared<opset1::MatMul>(data_1, reshape_before, true, false);
auto const_reshape_after = opset1::Constant::create(element::i32, Shape{3}, {10, 10, 25});
auto reshape_after = std::make_shared<opset1::Reshape>(matmul, const_reshape_after, false);
auto const_tranpose_after = opset1::Constant::create(element::i32, Shape{3}, {2, 0, 1});
auto tranpose_after = std::make_shared<opset1::Transpose>(reshape_after, const_tranpose_after);
function = std::make_shared<Function>(NodeVector{tranpose_after}, ParameterVector{data_1, data_2});
manager.register_pass<pass::TransposeReshapeEliminationForMatmul>();
}
{
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
auto matmul = std::make_shared<opset1::MatMul>(data_1, data_2, true, false);
function_ref = std::make_shared<Function>(NodeVector{matmul}, ParameterVector{data_1, data_2});
}
}
TEST_F(TransformationTestsF, TransposeReshapeEliminationForMatMul_TransposedB) {
Shape data_shape_1{10, 2};
Shape data_shape_2{10, 2, 25};
{
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
auto const_transpose_before = opset1::Constant::create(element::i32, Shape{3}, {0, 2, 1});
auto transpose_before = std::make_shared<opset1::Transpose>(data_2, const_transpose_before);
auto const_reshape_before = opset1::Constant::create(element::i32, Shape{2}, {250, 2});
auto reshape_before = std::make_shared<opset1::Reshape>(transpose_before, const_reshape_before, false);
auto matmul = std::make_shared<opset1::MatMul>(data_1, reshape_before, false, true);
auto const_reshape_after = opset1::Constant::create(element::i32, Shape{3}, {10, 10, 25});
auto reshape_after = std::make_shared<opset1::Reshape>(matmul, const_reshape_after, false);
auto const_tranpose_after = opset1::Constant::create(element::i32, Shape{3}, {1, 0, 2});
auto tranpose_after = std::make_shared<opset1::Transpose>(reshape_after, const_tranpose_after);
function = std::make_shared<Function>(NodeVector{tranpose_after}, ParameterVector{data_1, data_2});
manager.register_pass<pass::TransposeReshapeEliminationForMatmul>();
}
{
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
auto matmul = std::make_shared<opset1::MatMul>(data_1, data_2);
function_ref = std::make_shared<Function>(NodeVector{matmul}, ParameterVector{data_1, data_2});
}
}
TEST_F(TransformationTestsF, TransposeReshapeEliminationForMatMul_TransposedAB) {
Shape data_shape_1{2, 10};
Shape data_shape_2{10, 2, 25};
{
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
auto const_transpose_before = opset1::Constant::create(element::i32, Shape{3}, {0, 2, 1});
auto transpose_before = std::make_shared<opset1::Transpose>(data_2, const_transpose_before);
auto const_reshape_before = opset1::Constant::create(element::i32, Shape{2}, {250, 2});
auto reshape_before = std::make_shared<opset1::Reshape>(transpose_before, const_reshape_before, false);
auto matmul = std::make_shared<opset1::MatMul>(data_1, reshape_before, true, true);
auto const_reshape_after = opset1::Constant::create(element::i32, Shape{3}, {10, 10, 25});
auto reshape_after = std::make_shared<opset1::Reshape>(matmul, const_reshape_after, false);
auto const_tranpose_after = opset1::Constant::create(element::i32, Shape{3}, {1, 0, 2});
auto tranpose_after = std::make_shared<opset1::Transpose>(reshape_after, const_tranpose_after);
function = std::make_shared<Function>(NodeVector{tranpose_after}, ParameterVector{data_1, data_2});
manager.register_pass<pass::TransposeReshapeEliminationForMatmul>();
}
{
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
auto matmul = std::make_shared<opset1::MatMul>(data_1, data_2, true, false);
function_ref = std::make_shared<Function>(NodeVector{matmul}, ParameterVector{data_1, data_2});
}
}
TEST_F(TransformationTestsF, TransposeReshapeEliminationForMatMul_Einsum) {
Shape data_shape_1{5, 2};
Shape data_shape_2{10, 2, 25};
{
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
auto einsum = std::make_shared<opset7::Einsum>(OutputVector{data_1, data_2}, "kl,mlj->mkj");
function = std::make_shared<Function>(NodeVector{einsum}, ParameterVector{data_1, data_2});
manager.register_pass<pass::EinsumDecomposition>();
manager.register_pass<pass::TransposeReshapeEliminationForMatmul>();
}
{
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
// for some cases Reshape may be first input for Matmul
auto shape_constant = std::make_shared<opset1::Constant>(element::i64, Shape{data_shape_1.size()}, data_shape_1);
auto reshape = std::make_shared<opset1::Reshape>(data_1, shape_constant, false);
auto matmul = std::make_shared<opset1::MatMul>(reshape, data_2, false, false);
function_ref = std::make_shared<Function>(NodeVector{matmul}, ParameterVector{data_1, data_2});
}
}

View File

@@ -74,6 +74,15 @@ const std::vector<std::vector<size_t >> kernels2D = {
{7, 1},
{3, 3},
};
const std::vector<std::vector<size_t >> kernels2D_big = {
{7, 2},
{2, 7},
{3, 7},
{6, 6},
{7, 7},
};
const std::vector<std::vector<size_t >> strides2D = {
{1, 1},
};
@@ -100,6 +109,16 @@ const auto conv2DParams_Kernels2D = ::testing::Combine(
::testing::ValuesIn(numOutCannels2D),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const auto conv2DParams_Kernels2D_big = ::testing::Combine(
::testing::ValuesIn(kernels2D_big),
::testing::ValuesIn(strides2D),
::testing::ValuesIn(padBegins2D),
::testing::ValuesIn(padEnds2D),
::testing::ValuesIn(dilations2D),
::testing::ValuesIn(numOutCannels2D),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const auto conv2DParams_ExplicitPadding_Height1 = ::testing::Combine(
::testing::ValuesIn(kernelsH1),
::testing::ValuesIn(stridesH1),
@@ -218,4 +237,16 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_Kernels2D, GnaConvolutionLayerTest,
::testing::Values(input2DNCHW),
::testing::Values(CommonTestUtils::DEVICE_GNA)),
GnaConvolutionLayerTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_Kernels2D_big, GnaConvolutionLayerTest,
::testing::Combine(
conv2DParams_Kernels2D_big,
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(InferenceEngine::Layout::ANY),
::testing::Values(input2DNCHW),
::testing::Values(CommonTestUtils::DEVICE_GNA)),
GnaConvolutionLayerTest::getTestCaseName);
} // namespace

View File

@@ -22,21 +22,39 @@ const std::vector<std::vector<size_t >> kernels2D = {
{1, 3},
{7, 1},
{3, 3},
{7, 2},
{2, 7}
};
const std::vector<std::vector<size_t >> kernels2DInvalid = {
{1, 4},
{2, 3},
{3, 2},
{9, 3},
{1, 9},
{1, 8},
{8, 1},
{4, 4},
{8, 8},
};
const std::vector<std::vector<size_t >> kernels2DInvalidFor56InC = {
{1, 6},
{2, 6},
{7, 7},
{1, 7},
{4, 7},
};
const std::vector<std::vector<size_t >> kernels2DInvalidFor120InC = {
{1, 4},
{8, 3},
{7, 5},
{1, 6},
{4, 7},
};
const std::vector<std::vector<size_t >> strides2D = {
{1, 1},
};
const std::vector<std::vector<size_t >> strides2DInvalid = {
{4, 4}, {1, 4}
{8, 8}, {1, 8}
};
const std::vector<std::vector<ptrdiff_t>> padBegins2D = { {0, 0},
};
@@ -51,10 +69,13 @@ const std::vector<std::vector<size_t >> dilations2D = { {1, 1},
const std::vector<std::vector<size_t >> dilations2DInvalid = { {2, 2},
};
const std::vector<size_t> numOutChannels2D = { 32 };
const std::vector<size_t> numOutChannels2DInvalid = { 1, 7, 9, 400 };
const std::vector<size_t> numOutChannels2DInvalid = { 1, 7, 9, 1032 };
const std::vector<std::vector<size_t>> input2DNCHWFine = { { 1, 8, 20, 16 } };
const std::vector<std::vector<size_t>> input2DNCHWWithInC56 = { { 1, 56, 20, 16 } };
const std::vector<std::vector<size_t>> input2DNCHWWithInC120 = { { 1, 120, 20, 16 } };
const std::vector<std::vector<size_t>> input2DNCHWInvalidInputC = {
{ 1, 7, 20, 16 },
{ 1, 9, 20, 16 },
@@ -80,6 +101,27 @@ const auto conv2DParametersInvalidKernel = ::testing::Combine(
::testing::ValuesIn(numOutChannels2D),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const auto conv2DParametersInvalidKernelFor56InC = ::testing::Combine(
::testing::ValuesIn(kernels2DInvalidFor56InC),
::testing::ValuesIn(strides2D),
::testing::ValuesIn(padBegins2D),
::testing::ValuesIn(padEnds2D),
::testing::ValuesIn(dilations2D),
::testing::ValuesIn(numOutChannels2D),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const auto conv2DParametersInvalidKernelFor120InC = ::testing::Combine(
::testing::ValuesIn(kernels2DInvalidFor120InC),
::testing::ValuesIn(strides2D),
::testing::ValuesIn(padBegins2D),
::testing::ValuesIn(padEnds2D),
::testing::ValuesIn(dilations2D),
::testing::ValuesIn(numOutChannels2D),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const auto conv2DParametersInvalidFilterNumber = ::testing::Combine(
::testing::ValuesIn(kernels2D),
::testing::ValuesIn(strides2D),
@@ -165,6 +207,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_GnaConv2DNegativeTestInvalid##whats_wrong, GnaCon
GNA_NEG_INSTANTIATE(FilterNumber, InvalidFilterNumber, Fine, "Unsupported number of kernels")
GNA_NEG_INSTANTIATE(Kernel, InvalidKernel, Fine, "Unsupported kernel shape")
GNA_NEG_INSTANTIATE(BigKernelFor56InC, InvalidKernelFor56InC, WithInC56, "Unsupported kernel shape")
GNA_NEG_INSTANTIATE(BigKernelFor120InC, InvalidKernelFor120InC, WithInC120, "Unsupported kernel shape")
GNA_NEG_INSTANTIATE(InputH, Fine, InvalidInputH, "Unsupported input height")
GNA_NEG_INSTANTIATE(InputW, Fine, InvalidInputW, "Unsupported input width")
GNA_NEG_INSTANTIATE(InputC, Fine, InvalidInputC, "Unsupported number of input channels")
@@ -172,4 +216,4 @@ GNA_NEG_INSTANTIATE(Padding, InvalidPadding, Fine, "Convolution's input padding
GNA_NEG_INSTANTIATE(Stride, InvalidStride, Fine, "Unsupported convolution stride shape")
GNA_NEG_INSTANTIATE(Dilation, InvalidDilation, Fine, "dilation is not supported on GNA")
} // namespace
} // namespace

View File

@@ -1,47 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <openvino/op/ops.hpp>
#include <openvino/op/parameter.hpp>
#include <utils/shape_inference/shape_inference.hpp>
#include <utils/shape_inference/static_shape.hpp>
using namespace ov;
template <class T>
std::shared_ptr<T> constructGraph();
template <>
std::shared_ptr<op::v3::Assign> constructGraph() {
auto input = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
auto read_value = std::make_shared<op::v3::ReadValue>(input, "variable_id");
return std::make_shared<op::v3::Assign>(read_value, "variable_id");
}
template <>
std::shared_ptr<op::v6::Assign> constructGraph() {
auto input = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
auto variable = std::make_shared<ov::op::util::Variable>(
ov::op::util::VariableInfo{PartialShape::dynamic(), element::dynamic, "ID"});
auto read_value = std::make_shared<op::v6::Assign>(input, variable);
return std::make_shared<op::v6::Assign>(read_value, variable);
}
template <class T>
void assignTest() {
auto assign = constructGraph<T>();
// Test StaticShape
std::vector<StaticShape> static_input_shapes = {StaticShape{1, 2, 64, 64}}, static_output_shapes = {StaticShape{}};
shape_inference(assign.get(), static_input_shapes, static_output_shapes);
ASSERT_EQ(static_input_shapes[0], (StaticShape{1, 2, 64, 64}));
}
TEST(StaticShapeInferenceTest, AssignTest) {
// Test v3 Assign
assignTest<op::v3::Assign>();
// Test v6 Assign
assignTest<op::v6::Assign>();
}

View File

@@ -1,37 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <openvino/op/experimental_detectron_prior_grid_generator.hpp>
#include <openvino/op/ops.hpp>
#include <openvino/op/parameter.hpp>
#include <utils/shape_inference/shape_inference.hpp>
#include <utils/shape_inference/static_shape.hpp>
using namespace ov;
TEST(StaticShapeInferenceTest, PriorGridGenerator) {
op::v6::ExperimentalDetectronPriorGridGenerator::Attributes attrs;
attrs.flatten = false;
attrs.h = 0;
attrs.w = 0;
attrs.stride_x = 4.0f;
attrs.stride_y = 4.0f;
auto priors = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1});
auto feature_map = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
auto im_data = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
auto grid_gen =
std::make_shared<ov::op::v6::ExperimentalDetectronPriorGridGenerator>(priors, feature_map, im_data, attrs);
std::vector<StaticShape> static_input_shapes = {StaticShape{3, 4},
StaticShape{1, 256, 200, 336},
StaticShape{1, 3, 800, 1344}},
static_output_shapes = {StaticShape{}};
shape_inference(grid_gen.get(), static_input_shapes, static_output_shapes);
ASSERT_EQ(static_output_shapes[0], StaticShape({200, 336, 3, 4}));
}

View File

@@ -1,38 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <openvino/op/ops.hpp>
#include <openvino/op/parameter.hpp>
#include <utils/shape_inference/shape_inference.hpp>
#include <utils/shape_inference/static_shape.hpp>
using namespace ov;
TEST(StaticShapeInferenceTest, LstmCellTest) {
const size_t batch_size = 2;
const size_t input_size = 3;
const size_t hidden_size = 3;
const size_t gates_count = 4;
const auto X = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1});
const auto W = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1});
const auto R = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1});
const auto H_t = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1});
const auto C_t = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1});
const auto Bias = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1});
const auto lstm_cell = std::make_shared<op::v4::LSTMCell>(X, H_t, C_t, W, R, Bias, hidden_size);
std::vector<StaticShape> static_input_shapes = {StaticShape{batch_size, input_size},
StaticShape{batch_size, hidden_size},
StaticShape{batch_size, hidden_size},
StaticShape{gates_count * hidden_size, input_size},
StaticShape{gates_count * hidden_size, hidden_size},
StaticShape{gates_count * hidden_size}},
static_output_shapes = {StaticShape{}, StaticShape{}};
shape_inference(lstm_cell.get(), static_input_shapes, static_output_shapes);
ASSERT_EQ(static_output_shapes[0], StaticShape({batch_size, hidden_size}));
ASSERT_EQ(static_output_shapes[1], StaticShape({batch_size, hidden_size}));
}

View File

@@ -1,45 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <openvino/op/ops.hpp>
#include <openvino/op/parameter.hpp>
#include <utils/shape_inference/shape_inference.hpp>
#include <utils/shape_inference/static_shape.hpp>
using namespace ov;
template <class T>
std::shared_ptr<T> constructGraph();
template <>
std::shared_ptr<op::v3::ReadValue> constructGraph() {
auto input = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
return std::make_shared<op::v3::ReadValue>(input, "variable_id");
}
template <>
std::shared_ptr<op::v6::ReadValue> constructGraph() {
auto input = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
auto variable = std::make_shared<ov::op::util::Variable>(
ov::op::util::VariableInfo{PartialShape::dynamic(), element::dynamic, "ID"});
return std::make_shared<op::v6::ReadValue>(input, variable);
}
template <class T>
void readValueTest() {
auto readValue = constructGraph<T>();
// Test StaticShape
std::vector<StaticShape> static_input_shapes = {StaticShape{1, 2, 64, 64}}, static_output_shapes = {StaticShape{}};
shape_inference(readValue.get(), static_input_shapes, static_output_shapes);
ASSERT_EQ(static_output_shapes[0], (StaticShape{1, 2, 64, 64}));
}
TEST(StaticShapeInferenceTest, ReadValueTest) {
// Test v3 ReadValue
readValueTest<op::v3::ReadValue>();
// Test v6 ReadValue
readValueTest<op::v6::ReadValue>();
}

View File

@@ -1,50 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <openvino/op/ops.hpp>
#include <openvino/op/parameter.hpp>
#include <utils/shape_inference/shape_inference.hpp>
#include <utils/shape_inference/static_shape.hpp>
using namespace ov;
TEST(StaticShapeInferenceTest, TileTest) {
auto param0 = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1});
auto param1 = std::make_shared<ov::op::v0::Constant>(element::i64, ov::Shape{3}, std::vector<int>{3, 4, 1});
auto tile = std::make_shared<op::v0::Tile>(param0, param1);
// Test Static Shape
std::vector<StaticShape> static_input_shapes = {StaticShape{6, 8, 10}, StaticShape{3}},
static_output_shapes = {StaticShape{}};
shape_inference(tile.get(), static_input_shapes, static_output_shapes);
ASSERT_EQ(static_output_shapes[0], StaticShape({18, 32, 10}));
// Test Wrong Static Shape
std::vector<StaticShape> wrong_static_input_shapes = {StaticShape{6, 8, 10}, StaticShape{}},
wrong_static_output_shapes = {StaticShape{}};
ASSERT_THROW(shape_inference(tile.get(), wrong_static_input_shapes, wrong_static_output_shapes), ov::AssertFailure);
}
TEST(StaticShapeInferenceTest, TileFewRepeatsTest) {
auto param0 = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1});
auto param1 = ov::op::v0::Constant::create(element::i64, Shape{2}, {4, 1});
auto tile = std::make_shared<op::v0::Tile>(param0, param1);
// Test Static Shape
std::vector<StaticShape> static_input_shapes = {StaticShape{6, 8, 10}, StaticShape{2}},
static_output_shapes = {StaticShape{}};
shape_inference(tile.get(), static_input_shapes, static_output_shapes);
ASSERT_EQ(static_output_shapes[0], StaticShape({6, 32, 10}));
}
TEST(StaticShapeInferenceTest, TileSmallDataRankTest) {
auto param0 = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1});
auto param1 = ov::op::v0::Constant::create(element::i64, Shape{3}, {3, 4, 1});
auto tile = std::make_shared<op::v0::Tile>(param0, param1);
// Test Static Shape
std::vector<StaticShape> static_input_shapes = {StaticShape{8, 10}, StaticShape{3}},
static_output_shapes = {StaticShape{}};
shape_inference(tile.get(), static_input_shapes, static_output_shapes);
ASSERT_EQ(static_output_shapes[0], StaticShape({3, 32, 10}));
}

View File

@@ -84,8 +84,8 @@ openvino_developer_export_targets(COMPONENT inference_engine TARGETS ${TARGET_NA
if(BUILD_SHARED_LIBS)
install(TARGETS ${TARGET_NAME}
RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core OPTIONAL
LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core OPTIONAL
NAMELINK_COMPONENT core_dev)
else()
ov_install_static_lib(${TARGET_NAME} core)

View File

@@ -48,10 +48,6 @@ set_target_properties(${TARGET_NAME} PROPERTIES SOVERSION 2022.1.1)
set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
# developer package
openvino_developer_export_targets(COMPONENT inference_engine TARGETS ${TARGET_NAME})
# install
# TODO: uncomment once snippets are integrated into CPU plugin

View File

@@ -0,0 +1,32 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
#include <memory>
#include "transformations_visibility.hpp"
#include "ngraph/pass/graph_rewrite.hpp"
namespace ngraph {
namespace pass {
class TRANSFORMATIONS_API TransposeReshapeEliminationForMatmul;
} // namespace pass
} // namespace ngraph
/**
* @ingroup ie_transformation_common_api
* @brief TransposeReshapeEliminationForMatmul transformation eliminates Transpose and Reshape which were created to
* align input and output dimension ranks before second MatMul input and after MatMul output
* (for example, after Einsum Decomposition inside TensorFlow 1 and nGraph EinsumDecomposition transformation)
*/
class ngraph::pass::TransposeReshapeEliminationForMatmul: public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
TransposeReshapeEliminationForMatmul();
};

View File

@@ -51,6 +51,7 @@
#include "transformations/common_optimizations/mul_conv_fusion.hpp"
#include "transformations/common_optimizations/interpolate_sequence_fusion.hpp"
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
#include <transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp>
#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
#include "transformations/op_conversions/convert_pad_to_group_conv.hpp"
#include "transformations/op_conversions/convert_divide.hpp"
@@ -149,6 +150,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
decomp->add_matcher<ngraph::pass::SoftmaxDecomposition, false>();
decomp->add_matcher<ngraph::pass::GatherNegativeConstIndicesNormalize>();
decomp->add_matcher<ngraph::pass::DropoutWithRandomUniformReplacer>();
decomp->add_matcher<ngraph::pass::TransposeReshapeEliminationForMatmul>();
decomp->set_name("ngraph::pass::CommonDecompositions");
// CF is required after all decompositions

View File

@@ -0,0 +1,175 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp"
#include <memory>
#include <vector>
#include "ngraph/opsets/opset1.hpp"
#include "ngraph/rt_info.hpp"
#include "ngraph/pattern/op/wrap_type.hpp"
#include "ngraph/validation_util.hpp"
#include "itt.hpp"
namespace {
/// \brief Check for correct Transpose orders which are before and after MatMul. Second Transpose must be back for
/// first Transpose before MatMul
///
/// \param before_order Order of Transpose which is before MatMul
/// \param after_order Order of Transpose which is after MatMul
/// \param transposed_b true - second MatMul input is transposed, otherwise, it's not transposed
///
/// \return True - Transposes have right orders, otherwise, Transposes have incorrect order for transformation
///
bool check_transposes(const std::vector<int64_t>& before_order, const std::vector<int64_t>& after_order, const bool transposed_b) {
const size_t rank = before_order.size();
if (rank < 3)
return false;
if (before_order.size() != after_order.size())
return false;
if (transposed_b) {
// before order must be : 0, 1, 2, ..., N-1, N-2
std::vector<int64_t> start_order(rank);
std::iota(start_order.begin(), start_order.begin() + rank - 2, 0);
start_order[rank - 1] = rank - 2;
start_order[rank - 2] = rank - 1;
if (before_order != start_order)
return false;
// after order must be : 1, ..., N-2, 0, N-1
std::vector<int64_t> back_order(rank);
std::iota(back_order.begin(), back_order.begin() + rank - 2, 1);
back_order[rank - 2] = 0;
back_order[rank - 1] = rank - 1;
if (after_order != back_order)
return false;
} else {
// before order must be : N-2, N-1, 0, 1, 2, ...
std::vector<int64_t> needed_transpose_order_before(rank);
std::iota(needed_transpose_order_before.begin() + 2, needed_transpose_order_before.end(), 0);
needed_transpose_order_before[0] = rank - 2;
needed_transpose_order_before[1] = rank - 1;
if (before_order != needed_transpose_order_before)
return false;
// transpose order after matmul must be back for transpose before
std::vector<int64_t> back_order(rank);
for (size_t i = 0; i < rank; i++)
back_order[i] = std::distance(after_order.begin(), std::find(after_order.begin(), after_order.end(), i));
if (before_order != back_order)
return false;
}
return true;
}
/// \brief Check for input Reshape which are before MatMul
///
/// \param reshape Reshape which is before MatMul
/// \param new_shape New shape for Reshape
/// \param transposed_b true - second MatMul input is transposed, otherwise, it's not transposed
///
/// \return True - Reshape has right new shape for reshaping, otherwise, Reshape has incorrect new shape for transformation
///
bool check_input_reshape(const std::shared_ptr<ngraph::opset1::Reshape>& reshape,
const std::vector<int64_t>& new_shape, const bool transposed_b) {
const auto input_shape = reshape->get_input_shape(0);
const size_t input_rank = input_shape.size();
const size_t output_rank = reshape->get_output_shape(0).size();
if (input_rank < 3 || output_rank != 2)
return false;
if (transposed_b) {
const int64_t k = input_shape.back();
const int64_t new_n = ov::shape_size(input_shape) / k;
if (new_shape != std::vector<int64_t>{new_n, k})
return false;
} else {
const int64_t k = input_shape.front();
const int64_t new_n = ov::shape_size(input_shape) / k;
if (new_shape != std::vector<int64_t>{k, -1} && new_shape != std::vector<int64_t>{k, new_n})
return false;
}
return true;
}
} // namespace
NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeReshapeEliminationForMatmul, "TransposeReshapeEliminationForMatmul", 0);
ngraph::pass::TransposeReshapeEliminationForMatmul::TransposeReshapeEliminationForMatmul() {
MATCHER_SCOPE(TransposeReshapeEliminationForMatmul);
auto input_1_pattern = ngraph::pattern::any_input([] (const Output<Node>& node) -> bool {
const auto& shape = node.get_partial_shape();
const auto& rank = shape.rank();
return rank.is_static() && rank.get_length() == 2 && shape.is_static();
});
auto input_2_pattern = ngraph::pattern::any_input([] (const Output<Node>& node) -> bool {
return node.get_partial_shape().is_static();
});
auto const_transpose_before_pattern = ngraph::pattern::wrap_type<opset1::Constant>();
auto transpose_before_pattern = ngraph::pattern::wrap_type<opset1::Transpose>({input_2_pattern, const_transpose_before_pattern});
auto const_reshape_before_pattern = ngraph::pattern::wrap_type<opset1::Constant>();
auto reshape_before_pattern = ngraph::pattern::wrap_type<opset1::Reshape>({transpose_before_pattern, const_reshape_before_pattern});
auto matmul_pattern = ngraph::pattern::wrap_type<opset1::MatMul>({input_1_pattern, reshape_before_pattern});
auto const_reshape_after_pattern = ngraph::pattern::wrap_type<opset1::Constant>();
auto reshape_after_pattern = ngraph::pattern::wrap_type<opset1::Reshape>({matmul_pattern, const_reshape_after_pattern});
auto const_transpose_after_pattern = ngraph::pattern::wrap_type<opset1::Constant>();
auto transpose_after_pattern = ngraph::pattern::wrap_type<opset1::Transpose>({reshape_after_pattern, const_transpose_after_pattern});
ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
const auto& pattern_value_map = m.get_pattern_value_map();
const auto& input_1 = pattern_value_map.at(input_1_pattern);
const auto& input_2 = pattern_value_map.at(input_2_pattern);
auto matmul = std::dynamic_pointer_cast<opset1::MatMul>(pattern_value_map.at(matmul_pattern).get_node_shared_ptr());
if (!matmul)
return false;
const bool transposed_a = matmul->get_transpose_a();
const bool transposed_b = matmul->get_transpose_b();
auto reshape_before = std::dynamic_pointer_cast<opset1::Reshape>(pattern_value_map.at(reshape_before_pattern).get_node_shared_ptr());
auto reshape_after = std::dynamic_pointer_cast<opset1::Reshape>(pattern_value_map.at(reshape_after_pattern).get_node_shared_ptr());
auto reshape_before_constant = std::dynamic_pointer_cast<ngraph::opset1::Constant>(
pattern_value_map.at(const_reshape_before_pattern).get_node_shared_ptr());
if (!reshape_before || !reshape_after || !reshape_before_constant)
return false;
if (!check_input_reshape(reshape_before, reshape_before_constant->cast_vector<int64_t>(), transposed_b))
return false;
// check transpose order before and after matmul
auto transpose_before = std::dynamic_pointer_cast<opset1::Transpose>(pattern_value_map.at(transpose_before_pattern).get_node_shared_ptr());
auto transpose_after = std::dynamic_pointer_cast<opset1::Transpose>(pattern_value_map.at(transpose_after_pattern).get_node_shared_ptr());
auto transpose_before_constant = std::dynamic_pointer_cast<ngraph::opset1::Constant>(transpose_before->get_input_node_shared_ptr(1));
auto transpose_after_constant = std::dynamic_pointer_cast<ngraph::opset1::Constant>(transpose_after->get_input_node_shared_ptr(1));
if (!transpose_before || !transpose_after || !transpose_before_constant || !transpose_after_constant)
return false;
auto transpose_before_order = transpose_before_constant->cast_vector<int64_t>();
auto transpose_after_order = transpose_after_constant->cast_vector<int64_t>();
// need to check that input shape is correctly contracted and output shape is correctly unpacked using transposes
if (!check_transposes(transpose_before_order, transpose_after_order, transposed_b))
return false;
const auto new_matmul = std::make_shared<opset1::MatMul>(input_1, input_2, transposed_a, false);
new_matmul->set_friendly_name(transpose_after->get_friendly_name());
copy_runtime_info({transpose_before, reshape_before, matmul, reshape_after, transpose_after}, new_matmul);
replace_node(transpose_after, new_matmul);
return true;
};
auto m = std::make_shared<ngraph::pattern::Matcher>(transpose_after_pattern, matcher_name);
this->register_matcher(m, callback);
}

View File

@@ -34,8 +34,6 @@ public:
private:
std::string m_variable_id;
template <class T>
friend void shape_infer(const Assign* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes);
};
} // namespace v3
@@ -72,10 +70,6 @@ public:
OPENVINO_SUPPRESS_DEPRECATED_END
bool has_evaluate() const override;
bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override;
private:
template <class T>
friend void shape_infer(const Assign* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes);
};
} // namespace v6
} // namespace op

View File

@@ -60,10 +60,8 @@ public:
private:
Attributes m_attrs;
template <class T>
friend void shape_infer(const ExperimentalDetectronPriorGridGenerator* op,
const std::vector<T>& input_shapes,
std::vector<T>& output_shapes);
void validate();
};
} // namespace v6
} // namespace op

View File

@@ -241,8 +241,6 @@ private:
static constexpr std::size_t s_gates_count{4};
static constexpr std::size_t s_peepholes_count{3};
template <class T>
friend void shape_infer(const LSTMCell* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes);
};
} // namespace v0
@@ -380,8 +378,6 @@ private:
util::ActivationFunction m_activation_h;
static constexpr std::size_t s_gates_count{4};
template <class T>
friend void shape_infer(const LSTMCell* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes);
};
} // namespace v4
} // namespace op

View File

@@ -1,41 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <openvino/core/graph_util.hpp>
#include <openvino/op/assign.hpp>
#include "utils.hpp"
namespace ov {
namespace op {
namespace v3 {
template <class T>
void shape_infer(const Assign* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
NODE_VALIDATION_CHECK(op, input_shapes.size() == 1 && output_shapes.size() == 1);
const auto& input_shape = input_shapes[0];
const auto& variable_info = op->m_variable->get_info();
NODE_VALIDATION_CHECK(op,
op->m_variable_id == variable_info.variable_id,
"Variables identifiers are inconsistent.");
const auto& arg_t = op->get_input_element_type(0);
NODE_VALIDATION_CHECK(op, arg_t == variable_info.data_type, "Variables types are inconsistent.");
if (input_shape.is_static() && variable_info.data_shape.is_static()) {
NODE_VALIDATION_CHECK(op,
input_shape.to_shape() == variable_info.data_shape.to_shape(),
"Variables output shapes are inconsistent.");
}
copy_shape_infer(op, input_shapes, output_shapes);
}
} // namespace v3
namespace v6 {
template <class T>
void shape_infer(const Assign* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
copy_shape_infer(op, input_shapes, output_shapes);
}
} // namespace v6
} // namespace op
} // namespace ov

View File

@@ -1,76 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <openvino/op/experimental_detectron_prior_grid_generator.hpp>
namespace ov {
namespace op {
namespace v6 {
template <class T>
void shape_infer(const ExperimentalDetectronPriorGridGenerator* op,
const std::vector<T>& input_shapes,
std::vector<T>& output_shapes) {
NODE_VALIDATION_CHECK(op, input_shapes.size() == 3 && output_shapes.size() == 1);
const auto& priors_shape = input_shapes[0];
const auto& featmap_shape = input_shapes[1];
const auto& im_data_shape = input_shapes[2];
auto& output_shape = output_shapes[0];
size_t output_size = op->m_attrs.flatten ? 2 : 4;
output_shape.resize(output_size);
output_shape[output_size - 1] = 4;
bool prior_rank_static = priors_shape.rank().is_static();
bool featmap_rank_static = featmap_shape.rank().is_static();
bool im_data_rank_static = im_data_shape.rank().is_static();
if (prior_rank_static) {
NODE_VALIDATION_CHECK(op, priors_shape.size() == 2, "Priors rank must be equal to 2.");
NODE_VALIDATION_CHECK(op,
priors_shape[1].compatible(4),
"The last dimension of the 'priors' input must be equal to 4. Got: ",
priors_shape[1]);
}
if (featmap_rank_static) {
NODE_VALIDATION_CHECK(op, featmap_shape.size() == 4, "Feature_map rank must be equal to 4.");
}
if (im_data_rank_static) {
NODE_VALIDATION_CHECK(op, im_data_shape.size() == 4, "Im_data rank must be equal to 4.");
}
if (featmap_rank_static && im_data_rank_static) {
const auto& num_batches_featmap = featmap_shape[0];
const auto& num_batches_im_data = im_data_shape[0];
NODE_VALIDATION_CHECK(op,
num_batches_featmap.compatible(num_batches_im_data),
"The first dimension of both 'feature_map' and 'im_data' must match. "
"Feature_map: ",
num_batches_featmap,
"; Im_data: ",
num_batches_im_data);
}
if (op->m_attrs.flatten) {
if (prior_rank_static && featmap_rank_static) {
output_shape[0] = featmap_shape[2] * featmap_shape[3] * priors_shape[0];
}
} else {
if (featmap_rank_static) {
output_shape[0] = featmap_shape[2];
output_shape[1] = featmap_shape[3];
}
if (prior_rank_static) {
output_shape[2] = priors_shape[0];
}
}
}
} // namespace v6
} // namespace op
} // namespace ov

View File

@@ -1,191 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <openvino/op/lstm_cell.hpp>
#include "utils.hpp"
namespace ov {
namespace op {
namespace ShapeInferLSTM {
template <class OpsType, class ShapeType>
void lstm_shape_infer(const OpsType* op,
const std::vector<ShapeType>& input_shapes,
std::vector<ShapeType>& output_shapes,
std::size_t gates_count) {
using DimType = typename std::iterator_traits<typename ShapeType::iterator>::value_type;
enum { X, initial_hidden_state, initial_cell_state, W, R, B };
std::vector<bool> input_rank_static(6, false);
bool all_rank_dynamic = false;
bool all_rank_static = true;
// Prepare OutShape
auto& hidden_shape = output_shapes[0];
auto& cell_shape = output_shapes[1];
hidden_shape.resize(2);
cell_shape.resize(2);
// If rank is dynamic, then output_shape is undefined
for (size_t i = 0; i < input_shapes.size(); i++) {
input_rank_static[i] = input_shapes[i].rank().is_static();
all_rank_dynamic &= !input_rank_static[i];
all_rank_static &= input_rank_static[i];
}
if (all_rank_dynamic) {
return;
}
const auto& x_pshape = input_shapes[0];
const auto& w_pshape = input_shapes[3];
DimType output_batch_size;
DimType output_hidden_size;
bool is_batch_init = false;
bool is_hidden_init = false;
// deduce batch/hidden_size
for (size_t i = 0; i < input_shapes.size(); i++) {
const auto& input = input_shapes[i];
if (input_rank_static[i]) {
// batch could be deduced from x, cell_state or hidden_state
if (i == X || i == initial_cell_state || i == initial_hidden_state) {
NODE_VALIDATION_CHECK(op,
(input.size() == 2),
"LSTMCell input rank is not correct for ",
i,
" input parameter. Current rank: ",
input.size(),
", expected: 2.");
if (!is_batch_init) {
output_batch_size = input[0];
is_batch_init = true;
} else {
NODE_VALIDATION_CHECK(
op,
DimType::merge(output_batch_size, output_batch_size, input[0]),
"Parameter batch_size not matched for X, initial_hidden_state or initial_cell_state "
"inputs.");
}
if (i == initial_cell_state || i == initial_hidden_state) {
if (!is_hidden_init) {
output_hidden_size = input[1];
is_hidden_init = true;
} else {
NODE_VALIDATION_CHECK(op,
DimType::merge(output_hidden_size, output_hidden_size, input[1]),
"Parameter hidden_size not matched for W, R, B, initial_hidden_state and "
"initial_cell_state "
"inputs.");
}
}
} else if (i == W || i == R || i == B) {
// check input dimension
if (i == B) {
NODE_VALIDATION_CHECK(op,
(input.size() == 1),
"LSTMCell input tensor dimension is not correct for ",
i,
" input parameter. Current input length: ",
input.size(),
", expected: 1.");
if (input[0].is_static()) {
if (!is_hidden_init) {
output_hidden_size = input[0].get_length() / gates_count;
is_hidden_init = true;
} else {
NODE_VALIDATION_CHECK(
op,
DimType::merge(output_hidden_size, output_hidden_size, input[0].get_length() / gates_count),
"Parameter hidden_size not matched for W, R, B, initial_hidden_state and "
"initial_cell_state "
"inputs.");
}
}
} else {
NODE_VALIDATION_CHECK(op,
(input.size() == 2),
"LSTMCell input rank is not correct for ",
i,
" input parameter. Current rank: ",
input.size(),
", expected: 2.");
if (input[0].is_static()) {
if (!is_hidden_init) {
output_hidden_size = input[0].get_length() / gates_count;
is_hidden_init = true;
} else {
NODE_VALIDATION_CHECK(
op,
DimType::merge(output_hidden_size, output_hidden_size, input[0].get_length() / gates_count),
"Parameter hidden_size not matched for W, R, B, initial_hidden_state and "
"initial_cell_state "
"inputs.");
}
}
if (i == R) {
if (!is_hidden_init) {
output_hidden_size = input[1];
is_hidden_init = true;
} else {
NODE_VALIDATION_CHECK(op,
DimType::merge(output_hidden_size, output_hidden_size, input[1]),
"Parameter hidden_size not matched for W, R, B, initial_hidden_state "
"and initial_cell_state "
"inputs.");
}
}
}
}
}
}
// Check peepholes
if (input_shapes.size() == 7) {
const auto& p_pshape = input_shapes[6];
NODE_VALIDATION_CHECK(op,
(p_pshape.rank().compatible(1)),
"LSTMCell input tensor P shall have dimension 1D.");
}
// check input size
if (input_rank_static[X] && input_rank_static[W]) {
NODE_VALIDATION_CHECK(op, (x_pshape[1].compatible(w_pshape[1])), "LSTMCell mismatched input_size dimension.");
}
hidden_shape[0] = output_batch_size;
hidden_shape[1] = output_hidden_size;
cell_shape[0] = output_batch_size;
cell_shape[1] = output_hidden_size;
}
} // namespace ShapeInferLSTM
namespace v0 {
using ShapeInferLSTM::lstm_shape_infer;
template <class T>
void shape_infer(const LSTMCell* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
NODE_VALIDATION_CHECK(op, input_shapes.size() == 7 && output_shapes.size() == 2);
const auto& p_pshape = input_shapes[6];
lstm_shape_infer(op, input_shapes, output_shapes, op->s_gates_count);
const auto& hidden_size = output_shapes[0][1];
if (p_pshape[0].is_static() && hidden_size.is_static()) {
NODE_VALIDATION_CHECK(op,
p_pshape[0].compatible(hidden_size * op->s_peepholes_count),
"Parameter hidden_size mistmatched in P input. Current value is: ",
p_pshape[0].get_length(),
", expected: ",
hidden_size.get_length() * op->s_peepholes_count,
".");
}
}
} // namespace v0
namespace v4 {
using ShapeInferLSTM::lstm_shape_infer;
template <class T>
void shape_infer(const LSTMCell* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
NODE_VALIDATION_CHECK(op, input_shapes.size() == 6 && output_shapes.size() == 2);
lstm_shape_infer(op, input_shapes, output_shapes, op->s_gates_count);
}
} // namespace v4
} // namespace op
} // namespace ov

View File

@@ -1,29 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <openvino/op/read_value.hpp>
#include "utils.hpp"
namespace ov {
namespace op {
template <class OpType, class ShapeType>
void read_value_shape_infer(const OpType* op, const std::vector<ShapeType>& input_shapes, std::vector<ShapeType>& output_shapes) {
copy_shape_infer(op, input_shapes, output_shapes);
}
namespace v3 {
template <class T>
void shape_infer(const ReadValue* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
read_value_shape_infer(op, input_shapes, output_shapes);
}
} // namespace v3
namespace v6 {
template <class T>
void shape_infer(const ReadValue* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
read_value_shape_infer(op, input_shapes, output_shapes);
}
} // namespace v6
} // namespace op
} // namespace ov

View File

@@ -1,52 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <openvino/op/tile.hpp>
#include "utils.hpp"
namespace ov {
namespace op {
namespace v0 {
template <class T>
void shape_infer(const Tile* op,
const std::vector<T>& input_shapes,
std::vector<T>& output_shapes,
const std::map<size_t, std::shared_ptr<ngraph::runtime::HostTensor>>& constant_data = {}) {
NODE_VALIDATION_CHECK(op, input_shapes.size() == 2 && output_shapes.size() == 1);
const auto& arg_shape = input_shapes[0];
auto& repeats_shape = input_shapes[1];
auto& output_shape = output_shapes[0];
using DimType = typename std::iterator_traits<typename T::iterator>::value_type;
std::vector<int64_t> axes_val;
NODE_VALIDATION_CHECK(op, repeats_shape.rank().compatible(1), "PartialShape of repeats must be of rank 1");
//Get repeats
bool axes_are_known = get_data_as_int64<T>(1, op, axes_val, constant_data);
const auto arg_rank = arg_shape.rank();
if (arg_rank.is_static() && (axes_are_known || repeats_shape[0].is_static())) {
//try to specify rank
int64_t data_rank = arg_shape.size();
int64_t repeats_rank = axes_are_known ? axes_val.size() : repeats_shape[0].get_length();
auto output_rank = std::max(data_rank, repeats_rank);
output_shape.resize(output_rank);
//if have constant axes, compute new axes
if (axes_are_known) {
auto remain_arg = output_rank - data_rank;
auto remain_axes = output_rank - repeats_rank;
for (size_t i = 0; i < output_rank; i++) {
auto data_tmp = i < remain_arg ? DimType(1) : arg_shape[i - (remain_arg)];
auto repeat_tmp =
i < remain_axes ? DimType(1) : axes_val[i - remain_axes];
output_shape[i] = data_tmp * repeat_tmp;
}
}
} else {
//can't deduce shape, set default value
output_shape = PartialShape::dynamic();
}
}
} // namespace v0
} // namespace op
} // namespace ov

View File

@@ -4,8 +4,6 @@
#include "ngraph/op/assign.hpp"
#include <assign_shape_inference.hpp>
#include "itt.hpp"
#include "ngraph/op/read_value.hpp"
#include "ngraph/op/util/variable.hpp"
@@ -28,7 +26,7 @@ void op::v3::Assign::validate_and_infer_types() {
NGRAPH_OP_SCOPE(v3_Assign_validate_and_infer_types);
auto value = input_value(0);
auto arg_t = get_input_element_type(0);
const auto& input_shape = get_input_partial_shape(0);
auto output_shape = get_input_partial_shape(0);
if (!m_variable) {
NodeVector start_nodes;
for (const auto& input : inputs()) {
@@ -43,10 +41,20 @@ void op::v3::Assign::validate_and_infer_types() {
}
NODE_VALIDATION_CHECK(this, m_variable != nullptr, "Can't find variable with id = ", m_variable_id);
}
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
std::vector<ov::PartialShape> input_shapes = {input_shape};
shape_infer(this, input_shapes, output_shapes);
set_output_type(0, arg_t, output_shapes[0]);
auto variable_info = m_variable->get_info();
NODE_VALIDATION_CHECK(this, m_variable_id == variable_info.variable_id, "Variables identifiers are inconsistent.");
NODE_VALIDATION_CHECK(this, arg_t == variable_info.data_type, "Variables types are inconsistent.");
if (output_shape.is_static() && variable_info.data_shape.is_static()) {
NODE_VALIDATION_CHECK(this,
output_shape == variable_info.data_shape,
"Variables output shapes are inconsistent.");
set_output_type(0, arg_t, output_shape);
} else {
set_output_type(0, arg_t, ov::PartialShape::dynamic());
}
}
shared_ptr<Node> op::v3::Assign::clone_with_new_inputs(const OutputVector& new_args) const {
@@ -70,10 +78,7 @@ op::v6::Assign::Assign(const Output<Node>& new_value, const std::shared_ptr<Vari
void op::v6::Assign::validate_and_infer_types() {
NGRAPH_OP_SCOPE(v6_Assign_validate_and_infer_types);
m_variable->update({get_input_partial_shape(0), get_input_element_type(0), m_variable->get_info().variable_id});
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
std::vector<ov::PartialShape> input_shapes = {get_input_partial_shape(0)};
shape_infer(this, input_shapes, output_shapes);
set_output_type(0, get_input_element_type(0), output_shapes[0]);
set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
}
shared_ptr<Node> op::v6::Assign::clone_with_new_inputs(const OutputVector& new_args) const {

View File

@@ -4,7 +4,6 @@
#include "ngraph/op/experimental_detectron_prior_grid_generator.hpp"
#include <experimental_detectron_prior_grid_generator_shape_inference.hpp>
#include <memory>
#include "itt.hpp"
@@ -50,15 +49,71 @@ static constexpr size_t priors_port = 0;
static constexpr size_t featmap_port = 1;
static constexpr size_t im_data_port = 2;
void op::v6::ExperimentalDetectronPriorGridGenerator::validate() {
auto priors_shape = get_input_partial_shape(priors_port);
auto featmap_shape = get_input_partial_shape(featmap_port);
auto im_data_shape = get_input_partial_shape(im_data_port);
if (priors_shape.rank().is_dynamic() || featmap_shape.rank().is_dynamic()) {
return;
}
NODE_VALIDATION_CHECK(this, priors_shape.rank().get_length() == 2, "Priors rank must be equal to 2.");
if (priors_shape[1].is_static()) {
NODE_VALIDATION_CHECK(this,
priors_shape[1].is_static() && priors_shape[1].get_length() == 4u,
"The last dimension of the 'priors' input must be equal to 4. Got: ",
priors_shape[1]);
}
NODE_VALIDATION_CHECK(this, featmap_shape.rank().get_length() == 4, "Feature_map rank must be equal to 4.");
if (im_data_shape.rank().is_dynamic()) {
return;
}
NODE_VALIDATION_CHECK(this, im_data_shape.rank().get_length() == 4, "Im_data rank must be equal to 4.");
const auto num_batches_featmap = featmap_shape[0];
const auto num_batches_im_data = im_data_shape[0];
const auto batches_intersection = num_batches_featmap & num_batches_im_data;
NODE_VALIDATION_CHECK(this,
!batches_intersection.get_interval().empty(),
"The first dimension of both 'feature_map' and 'im_data' must match. "
"Feature_map: ",
num_batches_featmap,
"; Im_data: ",
num_batches_im_data);
}
void op::v6::ExperimentalDetectronPriorGridGenerator::validate_and_infer_types() {
NGRAPH_OP_SCOPE(v6_ExperimentalDetectronPriorGridGenerator_validate_and_infer_types);
const auto& priors_shape = get_input_partial_shape(priors_port);
const auto& featmap_shape = get_input_partial_shape(featmap_port);
const auto& input_et = get_input_element_type(0);
auto priors_shape = get_input_partial_shape(priors_port);
auto featmap_shape = get_input_partial_shape(featmap_port);
auto input_et = get_input_element_type(0);
validate();
set_output_size(1);
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
std::vector<ov::PartialShape> input_shapes = {priors_shape, featmap_shape, get_input_partial_shape(im_data_port)};
shape_infer(this, input_shapes, output_shapes);
set_output_type(0, input_et, output_shapes[0]);
ov::PartialShape out_shape = {Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), 4};
if (m_attrs.flatten) {
out_shape = ov::PartialShape{Dimension::dynamic(), 4};
}
if (priors_shape.rank().is_dynamic() || featmap_shape.rank().is_dynamic()) {
set_output_type(0, input_et, out_shape);
return;
}
auto num_priors = priors_shape[0];
auto featmap_height = featmap_shape[2];
auto featmap_width = featmap_shape[3];
if (m_attrs.flatten) {
out_shape = ov::PartialShape{featmap_height * featmap_width * num_priors, 4};
} else {
out_shape = ov::PartialShape{featmap_height, featmap_width, num_priors, 4};
}
set_output_type(0, input_et, out_shape);
}

View File

@@ -6,7 +6,6 @@
#include <cmath>
#include <functional>
#include <lstm_cell_shape_inference.hpp>
#include "itt.hpp"
#include "ngraph/attribute_visitor.hpp"
@@ -140,7 +139,30 @@ void op::v0::LSTMCell::validate_and_infer_types() {
set_argument(6, get_default_peepholes_input());
}
for (const auto& input : inputs()) {
if (input.get_partial_shape().rank().is_dynamic()) {
set_output_type(0, get_input_element_type(0), ov::PartialShape::dynamic());
set_output_type(1, get_input_element_type(0), ov::PartialShape::dynamic());
return;
}
}
std::vector<ov::PartialShape> input_param{};
auto merged_batch_size = Dimension::dynamic();
auto merged_hidden_size = Dimension::dynamic();
auto result_et = element::dynamic;
// Copy all inputs without peephole (7th input) and initial_cell_state (2nd input)
// information
// for further validation
for (size_t i = 0; i < get_input_size() - 1; i++) {
// exclude initial_cell_state input
if (i != 2) {
input_param.push_back(get_input_partial_shape(i));
}
}
// Get input partial shape for all inputs
const auto& x_pshape = get_input_partial_shape(0);
const auto& ht_pshape = get_input_partial_shape(1);
@@ -150,6 +172,24 @@ void op::v0::LSTMCell::validate_and_infer_types() {
const auto& b_pshape = get_input_partial_shape(5);
const auto& p_pshape = get_input_partial_shape(6);
validate_input_rank_dimension(input_param);
// Validate rank and dimension for initial_cell_state input
NODE_VALIDATION_CHECK(this,
(ct_pshape.rank().is_static()),
"LSTMCell input tensor initial_cell_state shall have static rank.");
NODE_VALIDATION_CHECK(this,
(ct_pshape.rank().get_length() == 2),
"LSTMCell input tensor initial_cell_state shall have dimension 2D.");
// Validate rank and dimension for P input
NODE_VALIDATION_CHECK(this, (p_pshape.rank().is_static()), "LSTMCell input tensor P shall have static rank.");
NODE_VALIDATION_CHECK(this,
(p_pshape.rank().get_length() == 1),
"LSTMCell input tensor P shall have dimension 1D.");
// Validate input element types and save result for output type
NODE_VALIDATION_CHECK(this,
element::Type::merge(result_et, result_et, get_input_element_type(0)) &&
@@ -161,10 +201,65 @@ void op::v0::LSTMCell::validate_and_infer_types() {
"Element types for X, initial_hidden_state, initial_cell_state, W, R and B do not "
"match.");
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}, ov::PartialShape{}};
std::vector<ov::PartialShape> input_shapes =
{x_pshape, ht_pshape, ct_pshape, w_pshape, r_pshape, b_pshape, p_pshape};
shape_infer(this, input_shapes, output_shapes);
// Merge batch_size dimension across all inputs to evaluate output[0] dimension
NODE_VALIDATION_CHECK(this,
Dimension::merge(merged_batch_size, merged_batch_size, ht_pshape[0]) &&
Dimension::merge(merged_batch_size, merged_batch_size, ct_pshape[0]) &&
Dimension::merge(merged_batch_size, merged_batch_size, x_pshape[0]),
"Parameter batch_size not matched for X, initial_hidden_state or initial_cell_state "
"inputs.");
// Merge hidden_size dimension across all inputs to evaluate output[1] dimension
NODE_VALIDATION_CHECK(this,
Dimension::merge(merged_hidden_size, merged_hidden_size, ht_pshape[1]) &&
Dimension::merge(merged_hidden_size, merged_hidden_size, ct_pshape[1]) &&
Dimension::merge(merged_hidden_size, merged_hidden_size, r_pshape[1]),
"Parameter hidden_size not matched for R, initial_hidden_state and initial_cell_state "
"inputs.");
// Validate hidden_size value for W, R and P inputs
if (merged_hidden_size.is_static()) {
if (w_pshape[0].is_static()) {
NODE_VALIDATION_CHECK(this,
w_pshape[0].compatible(merged_hidden_size * s_gates_count),
"Parameter hidden_size mistmatched in W input. Current value is: ",
w_pshape[0].get_length(),
", expected: ",
merged_hidden_size.get_length() * s_gates_count,
".");
}
if (r_pshape[0].is_static()) {
NODE_VALIDATION_CHECK(this,
r_pshape[0].compatible(merged_hidden_size * s_gates_count),
"Parameter hidden_size mistmatched in R input. Current value is: ",
r_pshape[0].get_length(),
", expected: ",
merged_hidden_size.get_length() * s_gates_count,
".");
}
if (b_pshape[0].is_static()) {
NODE_VALIDATION_CHECK(this,
b_pshape[0].compatible(merged_hidden_size * s_gates_count),
"Parameter hidden_size mistmatched in B input. Current value is: ",
b_pshape[0].get_length(),
", expected: ",
merged_hidden_size.get_length() * s_gates_count,
".");
}
if (p_pshape[0].is_static()) {
NODE_VALIDATION_CHECK(this,
p_pshape[0].compatible(merged_hidden_size * s_peepholes_count),
"Parameter hidden_size mistmatched in P input. Current value is: ",
p_pshape[0].get_length(),
", expected: ",
merged_hidden_size.get_length() * s_peepholes_count,
".");
}
}
// Mark inputs which are relevant to output parameters
set_input_is_relevant_to_shape(0);
set_input_is_relevant_to_shape(1);
@@ -173,8 +268,8 @@ void op::v0::LSTMCell::validate_and_infer_types() {
// Set output size, type and shape
set_output_size(2);
set_output_type(0, result_et, output_shapes[0]);
set_output_type(1, result_et, output_shapes[1]);
set_output_type(0, result_et, {merged_batch_size, merged_hidden_size});
set_output_type(1, result_et, {merged_batch_size, merged_hidden_size});
}
Output<Node> op::v0::LSTMCell::get_default_bias_input() const {
@@ -319,7 +414,15 @@ bool ngraph::op::v4::LSTMCell::visit_attributes(AttributeVisitor& visitor) {
void op::v4::LSTMCell::validate_and_infer_types() {
NGRAPH_OP_SCOPE(v4_LSTMCell_validate_and_infer_types);
for (const auto& input : inputs()) {
if (input.get_partial_shape().rank().is_dynamic()) {
set_output_type(0, get_input_element_type(0), ov::PartialShape::dynamic());
set_output_type(1, get_input_element_type(0), ov::PartialShape::dynamic());
return;
}
}
auto merged_batch_size = Dimension::dynamic();
auto merged_hidden_size = Dimension::dynamic();
auto result_et = element::dynamic;
// Get input partial shape for all inputs
@@ -330,6 +433,12 @@ void op::v4::LSTMCell::validate_and_infer_types() {
const auto& r_pshape = get_input_partial_shape(4);
const auto& b_pshape = get_input_partial_shape(5);
NODE_VALIDATION_CHECK(this,
(ct_pshape.rank().get_length() == 2),
"LSTMCell input tensor initial_cell_state shall have dimension 2D.");
validate_input_rank_dimension({x_pshape, ht_pshape, w_pshape, r_pshape, b_pshape});
// Validate input element types and save result for output type
NODE_VALIDATION_CHECK(this,
element::Type::merge(result_et, result_et, get_input_element_type(0)) &&
@@ -341,9 +450,54 @@ void op::v4::LSTMCell::validate_and_infer_types() {
"Element types for X, initial_hidden_state, initial_cell_state, W, R and B do not "
"match.");
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}, ov::PartialShape{}};
std::vector<ov::PartialShape> input_shapes = {x_pshape, ht_pshape, ct_pshape, w_pshape, r_pshape, b_pshape};
shape_infer(this, input_shapes, output_shapes);
// Merge batch_size dimension across all inputs to evaluate output[0] dimension
NODE_VALIDATION_CHECK(this,
Dimension::merge(merged_batch_size, merged_batch_size, ht_pshape[0]) &&
Dimension::merge(merged_batch_size, merged_batch_size, ct_pshape[0]) &&
Dimension::merge(merged_batch_size, merged_batch_size, x_pshape[0]),
"Parameter batch_size not matched for X, initial_hidden_state or initial_cell_state "
"inputs.");
// Merge hidden_size dimension across all inputs to evaluate output[1] dimension
NODE_VALIDATION_CHECK(this,
Dimension::merge(merged_hidden_size, merged_hidden_size, ht_pshape[1]) &&
Dimension::merge(merged_hidden_size, merged_hidden_size, ct_pshape[1]) &&
Dimension::merge(merged_hidden_size, merged_hidden_size, r_pshape[1]),
"Parameter hidden_size not matched for R, initial_hidden_state and initial_cell_state "
"inputs.");
// Validate hidden_size value for W, R and P inputs
if (merged_hidden_size.is_static()) {
if (w_pshape[0].is_static()) {
NODE_VALIDATION_CHECK(this,
w_pshape[0].compatible(merged_hidden_size * s_gates_count),
"Parameter hidden_size mistmatched in W input. Current value is: ",
w_pshape[0].get_length(),
", expected: ",
merged_hidden_size.get_length() * s_gates_count,
".");
}
if (r_pshape[0].is_static()) {
NODE_VALIDATION_CHECK(this,
r_pshape[0].compatible(merged_hidden_size * s_gates_count),
"Parameter hidden_size mistmatched in R input. Current value is: ",
r_pshape[0].get_length(),
", expected: ",
merged_hidden_size.get_length() * s_gates_count,
".");
}
if (b_pshape[0].is_static()) {
NODE_VALIDATION_CHECK(this,
b_pshape[0].compatible(merged_hidden_size * s_gates_count),
"Parameter hidden_size mistmatched in B input. Current value is: ",
b_pshape[0].get_length(),
", expected: ",
merged_hidden_size.get_length() * s_gates_count,
".");
}
}
// Mark inputs which are relevant to output parameters
set_input_is_relevant_to_shape(0);
@@ -353,8 +507,8 @@ void op::v4::LSTMCell::validate_and_infer_types() {
// Set output size, type and shape
set_output_size(2);
set_output_type(0, result_et, output_shapes[0]);
set_output_type(1, result_et, output_shapes[1]);
set_output_type(0, result_et, {merged_batch_size, merged_hidden_size});
set_output_type(1, result_et, {merged_batch_size, merged_hidden_size});
}
Output<Node> op::v4::LSTMCell::get_default_bias_input() const {

View File

@@ -4,8 +4,6 @@
#include "ngraph/op/read_value.hpp"
#include <read_value_shape_inference.hpp>
#include "itt.hpp"
#include "ngraph/op/util/variable_context.hpp"
#include "ngraph/ops.hpp"
@@ -25,13 +23,8 @@ op::v3::ReadValue::ReadValue(const Output<Node>& init_value, const std::string&
void op::v3::ReadValue::validate_and_infer_types() {
NGRAPH_OP_SCOPE(v3_ReadValue_validate_and_infer_types);
auto arg_t = get_input_element_type(0);
auto input_shape = get_input_partial_shape(0);
auto output_shape = get_input_partial_shape(0);
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
std::vector<ov::PartialShape> input_shapes = {input_shape};
shape_infer(this, input_shapes, output_shapes);
const auto& output_shape = output_shapes[0];
VariableInfo info = {output_shape, arg_t, m_variable_id};
if (m_variable == nullptr)
m_variable = std::make_shared<Variable>(info);
@@ -61,11 +54,7 @@ op::v6::ReadValue::ReadValue(const Output<Node>& init_value, const shared_ptr<Va
void op::v6::ReadValue::validate_and_infer_types() {
NGRAPH_OP_SCOPE(v6_ReadValue_validate_and_infer_types);
const auto arg_t = get_input_element_type(0);
auto input_shape = get_input_partial_shape(0);
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
std::vector<ov::PartialShape> input_shapes = {input_shape};
shape_infer(this, input_shapes, output_shapes);
const auto& output_shape = output_shapes[0];
auto output_shape = get_input_partial_shape(0);
NGRAPH_CHECK(m_variable, "Variable is not initialized.");
VariableInfo var_info = {output_shape, element::dynamic, m_variable->get_info().variable_id};
NODE_VALIDATION_CHECK(this,

View File

@@ -5,7 +5,6 @@
#include "ngraph/op/tile.hpp"
#include <ngraph/validation_util.hpp>
#include <tile_shape_inference.hpp>
#include "itt.hpp"
#include "ngraph/op/constant.hpp"
@@ -38,10 +37,37 @@ void op::v0::Tile::validate_and_infer_types() {
"Tile repeats must have any integer element type, but has ",
repeats_et);
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
std::vector<ov::PartialShape> input_shapes = {get_input_partial_shape(0), get_input_partial_shape(1)};
shape_infer(this, input_shapes, output_shapes);
set_output_type(0, arg_et, output_shapes[0]);
auto arg_shape = get_input_partial_shape(0);
auto repeats_shape = get_input_partial_shape(1);
NODE_VALIDATION_CHECK(this, repeats_shape.rank().compatible(1), "PartialShape of repeats must be of rank 1");
ov::PartialShape repeats_as_pshape;
bool repeats_are_known = evaluate_as_partial_shape(get_input_source_output(1), repeats_as_pshape);
std::vector<Dimension> repeats_value(repeats_as_pshape);
if (repeats_are_known && !repeats_value.empty() && arg_shape.rank().is_static()) {
std::vector<Dimension> data_shape(arg_shape);
auto data_rank = data_shape.size();
auto repeats_rank = repeats_value.size();
auto output_rank = std::max(data_rank, repeats_rank);
// expand data shape and repeats to output rank
data_shape.insert(data_shape.begin(), output_rank - data_rank, 1);
repeats_value.insert(repeats_value.begin(), output_rank - repeats_rank, 1);
auto output_shape = ov::PartialShape::dynamic(output_rank);
for (size_t i = 0; i < output_rank; i++)
output_shape[i] = data_shape[i] * repeats_value[i];
set_output_type(0, arg_et, output_shape);
} else {
Rank outRank = Rank::dynamic();
if (arg_shape.rank().is_static() && repeats_shape.is_static()) {
std::vector<Dimension> data_shape(arg_shape);
auto data_rank = data_shape.size();
auto repeats_rank = repeats_value.size();
auto output_rank = std::max(data_rank, repeats_rank);
outRank = Rank(output_rank);
}
set_output_type(0, arg_et, ov::PartialShape::dynamic(outRank));
}
set_input_is_relevant_to_shape(0);
set_input_is_relevant_to_shape(1);
@@ -58,16 +84,24 @@ bool op::v0::Tile::evaluate_tile(const HostTensorVector& outputs, const HostTens
const auto& axis = inputs[1];
auto& output = outputs[0];
auto repeats_val = read_index_vector(axis);
const auto repeats_rank = repeats_val.size();
auto repeats_rank = repeats_val.size();
ov::Shape data_shape = data->get_shape();
auto data_rank = data_shape.size();
auto output_rank = std::max(data_rank, repeats_rank);
// expand data shape and repeats to output rank
data_shape.insert(data_shape.begin(), output_rank - data_rank, 1);
repeats_val.insert(repeats_val.begin(), output_rank - repeats_rank, 1);
ov::Shape output_shape(output_rank);
for (size_t i = 0; i < output_rank; i++) {
output_shape[i] = data_shape[i] * repeats_val[i];
}
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
std::vector<ov::PartialShape> input_shapes = {data->get_shape(), axis->get_shape()};
shape_infer(this, input_shapes, output_shapes, {{1, axis}});
const auto& output_shape = output_shapes[0].to_shape();
if (!output->get_is_allocated()) {
output->set_shape(output_shape);
}
repeats_val.insert(repeats_val.begin(), output_shape.size() - repeats_rank, 1);
ngraph::runtime::reference::tile(data->get_data_ptr<const char>(),
output->get_data_ptr<char>(),
data->get_shape(),

View File

@@ -53,9 +53,7 @@ TEST(type_prop, lstm_cell_invalid_input) {
const auto lstm_cell = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, hidden_size);
FAIL() << "LSTMCell node was created with invalid data.";
} catch (const NodeValidationFailure& error) {
EXPECT_HAS_SUBSTRING(
error.what(),
std::string("Parameter hidden_size not matched for W, R, B, initial_hidden_state and initial_cell_state"));
EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter hidden_size mistmatched in W input."));
}
// Invalid R tensor shape.
@@ -66,7 +64,7 @@ TEST(type_prop, lstm_cell_invalid_input) {
FAIL() << "LSTMCell node was created with invalid data.";
} catch (const NodeValidationFailure& error) {
EXPECT_HAS_SUBSTRING(error.what(),
std::string("Parameter hidden_size not matched for W, R, B, "
std::string("Parameter hidden_size not matched for R, "
"initial_hidden_state and initial_cell_state inputs."));
}
@@ -102,7 +100,7 @@ TEST(type_prop, lstm_cell_invalid_input) {
const auto lstm_cell = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, B, hidden_size);
FAIL() << "LSTMCell node was created with invalid data.";
} catch (const NodeValidationFailure& error) {
EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter hidden_size not matched for W, R, B"));
EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter hidden_size mistmatched in B input."));
}
}
@@ -140,8 +138,8 @@ TEST(type_prop, lstm_cell_dynamic_hidden_size) {
const auto lstm_cell = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, 3);
EXPECT_EQ(lstm_cell->get_output_partial_shape(0), (PartialShape{batch_size, 3}));
EXPECT_EQ(lstm_cell->get_output_partial_shape(1), (PartialShape{batch_size, 3}));
EXPECT_EQ(lstm_cell->get_output_partial_shape(0), (PartialShape{batch_size, hidden_size}));
EXPECT_EQ(lstm_cell->get_output_partial_shape(1), (PartialShape{batch_size, hidden_size}));
EXPECT_EQ(lstm_cell->get_output_element_type(0), element::f32);
EXPECT_EQ(lstm_cell->get_output_element_type(1), element::f32);
}
@@ -160,8 +158,8 @@ TEST(type_prop, lstm_cell_dynamic_inputs) {
const auto lstm_cell = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, 3);
EXPECT_EQ(lstm_cell->get_output_partial_shape(0), (PartialShape{batch_size, 3}));
EXPECT_EQ(lstm_cell->get_output_partial_shape(1), (PartialShape{batch_size, 3}));
EXPECT_EQ(lstm_cell->get_output_partial_shape(0), (PartialShape{batch_size, hidden_size}));
EXPECT_EQ(lstm_cell->get_output_partial_shape(1), (PartialShape{batch_size, hidden_size}));
EXPECT_EQ(lstm_cell->get_output_element_type(0), element::f32);
EXPECT_EQ(lstm_cell->get_output_element_type(1), element::f32);
}
@@ -226,11 +224,9 @@ TEST(type_prop, lstm_cell_invalid_input_dynamic_rank) {
auto H_t = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, hidden_size});
auto C_t = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, hidden_size});
auto check_dynamic_lstm = [=](const shared_ptr<opset4::LSTMCell>& lstm) -> bool {
const int64_t target_batch_size = batch_size;
const int64_t target_hidden_size = hidden_size;
return lstm->output(0).get_partial_shape() == PartialShape{target_batch_size, target_hidden_size} &&
lstm->output(1).get_partial_shape() == PartialShape{target_batch_size, target_hidden_size} &&
auto check_dynamic_lstm = [](const shared_ptr<opset4::LSTMCell>& lstm) -> bool {
return lstm->output(0).get_partial_shape() == PartialShape::dynamic() &&
lstm->output(1).get_partial_shape() == PartialShape::dynamic() &&
lstm->output(0).get_element_type() == lstm->input(0).get_element_type();
};
@@ -269,61 +265,3 @@ TEST(type_prop, lstm_cell_invalid_input_dynamic_rank) {
lstm = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, B, hidden_size);
EXPECT_EQ(check_dynamic_lstm(lstm), true);
}
TEST(type_prop, lstm_cell_shape_from_partial) {
const size_t batch_size = 2;
const size_t input_size = 3;
const size_t hidden_size = 3;
const size_t gates_count = 4;
auto check_dynamic_lstm = [=](const shared_ptr<opset4::LSTMCell>& lstm) -> bool {
const int64_t target_batch_size = batch_size;
const int64_t target_hidden_size = hidden_size;
return lstm->output(0).get_partial_shape() == PartialShape{target_batch_size, target_hidden_size} &&
lstm->output(1).get_partial_shape() == PartialShape{target_batch_size, target_hidden_size} &&
lstm->output(0).get_element_type() == lstm->input(0).get_element_type();
};
{
// from h & w
auto X = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
auto W = make_shared<opset4::Parameter>(element::f32, PartialShape{gates_count * hidden_size, input_size});
auto R = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
auto H_t = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, -1});
auto C_t = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
auto lstm = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, hidden_size);
EXPECT_EQ(check_dynamic_lstm(lstm), true);
}
{
// from x & w
auto X = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, input_size});
auto W = make_shared<opset4::Parameter>(element::f32, PartialShape{gates_count * hidden_size, input_size});
auto R = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
auto H_t = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
auto C_t = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
auto lstm = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, hidden_size);
EXPECT_EQ(check_dynamic_lstm(lstm), true);
}
{
// only valid rank for H_t tensor.
auto X = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, input_size});
auto W = make_shared<opset4::Parameter>(element::f32, PartialShape{gates_count * hidden_size, input_size});
auto R = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
auto H_t = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, input_size});
auto C_t = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
auto lstm = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, hidden_size);
EXPECT_EQ(check_dynamic_lstm(lstm), true);
}
{
// batch from x, hidden from h_t
auto X = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, input_size});
auto W = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
auto R = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
auto H_t = make_shared<opset4::Parameter>(element::f32, PartialShape{-1, hidden_size});
auto C_t = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
auto lstm = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, hidden_size);
EXPECT_EQ(check_dynamic_lstm(lstm), true);
}
}

View File

@@ -40,11 +40,3 @@ TEST(type_prop, tile_few_repeats_dyn_input) {
ASSERT_EQ(top->get_element_type(), element::f32);
ASSERT_EQ(top->get_output_partial_shape(0), (PartialShape{6, Dimension(32, 40), 10}));
}
TEST(type_prop, tile_out_rank_from_repeats) {
auto param0 = make_shared<op::Parameter>(element::f32, Shape{6, 8, 10});
auto param1 = make_shared<op::Parameter>(element::i32, Shape{5});
auto top = make_shared<op::v0::Tile>(param0, param1);
ASSERT_EQ(top->get_element_type(), element::f32);
ASSERT_EQ(top->get_output_partial_shape(0).size(), 5);
}

View File

@@ -61,35 +61,55 @@ bool VectorOrSquareLimit::isValid(const uint32_t h, const uint32_t w) const {
std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
std::ostringstream out;
if (!isValid(h, w)) {
out << "Unsupported " << what << " shape, actual WxH: " << w << "x" << h <<
", only vertical vector up to 1x" << maxVectorHeight << ", horizontal up to " << maxVectorWidth <<
"x1 or square up to " << maxSquare << "x" << maxSquare << " are valid\n";
out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w <<
", only vertical vector up to " << maxVectorHeight << "x1, horizontal up to 1x" << maxVectorWidth <<
" or square up to " << maxSquare << "x" << maxSquare << " are valid\n";
}
return out.str();
}
VectorOrSquareLimit VectorOrSquareLimitByChannels::GetByChannels(const uint32_t channels) const {
return channels <= smallChannelMax ? smallChannel : bigChannel;
bool RectLimit::isValid(const uint32_t h, const uint32_t w) const {
if (h >= 1 && h <= maxVectorHeight && w >= 1 && w <= maxVectorWidth) return true;
return false;
}
bool VectorOrSquareLimitByChannels::isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const {
std::string RectLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
std::ostringstream out;
if (!isValid(h, w)) {
out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w <<
", only rectangular shapes up to " << maxVectorHeight << "x" << maxVectorWidth << " are valid\n";
}
return out.str();
}
RectLimit RectLimitByChannels::GetByChannels(const uint32_t channels) const {
for (auto&& limit : limitPerChannel) {
if (limit.first >= channels) {
return limit.second;
}
}
return RectLimit{ 0, 0 };
}
bool RectLimitByChannels::isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const {
return GetByChannels(channels).isValid(h, w);
}
std::string VectorOrSquareLimitByChannels::GetErrorOrEmpty(const uint32_t h, const uint32_t w,
std::string RectLimitByChannels::GetErrorOrEmpty(const uint32_t h, const uint32_t w,
const uint32_t channels, std::string what) const {
return GetByChannels(channels).GetErrorOrEmpty(h, w, what);
}
VectorOrSquareLimitByChannels VectorOrSquareLimitByChannelsAndPrecision::GetByPrecision(const OvGnaType precision) const {
RectLimitByChannels RectLimitByChannelsAndPrecision::GetByPrecision(const OvGnaType precision) const {
return precision == OvGnaTypeInt8 ? lowPrecision : defaultPrecision;
}
bool VectorOrSquareLimitByChannelsAndPrecision::isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const {
bool RectLimitByChannelsAndPrecision::isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const {
return GetByPrecision(precision).isValid(h, w, channels);
}
std::string VectorOrSquareLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h, const uint32_t w,
std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h, const uint32_t w,
const OvGnaType precision, const uint32_t channels, std::string what) const {
return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what);
}

View File

@@ -67,6 +67,13 @@ struct RangeMultipleLimit : public RangeLimit {
std::string GetErrorOrEmpty(const uint32_t val) const;
};
struct RectLimit {
uint32_t maxVectorHeight;
uint32_t maxVectorWidth;
bool isValid(const uint32_t h, const uint32_t w) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
};
struct VectorOrSquareLimit {
uint32_t maxSquare;
uint32_t maxVectorHeight;
@@ -75,20 +82,18 @@ struct VectorOrSquareLimit {
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
};
struct VectorOrSquareLimitByChannels {
uint32_t smallChannelMax;
VectorOrSquareLimit smallChannel;
VectorOrSquareLimit bigChannel;
VectorOrSquareLimit GetByChannels(const uint32_t channels) const;
struct RectLimitByChannels {
std::vector<std::pair<uint32_t, RectLimit> > limitPerChannel;
RectLimit GetByChannels(const uint32_t channels) const;
bool isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w,
const uint32_t channels, std::string what) const;
};
struct VectorOrSquareLimitByChannelsAndPrecision {
VectorOrSquareLimitByChannels lowPrecision;
VectorOrSquareLimitByChannels defaultPrecision;
VectorOrSquareLimitByChannels GetByPrecision(const OvGnaType precision) const;
struct RectLimitByChannelsAndPrecision {
RectLimitByChannels lowPrecision;
RectLimitByChannels defaultPrecision;
RectLimitByChannels GetByPrecision(const OvGnaType precision) const;
bool isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w,
const OvGnaType precision, const uint32_t channels, std::string what) const;
@@ -98,11 +103,20 @@ class Validator {
RangeLimit2D inputHWLimit{ { 16, 384, "input height"} , { 16, 240, "input width"} };
RangeMultipleLimit inputChannelsNumberLimit{ {8, 384, "number of input channels"}, 8 };
RangeMultipleLimit kernelNumberLimit{ {8, 256, "number of kernels"}, 8 };
VectorOrSquareLimitByChannelsAndPrecision kernelLimit {
{ 240, { 3, 7, 3 }, { 2, 7, 2 } },
{ 120, { 3, 7, 3 }, { 1, 7, 1 } } };
VectorOrSquareLimitByChannelsAndPrecision& strideLimit = kernelLimit;
RangeMultipleLimit kernelNumberLimit{ {8, 1024, "number of kernels"}, 8 };
RectLimitByChannelsAndPrecision kernelLimit {
{ { {96, {7, 7}},
{136, {7, 5}},
{168, {7, 4}},
{240, {7, 3}},
{384, {7, 2}} } },
{ { {48, {7, 7}},
{64, {7, 5}},
{80, {7, 4}},
{120, {7, 3}},
{384, {7, 1}} } },
};
RectLimitByChannelsAndPrecision& strideLimit = kernelLimit;
RangeLimit2D dilationLimit{ {convDilationHeight, convDilationHeight, "dilation height" },
{ convDilationWidth, convDilationWidth, "dilation width" } };
const VectorOrSquareLimit poolingWindowLimit{ 3, 1, 1 };

View File

@@ -30,9 +30,10 @@ double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
using KRT = std::pair<uint32_t, double>;
// Empirically determined weights reducers for 2D Convolution
// i.e.:
// for kernelSize >= 14 -> 1.7
// for kernelSize >= 9 -> 1.3
// for kernelSize in {7, 8} -> 1.2
const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} };
const std::vector< KRT > reducers{ {49, 3.0}, {36, 2.6}, {21, 2.3}, {14, 1.7}, {9, 1.3}, {7, 1.2} };
auto reducer = 1.0;
const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);

View File

@@ -135,6 +135,10 @@ def getting_samples_data_zip(url, samples_path, size_of_chunk=128):
print("\nExtracting of samples_smoke_tests_data.zip...")
with zipfile.ZipFile(samples_path, 'r') as samples_zip:
samples_zip.extractall(Environment.env['smoke_tests_path'])
nameFolder = str(Environment.env['samples_data_zip'])[Environment.env['samples_data_zip'].rfind('/')+1:][:-4]
smoke_tests_path = os.path.join(Environment.env['smoke_tests_path'])
if os.path.exists(os.path.join(smoke_tests_path,nameFolder)):
os.rename(os.path.join(smoke_tests_path, nameFolder), os.path.join(smoke_tests_path, 'samples_smoke_tests_data') )
if os.path.exists(samples_path):
print("\nRemoving samples_smoke_tests_data.zip...")
os.remove(samples_path)
@@ -169,10 +173,16 @@ class SamplesCommonTestClass():
@staticmethod
def reset_models_path(model):
if ('FP32' in os.path.split(model)[0] or 'FP16' in os.path.split(model)[0]):
model = search_model_path_recursively(config_key=Environment.env['icv_model_zoo_models'], model_name=model)
else:
model = os.path.join(Environment.env['public_models'], model)
pathList = model.split(os.sep)
modelName = pathList[len(pathList)-1]
precision = pathList[len(pathList)-2]
for root, subFolder, files in os.walk(Environment.env['models_path']):
for item in files:
if item.endswith(modelName) :
if precision in root :
model = str(os.path.join(root,item))
else :
model = os.path.join(Environment.env['models_path'], model)
return model
@staticmethod
@@ -328,10 +338,8 @@ class SamplesCommonTestClass():
def setup_class(cls):
getting_samples_data_zip(Environment.env['samples_data_zip'], Environment.env['samples_path'])
assert os.environ.get('IE_APP_PATH') is not None, "IE_APP_PATH environment variable is not specified!"
assert os.path.exists(Environment.env['public_models']), \
"Path for public models {} is not exist!".format(Environment.env['public_models'])
assert os.path.exists(Environment.env['icv_model_zoo_models']), \
"Path for icv models {} is not exist!".format(Environment.env['icv_model_zoo_models'])
assert os.path.exists(Environment.env['models_path']), \
"Path for public models {} is not exist!".format(Environment.env['models_path'])
assert os.path.exists(Environment.env['test_data']), \
"Path for test data {} is not exist!".format(Environment.env['test_data'])
cls.output_dir = Environment.env['out_directory']

View File

@@ -45,7 +45,7 @@ def pytest_configure(config):
try:
Environment.env = fix_env_conf(yaml.safe_load(env_conf))
# Check mandatory env variables:
mandatory_env_varibales = ['out_directory', 'public_models', 'icv_model_zoo_models', 'test_data', 'samples_data_zip', 'smoke_tests_path', 'samples_path']
mandatory_env_varibales = ['out_directory', 'models_path', 'test_data', 'samples_data_zip', 'smoke_tests_path', 'samples_path']
missing_variables = []
for variable in mandatory_env_varibales:
if variable not in Environment.env:

View File

@@ -1,9 +1,8 @@
out_directory: ${WORKSPACE}/out
public_models: ${SHARE}/models/public/
icv_model_zoo_models: ${SHARE}/models/omz_models/
models_path: ${SHARE}/models/
test_data: ${SHARE}/validation_set/
#Performance data:
perf_result_path: ${SHARE}/validation_set/performance_result/
samples_data_zip: "https://storage.openvinotoolkit.org/repositories/openvino/ci_dependencies/test/samples_smoke_tests_data.zip"
samples_data_zip: "https://storage.openvinotoolkit.org/repositories/openvino/ci_dependencies/test/2021.4/samples_smoke_tests_data_2021.4.zip"
smoke_tests_path: ${WORKSPACE}/tests/smoke_tests
samples_path: ${WORKSPACE}/tests/smoke_tests/samples_smoke_tests_data.zip

View File

@@ -21,7 +21,7 @@ log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=s
test_data_fp32_async = get_tests \
(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
'm': [os.path.join('squeezenet1.1', 'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
'batch': [1],
'sample_type': ['C++', 'Python'],
'd': ['CPU'],
@@ -33,7 +33,7 @@ test_data_fp32_async = get_tests \
test_data_fp32_sync = get_tests \
(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
'm': [os.path.join('squeezenet1.1', 'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
'batch': [1],
'sample_type': ['C++', 'Python'],
'd': ['CPU'],

View File

@@ -21,7 +21,7 @@ from common.samples_common_test_clas import get_tests
log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
test_data_fp32 = get_tests(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
'm': [os.path.join('squeezenet1.1', 'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
'nt': ['1'],
'sample_type': ['C++','Python'],
'batch': [1, 2, 4],
@@ -30,7 +30,7 @@ test_data_fp32 = get_tests(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')]
)
test_data_fp16 = get_tests(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
'm': [os.path.join('squeezenet1.1', 'caffe_squeezenet_v1_1_FP16_batch_1_seqlen_[1]_v10.xml')],
'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
'nt': ['1'],
'sample_type': ['C++','Python'],
'batch': [1, 2, 4],

View File

@@ -26,15 +26,13 @@ import shutil
log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
test_data_fp32 = get_tests(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
'm': [os.path.join('squeezenet1.1',
'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
'd': ['CPU'],
'sample_type': ['C++', 'C']},
use_device=['d'])
test_data_fp32_unicode = get_tests(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
'm': [os.path.join('squeezenet1.1',
'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
'd': ['CPU'],
'sample_type': ['C++', 'C']},
use_device=['d'])
@@ -91,8 +89,8 @@ class TestHello(SamplesCommonTestClass):
# Copy files
shutil.copy(Path(Environment.env['test_data']) / Path(param['i']), tmp_image_dir)
shutil.copy(Path(Environment.env['public_models']) / Path(param['m']), tmp_model_dir)
shutil.copy(Path(Environment.env['public_models']) / Path(param['m'].replace('.xml', '.bin')), tmp_model_dir)
shutil.copy(Path(Environment.env['models_path']) / 'public' / Path(param['m']), tmp_model_dir)
shutil.copy(Path(Environment.env['models_path']) / 'public' / Path(param['m'].replace('.xml', '.bin')), tmp_model_dir)
image_path = tmp_image_dir / Path(param['i']).name
original_image_name = image_path.name.split(sep='.')[0]

View File

@@ -21,7 +21,7 @@ from common.samples_common_test_clas import SamplesCommonTestClass
log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
test_data_fp32 = get_tests(cmd_params={'i': [os.path.join('224x224', 'dog6.yuv')],
'm': [os.path.join('squeezenet1.1', 'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
'size': ['224x224'],
'sample_type': ['C++', 'C'],
'd': ['CPU']},

View File

@@ -21,8 +21,7 @@ from common.specific_samples_parsers import parse_hello_reshape_ssd
log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
test_data_fp32 = get_tests(cmd_params={'i': [os.path.join('500x500', 'cat.bmp')],
'm': [os.path.join('ssd300',
'caffe_ssd_300_FP32_v10.xml')],
'm': [os.path.join('ssd512', 'FP32', 'ssd512.xml')],
'd': ['CPU'],
'batch': [1, 2, 4]}, use_device=['d'], use_batch=True
)

View File

@@ -21,7 +21,7 @@ from common.common_utils import parse_avg_err
log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
test_data_nthreads = get_tests(cmd_params={'i': [os.path.join('ark', 'dev93_10.ark')],
'm': [os.path.join('FP32', 'wsj_dnn5b.xml')],
'm': [os.path.join('wsj', 'FP32', 'wsj_dnn5b.xml')],
'bs': [1, 2],
'o': ['res_output.ark'],
'r': [os.path.join('ark', 'dev93_scores_10.ark')],
@@ -32,7 +32,7 @@ test_data_nthreads = get_tests(cmd_params={'i': [os.path.join('ark', 'dev93_10.a
)
test_data_nthreads_negative = get_tests(cmd_params={'i': [os.path.join('ark', 'dev93_10.ark')],
'm': [os.path.join('FP32', 'wsj_dnn5b.xml')],
'm': [os.path.join('wsj', 'FP32', 'wsj_dnn5b.xml')],
'bs': [1],
'o': ['res_output.ark'],
'r': [os.path.join('ark', 'dev93_scores_10.ark')],

View File

@@ -30,3 +30,243 @@
precision: FP16-INT8
framework: caffe2
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/faster_rcnn_resnet101_coco/tf/FP16/faster_rcnn_resnet101_coco.xml
name: faster_rcnn_resnet101_coco
precision: FP16
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/faster_rcnn_resnet101_coco/tf/FP16-INT8/faster_rcnn_resnet101_coco.xml
name: faster_rcnn_resnet101_coco
precision: FP16-INT8
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml
name: faster-rcnn-resnet101-coco-sparse-60-0001
precision: FP16
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml
name: faster-rcnn-resnet101-coco-sparse-60-0001
precision: FP16-INT8
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml
name: googlenet-v1
precision: FP16
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml
name: googlenet-v1
precision: FP16-INT8
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml
name: googlenet-v3
precision: FP16
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml
name: googlenet-v3
precision: FP16-INT8
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml
name: ssd512
precision: FP16
framework: caffe
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml
name: ssd512
precision: FP16-INT8
framework: caffe
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml
name: yolo-v2-ava-0001
precision: FP16
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml
name: yolo-v2-ava-0001
precision: FP16-INT8
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml
name: yolo-v2-ava-sparse-35-0001
precision: FP16
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml
name: yolo-v2-ava-sparse-35-0001
precision: FP16-INT8
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml
name: yolo-v2-ava-sparse-70-0001
precision: FP16
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml
name: yolo-v2-ava-sparse-70-0001
precision: FP16-INT8
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml
name: yolo-v2-tiny-ava-0001
precision: FP16
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml
name: yolo-v2-tiny-ava-0001
precision: FP16-INT8
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml
name: yolo-v2-tiny-ava-sparse-30-0001
precision: FP16
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml
name: yolo-v2-tiny-ava-sparse-30-0001
precision: FP16-INT8
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml
name: yolo-v2-tiny-ava-sparse-60-0001
precision: FP16
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml
name: yolo-v2-tiny-ava-sparse-60-0001
precision: FP16-INT8
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe2/FP16/squeezenet1.1.xml
name: squeezenet1.1
precision: FP16
framework: caffe2
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe2/FP16-INT8/squeezenet1.1.xml
name: squeezenet1.1
precision: FP16-INT8
framework: caffe2
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml
name: icnet-camvid-ava-0001
precision: FP16
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml
name: icnet-camvid-ava-0001
precision: FP16-INT8
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml
name: icnet-camvid-ava-sparse-30-0001
precision: FP16
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml
name: icnet-camvid-ava-sparse-30-0001
precision: FP16-INT8
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml
name: icnet-camvid-ava-sparse-60-0001
precision: FP16
framework: tf
use_model_cache: true
- device:
name: CPU
model:
path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml
name: icnet-camvid-ava-sparse-60-0001
precision: FP16-INT8
framework: tf
use_model_cache: true

View File

@@ -58,15 +58,33 @@ def aggregate_stats(stats: dict):
def prepare_executable_cmd(args: dict):
"""Generate common part of cmd from arguments to execute"""
return [str(args["executable"].resolve(strict=True)),
"-m", str(args["model"].resolve(strict=True)),
"-d", args["device"]]
return [
str(args["executable"].resolve(strict=True)),
"-m", str(args["model"].resolve(strict=True)),
"-d", args["device"],
"-p", args["perf_hint"],
"-v" if args["vpu_compiler"] else "", args['vpu_compiler'] if args["vpu_compiler"] else "",
"-c" if args["cpu_cache"] else "",
]
def get_cache_stats(flatten_data):
"""Update statistics for run with models cache"""
data_cache = {
"full_run_using_cache": flatten_data["full_run"],
"time_to_inference_using_cache": flatten_data["time_to_inference"],
"load_plugin": flatten_data["load_plugin"],
"load_network_using_cache": flatten_data["load_network"],
"first_inference": flatten_data["first_inference"],
"fill_inputs": flatten_data["fill_inputs"],
}
return data_cache
def run_timetest(args: dict, log=None):
"""Run provided executable several times and aggregate collected statistics"""
if log is None:
log = logging.getLogger('run_timetest')
log = logging.getLogger("run_timetest")
cmd_common = prepare_executable_cmd(args)
@@ -90,6 +108,9 @@ def run_timetest(args: dict, log=None):
flatten_data = {}
parse_stats(raw_data[0], flatten_data)
if run_iter > 0 and args["cpu_cache"]:
flatten_data = get_cache_stats(flatten_data)
log.debug(f"Statistics after run of executable #{run_iter}: {flatten_data}")
# Combine statistics from several runs
@@ -108,29 +129,45 @@ def run_timetest(args: dict, log=None):
def cli_parser():
"""parse command-line arguments"""
parser = argparse.ArgumentParser(description='Run timetest executable')
parser.add_argument('executable',
parser = argparse.ArgumentParser(description="Run timetest executable")
parser.add_argument("executable",
type=Path,
help='binary to execute')
parser.add_argument('-m',
help="Binary to execute")
parser.add_argument("-m",
required=True,
dest="model",
type=Path,
help='path to an .xml/.onnx file with a trained model or'
' to a .blob files with a trained compiled model')
parser.add_argument('-d',
help="Path to an .xml/.onnx file with a trained model or"
" to a .blob files with a trained compiled model")
parser.add_argument("-d",
required=True,
dest="device",
type=str,
help='target device to infer on')
parser.add_argument('-niter',
help="Target device to infer on")
parser.add_argument("-niter",
default=10,
type=check_positive_int,
help='number of times to execute binary to aggregate statistics of')
parser.add_argument('-s',
help="Number of times to execute binary to aggregate statistics of")
parser.add_argument("-s",
dest="stats_path",
type=Path,
help='path to a file to save aggregated statistics')
help="path to a file to save aggregated statistics")
parser.add_argument("-p",
dest="perf_hint",
choices=["LATENCY", "THROUGHPUT"],
default="LATENCY",
type=str,
help="Enables performance hint for specified device. Default hint is LATENCY")
exclusive_group = parser.add_mutually_exclusive_group(required=False)
exclusive_group.add_argument("-c",
dest="cpu_cache",
action="store_true",
help="Enable CPU model cache usage")
exclusive_group.add_argument("-v",
dest="vpu_compiler",
choices=["MCM", "MLIR"],
type=str,
help="Change VPUX compiler type")
args = parser.parse_args()
@@ -143,6 +180,12 @@ if __name__ == "__main__":
logging.basicConfig(format="[ %(levelname)s ] %(message)s",
level=logging.DEBUG, stream=sys.stdout)
assert not (args.cpu_cache and args.device != "CPU"), \
"The cache option is used only for the CPU device."
assert not (args.vpu_compiler and "VPUX" not in args.device), \
"The VPUX compiler option is used only for the VPUX device."
exit_code, _, aggr_stats, _ = run_timetest(
dict(args._get_kwargs()), log=logging) # pylint: disable=protected-access
if args.stats_path:
@@ -159,15 +202,15 @@ if __name__ == "__main__":
def test_timetest_parser():
# Example of timetest yml file
raw_data_example = [{'full_run': [1, {'first_inference_latency': [2, {'load_plugin': [3]}, {
'create_exenetwork': [4, {'read_network': [5]}, {'load_network': [6]}]}]},
{'first_inference': [7, {'fill_inputs': [8]}]}]}]
raw_data_example = [{"full_run": [1, {"first_inference_latency": [2, {"load_plugin": [3]}, {
"create_exenetwork": [4, {"read_network": [5]}, {"load_network": [6]}]}]},
{"first_inference": [7, {"fill_inputs": [8]}]}]}]
# Refactoring raw data from yml
flatten_dict = {}
parse_stats(raw_data_example, flatten_dict)
expected_result = {'full_run': 1, 'first_inference_latency': 2, 'load_plugin': 3, 'create_exenetwork': 4,
'read_network': 5, 'load_network': 6, 'first_inference': 7, 'fill_inputs': 8}
expected_result = {"full_run": 1, "first_inference_latency": 2, "load_plugin": 3, "create_exenetwork": 4,
"read_network": 5, "load_network": 6, "first_inference": 7, "fill_inputs": 8}
assert flatten_dict == expected_result, "Statistics parsing is performed incorrectly!"

View File

@@ -17,51 +17,87 @@ using namespace InferenceEngine;
* main(). The function should not throw any exceptions and responsible for
* handling it by itself.
*/
int runPipeline(const std::string &model, const std::string &device) {
auto pipeline = [](const std::string &model, const std::string &device) {
int runPipeline(const std::string &model, const std::string &device, const std::string &performanceHint,
const bool isCacheEnabled, const std::string &vpuCompiler) {
auto pipeline = [](const std::string &model, const std::string &device, const std::string &performanceHint,
const bool isCacheEnabled, const std::string &vpuCompiler) {
Core ie;
CNNNetwork cnnNetwork;
ExecutableNetwork exeNetwork;
InferRequest inferRequest;
size_t batchSize = 0;
if (!performanceHint.empty()) {
std::vector<std::string> supported_config_keys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
// enables performance hint for specified device
std::string performanceConfig;
if (performanceHint == "THROUGHPUT")
performanceConfig = CONFIG_VALUE(THROUGHPUT);
else if (performanceHint == "LATENCY")
performanceConfig = CONFIG_VALUE(LATENCY);
if (std::find(supported_config_keys.begin(), supported_config_keys.end(), "PERFORMANCE_HINT") ==
supported_config_keys.end()) {
std::cerr << "Device " << device << " doesn't support config key 'PERFORMANCE_HINT'!\n"
<< "Performance config was not set.";
}
else
ie.SetConfig({{CONFIG_KEY(PERFORMANCE_HINT), performanceConfig}}, device);
}
// set config for VPUX device
std::map<std::string, std::string> vpuConfig = {};
if (vpuCompiler == "MCM")
vpuConfig = {{"VPUX_COMPILER_TYPE", "MCM"}};
else if (vpuCompiler == "MLIR")
vpuConfig = {{"VPUX_COMPILER_TYPE", "MLIR"}};
// first_inference_latency = time_to_inference + first_inference
{
SCOPED_TIMER(first_inference_latency);
SCOPED_TIMER(time_to_inference);
{
SCOPED_TIMER(load_plugin);
ie.GetVersions(device);
// enables performance hint for specified device
ie.SetConfig({{CONFIG_KEY(PERFORMANCE_HINT), CONFIG_VALUE(LATENCY)}}, device);
if (isCacheEnabled)
ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "models_cache"}});
}
{
SCOPED_TIMER(create_exenetwork);
if (TimeTest::fileExt(model) == "blob") {
SCOPED_TIMER(import_network);
exeNetwork = ie.ImportNetwork(model, device);
if (!isCacheEnabled) {
SCOPED_TIMER(create_exenetwork);
if (TimeTest::fileExt(model) == "blob") {
SCOPED_TIMER(import_network);
exeNetwork = ie.ImportNetwork(model, device);
}
else {
{
SCOPED_TIMER(read_network);
cnnNetwork = ie.ReadNetwork(model);
batchSize = cnnNetwork.getBatchSize();
}
{
SCOPED_TIMER(load_network);
exeNetwork = ie.LoadNetwork(cnnNetwork, device, vpuConfig);
}
}
}
else {
{
SCOPED_TIMER(read_network);
cnnNetwork = ie.ReadNetwork(model);
batchSize = cnnNetwork.getBatchSize();
}
{
SCOPED_TIMER(load_network);
exeNetwork = ie.LoadNetwork(cnnNetwork, device);
}
SCOPED_TIMER(load_network);
exeNetwork = ie.LoadNetwork(model, device);
}
}
inferRequest = exeNetwork.CreateInferRequest();
}
{
SCOPED_TIMER(first_inference);
inferRequest = exeNetwork.CreateInferRequest();
{
SCOPED_TIMER(fill_inputs)
batchSize = batchSize != 0 ? batchSize : 1;
SCOPED_TIMER(fill_inputs);
const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
batchSize = batchSize != 0 ? batchSize : 1;
fillBlobs(inferRequest, inputsInfo, batchSize);
}
inferRequest.Infer();
@@ -69,7 +105,7 @@ int runPipeline(const std::string &model, const std::string &device) {
};
try {
pipeline(model, device);
pipeline(model, device, performanceHint, isCacheEnabled, vpuCompiler);
} catch (const InferenceEngine::Exception &iex) {
std::cerr
<< "Inference Engine pipeline failed with Inference Engine exception:\n"

View File

@@ -1,68 +0,0 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <inference_engine.hpp>
#include <ie_plugin_config.hpp>
#include <iostream>
#include "common_utils.h"
#include "timetests_helper/timer.h"
#include "timetests_helper/utils.h"
using namespace InferenceEngine;
/**
* @brief Function that contain executable pipeline which will be called from
* main(). The function should not throw any exceptions and responsible for
* handling it by itself.
*/
int runPipeline(const std::string &model, const std::string &device) {
auto pipeline = [](const std::string &model, const std::string &device) {
Core ie;
CNNNetwork cnnNetwork;
ExecutableNetwork exeNetwork;
InferRequest inferRequest;
{
SCOPED_TIMER(first_inference_latency);
{
SCOPED_TIMER(load_plugin);
ie.GetVersions(device);
}
{
SCOPED_TIMER(load_network);
// enables cache
ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "models_cache"}});
exeNetwork = ie.LoadNetwork(model, device);
}
{
SCOPED_TIMER(first_inference);
inferRequest = exeNetwork.CreateInferRequest();
{
SCOPED_TIMER(fill_inputs)
const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
fillBlobs(inferRequest, inputsInfo, 1);
}
inferRequest.Infer();
}
}
};
try {
pipeline(model, device);
} catch (const InferenceEngine::Exception &iex) {
std::cerr
<< "Inference Engine pipeline failed with Inference Engine exception:\n"
<< iex.what();
return 1;
} catch (const std::exception &ex) {
std::cerr << "Inference Engine pipeline failed with exception:\n"
<< ex.what();
return 2;
} catch (...) {
std::cerr << "Inference Engine pipeline failed\n";
return 3;
}
return 0;
}

View File

@@ -1,84 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <inference_engine.hpp>
#include <iostream>
#include "common_utils.h"
#include "timetests_helper/timer.h"
#include "timetests_helper/utils.h"
using namespace InferenceEngine;
/**
* @brief Function that contain executable pipeline which will be called from
* main(). The function should not throw any exceptions and responsible for
* handling it by itself.
*/
int runPipeline(const std::string &model, const std::string &device) {
auto pipeline = [](const std::string &model, const std::string &device) {
Core ie;
CNNNetwork cnnNetwork;
ExecutableNetwork exeNetwork;
InferRequest inferRequest;
size_t batchSize = 0;
{
SCOPED_TIMER(first_inference_latency);
{
SCOPED_TIMER(load_plugin);
ie.GetVersions(device);
}
{
SCOPED_TIMER(create_exenetwork);
if (TimeTest::fileExt(model) == "blob") {
SCOPED_TIMER(import_network);
exeNetwork = ie.ImportNetwork(model, device);
}
else {
{
SCOPED_TIMER(read_network);
cnnNetwork = ie.ReadNetwork(model);
batchSize = cnnNetwork.getBatchSize();
}
{
SCOPED_TIMER(load_network);
exeNetwork = ie.LoadNetwork(cnnNetwork, device, {{"VPUX_COMPILER_TYPE", "MLIR"}});
}
}
}
}
{
SCOPED_TIMER(first_inference);
inferRequest = exeNetwork.CreateInferRequest();
{
SCOPED_TIMER(fill_inputs)
batchSize = batchSize != 0 ? batchSize : 1;
const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
fillBlobs(inferRequest, inputsInfo, batchSize);
}
inferRequest.Infer();
}
};
try {
pipeline(model, device);
} catch (const InferenceEngine::Exception &iex) {
std::cerr
<< "Inference Engine pipeline failed with Inference Engine exception:\n"
<< iex.what();
return 1;
} catch (const std::exception &ex) {
std::cerr << "Inference Engine pipeline failed with exception:\n"
<< ex.what();
return 2;
} catch (...) {
std::cerr << "Inference Engine pipeline failed\n";
return 3;
}
return 0;
}

View File

@@ -26,6 +26,18 @@ static const char target_device_message[] =
"plugin. "
"The application looks for a suitable plugin for the specified device.";
/// @brief message for vpu argument
static const char performance_hint_message[] =
"Not required. Enables performance hint for specified device. Available hints are LATENCY and THROUGHPUT.";
/// @brief message for cache argument
static const char cpu_cache_message[] =
"Not required. Use this key to run timetests with CPU models caching.";
/// @brief message for vpu argument
static const char vpu_compiler_message[] =
"Not required. Use this key to run timetests using MLIR or MCM VPUX compiler type.";
/// @brief message for statistics path argument
static const char statistics_path_message[] =
"Required. Path to a file to write statistics.";
@@ -44,6 +56,18 @@ DEFINE_string(m, "", model_message);
/// It is a required parameter
DEFINE_string(d, "", target_device_message);
/// @brief Define parameter for set performance hint for target device <br>
/// It is a non-required parameter
DEFINE_string(p, "", performance_hint_message);
/// @brief Define parameter for set CPU models caching <br>
/// It is a non-required parameter
DEFINE_bool(c, false, cpu_cache_message);
/// @brief Define parameter VPU compiler type <br>
/// It is a non-required parameter
DEFINE_string(v, "", vpu_compiler_message);
/// @brief Define parameter for set path to a file to write statistics <br>
/// It is a required parameter
DEFINE_string(s, "", statistics_path_message);
@@ -56,10 +80,13 @@ static void showUsage() {
std::cout << "TimeTests [OPTION]" << std::endl;
std::cout << "Options:" << std::endl;
std::cout << std::endl;
std::cout << " -h, --help " << help_message << std::endl;
std::cout << " -h, --help " << help_message << std::endl;
std::cout << " -m \"<path>\" " << model_message << std::endl;
std::cout << " -d \"<device>\" " << target_device_message
<< std::endl;
std::cout << " -s \"<path>\" " << statistics_path_message
<< std::endl;
std::cout << " -p \"<perf_hint>\" " << performance_hint_message << std::endl;
std::cout << " -c " << cpu_cache_message << std::endl;
std::cout << " -v \"<compiler_type>\" " << vpu_compiler_message << std::endl;
}

View File

@@ -8,7 +8,8 @@
#include <iostream>
int runPipeline(const std::string &model, const std::string &device);
int runPipeline(const std::string &model, const std::string &device, const std::string &performanceHint,
const bool isCacheEnabled, const std::string &vpuCompiler);
/**
* @brief Parses command line and check required arguments
@@ -40,7 +41,7 @@ bool parseAndCheckCommandLine(int argc, char **argv) {
*/
int _runPipeline() {
SCOPED_TIMER(full_run);
return runPipeline(FLAGS_m, FLAGS_d);
return runPipeline(FLAGS_m, FLAGS_d, FLAGS_p, FLAGS_c, FLAGS_v);
}
/**
@@ -54,4 +55,4 @@ int main(int argc, char **argv) {
StatisticsWriter::Instance().setFile(FLAGS_s);
StatisticsWriter::Instance().write();
return status;
}
}

View File

@@ -43,7 +43,7 @@ def pytest_addoption(parser):
test_args_parser.addoption(
"--test_conf",
type=Path,
help="path to a test config",
help="Path to a test config",
default=Path(__file__).parent / "test_config.yml"
)
test_args_parser.addoption(
@@ -51,20 +51,38 @@ def pytest_addoption(parser):
required=True,
dest="executable",
type=Path,
help="path to a timetest binary to execute"
help="Path to a timetest binary to execute"
)
test_args_parser.addoption(
"--niter",
type=check_positive_int,
help="number of iterations to run executable and aggregate results",
help="Number of iterations to run executable and aggregate results",
default=3
)
test_args_parser.addoption(
"--cpu_cache",
action='store_true',
help="Enable model CPU cache usage",
)
test_args_parser.addoption(
"--perf_hint",
choices=['LATENCY', 'THROUGHPUT'],
default='LATENCY',
type=str,
help='Enables performance hint for specified device. Default hint is LATENCY'
)
test_args_parser.addoption(
"--vpu_compiler",
choices=["MCM", "MLIR"],
type=str,
help="Change VPUX compiler type",
)
db_args_parser = parser.getgroup("timetest database use")
db_args_parser.addoption(
'--db_submit',
metavar="RUN_ID",
type=str,
help='submit results to the database. ' \
help='Submit results to the database. ' \
'`RUN_ID` should be a string uniquely identifying the run' \
' (like Jenkins URL or time)'
)
@@ -79,19 +97,21 @@ def pytest_addoption(parser):
'--db_collection',
type=str,
required=is_db_used,
help='collection name in database',
help='Collection name in database',
choices=DB_COLLECTIONS
)
db_args_parser.addoption(
'--db_metadata',
type=str,
default=None,
help='path to JSON-formatted file to extract additional information')
help='Path to JSON-formatted file to extract additional information'
)
db_args_parser.addoption(
'--manifest',
type=Path,
required=is_db_used,
help='path to build manifest to extract commit information')
help='Path to build manifest to extract commit information'
)
@pytest.fixture(scope="session")
@@ -112,8 +132,26 @@ def niter(request):
return request.config.getoption('niter')
@pytest.fixture(scope="session")
def cpu_cache(request):
"""Fixture function for command-line option."""
return request.config.getoption('cpu_cache')
@pytest.fixture(scope="session")
def perf_hint(request):
"""Fixture function for command-line option."""
return request.config.getoption('perf_hint')
@pytest.fixture(scope="session")
def vpu_compiler(request):
"""Fixture function for command-line option."""
return request.config.getoption('vpu_compiler')
# -------------------- CLI options --------------------
@pytest.fixture(scope="function")
def temp_dir(pytestconfig):
"""Create temporary directory for test purposes.

View File

@@ -34,14 +34,17 @@ from scripts.run_timetest import run_timetest
REFS_FACTOR = 1.2 # 120%
def test_timetest(instance, executable, niter, cl_cache_dir, model_cache_dir, test_info, temp_dir, validate_test_case,
prepare_db_info):
def test_timetest(instance, executable, niter, cl_cache_dir, cpu_cache, vpu_compiler, perf_hint, model_cache_dir,
test_info, temp_dir, validate_test_case, prepare_db_info):
"""Parameterized test.
:param instance: test instance. Should not be changed during test run
:param executable: timetest executable to run
:param niter: number of times to run executable
:param cl_cache_dir: directory to store OpenCL cache
:param cpu_cache: flag to enable model CPU cache
:param vpu_compiler: flag to change VPUX compiler type
:param perf_hint: performance hint (optimize device for latency or throughput settings)
:param model_cache_dir: directory to store IE model cache
:param test_info: custom `test_info` field of built-in `request` pytest fixture
:param temp_dir: path to a temporary directory. Will be cleaned up after test run
@@ -63,7 +66,10 @@ def test_timetest(instance, executable, niter, cl_cache_dir, model_cache_dir, te
"executable": Path(executable),
"model": Path(model_path),
"device": instance["device"]["name"],
"niter": niter
"niter": niter,
"perf_hint": perf_hint,
"cpu_cache": cpu_cache,
"vpu_compiler": vpu_compiler if vpu_compiler else ""
}
logging.info("Run timetest once to generate any cache")
retcode, msg, _, _ = run_timetest({**exe_args, "niter": 1}, log=logging)

View File

@@ -5,6 +5,8 @@ import os
import sys
from datetime import datetime
from openvino.runtime import Dimension
from openvino.tools.benchmark.benchmark import Benchmark
from openvino.tools.benchmark.parameters import parse_args
from openvino.tools.benchmark.utils.constants import MULTI_DEVICE_NAME, HETERO_DEVICE_NAME, CPU_DEVICE_NAME, \
@@ -15,8 +17,8 @@ from openvino.tools.benchmark.utils.progress_bar import ProgressBar
from openvino.tools.benchmark.utils.utils import next_step, get_number_iterations, pre_post_processing, \
process_help_inference_string, print_perf_counters, dump_exec_graph, get_duration_in_milliseconds, \
get_command_line_arguments, parse_nstreams_value_per_device, parse_devices, get_inputs_info, \
print_inputs_and_outputs_info, get_batch_size, load_config, dump_config, get_latency_groups, \
check_for_static
print_inputs_and_outputs_info, get_network_batch_size, load_config, dump_config, get_latency_groups, \
check_for_static, can_measure_as_static
from openvino.tools.benchmark.utils.statistics_report import StatisticsReport, averageCntReport, detailedCntReport
@@ -225,9 +227,7 @@ def run(args):
('load network time (ms)', duration_ms)
])
app_inputs_info, _ = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.get_runtime_function().get_parameters())
batch_size = get_batch_size(app_inputs_info)
if batch_size.is_dynamic and benchmark.api_type == 'sync':
raise Exception("Dynamic batch size is supported only in async mode")
batch_size = get_network_batch_size(app_inputs_info)
elif not is_network_compiled:
# --------------------- 4. Read the Intermediate Representation of the network -----------------------------
next_step()
@@ -262,10 +262,7 @@ def run(args):
])
# use batch size according to provided layout and shapes
batch_size = get_batch_size(app_inputs_info)
if batch_size.is_dynamic and benchmark.api_type == 'sync':
raise Exception("Dynamic batch size is supported only in async mode")
batch_size = get_network_batch_size(app_inputs_info)
logger.info(f'Network batch size: {batch_size}')
# --------------------- 6. Configuring inputs and outputs of the model --------------------------------------------------
@@ -307,10 +304,7 @@ def run(args):
('import network time (ms)', duration_ms)
])
app_inputs_info, _ = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.get_runtime_function().get_parameters())
batch_size = get_batch_size(app_inputs_info)
if batch_size.is_dynamic and benchmark.api_type == 'sync':
raise Exception("Dynamic batch size is supported only in async mode")
batch_size = get_network_batch_size(app_inputs_info)
# --------------------- 8. Querying optimal runtime parameters --------------------------------------------------
next_step()
@@ -353,7 +347,8 @@ def run(args):
data_queue = get_input_data(paths_to_input, app_inputs_info)
static_mode = check_for_static(app_inputs_info)
if not static_mode and benchmark.api_type == 'sync':
allow_inference_only_or_sync = can_measure_as_static(app_inputs_info)
if not allow_inference_only_or_sync and benchmark.api_type == 'sync':
raise Exception("Benchmarking of the model with dynamic shapes is available for async API only."
"Please use -api async -nstreams 1 -nireq 1 to emulate sync behavior.")
@@ -362,9 +357,13 @@ def run(args):
benchmark.inference_only = True
else:
benchmark.inference_only = False
elif benchmark.inference_only and not static_mode:
elif benchmark.inference_only and not allow_inference_only_or_sync:
raise Exception("Benchmarking dynamic model available with input filling in measurement loop only!")
# update batch size in case dynamic network with one data_shape
if benchmark.inference_only and batch_size.is_dynamic:
batch_size = Dimension(data_queue.batch_sizes[data_queue.current_group_id])
benchmark.latency_groups = get_latency_groups(app_inputs_info)
if len(benchmark.latency_groups) > 1:

View File

@@ -236,11 +236,17 @@ def get_duration_in_secs(target_device):
def check_for_static(app_input_info):
is_static = True
for info in app_input_info:
if info.is_dynamic:
return False
return is_static
return True
def can_measure_as_static(app_input_info):
for info in app_input_info:
if info.is_dynamic and (len(info.shapes) > 1 or info.original_shape.is_static):
return False
return True
def parse_devices(device_string):
@@ -428,6 +434,7 @@ class AppInputInfo:
def __init__(self):
self.element_type = None
self.layout = Layout()
self.original_shape = None
self.partial_shape = None
self.data_shapes = []
self.scale = []
@@ -550,6 +557,7 @@ def get_inputs_info(shape_string, data_shape_string, layout_string, batch_size,
# Input name
info.name = input_names[i]
# Shape
info.original_shape = parameters[i].get_partial_shape()
if info.name in shape_map.keys():
info.partial_shape = parse_partial_shape(shape_map[info.name])
reshape = True
@@ -625,7 +633,7 @@ def get_inputs_info(shape_string, data_shape_string, layout_string, batch_size,
return input_info, reshape
def get_batch_size(inputs_info):
def get_network_batch_size(inputs_info):
null_dimension = Dimension(0)
batch_size = null_dimension
for info in inputs_info:

View File

@@ -124,12 +124,14 @@ class CanonicalizePathCheckExistenceIfNeededAction(CanonicalizePathCheckExistenc
class DeprecatedCanonicalizePathCheckExistenceAction(CanonicalizePathCheckExistenceAction):
def __call__(self, parser, namespace, values, option_string=None):
super().__call__(parser, namespace, values, option_string)
dep_msg = "Use of deprecated cli option {} detected. Option use in the following releases will be fatal. ".format(
option_string)
if 'tensorflow_use_custom_operations_config' in option_string:
dep_msg += 'Please use --transformations_config cli option instead'
if 'mean_file' in option_string or 'mean_offset' in option_string:
dep_msg += 'Please use --mean_values cli option instead.'
log.error(dep_msg, extra={'is_warning': True})
super().__call__(parser, namespace, values, option_string)
def readable_file(path: str):
@@ -377,7 +379,7 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
'the Inference Engine API in runtime may fail for such an IR.',
action='store_true', default=False)
common_group.add_argument('--keep_shape_ops',
help='The option is ignored. Expected behavior is enabled by default.',
help=argparse.SUPPRESS,
action=IgnoredAction, default=True)
common_group.add_argument('--disable_weights_compression',
help='Disable compression and store weights with original precision.',
@@ -524,11 +526,13 @@ def get_caffe_cli_parser(parser: argparse.ArgumentParser = None):
'CustomLayersMapping.xml'),
action=CanonicalizePathCheckExistenceAction)
caffe_group.add_argument('--mean_file', '-mf',
help='Mean image to be used for the input. Should be a binaryproto file',
help='[DEPRECATED] ' +
'Mean image to be used for the input. Should be a binaryproto file',
default=None,
action=CanonicalizePathCheckExistenceAction)
action=DeprecatedCanonicalizePathCheckExistenceAction)
caffe_group.add_argument('--mean_file_offsets', '-mo',
help='Mean image offsets to be used for the input binaryproto file. ' +
help='[DEPRECATED] ' +
'Mean image offsets to be used for the input binaryproto file. ' +
'When the mean image is bigger than the expected input, it is cropped. By default, centers ' +
'of the input image and the mean image are the same and the mean image is cropped by ' +
'dimensions of the input image. The format to pass this option is the following: "-mo (x,y)". In this ' +