Merge remote-tracking branch 'upstream/master' into debian-packages
This commit is contained in:
@@ -287,8 +287,8 @@ if(ENABLE_INTEL_GNA)
|
||||
set(GNA_HASH "cc954e67525006bf8bd353a6682e38bf208f6d74e973e0fc292850e721f17452")
|
||||
endif()
|
||||
if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
|
||||
set(GNA_VERSION "03.00.00.1377")
|
||||
set(GNA_HASH "d45fb48994d8c2803a16e88e29ae48851066325b97c1c6c4a5bf4f4573d55c65")
|
||||
set(GNA_VERSION "03.00.00.1455")
|
||||
set(GNA_HASH "8ac1af18eb32777b00193f4f8c252ee4f8bd64a9069138b4a5aaeebd82ead464")
|
||||
endif()
|
||||
|
||||
set(FILES_TO_EXTRACT_LIST gna_${GNA_VERSION}/include)
|
||||
|
||||
@@ -63,10 +63,10 @@ Caffe*-specific parameters:
|
||||
-k K Path to CustomLayersMapping.xml to register custom
|
||||
layers
|
||||
--mean_file MEAN_FILE, -mf MEAN_FILE
|
||||
Mean image to be used for the input. Should be a
|
||||
[DEPRECATED] Mean image to be used for the input. Should be a
|
||||
binaryproto file
|
||||
--mean_file_offsets MEAN_FILE_OFFSETS, -mo MEAN_FILE_OFFSETS
|
||||
Mean image offsets to be used for the input
|
||||
[DEPRECATED] Mean image offsets to be used for the input
|
||||
binaryproto file. When the mean image is bigger than
|
||||
the expected input, it is cropped. By default, centers
|
||||
of the input image and the mean image are the same and
|
||||
|
||||
@@ -42,7 +42,7 @@ To convert a Paddle\* model:
|
||||
Parameters to convert your model:
|
||||
|
||||
* [Framework-agnostic parameters](Converting_Model_General.md): These parameters are used to convert a model trained with any supported framework.
|
||||
> **NOTE:** `--scale`, `--scale_values`, `--mean_values`, `--mean_file` are not supported in the current version of mo_paddle.
|
||||
> **NOTE:** `--scale`, `--scale_values`, `--mean_values` are not supported in the current version of mo_paddle.
|
||||
|
||||
### Example of Converting a Paddle* Model
|
||||
Below is the example command to convert yolo v3 Paddle\* network to OpenVINO IR network with Model Optimizer.
|
||||
|
||||
@@ -6,7 +6,7 @@ mo --input_model INPUT_MODEL --output_dir <OUTPUT_MODEL_DIR>
|
||||
```
|
||||
You need to have have write permissions for an output directory.
|
||||
|
||||
> **NOTE**: Some models require using additional arguments to specify conversion parameters, such as `--input_shape`, `--scale`, `--scale_values`, `--mean_values`, `--mean_file`. To learn about when you need to use these parameters, refer to [Converting a Model Using General Conversion Parameters](Converting_Model_General.md).
|
||||
> **NOTE**: Some models require using additional arguments to specify conversion parameters, such as `--input_shape`, `--scale`, `--scale_values`, `--mean_values`. To learn about when you need to use these parameters, refer to [Converting a Model Using General Conversion Parameters](Converting_Model_General.md).
|
||||
|
||||
To adjust the conversion process, you may use general parameters defined in the [Converting a Model Using General Conversion Parameters](Converting_Model_General.md) and
|
||||
Framework-specific parameters for:
|
||||
|
||||
@@ -151,7 +151,7 @@ Usually neural network models are trained with the normalized input data. This m
|
||||
|
||||
In the first case, the Model Optimizer generates the IR with required pre-processing layers and Inference Engine samples may be used to infer the model.
|
||||
|
||||
In the second case, information about mean/scale values should be provided to the Model Optimizer to embed it to the generated IR. Model Optimizer provides a number of command line parameters to specify them: `--scale`, `--scale_values`, `--mean_values`, `--mean_file`.
|
||||
In the second case, information about mean/scale values should be provided to the Model Optimizer to embed it to the generated IR. Model Optimizer provides a number of command line parameters to specify them: `--scale`, `--scale_values`, `--mean_values`.
|
||||
|
||||
If both mean and scale values are specified, the mean is subtracted first and then scale is applied. Input values are *divided* by the scale value(s).
|
||||
|
||||
|
||||
@@ -2,30 +2,23 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "shape_inference.hpp"
|
||||
|
||||
#include <ngraph/runtime/host_tensor.hpp>
|
||||
#include <openvino/core/node.hpp>
|
||||
#include <ngraph/runtime/host_tensor.hpp>
|
||||
#include <openvino/opsets/opset1.hpp>
|
||||
#include <openvino/opsets/opset2.hpp>
|
||||
#include <openvino/opsets/opset4.hpp>
|
||||
#include <openvino/opsets/opset5.hpp>
|
||||
#include <openvino/opsets/opset6.hpp>
|
||||
#include <openvino/opsets/opset8.hpp>
|
||||
|
||||
#include "assign_shape_inference.hpp"
|
||||
#include "convolution_shape_inference.hpp"
|
||||
#include "experimental_detectron_detection_output_shape_inference.hpp"
|
||||
#include "experimental_detectron_prior_grid_generator_shape_inference.hpp"
|
||||
#include "fake_quantize.hpp"
|
||||
#include "lstm_cell_shape_inference.hpp"
|
||||
#include "read_value_shape_inference.hpp"
|
||||
#include "reduce_shape_inference.hpp"
|
||||
#include "shape_inference.hpp"
|
||||
#include "shape_nodes.hpp"
|
||||
#include "static_shape.hpp"
|
||||
#include "tile_shape_inference.hpp"
|
||||
#include "utils.hpp"
|
||||
#include "shape_inference.hpp"
|
||||
#include "convolution_shape_inference.hpp"
|
||||
#include "reduce_shape_inference.hpp"
|
||||
#include "shape_nodes.hpp"
|
||||
#include "fake_quantize.hpp"
|
||||
#include "experimental_detectron_detection_output_shape_inference.hpp"
|
||||
|
||||
|
||||
void shape_inference(ov::Node* op,
|
||||
const std::vector<ov::StaticShape>& input_shapes,
|
||||
@@ -34,53 +27,44 @@ void shape_inference(ov::Node* op,
|
||||
if (auto node = ov::as_type<ov::opset8::Convolution>(op)) {
|
||||
ov::CoordinateDiff pads_begin, pads_end;
|
||||
bool status = resolve_auto_pad_for_shape(node, pads_begin, pads_end, input_shapes, 2, 2);
|
||||
OPENVINO_ASSERT(status,
|
||||
"Convolution shape inference doesn't have enough information to calculate static shapes");
|
||||
OPENVINO_ASSERT(status, "Convolution shape inference doesn't have enough information to calculate static shapes");
|
||||
shape_infer(node, pads_begin, pads_end, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset8::GroupConvolution>(op)) {
|
||||
ov::CoordinateDiff pads_begin, pads_end;
|
||||
bool status = resolve_auto_pad_for_shape(node, pads_begin, pads_end, input_shapes, 2, 3);
|
||||
OPENVINO_ASSERT(status,
|
||||
"GroupConvolution shape inference doesn't have enough information to calculate static shapes");
|
||||
OPENVINO_ASSERT(status, "GroupConvolution shape inference doesn't have enough information to calculate static shapes");
|
||||
shape_infer(node, pads_begin, pads_end, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset8::ConvolutionBackpropData>(op)) {
|
||||
ov::CoordinateDiff pads_begin, pads_end;
|
||||
ov::StaticShape output_shape_input;
|
||||
if (node->get_input_size() == 3)
|
||||
get_data_as_shape<ov::StaticShape>(2, op, output_shape_input, constant_data);
|
||||
bool status =
|
||||
resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 2);
|
||||
OPENVINO_ASSERT(
|
||||
status,
|
||||
"ConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
|
||||
bool status = resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 2);
|
||||
OPENVINO_ASSERT(status, "ConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
|
||||
shape_infer(node, pads_begin, pads_end, output_shape_input, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset8::GroupConvolutionBackpropData>(op)) {
|
||||
ov::CoordinateDiff pads_begin, pads_end;
|
||||
ov::StaticShape output_shape_input;
|
||||
if (node->get_input_size() == 3)
|
||||
get_data_as_shape<ov::StaticShape>(2, op, output_shape_input, constant_data);
|
||||
bool status =
|
||||
resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 3);
|
||||
OPENVINO_ASSERT(
|
||||
status,
|
||||
"GroupConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
|
||||
bool status = resolve_auto_pad_for_shape_back_prop(node, pads_begin, pads_end, input_shapes, output_shape_input, 2, 3);
|
||||
OPENVINO_ASSERT(status, "GroupConvolutionBackpropData shape inference doesn't have enough information to calculate static shapes");
|
||||
shape_infer(node, pads_begin, pads_end, output_shape_input, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::op::util::ArithmeticReductionKeepDims>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes, constant_data);
|
||||
} else if (auto node = ov::as_type<ov::op::util::LogicalReductionKeepDims>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes, constant_data);
|
||||
} else if (ov::is_type<ov::op::util::UnaryElementwiseArithmetic>(op) || ov::is_type<ov::opset1::Convert>(op) ||
|
||||
ov::is_type<ov::opset1::Clamp>(op) || ov::is_type<ov::opset1::GRN>(op) ||
|
||||
ov::is_type<ov::opset1::LRN>(op) || ov::is_type<ov::opset1::LogicalNot>(op) ||
|
||||
ov::is_type<ov::opset4::Mish>(op) || ov::is_type<ov::opset2::MVN>(op) ||
|
||||
ov::is_type<ov::opset6::MVN>(op) || ov::is_type<ov::opset1::PRelu>(op) ||
|
||||
ov::is_type<ov::opset1::Relu>(op) || ov::is_type<ov::opset4::Swish>(op) ||
|
||||
ov::is_type<ov::opset1::Softmax>(op) || ov::is_type<ov::opset1::Elu>(op) ||
|
||||
ov::is_type<ov::opset5::Round>(op)) {
|
||||
} else if (ov::is_type<ov::op::util::UnaryElementwiseArithmetic>(op) ||
|
||||
ov::is_type<ov::opset1::Convert>(op) || ov::is_type<ov::opset1::Clamp>(op) ||
|
||||
ov::is_type<ov::opset1::GRN>(op) || ov::is_type<ov::opset1::LRN>(op) ||
|
||||
ov::is_type<ov::opset1::LogicalNot>(op) || ov::is_type<ov::opset4::Mish>(op) ||
|
||||
ov::is_type<ov::opset2::MVN>(op) || ov::is_type<ov::opset6::MVN>(op) ||
|
||||
ov::is_type<ov::opset1::PRelu>(op) || ov::is_type<ov::opset1::Relu>(op) ||
|
||||
ov::is_type<ov::opset4::Swish>(op) || ov::is_type<ov::opset1::Softmax>(op) ||
|
||||
ov::is_type<ov::opset1::Elu>(op) || ov::is_type<ov::opset5::Round>(op)) {
|
||||
copy_shape_infer(node, input_shapes, output_shapes);
|
||||
} else if (ov::is_type<ov::op::util::BinaryElementwiseArithmetic>(op) ||
|
||||
ov::is_type<ov::op::util::BinaryElementwiseComparison>(op) ||
|
||||
ov::is_type<ov::op::util::BinaryElementwiseLogical>(op)) {
|
||||
ov::is_type<ov::op::util::BinaryElementwiseComparison>(op) || ov::is_type<ov::op::util::BinaryElementwiseLogical>(op)) {
|
||||
eltwise_shape_infer(op, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset1::FakeQuantize>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes);
|
||||
@@ -96,30 +80,15 @@ void shape_inference(ov::Node* op,
|
||||
shape_infer(node, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset6::ExperimentalDetectronDetectionOutput>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset3::Assign>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset6::Assign>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset6::ExperimentalDetectronPriorGridGenerator>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset1::LSTMCell>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset6::LSTMCell>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset3::ReadValue>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset6::ReadValue>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes);
|
||||
} else if (auto node = ov::as_type<ov::opset6::Tile>(op)) {
|
||||
shape_infer(node, input_shapes, output_shapes, constant_data);
|
||||
} else {
|
||||
ngraph::OutputVector new_inputs;
|
||||
for (size_t i = 0; i < op->get_input_size(); ++i) {
|
||||
if (constant_data.count(i)) {
|
||||
new_inputs.push_back(std::make_shared<ov::opset1::Constant>(constant_data.at(i)));
|
||||
} else {
|
||||
new_inputs.push_back(std::make_shared<ov::opset1::Parameter>(op->get_input_element_type(i),
|
||||
input_shapes[i].to_partial_shape()));
|
||||
new_inputs.push_back(
|
||||
std::make_shared<ov::opset1::Parameter>(
|
||||
op->get_input_element_type(i), input_shapes[i].to_partial_shape()));
|
||||
}
|
||||
}
|
||||
const auto local_op = op->clone_with_new_inputs(new_inputs);
|
||||
@@ -127,10 +96,8 @@ void shape_inference(ov::Node* op,
|
||||
|
||||
output_shapes.resize(op->get_output_size());
|
||||
for (size_t i = 0; i < output_shapes.size(); ++i) {
|
||||
const auto& partial_shape = local_op->get_output_partial_shape(i);
|
||||
OPENVINO_ASSERT(
|
||||
partial_shape.is_static(),
|
||||
"On device shape infer shouldn't support default shape infer for nodes with internal dynamism");
|
||||
const auto &partial_shape = local_op->get_output_partial_shape(i);
|
||||
OPENVINO_ASSERT(partial_shape.is_static(), "On device shape infer shouldn't support default shape infer for nodes with internal dynamism");
|
||||
output_shapes[i] = ov::StaticShape(partial_shape.to_shape());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,149 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
|
||||
#include <ngraph/function.hpp>
|
||||
#include <ngraph/opsets/opset7.hpp>
|
||||
#include <transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp>
|
||||
#include <transformations/op_conversions/einsum_decomposition.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
|
||||
|
||||
using namespace testing;
|
||||
using namespace ngraph;
|
||||
|
||||
|
||||
TEST_F(TransformationTestsF, TransposeReshapeEliminationForMatMul) {
|
||||
Shape data_shape_1{10, 2};
|
||||
Shape data_shape_2{10, 2, 25};
|
||||
{
|
||||
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
|
||||
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
|
||||
auto const_transpose_before = opset1::Constant::create(element::i32, Shape{3}, {1, 2, 0});
|
||||
auto transpose_before = std::make_shared<opset1::Transpose>(data_2, const_transpose_before);
|
||||
auto const_reshape_before = opset1::Constant::create(element::i32, Shape{2}, {2, 250});
|
||||
auto reshape_before = std::make_shared<opset1::Reshape>(transpose_before, const_reshape_before, false);
|
||||
auto matmul = std::make_shared<opset1::MatMul>(data_1, reshape_before);
|
||||
auto const_reshape_after = opset1::Constant::create(element::i32, Shape{3}, {10, 10, 25});
|
||||
auto reshape_after = std::make_shared<opset1::Reshape>(matmul, const_reshape_after, false);
|
||||
auto const_tranpose_after = opset1::Constant::create(element::i32, Shape{3}, {2, 0, 1});
|
||||
auto tranpose_after = std::make_shared<opset1::Transpose>(reshape_after, const_tranpose_after);
|
||||
function = std::make_shared<Function>(NodeVector{tranpose_after}, ParameterVector{data_1, data_2});
|
||||
manager.register_pass<pass::InitNodeInfo>();
|
||||
manager.register_pass<pass::TransposeReshapeEliminationForMatmul>();
|
||||
}
|
||||
{
|
||||
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
|
||||
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
|
||||
auto matmul = std::make_shared<opset1::MatMul>(data_1, data_2);
|
||||
function_ref = std::make_shared<Function>(NodeVector{matmul}, ParameterVector{data_1, data_2});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, TransposeReshapeEliminationForMatMul_TransposedA) {
|
||||
Shape data_shape_1{2, 10};
|
||||
Shape data_shape_2{10, 2, 25};
|
||||
{
|
||||
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
|
||||
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
|
||||
auto const_transpose_before = opset1::Constant::create(element::i32, Shape{3}, {1, 2, 0});
|
||||
auto transpose_before = std::make_shared<opset1::Transpose>(data_2, const_transpose_before);
|
||||
auto const_reshape_before = opset1::Constant::create(element::i32, Shape{2}, {2, 250});
|
||||
auto reshape_before = std::make_shared<opset1::Reshape>(transpose_before, const_reshape_before, false);
|
||||
auto matmul = std::make_shared<opset1::MatMul>(data_1, reshape_before, true, false);
|
||||
auto const_reshape_after = opset1::Constant::create(element::i32, Shape{3}, {10, 10, 25});
|
||||
auto reshape_after = std::make_shared<opset1::Reshape>(matmul, const_reshape_after, false);
|
||||
auto const_tranpose_after = opset1::Constant::create(element::i32, Shape{3}, {2, 0, 1});
|
||||
auto tranpose_after = std::make_shared<opset1::Transpose>(reshape_after, const_tranpose_after);
|
||||
function = std::make_shared<Function>(NodeVector{tranpose_after}, ParameterVector{data_1, data_2});
|
||||
manager.register_pass<pass::TransposeReshapeEliminationForMatmul>();
|
||||
}
|
||||
{
|
||||
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
|
||||
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
|
||||
auto matmul = std::make_shared<opset1::MatMul>(data_1, data_2, true, false);
|
||||
function_ref = std::make_shared<Function>(NodeVector{matmul}, ParameterVector{data_1, data_2});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, TransposeReshapeEliminationForMatMul_TransposedB) {
|
||||
Shape data_shape_1{10, 2};
|
||||
Shape data_shape_2{10, 2, 25};
|
||||
{
|
||||
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
|
||||
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
|
||||
auto const_transpose_before = opset1::Constant::create(element::i32, Shape{3}, {0, 2, 1});
|
||||
auto transpose_before = std::make_shared<opset1::Transpose>(data_2, const_transpose_before);
|
||||
auto const_reshape_before = opset1::Constant::create(element::i32, Shape{2}, {250, 2});
|
||||
auto reshape_before = std::make_shared<opset1::Reshape>(transpose_before, const_reshape_before, false);
|
||||
auto matmul = std::make_shared<opset1::MatMul>(data_1, reshape_before, false, true);
|
||||
auto const_reshape_after = opset1::Constant::create(element::i32, Shape{3}, {10, 10, 25});
|
||||
auto reshape_after = std::make_shared<opset1::Reshape>(matmul, const_reshape_after, false);
|
||||
auto const_tranpose_after = opset1::Constant::create(element::i32, Shape{3}, {1, 0, 2});
|
||||
auto tranpose_after = std::make_shared<opset1::Transpose>(reshape_after, const_tranpose_after);
|
||||
function = std::make_shared<Function>(NodeVector{tranpose_after}, ParameterVector{data_1, data_2});
|
||||
manager.register_pass<pass::TransposeReshapeEliminationForMatmul>();
|
||||
}
|
||||
{
|
||||
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
|
||||
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
|
||||
auto matmul = std::make_shared<opset1::MatMul>(data_1, data_2);
|
||||
function_ref = std::make_shared<Function>(NodeVector{matmul}, ParameterVector{data_1, data_2});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, TransposeReshapeEliminationForMatMul_TransposedAB) {
|
||||
Shape data_shape_1{2, 10};
|
||||
Shape data_shape_2{10, 2, 25};
|
||||
{
|
||||
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
|
||||
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
|
||||
auto const_transpose_before = opset1::Constant::create(element::i32, Shape{3}, {0, 2, 1});
|
||||
auto transpose_before = std::make_shared<opset1::Transpose>(data_2, const_transpose_before);
|
||||
auto const_reshape_before = opset1::Constant::create(element::i32, Shape{2}, {250, 2});
|
||||
auto reshape_before = std::make_shared<opset1::Reshape>(transpose_before, const_reshape_before, false);
|
||||
auto matmul = std::make_shared<opset1::MatMul>(data_1, reshape_before, true, true);
|
||||
auto const_reshape_after = opset1::Constant::create(element::i32, Shape{3}, {10, 10, 25});
|
||||
auto reshape_after = std::make_shared<opset1::Reshape>(matmul, const_reshape_after, false);
|
||||
auto const_tranpose_after = opset1::Constant::create(element::i32, Shape{3}, {1, 0, 2});
|
||||
auto tranpose_after = std::make_shared<opset1::Transpose>(reshape_after, const_tranpose_after);
|
||||
function = std::make_shared<Function>(NodeVector{tranpose_after}, ParameterVector{data_1, data_2});
|
||||
manager.register_pass<pass::TransposeReshapeEliminationForMatmul>();
|
||||
}
|
||||
{
|
||||
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
|
||||
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
|
||||
auto matmul = std::make_shared<opset1::MatMul>(data_1, data_2, true, false);
|
||||
function_ref = std::make_shared<Function>(NodeVector{matmul}, ParameterVector{data_1, data_2});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, TransposeReshapeEliminationForMatMul_Einsum) {
|
||||
Shape data_shape_1{5, 2};
|
||||
Shape data_shape_2{10, 2, 25};
|
||||
{
|
||||
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
|
||||
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
|
||||
auto einsum = std::make_shared<opset7::Einsum>(OutputVector{data_1, data_2}, "kl,mlj->mkj");
|
||||
function = std::make_shared<Function>(NodeVector{einsum}, ParameterVector{data_1, data_2});
|
||||
manager.register_pass<pass::EinsumDecomposition>();
|
||||
manager.register_pass<pass::TransposeReshapeEliminationForMatmul>();
|
||||
}
|
||||
{
|
||||
auto data_1 = std::make_shared<opset1::Parameter>(element::f32, data_shape_1);
|
||||
auto data_2 = std::make_shared<opset1::Parameter>(element::f32, data_shape_2);
|
||||
// for some cases Reshape may be first input for Matmul
|
||||
auto shape_constant = std::make_shared<opset1::Constant>(element::i64, Shape{data_shape_1.size()}, data_shape_1);
|
||||
auto reshape = std::make_shared<opset1::Reshape>(data_1, shape_constant, false);
|
||||
auto matmul = std::make_shared<opset1::MatMul>(reshape, data_2, false, false);
|
||||
function_ref = std::make_shared<Function>(NodeVector{matmul}, ParameterVector{data_1, data_2});
|
||||
}
|
||||
}
|
||||
@@ -74,6 +74,15 @@ const std::vector<std::vector<size_t >> kernels2D = {
|
||||
{7, 1},
|
||||
{3, 3},
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t >> kernels2D_big = {
|
||||
{7, 2},
|
||||
{2, 7},
|
||||
{3, 7},
|
||||
{6, 6},
|
||||
{7, 7},
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t >> strides2D = {
|
||||
{1, 1},
|
||||
};
|
||||
@@ -100,6 +109,16 @@ const auto conv2DParams_Kernels2D = ::testing::Combine(
|
||||
::testing::ValuesIn(numOutCannels2D),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
const auto conv2DParams_Kernels2D_big = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels2D_big),
|
||||
::testing::ValuesIn(strides2D),
|
||||
::testing::ValuesIn(padBegins2D),
|
||||
::testing::ValuesIn(padEnds2D),
|
||||
::testing::ValuesIn(dilations2D),
|
||||
::testing::ValuesIn(numOutCannels2D),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
const auto conv2DParams_ExplicitPadding_Height1 = ::testing::Combine(
|
||||
::testing::ValuesIn(kernelsH1),
|
||||
::testing::ValuesIn(stridesH1),
|
||||
@@ -218,4 +237,16 @@ INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_Kernels2D, GnaConvolutionLayerTest,
|
||||
::testing::Values(input2DNCHW),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA)),
|
||||
GnaConvolutionLayerTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Convolution2D_Kernels2D_big, GnaConvolutionLayerTest,
|
||||
::testing::Combine(
|
||||
conv2DParams_Kernels2D_big,
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(InferenceEngine::Layout::ANY),
|
||||
::testing::Values(input2DNCHW),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA)),
|
||||
GnaConvolutionLayerTest::getTestCaseName);
|
||||
} // namespace
|
||||
|
||||
@@ -22,21 +22,39 @@ const std::vector<std::vector<size_t >> kernels2D = {
|
||||
{1, 3},
|
||||
{7, 1},
|
||||
{3, 3},
|
||||
{7, 2},
|
||||
{2, 7}
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t >> kernels2DInvalid = {
|
||||
{1, 4},
|
||||
{2, 3},
|
||||
{3, 2},
|
||||
{9, 3},
|
||||
{1, 9},
|
||||
{1, 8},
|
||||
{8, 1},
|
||||
{4, 4},
|
||||
{8, 8},
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t >> kernels2DInvalidFor56InC = {
|
||||
{1, 6},
|
||||
{2, 6},
|
||||
{7, 7},
|
||||
{1, 7},
|
||||
{4, 7},
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t >> kernels2DInvalidFor120InC = {
|
||||
{1, 4},
|
||||
{8, 3},
|
||||
{7, 5},
|
||||
{1, 6},
|
||||
{4, 7},
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t >> strides2D = {
|
||||
{1, 1},
|
||||
};
|
||||
const std::vector<std::vector<size_t >> strides2DInvalid = {
|
||||
{4, 4}, {1, 4}
|
||||
{8, 8}, {1, 8}
|
||||
};
|
||||
const std::vector<std::vector<ptrdiff_t>> padBegins2D = { {0, 0},
|
||||
};
|
||||
@@ -51,10 +69,13 @@ const std::vector<std::vector<size_t >> dilations2D = { {1, 1},
|
||||
const std::vector<std::vector<size_t >> dilations2DInvalid = { {2, 2},
|
||||
};
|
||||
const std::vector<size_t> numOutChannels2D = { 32 };
|
||||
const std::vector<size_t> numOutChannels2DInvalid = { 1, 7, 9, 400 };
|
||||
const std::vector<size_t> numOutChannels2DInvalid = { 1, 7, 9, 1032 };
|
||||
|
||||
const std::vector<std::vector<size_t>> input2DNCHWFine = { { 1, 8, 20, 16 } };
|
||||
|
||||
const std::vector<std::vector<size_t>> input2DNCHWWithInC56 = { { 1, 56, 20, 16 } };
|
||||
const std::vector<std::vector<size_t>> input2DNCHWWithInC120 = { { 1, 120, 20, 16 } };
|
||||
|
||||
const std::vector<std::vector<size_t>> input2DNCHWInvalidInputC = {
|
||||
{ 1, 7, 20, 16 },
|
||||
{ 1, 9, 20, 16 },
|
||||
@@ -80,6 +101,27 @@ const auto conv2DParametersInvalidKernel = ::testing::Combine(
|
||||
::testing::ValuesIn(numOutChannels2D),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
const auto conv2DParametersInvalidKernelFor56InC = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels2DInvalidFor56InC),
|
||||
::testing::ValuesIn(strides2D),
|
||||
::testing::ValuesIn(padBegins2D),
|
||||
::testing::ValuesIn(padEnds2D),
|
||||
::testing::ValuesIn(dilations2D),
|
||||
::testing::ValuesIn(numOutChannels2D),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
const auto conv2DParametersInvalidKernelFor120InC = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels2DInvalidFor120InC),
|
||||
::testing::ValuesIn(strides2D),
|
||||
::testing::ValuesIn(padBegins2D),
|
||||
::testing::ValuesIn(padEnds2D),
|
||||
::testing::ValuesIn(dilations2D),
|
||||
::testing::ValuesIn(numOutChannels2D),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
const auto conv2DParametersInvalidFilterNumber = ::testing::Combine(
|
||||
::testing::ValuesIn(kernels2D),
|
||||
::testing::ValuesIn(strides2D),
|
||||
@@ -165,6 +207,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_GnaConv2DNegativeTestInvalid##whats_wrong, GnaCon
|
||||
|
||||
GNA_NEG_INSTANTIATE(FilterNumber, InvalidFilterNumber, Fine, "Unsupported number of kernels")
|
||||
GNA_NEG_INSTANTIATE(Kernel, InvalidKernel, Fine, "Unsupported kernel shape")
|
||||
GNA_NEG_INSTANTIATE(BigKernelFor56InC, InvalidKernelFor56InC, WithInC56, "Unsupported kernel shape")
|
||||
GNA_NEG_INSTANTIATE(BigKernelFor120InC, InvalidKernelFor120InC, WithInC120, "Unsupported kernel shape")
|
||||
GNA_NEG_INSTANTIATE(InputH, Fine, InvalidInputH, "Unsupported input height")
|
||||
GNA_NEG_INSTANTIATE(InputW, Fine, InvalidInputW, "Unsupported input width")
|
||||
GNA_NEG_INSTANTIATE(InputC, Fine, InvalidInputC, "Unsupported number of input channels")
|
||||
@@ -172,4 +216,4 @@ GNA_NEG_INSTANTIATE(Padding, InvalidPadding, Fine, "Convolution's input padding
|
||||
GNA_NEG_INSTANTIATE(Stride, InvalidStride, Fine, "Unsupported convolution stride shape")
|
||||
GNA_NEG_INSTANTIATE(Dilation, InvalidDilation, Fine, "dilation is not supported on GNA")
|
||||
|
||||
} // namespace
|
||||
} // namespace
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <openvino/op/ops.hpp>
|
||||
#include <openvino/op/parameter.hpp>
|
||||
#include <utils/shape_inference/shape_inference.hpp>
|
||||
#include <utils/shape_inference/static_shape.hpp>
|
||||
|
||||
using namespace ov;
|
||||
template <class T>
|
||||
std::shared_ptr<T> constructGraph();
|
||||
|
||||
template <>
|
||||
std::shared_ptr<op::v3::Assign> constructGraph() {
|
||||
auto input = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
|
||||
auto read_value = std::make_shared<op::v3::ReadValue>(input, "variable_id");
|
||||
return std::make_shared<op::v3::Assign>(read_value, "variable_id");
|
||||
}
|
||||
|
||||
template <>
|
||||
std::shared_ptr<op::v6::Assign> constructGraph() {
|
||||
auto input = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
|
||||
auto variable = std::make_shared<ov::op::util::Variable>(
|
||||
ov::op::util::VariableInfo{PartialShape::dynamic(), element::dynamic, "ID"});
|
||||
auto read_value = std::make_shared<op::v6::Assign>(input, variable);
|
||||
return std::make_shared<op::v6::Assign>(read_value, variable);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void assignTest() {
|
||||
auto assign = constructGraph<T>();
|
||||
|
||||
// Test StaticShape
|
||||
std::vector<StaticShape> static_input_shapes = {StaticShape{1, 2, 64, 64}}, static_output_shapes = {StaticShape{}};
|
||||
shape_inference(assign.get(), static_input_shapes, static_output_shapes);
|
||||
ASSERT_EQ(static_input_shapes[0], (StaticShape{1, 2, 64, 64}));
|
||||
}
|
||||
|
||||
TEST(StaticShapeInferenceTest, AssignTest) {
|
||||
// Test v3 Assign
|
||||
assignTest<op::v3::Assign>();
|
||||
// Test v6 Assign
|
||||
assignTest<op::v6::Assign>();
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <openvino/op/experimental_detectron_prior_grid_generator.hpp>
|
||||
#include <openvino/op/ops.hpp>
|
||||
#include <openvino/op/parameter.hpp>
|
||||
#include <utils/shape_inference/shape_inference.hpp>
|
||||
#include <utils/shape_inference/static_shape.hpp>
|
||||
|
||||
using namespace ov;
|
||||
|
||||
TEST(StaticShapeInferenceTest, PriorGridGenerator) {
|
||||
op::v6::ExperimentalDetectronPriorGridGenerator::Attributes attrs;
|
||||
attrs.flatten = false;
|
||||
attrs.h = 0;
|
||||
attrs.w = 0;
|
||||
attrs.stride_x = 4.0f;
|
||||
attrs.stride_y = 4.0f;
|
||||
|
||||
auto priors = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1});
|
||||
auto feature_map = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
|
||||
auto im_data = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
|
||||
|
||||
auto grid_gen =
|
||||
std::make_shared<ov::op::v6::ExperimentalDetectronPriorGridGenerator>(priors, feature_map, im_data, attrs);
|
||||
|
||||
std::vector<StaticShape> static_input_shapes = {StaticShape{3, 4},
|
||||
StaticShape{1, 256, 200, 336},
|
||||
StaticShape{1, 3, 800, 1344}},
|
||||
static_output_shapes = {StaticShape{}};
|
||||
shape_inference(grid_gen.get(), static_input_shapes, static_output_shapes);
|
||||
|
||||
ASSERT_EQ(static_output_shapes[0], StaticShape({200, 336, 3, 4}));
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <openvino/op/ops.hpp>
|
||||
#include <openvino/op/parameter.hpp>
|
||||
#include <utils/shape_inference/shape_inference.hpp>
|
||||
#include <utils/shape_inference/static_shape.hpp>
|
||||
|
||||
using namespace ov;
|
||||
|
||||
TEST(StaticShapeInferenceTest, LstmCellTest) {
|
||||
const size_t batch_size = 2;
|
||||
const size_t input_size = 3;
|
||||
const size_t hidden_size = 3;
|
||||
const size_t gates_count = 4;
|
||||
|
||||
const auto X = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1});
|
||||
const auto W = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1});
|
||||
const auto R = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1});
|
||||
const auto H_t = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1});
|
||||
const auto C_t = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1});
|
||||
const auto Bias = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1});
|
||||
const auto lstm_cell = std::make_shared<op::v4::LSTMCell>(X, H_t, C_t, W, R, Bias, hidden_size);
|
||||
|
||||
std::vector<StaticShape> static_input_shapes = {StaticShape{batch_size, input_size},
|
||||
StaticShape{batch_size, hidden_size},
|
||||
StaticShape{batch_size, hidden_size},
|
||||
StaticShape{gates_count * hidden_size, input_size},
|
||||
StaticShape{gates_count * hidden_size, hidden_size},
|
||||
StaticShape{gates_count * hidden_size}},
|
||||
static_output_shapes = {StaticShape{}, StaticShape{}};
|
||||
shape_inference(lstm_cell.get(), static_input_shapes, static_output_shapes);
|
||||
ASSERT_EQ(static_output_shapes[0], StaticShape({batch_size, hidden_size}));
|
||||
ASSERT_EQ(static_output_shapes[1], StaticShape({batch_size, hidden_size}));
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <openvino/op/ops.hpp>
|
||||
#include <openvino/op/parameter.hpp>
|
||||
#include <utils/shape_inference/shape_inference.hpp>
|
||||
#include <utils/shape_inference/static_shape.hpp>
|
||||
|
||||
using namespace ov;
|
||||
|
||||
template <class T>
|
||||
std::shared_ptr<T> constructGraph();
|
||||
|
||||
template <>
|
||||
std::shared_ptr<op::v3::ReadValue> constructGraph() {
|
||||
auto input = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
|
||||
return std::make_shared<op::v3::ReadValue>(input, "variable_id");
|
||||
}
|
||||
|
||||
template <>
|
||||
std::shared_ptr<op::v6::ReadValue> constructGraph() {
|
||||
auto input = std::make_shared<op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1, -1});
|
||||
auto variable = std::make_shared<ov::op::util::Variable>(
|
||||
ov::op::util::VariableInfo{PartialShape::dynamic(), element::dynamic, "ID"});
|
||||
return std::make_shared<op::v6::ReadValue>(input, variable);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void readValueTest() {
|
||||
auto readValue = constructGraph<T>();
|
||||
|
||||
// Test StaticShape
|
||||
std::vector<StaticShape> static_input_shapes = {StaticShape{1, 2, 64, 64}}, static_output_shapes = {StaticShape{}};
|
||||
shape_inference(readValue.get(), static_input_shapes, static_output_shapes);
|
||||
ASSERT_EQ(static_output_shapes[0], (StaticShape{1, 2, 64, 64}));
|
||||
}
|
||||
|
||||
TEST(StaticShapeInferenceTest, ReadValueTest) {
|
||||
// Test v3 ReadValue
|
||||
readValueTest<op::v3::ReadValue>();
|
||||
// Test v6 ReadValue
|
||||
readValueTest<op::v6::ReadValue>();
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <openvino/op/ops.hpp>
|
||||
#include <openvino/op/parameter.hpp>
|
||||
#include <utils/shape_inference/shape_inference.hpp>
|
||||
#include <utils/shape_inference/static_shape.hpp>
|
||||
|
||||
using namespace ov;
|
||||
|
||||
TEST(StaticShapeInferenceTest, TileTest) {
|
||||
auto param0 = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1});
|
||||
auto param1 = std::make_shared<ov::op::v0::Constant>(element::i64, ov::Shape{3}, std::vector<int>{3, 4, 1});
|
||||
auto tile = std::make_shared<op::v0::Tile>(param0, param1);
|
||||
// Test Static Shape
|
||||
std::vector<StaticShape> static_input_shapes = {StaticShape{6, 8, 10}, StaticShape{3}},
|
||||
static_output_shapes = {StaticShape{}};
|
||||
shape_inference(tile.get(), static_input_shapes, static_output_shapes);
|
||||
ASSERT_EQ(static_output_shapes[0], StaticShape({18, 32, 10}));
|
||||
// Test Wrong Static Shape
|
||||
std::vector<StaticShape> wrong_static_input_shapes = {StaticShape{6, 8, 10}, StaticShape{}},
|
||||
wrong_static_output_shapes = {StaticShape{}};
|
||||
|
||||
ASSERT_THROW(shape_inference(tile.get(), wrong_static_input_shapes, wrong_static_output_shapes), ov::AssertFailure);
|
||||
}
|
||||
|
||||
TEST(StaticShapeInferenceTest, TileFewRepeatsTest) {
|
||||
auto param0 = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1, -1});
|
||||
auto param1 = ov::op::v0::Constant::create(element::i64, Shape{2}, {4, 1});
|
||||
auto tile = std::make_shared<op::v0::Tile>(param0, param1);
|
||||
// Test Static Shape
|
||||
std::vector<StaticShape> static_input_shapes = {StaticShape{6, 8, 10}, StaticShape{2}},
|
||||
static_output_shapes = {StaticShape{}};
|
||||
shape_inference(tile.get(), static_input_shapes, static_output_shapes);
|
||||
ASSERT_EQ(static_output_shapes[0], StaticShape({6, 32, 10}));
|
||||
}
|
||||
|
||||
TEST(StaticShapeInferenceTest, TileSmallDataRankTest) {
|
||||
auto param0 = std::make_shared<ov::op::v0::Parameter>(element::f32, PartialShape{-1, -1});
|
||||
auto param1 = ov::op::v0::Constant::create(element::i64, Shape{3}, {3, 4, 1});
|
||||
auto tile = std::make_shared<op::v0::Tile>(param0, param1);
|
||||
// Test Static Shape
|
||||
std::vector<StaticShape> static_input_shapes = {StaticShape{8, 10}, StaticShape{3}},
|
||||
static_output_shapes = {StaticShape{}};
|
||||
shape_inference(tile.get(), static_input_shapes, static_output_shapes);
|
||||
ASSERT_EQ(static_output_shapes[0], StaticShape({3, 32, 10}));
|
||||
}
|
||||
@@ -84,8 +84,8 @@ openvino_developer_export_targets(COMPONENT inference_engine TARGETS ${TARGET_NA
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
install(TARGETS ${TARGET_NAME}
|
||||
RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core
|
||||
LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core
|
||||
RUNTIME DESTINATION ${IE_CPACK_RUNTIME_PATH} COMPONENT core OPTIONAL
|
||||
LIBRARY DESTINATION ${IE_CPACK_LIBRARY_PATH} COMPONENT core OPTIONAL
|
||||
NAMELINK_COMPONENT core_dev)
|
||||
else()
|
||||
ov_install_static_lib(${TARGET_NAME} core)
|
||||
|
||||
@@ -48,10 +48,6 @@ set_target_properties(${TARGET_NAME} PROPERTIES SOVERSION 2022.1.1)
|
||||
|
||||
set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
|
||||
|
||||
# developer package
|
||||
|
||||
openvino_developer_export_targets(COMPONENT inference_engine TARGETS ${TARGET_NAME})
|
||||
|
||||
# install
|
||||
|
||||
# TODO: uncomment once snippets are integrated into CPU plugin
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "transformations_visibility.hpp"
|
||||
|
||||
#include "ngraph/pass/graph_rewrite.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
|
||||
class TRANSFORMATIONS_API TransposeReshapeEliminationForMatmul;
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief TransposeReshapeEliminationForMatmul transformation eliminates Transpose and Reshape which were created to
|
||||
* align input and output dimension ranks before second MatMul input and after MatMul output
|
||||
* (for example, after Einsum Decomposition inside TensorFlow 1 and nGraph EinsumDecomposition transformation)
|
||||
*/
|
||||
class ngraph::pass::TransposeReshapeEliminationForMatmul: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
TransposeReshapeEliminationForMatmul();
|
||||
};
|
||||
@@ -51,6 +51,7 @@
|
||||
#include "transformations/common_optimizations/mul_conv_fusion.hpp"
|
||||
#include "transformations/common_optimizations/interpolate_sequence_fusion.hpp"
|
||||
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
|
||||
#include <transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp>
|
||||
#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
|
||||
#include "transformations/op_conversions/convert_pad_to_group_conv.hpp"
|
||||
#include "transformations/op_conversions/convert_divide.hpp"
|
||||
@@ -149,6 +150,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
|
||||
decomp->add_matcher<ngraph::pass::SoftmaxDecomposition, false>();
|
||||
decomp->add_matcher<ngraph::pass::GatherNegativeConstIndicesNormalize>();
|
||||
decomp->add_matcher<ngraph::pass::DropoutWithRandomUniformReplacer>();
|
||||
decomp->add_matcher<ngraph::pass::TransposeReshapeEliminationForMatmul>();
|
||||
decomp->set_name("ngraph::pass::CommonDecompositions");
|
||||
|
||||
// CF is required after all decompositions
|
||||
|
||||
@@ -0,0 +1,175 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "ngraph/opsets/opset1.hpp"
|
||||
#include "ngraph/rt_info.hpp"
|
||||
#include "ngraph/pattern/op/wrap_type.hpp"
|
||||
#include "ngraph/validation_util.hpp"
|
||||
#include "itt.hpp"
|
||||
|
||||
namespace {
|
||||
/// \brief Check for correct Transpose orders which are before and after MatMul. Second Transpose must be back for
|
||||
/// first Transpose before MatMul
|
||||
///
|
||||
/// \param before_order Order of Transpose which is before MatMul
|
||||
/// \param after_order Order of Transpose which is after MatMul
|
||||
/// \param transposed_b true - second MatMul input is transposed, otherwise, it's not transposed
|
||||
///
|
||||
/// \return True - Transposes have right orders, otherwise, Transposes have incorrect order for transformation
|
||||
///
|
||||
bool check_transposes(const std::vector<int64_t>& before_order, const std::vector<int64_t>& after_order, const bool transposed_b) {
|
||||
const size_t rank = before_order.size();
|
||||
if (rank < 3)
|
||||
return false;
|
||||
|
||||
if (before_order.size() != after_order.size())
|
||||
return false;
|
||||
|
||||
if (transposed_b) {
|
||||
// before order must be : 0, 1, 2, ..., N-1, N-2
|
||||
std::vector<int64_t> start_order(rank);
|
||||
std::iota(start_order.begin(), start_order.begin() + rank - 2, 0);
|
||||
start_order[rank - 1] = rank - 2;
|
||||
start_order[rank - 2] = rank - 1;
|
||||
|
||||
if (before_order != start_order)
|
||||
return false;
|
||||
|
||||
// after order must be : 1, ..., N-2, 0, N-1
|
||||
std::vector<int64_t> back_order(rank);
|
||||
std::iota(back_order.begin(), back_order.begin() + rank - 2, 1);
|
||||
back_order[rank - 2] = 0;
|
||||
back_order[rank - 1] = rank - 1;
|
||||
|
||||
if (after_order != back_order)
|
||||
return false;
|
||||
} else {
|
||||
// before order must be : N-2, N-1, 0, 1, 2, ...
|
||||
std::vector<int64_t> needed_transpose_order_before(rank);
|
||||
std::iota(needed_transpose_order_before.begin() + 2, needed_transpose_order_before.end(), 0);
|
||||
needed_transpose_order_before[0] = rank - 2;
|
||||
needed_transpose_order_before[1] = rank - 1;
|
||||
|
||||
if (before_order != needed_transpose_order_before)
|
||||
return false;
|
||||
|
||||
// transpose order after matmul must be back for transpose before
|
||||
std::vector<int64_t> back_order(rank);
|
||||
for (size_t i = 0; i < rank; i++)
|
||||
back_order[i] = std::distance(after_order.begin(), std::find(after_order.begin(), after_order.end(), i));
|
||||
|
||||
if (before_order != back_order)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Check for input Reshape which are before MatMul
|
||||
///
|
||||
/// \param reshape Reshape which is before MatMul
|
||||
/// \param new_shape New shape for Reshape
|
||||
/// \param transposed_b true - second MatMul input is transposed, otherwise, it's not transposed
|
||||
///
|
||||
/// \return True - Reshape has right new shape for reshaping, otherwise, Reshape has incorrect new shape for transformation
|
||||
///
|
||||
bool check_input_reshape(const std::shared_ptr<ngraph::opset1::Reshape>& reshape,
|
||||
const std::vector<int64_t>& new_shape, const bool transposed_b) {
|
||||
const auto input_shape = reshape->get_input_shape(0);
|
||||
const size_t input_rank = input_shape.size();
|
||||
const size_t output_rank = reshape->get_output_shape(0).size();
|
||||
if (input_rank < 3 || output_rank != 2)
|
||||
return false;
|
||||
|
||||
if (transposed_b) {
|
||||
const int64_t k = input_shape.back();
|
||||
const int64_t new_n = ov::shape_size(input_shape) / k;
|
||||
if (new_shape != std::vector<int64_t>{new_n, k})
|
||||
return false;
|
||||
} else {
|
||||
const int64_t k = input_shape.front();
|
||||
const int64_t new_n = ov::shape_size(input_shape) / k;
|
||||
if (new_shape != std::vector<int64_t>{k, -1} && new_shape != std::vector<int64_t>{k, new_n})
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeReshapeEliminationForMatmul, "TransposeReshapeEliminationForMatmul", 0);
|
||||
|
||||
ngraph::pass::TransposeReshapeEliminationForMatmul::TransposeReshapeEliminationForMatmul() {
|
||||
MATCHER_SCOPE(TransposeReshapeEliminationForMatmul);
|
||||
auto input_1_pattern = ngraph::pattern::any_input([] (const Output<Node>& node) -> bool {
|
||||
const auto& shape = node.get_partial_shape();
|
||||
const auto& rank = shape.rank();
|
||||
return rank.is_static() && rank.get_length() == 2 && shape.is_static();
|
||||
});
|
||||
auto input_2_pattern = ngraph::pattern::any_input([] (const Output<Node>& node) -> bool {
|
||||
return node.get_partial_shape().is_static();
|
||||
});
|
||||
|
||||
auto const_transpose_before_pattern = ngraph::pattern::wrap_type<opset1::Constant>();
|
||||
auto transpose_before_pattern = ngraph::pattern::wrap_type<opset1::Transpose>({input_2_pattern, const_transpose_before_pattern});
|
||||
|
||||
auto const_reshape_before_pattern = ngraph::pattern::wrap_type<opset1::Constant>();
|
||||
auto reshape_before_pattern = ngraph::pattern::wrap_type<opset1::Reshape>({transpose_before_pattern, const_reshape_before_pattern});
|
||||
|
||||
auto matmul_pattern = ngraph::pattern::wrap_type<opset1::MatMul>({input_1_pattern, reshape_before_pattern});
|
||||
|
||||
auto const_reshape_after_pattern = ngraph::pattern::wrap_type<opset1::Constant>();
|
||||
auto reshape_after_pattern = ngraph::pattern::wrap_type<opset1::Reshape>({matmul_pattern, const_reshape_after_pattern});
|
||||
|
||||
auto const_transpose_after_pattern = ngraph::pattern::wrap_type<opset1::Constant>();
|
||||
auto transpose_after_pattern = ngraph::pattern::wrap_type<opset1::Transpose>({reshape_after_pattern, const_transpose_after_pattern});
|
||||
|
||||
ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
|
||||
const auto& pattern_value_map = m.get_pattern_value_map();
|
||||
const auto& input_1 = pattern_value_map.at(input_1_pattern);
|
||||
const auto& input_2 = pattern_value_map.at(input_2_pattern);
|
||||
|
||||
auto matmul = std::dynamic_pointer_cast<opset1::MatMul>(pattern_value_map.at(matmul_pattern).get_node_shared_ptr());
|
||||
if (!matmul)
|
||||
return false;
|
||||
const bool transposed_a = matmul->get_transpose_a();
|
||||
const bool transposed_b = matmul->get_transpose_b();
|
||||
|
||||
auto reshape_before = std::dynamic_pointer_cast<opset1::Reshape>(pattern_value_map.at(reshape_before_pattern).get_node_shared_ptr());
|
||||
auto reshape_after = std::dynamic_pointer_cast<opset1::Reshape>(pattern_value_map.at(reshape_after_pattern).get_node_shared_ptr());
|
||||
auto reshape_before_constant = std::dynamic_pointer_cast<ngraph::opset1::Constant>(
|
||||
pattern_value_map.at(const_reshape_before_pattern).get_node_shared_ptr());
|
||||
if (!reshape_before || !reshape_after || !reshape_before_constant)
|
||||
return false;
|
||||
if (!check_input_reshape(reshape_before, reshape_before_constant->cast_vector<int64_t>(), transposed_b))
|
||||
return false;
|
||||
|
||||
// check transpose order before and after matmul
|
||||
auto transpose_before = std::dynamic_pointer_cast<opset1::Transpose>(pattern_value_map.at(transpose_before_pattern).get_node_shared_ptr());
|
||||
auto transpose_after = std::dynamic_pointer_cast<opset1::Transpose>(pattern_value_map.at(transpose_after_pattern).get_node_shared_ptr());
|
||||
auto transpose_before_constant = std::dynamic_pointer_cast<ngraph::opset1::Constant>(transpose_before->get_input_node_shared_ptr(1));
|
||||
auto transpose_after_constant = std::dynamic_pointer_cast<ngraph::opset1::Constant>(transpose_after->get_input_node_shared_ptr(1));
|
||||
if (!transpose_before || !transpose_after || !transpose_before_constant || !transpose_after_constant)
|
||||
return false;
|
||||
auto transpose_before_order = transpose_before_constant->cast_vector<int64_t>();
|
||||
auto transpose_after_order = transpose_after_constant->cast_vector<int64_t>();
|
||||
// need to check that input shape is correctly contracted and output shape is correctly unpacked using transposes
|
||||
if (!check_transposes(transpose_before_order, transpose_after_order, transposed_b))
|
||||
return false;
|
||||
|
||||
const auto new_matmul = std::make_shared<opset1::MatMul>(input_1, input_2, transposed_a, false);
|
||||
new_matmul->set_friendly_name(transpose_after->get_friendly_name());
|
||||
copy_runtime_info({transpose_before, reshape_before, matmul, reshape_after, transpose_after}, new_matmul);
|
||||
replace_node(transpose_after, new_matmul);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(transpose_after_pattern, matcher_name);
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
@@ -34,8 +34,6 @@ public:
|
||||
|
||||
private:
|
||||
std::string m_variable_id;
|
||||
template <class T>
|
||||
friend void shape_infer(const Assign* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes);
|
||||
};
|
||||
} // namespace v3
|
||||
|
||||
@@ -72,10 +70,6 @@ public:
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
bool has_evaluate() const override;
|
||||
bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override;
|
||||
|
||||
private:
|
||||
template <class T>
|
||||
friend void shape_infer(const Assign* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes);
|
||||
};
|
||||
} // namespace v6
|
||||
} // namespace op
|
||||
|
||||
@@ -60,10 +60,8 @@ public:
|
||||
|
||||
private:
|
||||
Attributes m_attrs;
|
||||
template <class T>
|
||||
friend void shape_infer(const ExperimentalDetectronPriorGridGenerator* op,
|
||||
const std::vector<T>& input_shapes,
|
||||
std::vector<T>& output_shapes);
|
||||
|
||||
void validate();
|
||||
};
|
||||
} // namespace v6
|
||||
} // namespace op
|
||||
|
||||
@@ -241,8 +241,6 @@ private:
|
||||
|
||||
static constexpr std::size_t s_gates_count{4};
|
||||
static constexpr std::size_t s_peepholes_count{3};
|
||||
template <class T>
|
||||
friend void shape_infer(const LSTMCell* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes);
|
||||
};
|
||||
} // namespace v0
|
||||
|
||||
@@ -380,8 +378,6 @@ private:
|
||||
util::ActivationFunction m_activation_h;
|
||||
|
||||
static constexpr std::size_t s_gates_count{4};
|
||||
template <class T>
|
||||
friend void shape_infer(const LSTMCell* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes);
|
||||
};
|
||||
} // namespace v4
|
||||
} // namespace op
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#pragma once
|
||||
#include <openvino/core/graph_util.hpp>
|
||||
#include <openvino/op/assign.hpp>
|
||||
|
||||
#include "utils.hpp"
|
||||
namespace ov {
|
||||
namespace op {
|
||||
namespace v3 {
|
||||
|
||||
template <class T>
|
||||
void shape_infer(const Assign* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
|
||||
NODE_VALIDATION_CHECK(op, input_shapes.size() == 1 && output_shapes.size() == 1);
|
||||
const auto& input_shape = input_shapes[0];
|
||||
const auto& variable_info = op->m_variable->get_info();
|
||||
NODE_VALIDATION_CHECK(op,
|
||||
op->m_variable_id == variable_info.variable_id,
|
||||
"Variables identifiers are inconsistent.");
|
||||
const auto& arg_t = op->get_input_element_type(0);
|
||||
NODE_VALIDATION_CHECK(op, arg_t == variable_info.data_type, "Variables types are inconsistent.");
|
||||
|
||||
if (input_shape.is_static() && variable_info.data_shape.is_static()) {
|
||||
NODE_VALIDATION_CHECK(op,
|
||||
input_shape.to_shape() == variable_info.data_shape.to_shape(),
|
||||
"Variables output shapes are inconsistent.");
|
||||
}
|
||||
copy_shape_infer(op, input_shapes, output_shapes);
|
||||
}
|
||||
} // namespace v3
|
||||
|
||||
namespace v6 {
|
||||
|
||||
template <class T>
|
||||
void shape_infer(const Assign* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
|
||||
copy_shape_infer(op, input_shapes, output_shapes);
|
||||
}
|
||||
} // namespace v6
|
||||
} // namespace op
|
||||
} // namespace ov
|
||||
@@ -1,76 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#pragma once
|
||||
#include <openvino/op/experimental_detectron_prior_grid_generator.hpp>
|
||||
|
||||
namespace ov {
|
||||
namespace op {
|
||||
namespace v6 {
|
||||
|
||||
template <class T>
|
||||
void shape_infer(const ExperimentalDetectronPriorGridGenerator* op,
|
||||
const std::vector<T>& input_shapes,
|
||||
std::vector<T>& output_shapes) {
|
||||
NODE_VALIDATION_CHECK(op, input_shapes.size() == 3 && output_shapes.size() == 1);
|
||||
const auto& priors_shape = input_shapes[0];
|
||||
const auto& featmap_shape = input_shapes[1];
|
||||
const auto& im_data_shape = input_shapes[2];
|
||||
|
||||
auto& output_shape = output_shapes[0];
|
||||
size_t output_size = op->m_attrs.flatten ? 2 : 4;
|
||||
|
||||
output_shape.resize(output_size);
|
||||
output_shape[output_size - 1] = 4;
|
||||
|
||||
bool prior_rank_static = priors_shape.rank().is_static();
|
||||
bool featmap_rank_static = featmap_shape.rank().is_static();
|
||||
bool im_data_rank_static = im_data_shape.rank().is_static();
|
||||
|
||||
if (prior_rank_static) {
|
||||
NODE_VALIDATION_CHECK(op, priors_shape.size() == 2, "Priors rank must be equal to 2.");
|
||||
NODE_VALIDATION_CHECK(op,
|
||||
priors_shape[1].compatible(4),
|
||||
"The last dimension of the 'priors' input must be equal to 4. Got: ",
|
||||
priors_shape[1]);
|
||||
}
|
||||
|
||||
if (featmap_rank_static) {
|
||||
NODE_VALIDATION_CHECK(op, featmap_shape.size() == 4, "Feature_map rank must be equal to 4.");
|
||||
}
|
||||
|
||||
if (im_data_rank_static) {
|
||||
NODE_VALIDATION_CHECK(op, im_data_shape.size() == 4, "Im_data rank must be equal to 4.");
|
||||
}
|
||||
|
||||
if (featmap_rank_static && im_data_rank_static) {
|
||||
const auto& num_batches_featmap = featmap_shape[0];
|
||||
const auto& num_batches_im_data = im_data_shape[0];
|
||||
|
||||
NODE_VALIDATION_CHECK(op,
|
||||
num_batches_featmap.compatible(num_batches_im_data),
|
||||
"The first dimension of both 'feature_map' and 'im_data' must match. "
|
||||
"Feature_map: ",
|
||||
num_batches_featmap,
|
||||
"; Im_data: ",
|
||||
num_batches_im_data);
|
||||
}
|
||||
|
||||
if (op->m_attrs.flatten) {
|
||||
if (prior_rank_static && featmap_rank_static) {
|
||||
output_shape[0] = featmap_shape[2] * featmap_shape[3] * priors_shape[0];
|
||||
}
|
||||
} else {
|
||||
if (featmap_rank_static) {
|
||||
output_shape[0] = featmap_shape[2];
|
||||
output_shape[1] = featmap_shape[3];
|
||||
}
|
||||
if (prior_rank_static) {
|
||||
output_shape[2] = priors_shape[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace v6
|
||||
} // namespace op
|
||||
} // namespace ov
|
||||
@@ -1,191 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#pragma once
|
||||
#include <openvino/op/lstm_cell.hpp>
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace op {
|
||||
namespace ShapeInferLSTM {
|
||||
template <class OpsType, class ShapeType>
|
||||
void lstm_shape_infer(const OpsType* op,
|
||||
const std::vector<ShapeType>& input_shapes,
|
||||
std::vector<ShapeType>& output_shapes,
|
||||
std::size_t gates_count) {
|
||||
using DimType = typename std::iterator_traits<typename ShapeType::iterator>::value_type;
|
||||
enum { X, initial_hidden_state, initial_cell_state, W, R, B };
|
||||
std::vector<bool> input_rank_static(6, false);
|
||||
bool all_rank_dynamic = false;
|
||||
bool all_rank_static = true;
|
||||
// Prepare OutShape
|
||||
auto& hidden_shape = output_shapes[0];
|
||||
auto& cell_shape = output_shapes[1];
|
||||
hidden_shape.resize(2);
|
||||
cell_shape.resize(2);
|
||||
|
||||
// If rank is dynamic, then output_shape is undefined
|
||||
for (size_t i = 0; i < input_shapes.size(); i++) {
|
||||
input_rank_static[i] = input_shapes[i].rank().is_static();
|
||||
all_rank_dynamic &= !input_rank_static[i];
|
||||
all_rank_static &= input_rank_static[i];
|
||||
}
|
||||
|
||||
if (all_rank_dynamic) {
|
||||
return;
|
||||
}
|
||||
const auto& x_pshape = input_shapes[0];
|
||||
const auto& w_pshape = input_shapes[3];
|
||||
|
||||
DimType output_batch_size;
|
||||
DimType output_hidden_size;
|
||||
bool is_batch_init = false;
|
||||
bool is_hidden_init = false;
|
||||
|
||||
// deduce batch/hidden_size
|
||||
for (size_t i = 0; i < input_shapes.size(); i++) {
|
||||
const auto& input = input_shapes[i];
|
||||
if (input_rank_static[i]) {
|
||||
// batch could be deduced from x, cell_state or hidden_state
|
||||
if (i == X || i == initial_cell_state || i == initial_hidden_state) {
|
||||
NODE_VALIDATION_CHECK(op,
|
||||
(input.size() == 2),
|
||||
"LSTMCell input rank is not correct for ",
|
||||
i,
|
||||
" input parameter. Current rank: ",
|
||||
input.size(),
|
||||
", expected: 2.");
|
||||
if (!is_batch_init) {
|
||||
output_batch_size = input[0];
|
||||
is_batch_init = true;
|
||||
} else {
|
||||
NODE_VALIDATION_CHECK(
|
||||
op,
|
||||
DimType::merge(output_batch_size, output_batch_size, input[0]),
|
||||
"Parameter batch_size not matched for X, initial_hidden_state or initial_cell_state "
|
||||
"inputs.");
|
||||
}
|
||||
if (i == initial_cell_state || i == initial_hidden_state) {
|
||||
if (!is_hidden_init) {
|
||||
output_hidden_size = input[1];
|
||||
is_hidden_init = true;
|
||||
} else {
|
||||
NODE_VALIDATION_CHECK(op,
|
||||
DimType::merge(output_hidden_size, output_hidden_size, input[1]),
|
||||
"Parameter hidden_size not matched for W, R, B, initial_hidden_state and "
|
||||
"initial_cell_state "
|
||||
"inputs.");
|
||||
}
|
||||
}
|
||||
} else if (i == W || i == R || i == B) {
|
||||
// check input dimension
|
||||
if (i == B) {
|
||||
NODE_VALIDATION_CHECK(op,
|
||||
(input.size() == 1),
|
||||
"LSTMCell input tensor dimension is not correct for ",
|
||||
i,
|
||||
" input parameter. Current input length: ",
|
||||
input.size(),
|
||||
", expected: 1.");
|
||||
if (input[0].is_static()) {
|
||||
if (!is_hidden_init) {
|
||||
output_hidden_size = input[0].get_length() / gates_count;
|
||||
is_hidden_init = true;
|
||||
} else {
|
||||
NODE_VALIDATION_CHECK(
|
||||
op,
|
||||
DimType::merge(output_hidden_size, output_hidden_size, input[0].get_length() / gates_count),
|
||||
"Parameter hidden_size not matched for W, R, B, initial_hidden_state and "
|
||||
"initial_cell_state "
|
||||
"inputs.");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
NODE_VALIDATION_CHECK(op,
|
||||
(input.size() == 2),
|
||||
"LSTMCell input rank is not correct for ",
|
||||
i,
|
||||
" input parameter. Current rank: ",
|
||||
input.size(),
|
||||
", expected: 2.");
|
||||
if (input[0].is_static()) {
|
||||
if (!is_hidden_init) {
|
||||
output_hidden_size = input[0].get_length() / gates_count;
|
||||
is_hidden_init = true;
|
||||
} else {
|
||||
NODE_VALIDATION_CHECK(
|
||||
op,
|
||||
DimType::merge(output_hidden_size, output_hidden_size, input[0].get_length() / gates_count),
|
||||
"Parameter hidden_size not matched for W, R, B, initial_hidden_state and "
|
||||
"initial_cell_state "
|
||||
"inputs.");
|
||||
}
|
||||
}
|
||||
if (i == R) {
|
||||
if (!is_hidden_init) {
|
||||
output_hidden_size = input[1];
|
||||
is_hidden_init = true;
|
||||
} else {
|
||||
NODE_VALIDATION_CHECK(op,
|
||||
DimType::merge(output_hidden_size, output_hidden_size, input[1]),
|
||||
"Parameter hidden_size not matched for W, R, B, initial_hidden_state "
|
||||
"and initial_cell_state "
|
||||
"inputs.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Check peepholes
|
||||
if (input_shapes.size() == 7) {
|
||||
const auto& p_pshape = input_shapes[6];
|
||||
NODE_VALIDATION_CHECK(op,
|
||||
(p_pshape.rank().compatible(1)),
|
||||
"LSTMCell input tensor P shall have dimension 1D.");
|
||||
}
|
||||
|
||||
// check input size
|
||||
if (input_rank_static[X] && input_rank_static[W]) {
|
||||
NODE_VALIDATION_CHECK(op, (x_pshape[1].compatible(w_pshape[1])), "LSTMCell mismatched input_size dimension.");
|
||||
}
|
||||
|
||||
hidden_shape[0] = output_batch_size;
|
||||
hidden_shape[1] = output_hidden_size;
|
||||
cell_shape[0] = output_batch_size;
|
||||
cell_shape[1] = output_hidden_size;
|
||||
}
|
||||
|
||||
} // namespace ShapeInferLSTM
|
||||
|
||||
namespace v0 {
|
||||
using ShapeInferLSTM::lstm_shape_infer;
|
||||
template <class T>
|
||||
void shape_infer(const LSTMCell* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
|
||||
NODE_VALIDATION_CHECK(op, input_shapes.size() == 7 && output_shapes.size() == 2);
|
||||
const auto& p_pshape = input_shapes[6];
|
||||
|
||||
lstm_shape_infer(op, input_shapes, output_shapes, op->s_gates_count);
|
||||
const auto& hidden_size = output_shapes[0][1];
|
||||
if (p_pshape[0].is_static() && hidden_size.is_static()) {
|
||||
NODE_VALIDATION_CHECK(op,
|
||||
p_pshape[0].compatible(hidden_size * op->s_peepholes_count),
|
||||
"Parameter hidden_size mistmatched in P input. Current value is: ",
|
||||
p_pshape[0].get_length(),
|
||||
", expected: ",
|
||||
hidden_size.get_length() * op->s_peepholes_count,
|
||||
".");
|
||||
}
|
||||
}
|
||||
} // namespace v0
|
||||
|
||||
namespace v4 {
|
||||
using ShapeInferLSTM::lstm_shape_infer;
|
||||
template <class T>
|
||||
void shape_infer(const LSTMCell* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
|
||||
NODE_VALIDATION_CHECK(op, input_shapes.size() == 6 && output_shapes.size() == 2);
|
||||
lstm_shape_infer(op, input_shapes, output_shapes, op->s_gates_count);
|
||||
}
|
||||
} // namespace v4
|
||||
} // namespace op
|
||||
} // namespace ov
|
||||
@@ -1,29 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#pragma once
|
||||
#include <openvino/op/read_value.hpp>
|
||||
#include "utils.hpp"
|
||||
namespace ov {
|
||||
namespace op {
|
||||
|
||||
template <class OpType, class ShapeType>
|
||||
void read_value_shape_infer(const OpType* op, const std::vector<ShapeType>& input_shapes, std::vector<ShapeType>& output_shapes) {
|
||||
copy_shape_infer(op, input_shapes, output_shapes);
|
||||
}
|
||||
|
||||
namespace v3 {
|
||||
template <class T>
|
||||
void shape_infer(const ReadValue* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
|
||||
read_value_shape_infer(op, input_shapes, output_shapes);
|
||||
}
|
||||
} // namespace v3
|
||||
|
||||
namespace v6 {
|
||||
template <class T>
|
||||
void shape_infer(const ReadValue* op, const std::vector<T>& input_shapes, std::vector<T>& output_shapes) {
|
||||
read_value_shape_infer(op, input_shapes, output_shapes);
|
||||
}
|
||||
} // namespace v6
|
||||
} // namespace op
|
||||
} // namespace ov
|
||||
@@ -1,52 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#pragma once
|
||||
#include <openvino/op/tile.hpp>
|
||||
|
||||
#include "utils.hpp"
|
||||
namespace ov {
|
||||
namespace op {
|
||||
namespace v0 {
|
||||
|
||||
template <class T>
|
||||
void shape_infer(const Tile* op,
|
||||
const std::vector<T>& input_shapes,
|
||||
std::vector<T>& output_shapes,
|
||||
const std::map<size_t, std::shared_ptr<ngraph::runtime::HostTensor>>& constant_data = {}) {
|
||||
NODE_VALIDATION_CHECK(op, input_shapes.size() == 2 && output_shapes.size() == 1);
|
||||
const auto& arg_shape = input_shapes[0];
|
||||
auto& repeats_shape = input_shapes[1];
|
||||
auto& output_shape = output_shapes[0];
|
||||
using DimType = typename std::iterator_traits<typename T::iterator>::value_type;
|
||||
std::vector<int64_t> axes_val;
|
||||
NODE_VALIDATION_CHECK(op, repeats_shape.rank().compatible(1), "PartialShape of repeats must be of rank 1");
|
||||
|
||||
//Get repeats
|
||||
bool axes_are_known = get_data_as_int64<T>(1, op, axes_val, constant_data);
|
||||
const auto arg_rank = arg_shape.rank();
|
||||
if (arg_rank.is_static() && (axes_are_known || repeats_shape[0].is_static())) {
|
||||
//try to specify rank
|
||||
int64_t data_rank = arg_shape.size();
|
||||
int64_t repeats_rank = axes_are_known ? axes_val.size() : repeats_shape[0].get_length();
|
||||
auto output_rank = std::max(data_rank, repeats_rank);
|
||||
output_shape.resize(output_rank);
|
||||
//if have constant axes, compute new axes
|
||||
if (axes_are_known) {
|
||||
auto remain_arg = output_rank - data_rank;
|
||||
auto remain_axes = output_rank - repeats_rank;
|
||||
for (size_t i = 0; i < output_rank; i++) {
|
||||
auto data_tmp = i < remain_arg ? DimType(1) : arg_shape[i - (remain_arg)];
|
||||
auto repeat_tmp =
|
||||
i < remain_axes ? DimType(1) : axes_val[i - remain_axes];
|
||||
output_shape[i] = data_tmp * repeat_tmp;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
//can't deduce shape, set default value
|
||||
output_shape = PartialShape::dynamic();
|
||||
}
|
||||
}
|
||||
} // namespace v0
|
||||
} // namespace op
|
||||
} // namespace ov
|
||||
@@ -4,8 +4,6 @@
|
||||
|
||||
#include "ngraph/op/assign.hpp"
|
||||
|
||||
#include <assign_shape_inference.hpp>
|
||||
|
||||
#include "itt.hpp"
|
||||
#include "ngraph/op/read_value.hpp"
|
||||
#include "ngraph/op/util/variable.hpp"
|
||||
@@ -28,7 +26,7 @@ void op::v3::Assign::validate_and_infer_types() {
|
||||
NGRAPH_OP_SCOPE(v3_Assign_validate_and_infer_types);
|
||||
auto value = input_value(0);
|
||||
auto arg_t = get_input_element_type(0);
|
||||
const auto& input_shape = get_input_partial_shape(0);
|
||||
auto output_shape = get_input_partial_shape(0);
|
||||
if (!m_variable) {
|
||||
NodeVector start_nodes;
|
||||
for (const auto& input : inputs()) {
|
||||
@@ -43,10 +41,20 @@ void op::v3::Assign::validate_and_infer_types() {
|
||||
}
|
||||
NODE_VALIDATION_CHECK(this, m_variable != nullptr, "Can't find variable with id = ", m_variable_id);
|
||||
}
|
||||
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
|
||||
std::vector<ov::PartialShape> input_shapes = {input_shape};
|
||||
shape_infer(this, input_shapes, output_shapes);
|
||||
set_output_type(0, arg_t, output_shapes[0]);
|
||||
|
||||
auto variable_info = m_variable->get_info();
|
||||
NODE_VALIDATION_CHECK(this, m_variable_id == variable_info.variable_id, "Variables identifiers are inconsistent.");
|
||||
NODE_VALIDATION_CHECK(this, arg_t == variable_info.data_type, "Variables types are inconsistent.");
|
||||
|
||||
if (output_shape.is_static() && variable_info.data_shape.is_static()) {
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
output_shape == variable_info.data_shape,
|
||||
"Variables output shapes are inconsistent.");
|
||||
|
||||
set_output_type(0, arg_t, output_shape);
|
||||
} else {
|
||||
set_output_type(0, arg_t, ov::PartialShape::dynamic());
|
||||
}
|
||||
}
|
||||
|
||||
shared_ptr<Node> op::v3::Assign::clone_with_new_inputs(const OutputVector& new_args) const {
|
||||
@@ -70,10 +78,7 @@ op::v6::Assign::Assign(const Output<Node>& new_value, const std::shared_ptr<Vari
|
||||
void op::v6::Assign::validate_and_infer_types() {
|
||||
NGRAPH_OP_SCOPE(v6_Assign_validate_and_infer_types);
|
||||
m_variable->update({get_input_partial_shape(0), get_input_element_type(0), m_variable->get_info().variable_id});
|
||||
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
|
||||
std::vector<ov::PartialShape> input_shapes = {get_input_partial_shape(0)};
|
||||
shape_infer(this, input_shapes, output_shapes);
|
||||
set_output_type(0, get_input_element_type(0), output_shapes[0]);
|
||||
set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
|
||||
}
|
||||
|
||||
shared_ptr<Node> op::v6::Assign::clone_with_new_inputs(const OutputVector& new_args) const {
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
|
||||
#include "ngraph/op/experimental_detectron_prior_grid_generator.hpp"
|
||||
|
||||
#include <experimental_detectron_prior_grid_generator_shape_inference.hpp>
|
||||
#include <memory>
|
||||
|
||||
#include "itt.hpp"
|
||||
@@ -50,15 +49,71 @@ static constexpr size_t priors_port = 0;
|
||||
static constexpr size_t featmap_port = 1;
|
||||
static constexpr size_t im_data_port = 2;
|
||||
|
||||
void op::v6::ExperimentalDetectronPriorGridGenerator::validate() {
|
||||
auto priors_shape = get_input_partial_shape(priors_port);
|
||||
auto featmap_shape = get_input_partial_shape(featmap_port);
|
||||
auto im_data_shape = get_input_partial_shape(im_data_port);
|
||||
|
||||
if (priors_shape.rank().is_dynamic() || featmap_shape.rank().is_dynamic()) {
|
||||
return;
|
||||
}
|
||||
|
||||
NODE_VALIDATION_CHECK(this, priors_shape.rank().get_length() == 2, "Priors rank must be equal to 2.");
|
||||
|
||||
if (priors_shape[1].is_static()) {
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
priors_shape[1].is_static() && priors_shape[1].get_length() == 4u,
|
||||
"The last dimension of the 'priors' input must be equal to 4. Got: ",
|
||||
priors_shape[1]);
|
||||
}
|
||||
|
||||
NODE_VALIDATION_CHECK(this, featmap_shape.rank().get_length() == 4, "Feature_map rank must be equal to 4.");
|
||||
|
||||
if (im_data_shape.rank().is_dynamic()) {
|
||||
return;
|
||||
}
|
||||
|
||||
NODE_VALIDATION_CHECK(this, im_data_shape.rank().get_length() == 4, "Im_data rank must be equal to 4.");
|
||||
|
||||
const auto num_batches_featmap = featmap_shape[0];
|
||||
const auto num_batches_im_data = im_data_shape[0];
|
||||
const auto batches_intersection = num_batches_featmap & num_batches_im_data;
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
!batches_intersection.get_interval().empty(),
|
||||
"The first dimension of both 'feature_map' and 'im_data' must match. "
|
||||
"Feature_map: ",
|
||||
num_batches_featmap,
|
||||
"; Im_data: ",
|
||||
num_batches_im_data);
|
||||
}
|
||||
|
||||
void op::v6::ExperimentalDetectronPriorGridGenerator::validate_and_infer_types() {
|
||||
NGRAPH_OP_SCOPE(v6_ExperimentalDetectronPriorGridGenerator_validate_and_infer_types);
|
||||
const auto& priors_shape = get_input_partial_shape(priors_port);
|
||||
const auto& featmap_shape = get_input_partial_shape(featmap_port);
|
||||
const auto& input_et = get_input_element_type(0);
|
||||
auto priors_shape = get_input_partial_shape(priors_port);
|
||||
auto featmap_shape = get_input_partial_shape(featmap_port);
|
||||
auto input_et = get_input_element_type(0);
|
||||
|
||||
validate();
|
||||
|
||||
set_output_size(1);
|
||||
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
|
||||
std::vector<ov::PartialShape> input_shapes = {priors_shape, featmap_shape, get_input_partial_shape(im_data_port)};
|
||||
shape_infer(this, input_shapes, output_shapes);
|
||||
set_output_type(0, input_et, output_shapes[0]);
|
||||
ov::PartialShape out_shape = {Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), 4};
|
||||
if (m_attrs.flatten) {
|
||||
out_shape = ov::PartialShape{Dimension::dynamic(), 4};
|
||||
}
|
||||
|
||||
if (priors_shape.rank().is_dynamic() || featmap_shape.rank().is_dynamic()) {
|
||||
set_output_type(0, input_et, out_shape);
|
||||
return;
|
||||
}
|
||||
|
||||
auto num_priors = priors_shape[0];
|
||||
auto featmap_height = featmap_shape[2];
|
||||
auto featmap_width = featmap_shape[3];
|
||||
|
||||
if (m_attrs.flatten) {
|
||||
out_shape = ov::PartialShape{featmap_height * featmap_width * num_priors, 4};
|
||||
} else {
|
||||
out_shape = ov::PartialShape{featmap_height, featmap_width, num_priors, 4};
|
||||
}
|
||||
set_output_type(0, input_et, out_shape);
|
||||
}
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
|
||||
#include <cmath>
|
||||
#include <functional>
|
||||
#include <lstm_cell_shape_inference.hpp>
|
||||
|
||||
#include "itt.hpp"
|
||||
#include "ngraph/attribute_visitor.hpp"
|
||||
@@ -140,7 +139,30 @@ void op::v0::LSTMCell::validate_and_infer_types() {
|
||||
set_argument(6, get_default_peepholes_input());
|
||||
}
|
||||
|
||||
for (const auto& input : inputs()) {
|
||||
if (input.get_partial_shape().rank().is_dynamic()) {
|
||||
set_output_type(0, get_input_element_type(0), ov::PartialShape::dynamic());
|
||||
set_output_type(1, get_input_element_type(0), ov::PartialShape::dynamic());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ov::PartialShape> input_param{};
|
||||
|
||||
auto merged_batch_size = Dimension::dynamic();
|
||||
auto merged_hidden_size = Dimension::dynamic();
|
||||
auto result_et = element::dynamic;
|
||||
|
||||
// Copy all inputs without peephole (7th input) and initial_cell_state (2nd input)
|
||||
// information
|
||||
// for further validation
|
||||
for (size_t i = 0; i < get_input_size() - 1; i++) {
|
||||
// exclude initial_cell_state input
|
||||
if (i != 2) {
|
||||
input_param.push_back(get_input_partial_shape(i));
|
||||
}
|
||||
}
|
||||
|
||||
// Get input partial shape for all inputs
|
||||
const auto& x_pshape = get_input_partial_shape(0);
|
||||
const auto& ht_pshape = get_input_partial_shape(1);
|
||||
@@ -150,6 +172,24 @@ void op::v0::LSTMCell::validate_and_infer_types() {
|
||||
const auto& b_pshape = get_input_partial_shape(5);
|
||||
const auto& p_pshape = get_input_partial_shape(6);
|
||||
|
||||
validate_input_rank_dimension(input_param);
|
||||
|
||||
// Validate rank and dimension for initial_cell_state input
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
(ct_pshape.rank().is_static()),
|
||||
"LSTMCell input tensor initial_cell_state shall have static rank.");
|
||||
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
(ct_pshape.rank().get_length() == 2),
|
||||
"LSTMCell input tensor initial_cell_state shall have dimension 2D.");
|
||||
|
||||
// Validate rank and dimension for P input
|
||||
NODE_VALIDATION_CHECK(this, (p_pshape.rank().is_static()), "LSTMCell input tensor P shall have static rank.");
|
||||
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
(p_pshape.rank().get_length() == 1),
|
||||
"LSTMCell input tensor P shall have dimension 1D.");
|
||||
|
||||
// Validate input element types and save result for output type
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
element::Type::merge(result_et, result_et, get_input_element_type(0)) &&
|
||||
@@ -161,10 +201,65 @@ void op::v0::LSTMCell::validate_and_infer_types() {
|
||||
"Element types for X, initial_hidden_state, initial_cell_state, W, R and B do not "
|
||||
"match.");
|
||||
|
||||
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}, ov::PartialShape{}};
|
||||
std::vector<ov::PartialShape> input_shapes =
|
||||
{x_pshape, ht_pshape, ct_pshape, w_pshape, r_pshape, b_pshape, p_pshape};
|
||||
shape_infer(this, input_shapes, output_shapes);
|
||||
// Merge batch_size dimension across all inputs to evaluate output[0] dimension
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
Dimension::merge(merged_batch_size, merged_batch_size, ht_pshape[0]) &&
|
||||
Dimension::merge(merged_batch_size, merged_batch_size, ct_pshape[0]) &&
|
||||
Dimension::merge(merged_batch_size, merged_batch_size, x_pshape[0]),
|
||||
"Parameter batch_size not matched for X, initial_hidden_state or initial_cell_state "
|
||||
"inputs.");
|
||||
|
||||
// Merge hidden_size dimension across all inputs to evaluate output[1] dimension
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
Dimension::merge(merged_hidden_size, merged_hidden_size, ht_pshape[1]) &&
|
||||
Dimension::merge(merged_hidden_size, merged_hidden_size, ct_pshape[1]) &&
|
||||
Dimension::merge(merged_hidden_size, merged_hidden_size, r_pshape[1]),
|
||||
"Parameter hidden_size not matched for R, initial_hidden_state and initial_cell_state "
|
||||
"inputs.");
|
||||
|
||||
// Validate hidden_size value for W, R and P inputs
|
||||
if (merged_hidden_size.is_static()) {
|
||||
if (w_pshape[0].is_static()) {
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
w_pshape[0].compatible(merged_hidden_size * s_gates_count),
|
||||
"Parameter hidden_size mistmatched in W input. Current value is: ",
|
||||
w_pshape[0].get_length(),
|
||||
", expected: ",
|
||||
merged_hidden_size.get_length() * s_gates_count,
|
||||
".");
|
||||
}
|
||||
|
||||
if (r_pshape[0].is_static()) {
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
r_pshape[0].compatible(merged_hidden_size * s_gates_count),
|
||||
"Parameter hidden_size mistmatched in R input. Current value is: ",
|
||||
r_pshape[0].get_length(),
|
||||
", expected: ",
|
||||
merged_hidden_size.get_length() * s_gates_count,
|
||||
".");
|
||||
}
|
||||
|
||||
if (b_pshape[0].is_static()) {
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
b_pshape[0].compatible(merged_hidden_size * s_gates_count),
|
||||
"Parameter hidden_size mistmatched in B input. Current value is: ",
|
||||
b_pshape[0].get_length(),
|
||||
", expected: ",
|
||||
merged_hidden_size.get_length() * s_gates_count,
|
||||
".");
|
||||
}
|
||||
|
||||
if (p_pshape[0].is_static()) {
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
p_pshape[0].compatible(merged_hidden_size * s_peepholes_count),
|
||||
"Parameter hidden_size mistmatched in P input. Current value is: ",
|
||||
p_pshape[0].get_length(),
|
||||
", expected: ",
|
||||
merged_hidden_size.get_length() * s_peepholes_count,
|
||||
".");
|
||||
}
|
||||
}
|
||||
|
||||
// Mark inputs which are relevant to output parameters
|
||||
set_input_is_relevant_to_shape(0);
|
||||
set_input_is_relevant_to_shape(1);
|
||||
@@ -173,8 +268,8 @@ void op::v0::LSTMCell::validate_and_infer_types() {
|
||||
|
||||
// Set output size, type and shape
|
||||
set_output_size(2);
|
||||
set_output_type(0, result_et, output_shapes[0]);
|
||||
set_output_type(1, result_et, output_shapes[1]);
|
||||
set_output_type(0, result_et, {merged_batch_size, merged_hidden_size});
|
||||
set_output_type(1, result_et, {merged_batch_size, merged_hidden_size});
|
||||
}
|
||||
|
||||
Output<Node> op::v0::LSTMCell::get_default_bias_input() const {
|
||||
@@ -319,7 +414,15 @@ bool ngraph::op::v4::LSTMCell::visit_attributes(AttributeVisitor& visitor) {
|
||||
|
||||
void op::v4::LSTMCell::validate_and_infer_types() {
|
||||
NGRAPH_OP_SCOPE(v4_LSTMCell_validate_and_infer_types);
|
||||
|
||||
for (const auto& input : inputs()) {
|
||||
if (input.get_partial_shape().rank().is_dynamic()) {
|
||||
set_output_type(0, get_input_element_type(0), ov::PartialShape::dynamic());
|
||||
set_output_type(1, get_input_element_type(0), ov::PartialShape::dynamic());
|
||||
return;
|
||||
}
|
||||
}
|
||||
auto merged_batch_size = Dimension::dynamic();
|
||||
auto merged_hidden_size = Dimension::dynamic();
|
||||
auto result_et = element::dynamic;
|
||||
|
||||
// Get input partial shape for all inputs
|
||||
@@ -330,6 +433,12 @@ void op::v4::LSTMCell::validate_and_infer_types() {
|
||||
const auto& r_pshape = get_input_partial_shape(4);
|
||||
const auto& b_pshape = get_input_partial_shape(5);
|
||||
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
(ct_pshape.rank().get_length() == 2),
|
||||
"LSTMCell input tensor initial_cell_state shall have dimension 2D.");
|
||||
|
||||
validate_input_rank_dimension({x_pshape, ht_pshape, w_pshape, r_pshape, b_pshape});
|
||||
|
||||
// Validate input element types and save result for output type
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
element::Type::merge(result_et, result_et, get_input_element_type(0)) &&
|
||||
@@ -341,9 +450,54 @@ void op::v4::LSTMCell::validate_and_infer_types() {
|
||||
"Element types for X, initial_hidden_state, initial_cell_state, W, R and B do not "
|
||||
"match.");
|
||||
|
||||
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}, ov::PartialShape{}};
|
||||
std::vector<ov::PartialShape> input_shapes = {x_pshape, ht_pshape, ct_pshape, w_pshape, r_pshape, b_pshape};
|
||||
shape_infer(this, input_shapes, output_shapes);
|
||||
// Merge batch_size dimension across all inputs to evaluate output[0] dimension
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
Dimension::merge(merged_batch_size, merged_batch_size, ht_pshape[0]) &&
|
||||
Dimension::merge(merged_batch_size, merged_batch_size, ct_pshape[0]) &&
|
||||
Dimension::merge(merged_batch_size, merged_batch_size, x_pshape[0]),
|
||||
"Parameter batch_size not matched for X, initial_hidden_state or initial_cell_state "
|
||||
"inputs.");
|
||||
|
||||
// Merge hidden_size dimension across all inputs to evaluate output[1] dimension
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
Dimension::merge(merged_hidden_size, merged_hidden_size, ht_pshape[1]) &&
|
||||
Dimension::merge(merged_hidden_size, merged_hidden_size, ct_pshape[1]) &&
|
||||
Dimension::merge(merged_hidden_size, merged_hidden_size, r_pshape[1]),
|
||||
"Parameter hidden_size not matched for R, initial_hidden_state and initial_cell_state "
|
||||
"inputs.");
|
||||
|
||||
// Validate hidden_size value for W, R and P inputs
|
||||
if (merged_hidden_size.is_static()) {
|
||||
if (w_pshape[0].is_static()) {
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
w_pshape[0].compatible(merged_hidden_size * s_gates_count),
|
||||
"Parameter hidden_size mistmatched in W input. Current value is: ",
|
||||
w_pshape[0].get_length(),
|
||||
", expected: ",
|
||||
merged_hidden_size.get_length() * s_gates_count,
|
||||
".");
|
||||
}
|
||||
|
||||
if (r_pshape[0].is_static()) {
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
r_pshape[0].compatible(merged_hidden_size * s_gates_count),
|
||||
"Parameter hidden_size mistmatched in R input. Current value is: ",
|
||||
r_pshape[0].get_length(),
|
||||
", expected: ",
|
||||
merged_hidden_size.get_length() * s_gates_count,
|
||||
".");
|
||||
}
|
||||
|
||||
if (b_pshape[0].is_static()) {
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
b_pshape[0].compatible(merged_hidden_size * s_gates_count),
|
||||
"Parameter hidden_size mistmatched in B input. Current value is: ",
|
||||
b_pshape[0].get_length(),
|
||||
", expected: ",
|
||||
merged_hidden_size.get_length() * s_gates_count,
|
||||
".");
|
||||
}
|
||||
}
|
||||
|
||||
// Mark inputs which are relevant to output parameters
|
||||
set_input_is_relevant_to_shape(0);
|
||||
@@ -353,8 +507,8 @@ void op::v4::LSTMCell::validate_and_infer_types() {
|
||||
|
||||
// Set output size, type and shape
|
||||
set_output_size(2);
|
||||
set_output_type(0, result_et, output_shapes[0]);
|
||||
set_output_type(1, result_et, output_shapes[1]);
|
||||
set_output_type(0, result_et, {merged_batch_size, merged_hidden_size});
|
||||
set_output_type(1, result_et, {merged_batch_size, merged_hidden_size});
|
||||
}
|
||||
|
||||
Output<Node> op::v4::LSTMCell::get_default_bias_input() const {
|
||||
|
||||
@@ -4,8 +4,6 @@
|
||||
|
||||
#include "ngraph/op/read_value.hpp"
|
||||
|
||||
#include <read_value_shape_inference.hpp>
|
||||
|
||||
#include "itt.hpp"
|
||||
#include "ngraph/op/util/variable_context.hpp"
|
||||
#include "ngraph/ops.hpp"
|
||||
@@ -25,13 +23,8 @@ op::v3::ReadValue::ReadValue(const Output<Node>& init_value, const std::string&
|
||||
void op::v3::ReadValue::validate_and_infer_types() {
|
||||
NGRAPH_OP_SCOPE(v3_ReadValue_validate_and_infer_types);
|
||||
auto arg_t = get_input_element_type(0);
|
||||
auto input_shape = get_input_partial_shape(0);
|
||||
auto output_shape = get_input_partial_shape(0);
|
||||
|
||||
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
|
||||
std::vector<ov::PartialShape> input_shapes = {input_shape};
|
||||
shape_infer(this, input_shapes, output_shapes);
|
||||
|
||||
const auto& output_shape = output_shapes[0];
|
||||
VariableInfo info = {output_shape, arg_t, m_variable_id};
|
||||
if (m_variable == nullptr)
|
||||
m_variable = std::make_shared<Variable>(info);
|
||||
@@ -61,11 +54,7 @@ op::v6::ReadValue::ReadValue(const Output<Node>& init_value, const shared_ptr<Va
|
||||
void op::v6::ReadValue::validate_and_infer_types() {
|
||||
NGRAPH_OP_SCOPE(v6_ReadValue_validate_and_infer_types);
|
||||
const auto arg_t = get_input_element_type(0);
|
||||
auto input_shape = get_input_partial_shape(0);
|
||||
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
|
||||
std::vector<ov::PartialShape> input_shapes = {input_shape};
|
||||
shape_infer(this, input_shapes, output_shapes);
|
||||
const auto& output_shape = output_shapes[0];
|
||||
auto output_shape = get_input_partial_shape(0);
|
||||
NGRAPH_CHECK(m_variable, "Variable is not initialized.");
|
||||
VariableInfo var_info = {output_shape, element::dynamic, m_variable->get_info().variable_id};
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
#include "ngraph/op/tile.hpp"
|
||||
|
||||
#include <ngraph/validation_util.hpp>
|
||||
#include <tile_shape_inference.hpp>
|
||||
|
||||
#include "itt.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
@@ -38,10 +37,37 @@ void op::v0::Tile::validate_and_infer_types() {
|
||||
"Tile repeats must have any integer element type, but has ",
|
||||
repeats_et);
|
||||
|
||||
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
|
||||
std::vector<ov::PartialShape> input_shapes = {get_input_partial_shape(0), get_input_partial_shape(1)};
|
||||
shape_infer(this, input_shapes, output_shapes);
|
||||
set_output_type(0, arg_et, output_shapes[0]);
|
||||
auto arg_shape = get_input_partial_shape(0);
|
||||
auto repeats_shape = get_input_partial_shape(1);
|
||||
NODE_VALIDATION_CHECK(this, repeats_shape.rank().compatible(1), "PartialShape of repeats must be of rank 1");
|
||||
ov::PartialShape repeats_as_pshape;
|
||||
bool repeats_are_known = evaluate_as_partial_shape(get_input_source_output(1), repeats_as_pshape);
|
||||
std::vector<Dimension> repeats_value(repeats_as_pshape);
|
||||
if (repeats_are_known && !repeats_value.empty() && arg_shape.rank().is_static()) {
|
||||
std::vector<Dimension> data_shape(arg_shape);
|
||||
auto data_rank = data_shape.size();
|
||||
auto repeats_rank = repeats_value.size();
|
||||
auto output_rank = std::max(data_rank, repeats_rank);
|
||||
|
||||
// expand data shape and repeats to output rank
|
||||
data_shape.insert(data_shape.begin(), output_rank - data_rank, 1);
|
||||
repeats_value.insert(repeats_value.begin(), output_rank - repeats_rank, 1);
|
||||
|
||||
auto output_shape = ov::PartialShape::dynamic(output_rank);
|
||||
for (size_t i = 0; i < output_rank; i++)
|
||||
output_shape[i] = data_shape[i] * repeats_value[i];
|
||||
set_output_type(0, arg_et, output_shape);
|
||||
} else {
|
||||
Rank outRank = Rank::dynamic();
|
||||
if (arg_shape.rank().is_static() && repeats_shape.is_static()) {
|
||||
std::vector<Dimension> data_shape(arg_shape);
|
||||
auto data_rank = data_shape.size();
|
||||
auto repeats_rank = repeats_value.size();
|
||||
auto output_rank = std::max(data_rank, repeats_rank);
|
||||
outRank = Rank(output_rank);
|
||||
}
|
||||
set_output_type(0, arg_et, ov::PartialShape::dynamic(outRank));
|
||||
}
|
||||
|
||||
set_input_is_relevant_to_shape(0);
|
||||
set_input_is_relevant_to_shape(1);
|
||||
@@ -58,16 +84,24 @@ bool op::v0::Tile::evaluate_tile(const HostTensorVector& outputs, const HostTens
|
||||
const auto& axis = inputs[1];
|
||||
auto& output = outputs[0];
|
||||
auto repeats_val = read_index_vector(axis);
|
||||
const auto repeats_rank = repeats_val.size();
|
||||
auto repeats_rank = repeats_val.size();
|
||||
ov::Shape data_shape = data->get_shape();
|
||||
auto data_rank = data_shape.size();
|
||||
auto output_rank = std::max(data_rank, repeats_rank);
|
||||
|
||||
// expand data shape and repeats to output rank
|
||||
data_shape.insert(data_shape.begin(), output_rank - data_rank, 1);
|
||||
repeats_val.insert(repeats_val.begin(), output_rank - repeats_rank, 1);
|
||||
|
||||
ov::Shape output_shape(output_rank);
|
||||
for (size_t i = 0; i < output_rank; i++) {
|
||||
output_shape[i] = data_shape[i] * repeats_val[i];
|
||||
}
|
||||
|
||||
std::vector<ov::PartialShape> output_shapes = {ov::PartialShape{}};
|
||||
std::vector<ov::PartialShape> input_shapes = {data->get_shape(), axis->get_shape()};
|
||||
shape_infer(this, input_shapes, output_shapes, {{1, axis}});
|
||||
const auto& output_shape = output_shapes[0].to_shape();
|
||||
if (!output->get_is_allocated()) {
|
||||
output->set_shape(output_shape);
|
||||
}
|
||||
repeats_val.insert(repeats_val.begin(), output_shape.size() - repeats_rank, 1);
|
||||
|
||||
ngraph::runtime::reference::tile(data->get_data_ptr<const char>(),
|
||||
output->get_data_ptr<char>(),
|
||||
data->get_shape(),
|
||||
|
||||
@@ -53,9 +53,7 @@ TEST(type_prop, lstm_cell_invalid_input) {
|
||||
const auto lstm_cell = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, hidden_size);
|
||||
FAIL() << "LSTMCell node was created with invalid data.";
|
||||
} catch (const NodeValidationFailure& error) {
|
||||
EXPECT_HAS_SUBSTRING(
|
||||
error.what(),
|
||||
std::string("Parameter hidden_size not matched for W, R, B, initial_hidden_state and initial_cell_state"));
|
||||
EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter hidden_size mistmatched in W input."));
|
||||
}
|
||||
|
||||
// Invalid R tensor shape.
|
||||
@@ -66,7 +64,7 @@ TEST(type_prop, lstm_cell_invalid_input) {
|
||||
FAIL() << "LSTMCell node was created with invalid data.";
|
||||
} catch (const NodeValidationFailure& error) {
|
||||
EXPECT_HAS_SUBSTRING(error.what(),
|
||||
std::string("Parameter hidden_size not matched for W, R, B, "
|
||||
std::string("Parameter hidden_size not matched for R, "
|
||||
"initial_hidden_state and initial_cell_state inputs."));
|
||||
}
|
||||
|
||||
@@ -102,7 +100,7 @@ TEST(type_prop, lstm_cell_invalid_input) {
|
||||
const auto lstm_cell = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, B, hidden_size);
|
||||
FAIL() << "LSTMCell node was created with invalid data.";
|
||||
} catch (const NodeValidationFailure& error) {
|
||||
EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter hidden_size not matched for W, R, B"));
|
||||
EXPECT_HAS_SUBSTRING(error.what(), std::string("Parameter hidden_size mistmatched in B input."));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -140,8 +138,8 @@ TEST(type_prop, lstm_cell_dynamic_hidden_size) {
|
||||
|
||||
const auto lstm_cell = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, 3);
|
||||
|
||||
EXPECT_EQ(lstm_cell->get_output_partial_shape(0), (PartialShape{batch_size, 3}));
|
||||
EXPECT_EQ(lstm_cell->get_output_partial_shape(1), (PartialShape{batch_size, 3}));
|
||||
EXPECT_EQ(lstm_cell->get_output_partial_shape(0), (PartialShape{batch_size, hidden_size}));
|
||||
EXPECT_EQ(lstm_cell->get_output_partial_shape(1), (PartialShape{batch_size, hidden_size}));
|
||||
EXPECT_EQ(lstm_cell->get_output_element_type(0), element::f32);
|
||||
EXPECT_EQ(lstm_cell->get_output_element_type(1), element::f32);
|
||||
}
|
||||
@@ -160,8 +158,8 @@ TEST(type_prop, lstm_cell_dynamic_inputs) {
|
||||
|
||||
const auto lstm_cell = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, 3);
|
||||
|
||||
EXPECT_EQ(lstm_cell->get_output_partial_shape(0), (PartialShape{batch_size, 3}));
|
||||
EXPECT_EQ(lstm_cell->get_output_partial_shape(1), (PartialShape{batch_size, 3}));
|
||||
EXPECT_EQ(lstm_cell->get_output_partial_shape(0), (PartialShape{batch_size, hidden_size}));
|
||||
EXPECT_EQ(lstm_cell->get_output_partial_shape(1), (PartialShape{batch_size, hidden_size}));
|
||||
EXPECT_EQ(lstm_cell->get_output_element_type(0), element::f32);
|
||||
EXPECT_EQ(lstm_cell->get_output_element_type(1), element::f32);
|
||||
}
|
||||
@@ -226,11 +224,9 @@ TEST(type_prop, lstm_cell_invalid_input_dynamic_rank) {
|
||||
auto H_t = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, hidden_size});
|
||||
auto C_t = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, hidden_size});
|
||||
|
||||
auto check_dynamic_lstm = [=](const shared_ptr<opset4::LSTMCell>& lstm) -> bool {
|
||||
const int64_t target_batch_size = batch_size;
|
||||
const int64_t target_hidden_size = hidden_size;
|
||||
return lstm->output(0).get_partial_shape() == PartialShape{target_batch_size, target_hidden_size} &&
|
||||
lstm->output(1).get_partial_shape() == PartialShape{target_batch_size, target_hidden_size} &&
|
||||
auto check_dynamic_lstm = [](const shared_ptr<opset4::LSTMCell>& lstm) -> bool {
|
||||
return lstm->output(0).get_partial_shape() == PartialShape::dynamic() &&
|
||||
lstm->output(1).get_partial_shape() == PartialShape::dynamic() &&
|
||||
lstm->output(0).get_element_type() == lstm->input(0).get_element_type();
|
||||
};
|
||||
|
||||
@@ -269,61 +265,3 @@ TEST(type_prop, lstm_cell_invalid_input_dynamic_rank) {
|
||||
lstm = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, B, hidden_size);
|
||||
EXPECT_EQ(check_dynamic_lstm(lstm), true);
|
||||
}
|
||||
|
||||
TEST(type_prop, lstm_cell_shape_from_partial) {
|
||||
const size_t batch_size = 2;
|
||||
const size_t input_size = 3;
|
||||
const size_t hidden_size = 3;
|
||||
const size_t gates_count = 4;
|
||||
|
||||
auto check_dynamic_lstm = [=](const shared_ptr<opset4::LSTMCell>& lstm) -> bool {
|
||||
const int64_t target_batch_size = batch_size;
|
||||
const int64_t target_hidden_size = hidden_size;
|
||||
return lstm->output(0).get_partial_shape() == PartialShape{target_batch_size, target_hidden_size} &&
|
||||
lstm->output(1).get_partial_shape() == PartialShape{target_batch_size, target_hidden_size} &&
|
||||
lstm->output(0).get_element_type() == lstm->input(0).get_element_type();
|
||||
};
|
||||
{
|
||||
// from h & w
|
||||
auto X = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
|
||||
auto W = make_shared<opset4::Parameter>(element::f32, PartialShape{gates_count * hidden_size, input_size});
|
||||
auto R = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
|
||||
auto H_t = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, -1});
|
||||
auto C_t = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
|
||||
auto lstm = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, hidden_size);
|
||||
EXPECT_EQ(check_dynamic_lstm(lstm), true);
|
||||
}
|
||||
|
||||
{
|
||||
// from x & w
|
||||
auto X = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, input_size});
|
||||
auto W = make_shared<opset4::Parameter>(element::f32, PartialShape{gates_count * hidden_size, input_size});
|
||||
auto R = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
|
||||
auto H_t = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
|
||||
auto C_t = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
|
||||
auto lstm = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, hidden_size);
|
||||
EXPECT_EQ(check_dynamic_lstm(lstm), true);
|
||||
}
|
||||
|
||||
{
|
||||
// only valid rank for H_t tensor.
|
||||
auto X = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, input_size});
|
||||
auto W = make_shared<opset4::Parameter>(element::f32, PartialShape{gates_count * hidden_size, input_size});
|
||||
auto R = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
|
||||
auto H_t = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, input_size});
|
||||
auto C_t = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
|
||||
auto lstm = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, hidden_size);
|
||||
EXPECT_EQ(check_dynamic_lstm(lstm), true);
|
||||
}
|
||||
|
||||
{
|
||||
// batch from x, hidden from h_t
|
||||
auto X = make_shared<opset4::Parameter>(element::f32, PartialShape{batch_size, input_size});
|
||||
auto W = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
|
||||
auto R = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
|
||||
auto H_t = make_shared<opset4::Parameter>(element::f32, PartialShape{-1, hidden_size});
|
||||
auto C_t = make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(Rank::dynamic()));
|
||||
auto lstm = make_shared<opset4::LSTMCell>(X, H_t, C_t, W, R, hidden_size);
|
||||
EXPECT_EQ(check_dynamic_lstm(lstm), true);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,11 +40,3 @@ TEST(type_prop, tile_few_repeats_dyn_input) {
|
||||
ASSERT_EQ(top->get_element_type(), element::f32);
|
||||
ASSERT_EQ(top->get_output_partial_shape(0), (PartialShape{6, Dimension(32, 40), 10}));
|
||||
}
|
||||
|
||||
TEST(type_prop, tile_out_rank_from_repeats) {
|
||||
auto param0 = make_shared<op::Parameter>(element::f32, Shape{6, 8, 10});
|
||||
auto param1 = make_shared<op::Parameter>(element::i32, Shape{5});
|
||||
auto top = make_shared<op::v0::Tile>(param0, param1);
|
||||
ASSERT_EQ(top->get_element_type(), element::f32);
|
||||
ASSERT_EQ(top->get_output_partial_shape(0).size(), 5);
|
||||
}
|
||||
|
||||
@@ -61,35 +61,55 @@ bool VectorOrSquareLimit::isValid(const uint32_t h, const uint32_t w) const {
|
||||
std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
|
||||
std::ostringstream out;
|
||||
if (!isValid(h, w)) {
|
||||
out << "Unsupported " << what << " shape, actual WxH: " << w << "x" << h <<
|
||||
", only vertical vector up to 1x" << maxVectorHeight << ", horizontal up to " << maxVectorWidth <<
|
||||
"x1 or square up to " << maxSquare << "x" << maxSquare << " are valid\n";
|
||||
out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w <<
|
||||
", only vertical vector up to " << maxVectorHeight << "x1, horizontal up to 1x" << maxVectorWidth <<
|
||||
" or square up to " << maxSquare << "x" << maxSquare << " are valid\n";
|
||||
}
|
||||
return out.str();
|
||||
}
|
||||
|
||||
VectorOrSquareLimit VectorOrSquareLimitByChannels::GetByChannels(const uint32_t channels) const {
|
||||
return channels <= smallChannelMax ? smallChannel : bigChannel;
|
||||
|
||||
bool RectLimit::isValid(const uint32_t h, const uint32_t w) const {
|
||||
if (h >= 1 && h <= maxVectorHeight && w >= 1 && w <= maxVectorWidth) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool VectorOrSquareLimitByChannels::isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const {
|
||||
std::string RectLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
|
||||
std::ostringstream out;
|
||||
if (!isValid(h, w)) {
|
||||
out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w <<
|
||||
", only rectangular shapes up to " << maxVectorHeight << "x" << maxVectorWidth << " are valid\n";
|
||||
}
|
||||
return out.str();
|
||||
}
|
||||
|
||||
RectLimit RectLimitByChannels::GetByChannels(const uint32_t channels) const {
|
||||
for (auto&& limit : limitPerChannel) {
|
||||
if (limit.first >= channels) {
|
||||
return limit.second;
|
||||
}
|
||||
}
|
||||
return RectLimit{ 0, 0 };
|
||||
}
|
||||
|
||||
bool RectLimitByChannels::isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const {
|
||||
return GetByChannels(channels).isValid(h, w);
|
||||
}
|
||||
|
||||
std::string VectorOrSquareLimitByChannels::GetErrorOrEmpty(const uint32_t h, const uint32_t w,
|
||||
std::string RectLimitByChannels::GetErrorOrEmpty(const uint32_t h, const uint32_t w,
|
||||
const uint32_t channels, std::string what) const {
|
||||
return GetByChannels(channels).GetErrorOrEmpty(h, w, what);
|
||||
}
|
||||
|
||||
VectorOrSquareLimitByChannels VectorOrSquareLimitByChannelsAndPrecision::GetByPrecision(const OvGnaType precision) const {
|
||||
RectLimitByChannels RectLimitByChannelsAndPrecision::GetByPrecision(const OvGnaType precision) const {
|
||||
return precision == OvGnaTypeInt8 ? lowPrecision : defaultPrecision;
|
||||
}
|
||||
|
||||
bool VectorOrSquareLimitByChannelsAndPrecision::isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const {
|
||||
bool RectLimitByChannelsAndPrecision::isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const {
|
||||
return GetByPrecision(precision).isValid(h, w, channels);
|
||||
}
|
||||
|
||||
std::string VectorOrSquareLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h, const uint32_t w,
|
||||
std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h, const uint32_t w,
|
||||
const OvGnaType precision, const uint32_t channels, std::string what) const {
|
||||
return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what);
|
||||
}
|
||||
|
||||
@@ -67,6 +67,13 @@ struct RangeMultipleLimit : public RangeLimit {
|
||||
std::string GetErrorOrEmpty(const uint32_t val) const;
|
||||
};
|
||||
|
||||
struct RectLimit {
|
||||
uint32_t maxVectorHeight;
|
||||
uint32_t maxVectorWidth;
|
||||
bool isValid(const uint32_t h, const uint32_t w) const;
|
||||
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
|
||||
};
|
||||
|
||||
struct VectorOrSquareLimit {
|
||||
uint32_t maxSquare;
|
||||
uint32_t maxVectorHeight;
|
||||
@@ -75,20 +82,18 @@ struct VectorOrSquareLimit {
|
||||
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
|
||||
};
|
||||
|
||||
struct VectorOrSquareLimitByChannels {
|
||||
uint32_t smallChannelMax;
|
||||
VectorOrSquareLimit smallChannel;
|
||||
VectorOrSquareLimit bigChannel;
|
||||
VectorOrSquareLimit GetByChannels(const uint32_t channels) const;
|
||||
struct RectLimitByChannels {
|
||||
std::vector<std::pair<uint32_t, RectLimit> > limitPerChannel;
|
||||
RectLimit GetByChannels(const uint32_t channels) const;
|
||||
bool isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const;
|
||||
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w,
|
||||
const uint32_t channels, std::string what) const;
|
||||
};
|
||||
|
||||
struct VectorOrSquareLimitByChannelsAndPrecision {
|
||||
VectorOrSquareLimitByChannels lowPrecision;
|
||||
VectorOrSquareLimitByChannels defaultPrecision;
|
||||
VectorOrSquareLimitByChannels GetByPrecision(const OvGnaType precision) const;
|
||||
struct RectLimitByChannelsAndPrecision {
|
||||
RectLimitByChannels lowPrecision;
|
||||
RectLimitByChannels defaultPrecision;
|
||||
RectLimitByChannels GetByPrecision(const OvGnaType precision) const;
|
||||
bool isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const;
|
||||
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w,
|
||||
const OvGnaType precision, const uint32_t channels, std::string what) const;
|
||||
@@ -98,11 +103,20 @@ class Validator {
|
||||
RangeLimit2D inputHWLimit{ { 16, 384, "input height"} , { 16, 240, "input width"} };
|
||||
RangeMultipleLimit inputChannelsNumberLimit{ {8, 384, "number of input channels"}, 8 };
|
||||
|
||||
RangeMultipleLimit kernelNumberLimit{ {8, 256, "number of kernels"}, 8 };
|
||||
VectorOrSquareLimitByChannelsAndPrecision kernelLimit {
|
||||
{ 240, { 3, 7, 3 }, { 2, 7, 2 } },
|
||||
{ 120, { 3, 7, 3 }, { 1, 7, 1 } } };
|
||||
VectorOrSquareLimitByChannelsAndPrecision& strideLimit = kernelLimit;
|
||||
RangeMultipleLimit kernelNumberLimit{ {8, 1024, "number of kernels"}, 8 };
|
||||
RectLimitByChannelsAndPrecision kernelLimit {
|
||||
{ { {96, {7, 7}},
|
||||
{136, {7, 5}},
|
||||
{168, {7, 4}},
|
||||
{240, {7, 3}},
|
||||
{384, {7, 2}} } },
|
||||
{ { {48, {7, 7}},
|
||||
{64, {7, 5}},
|
||||
{80, {7, 4}},
|
||||
{120, {7, 3}},
|
||||
{384, {7, 1}} } },
|
||||
};
|
||||
RectLimitByChannelsAndPrecision& strideLimit = kernelLimit;
|
||||
RangeLimit2D dilationLimit{ {convDilationHeight, convDilationHeight, "dilation height" },
|
||||
{ convDilationWidth, convDilationWidth, "dilation width" } };
|
||||
const VectorOrSquareLimit poolingWindowLimit{ 3, 1, 1 };
|
||||
|
||||
@@ -30,9 +30,10 @@ double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
|
||||
using KRT = std::pair<uint32_t, double>;
|
||||
// Empirically determined weights reducers for 2D Convolution
|
||||
// i.e.:
|
||||
// for kernelSize >= 14 -> 1.7
|
||||
// for kernelSize >= 9 -> 1.3
|
||||
// for kernelSize in {7, 8} -> 1.2
|
||||
const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} };
|
||||
const std::vector< KRT > reducers{ {49, 3.0}, {36, 2.6}, {21, 2.3}, {14, 1.7}, {9, 1.3}, {7, 1.2} };
|
||||
auto reducer = 1.0;
|
||||
const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
|
||||
const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
|
||||
|
||||
@@ -135,6 +135,10 @@ def getting_samples_data_zip(url, samples_path, size_of_chunk=128):
|
||||
print("\nExtracting of samples_smoke_tests_data.zip...")
|
||||
with zipfile.ZipFile(samples_path, 'r') as samples_zip:
|
||||
samples_zip.extractall(Environment.env['smoke_tests_path'])
|
||||
nameFolder = str(Environment.env['samples_data_zip'])[Environment.env['samples_data_zip'].rfind('/')+1:][:-4]
|
||||
smoke_tests_path = os.path.join(Environment.env['smoke_tests_path'])
|
||||
if os.path.exists(os.path.join(smoke_tests_path,nameFolder)):
|
||||
os.rename(os.path.join(smoke_tests_path, nameFolder), os.path.join(smoke_tests_path, 'samples_smoke_tests_data') )
|
||||
if os.path.exists(samples_path):
|
||||
print("\nRemoving samples_smoke_tests_data.zip...")
|
||||
os.remove(samples_path)
|
||||
@@ -169,10 +173,16 @@ class SamplesCommonTestClass():
|
||||
|
||||
@staticmethod
|
||||
def reset_models_path(model):
|
||||
if ('FP32' in os.path.split(model)[0] or 'FP16' in os.path.split(model)[0]):
|
||||
model = search_model_path_recursively(config_key=Environment.env['icv_model_zoo_models'], model_name=model)
|
||||
else:
|
||||
model = os.path.join(Environment.env['public_models'], model)
|
||||
pathList = model.split(os.sep)
|
||||
modelName = pathList[len(pathList)-1]
|
||||
precision = pathList[len(pathList)-2]
|
||||
for root, subFolder, files in os.walk(Environment.env['models_path']):
|
||||
for item in files:
|
||||
if item.endswith(modelName) :
|
||||
if precision in root :
|
||||
model = str(os.path.join(root,item))
|
||||
else :
|
||||
model = os.path.join(Environment.env['models_path'], model)
|
||||
return model
|
||||
|
||||
@staticmethod
|
||||
@@ -328,10 +338,8 @@ class SamplesCommonTestClass():
|
||||
def setup_class(cls):
|
||||
getting_samples_data_zip(Environment.env['samples_data_zip'], Environment.env['samples_path'])
|
||||
assert os.environ.get('IE_APP_PATH') is not None, "IE_APP_PATH environment variable is not specified!"
|
||||
assert os.path.exists(Environment.env['public_models']), \
|
||||
"Path for public models {} is not exist!".format(Environment.env['public_models'])
|
||||
assert os.path.exists(Environment.env['icv_model_zoo_models']), \
|
||||
"Path for icv models {} is not exist!".format(Environment.env['icv_model_zoo_models'])
|
||||
assert os.path.exists(Environment.env['models_path']), \
|
||||
"Path for public models {} is not exist!".format(Environment.env['models_path'])
|
||||
assert os.path.exists(Environment.env['test_data']), \
|
||||
"Path for test data {} is not exist!".format(Environment.env['test_data'])
|
||||
cls.output_dir = Environment.env['out_directory']
|
||||
|
||||
@@ -45,7 +45,7 @@ def pytest_configure(config):
|
||||
try:
|
||||
Environment.env = fix_env_conf(yaml.safe_load(env_conf))
|
||||
# Check mandatory env variables:
|
||||
mandatory_env_varibales = ['out_directory', 'public_models', 'icv_model_zoo_models', 'test_data', 'samples_data_zip', 'smoke_tests_path', 'samples_path']
|
||||
mandatory_env_varibales = ['out_directory', 'models_path', 'test_data', 'samples_data_zip', 'smoke_tests_path', 'samples_path']
|
||||
missing_variables = []
|
||||
for variable in mandatory_env_varibales:
|
||||
if variable not in Environment.env:
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
out_directory: ${WORKSPACE}/out
|
||||
public_models: ${SHARE}/models/public/
|
||||
icv_model_zoo_models: ${SHARE}/models/omz_models/
|
||||
models_path: ${SHARE}/models/
|
||||
test_data: ${SHARE}/validation_set/
|
||||
#Performance data:
|
||||
perf_result_path: ${SHARE}/validation_set/performance_result/
|
||||
samples_data_zip: "https://storage.openvinotoolkit.org/repositories/openvino/ci_dependencies/test/samples_smoke_tests_data.zip"
|
||||
samples_data_zip: "https://storage.openvinotoolkit.org/repositories/openvino/ci_dependencies/test/2021.4/samples_smoke_tests_data_2021.4.zip"
|
||||
smoke_tests_path: ${WORKSPACE}/tests/smoke_tests
|
||||
samples_path: ${WORKSPACE}/tests/smoke_tests/samples_smoke_tests_data.zip
|
||||
|
||||
@@ -21,7 +21,7 @@ log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=s
|
||||
|
||||
test_data_fp32_async = get_tests \
|
||||
(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
|
||||
'm': [os.path.join('squeezenet1.1', 'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
|
||||
'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
|
||||
'batch': [1],
|
||||
'sample_type': ['C++', 'Python'],
|
||||
'd': ['CPU'],
|
||||
@@ -33,7 +33,7 @@ test_data_fp32_async = get_tests \
|
||||
|
||||
test_data_fp32_sync = get_tests \
|
||||
(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
|
||||
'm': [os.path.join('squeezenet1.1', 'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
|
||||
'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
|
||||
'batch': [1],
|
||||
'sample_type': ['C++', 'Python'],
|
||||
'd': ['CPU'],
|
||||
|
||||
@@ -21,7 +21,7 @@ from common.samples_common_test_clas import get_tests
|
||||
log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
|
||||
|
||||
test_data_fp32 = get_tests(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
|
||||
'm': [os.path.join('squeezenet1.1', 'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
|
||||
'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
|
||||
'nt': ['1'],
|
||||
'sample_type': ['C++','Python'],
|
||||
'batch': [1, 2, 4],
|
||||
@@ -30,7 +30,7 @@ test_data_fp32 = get_tests(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')]
|
||||
)
|
||||
|
||||
test_data_fp16 = get_tests(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
|
||||
'm': [os.path.join('squeezenet1.1', 'caffe_squeezenet_v1_1_FP16_batch_1_seqlen_[1]_v10.xml')],
|
||||
'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
|
||||
'nt': ['1'],
|
||||
'sample_type': ['C++','Python'],
|
||||
'batch': [1, 2, 4],
|
||||
|
||||
@@ -26,15 +26,13 @@ import shutil
|
||||
log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
|
||||
|
||||
test_data_fp32 = get_tests(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
|
||||
'm': [os.path.join('squeezenet1.1',
|
||||
'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
|
||||
'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
|
||||
'd': ['CPU'],
|
||||
'sample_type': ['C++', 'C']},
|
||||
use_device=['d'])
|
||||
|
||||
test_data_fp32_unicode = get_tests(cmd_params={'i': [os.path.join('227x227', 'dog.bmp')],
|
||||
'm': [os.path.join('squeezenet1.1',
|
||||
'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
|
||||
'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
|
||||
'd': ['CPU'],
|
||||
'sample_type': ['C++', 'C']},
|
||||
use_device=['d'])
|
||||
@@ -91,8 +89,8 @@ class TestHello(SamplesCommonTestClass):
|
||||
|
||||
# Copy files
|
||||
shutil.copy(Path(Environment.env['test_data']) / Path(param['i']), tmp_image_dir)
|
||||
shutil.copy(Path(Environment.env['public_models']) / Path(param['m']), tmp_model_dir)
|
||||
shutil.copy(Path(Environment.env['public_models']) / Path(param['m'].replace('.xml', '.bin')), tmp_model_dir)
|
||||
shutil.copy(Path(Environment.env['models_path']) / 'public' / Path(param['m']), tmp_model_dir)
|
||||
shutil.copy(Path(Environment.env['models_path']) / 'public' / Path(param['m'].replace('.xml', '.bin')), tmp_model_dir)
|
||||
|
||||
image_path = tmp_image_dir / Path(param['i']).name
|
||||
original_image_name = image_path.name.split(sep='.')[0]
|
||||
|
||||
@@ -21,7 +21,7 @@ from common.samples_common_test_clas import SamplesCommonTestClass
|
||||
log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
|
||||
|
||||
test_data_fp32 = get_tests(cmd_params={'i': [os.path.join('224x224', 'dog6.yuv')],
|
||||
'm': [os.path.join('squeezenet1.1', 'caffe_squeezenet_v1_1_FP32_batch_1_seqlen_[1]_v10.xml')],
|
||||
'm': [os.path.join('squeezenet1.1', 'FP32', 'squeezenet1.1.xml')],
|
||||
'size': ['224x224'],
|
||||
'sample_type': ['C++', 'C'],
|
||||
'd': ['CPU']},
|
||||
|
||||
@@ -21,8 +21,7 @@ from common.specific_samples_parsers import parse_hello_reshape_ssd
|
||||
log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
|
||||
|
||||
test_data_fp32 = get_tests(cmd_params={'i': [os.path.join('500x500', 'cat.bmp')],
|
||||
'm': [os.path.join('ssd300',
|
||||
'caffe_ssd_300_FP32_v10.xml')],
|
||||
'm': [os.path.join('ssd512', 'FP32', 'ssd512.xml')],
|
||||
'd': ['CPU'],
|
||||
'batch': [1, 2, 4]}, use_device=['d'], use_batch=True
|
||||
)
|
||||
|
||||
@@ -21,7 +21,7 @@ from common.common_utils import parse_avg_err
|
||||
log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
|
||||
|
||||
test_data_nthreads = get_tests(cmd_params={'i': [os.path.join('ark', 'dev93_10.ark')],
|
||||
'm': [os.path.join('FP32', 'wsj_dnn5b.xml')],
|
||||
'm': [os.path.join('wsj', 'FP32', 'wsj_dnn5b.xml')],
|
||||
'bs': [1, 2],
|
||||
'o': ['res_output.ark'],
|
||||
'r': [os.path.join('ark', 'dev93_scores_10.ark')],
|
||||
@@ -32,7 +32,7 @@ test_data_nthreads = get_tests(cmd_params={'i': [os.path.join('ark', 'dev93_10.a
|
||||
)
|
||||
|
||||
test_data_nthreads_negative = get_tests(cmd_params={'i': [os.path.join('ark', 'dev93_10.ark')],
|
||||
'm': [os.path.join('FP32', 'wsj_dnn5b.xml')],
|
||||
'm': [os.path.join('wsj', 'FP32', 'wsj_dnn5b.xml')],
|
||||
'bs': [1],
|
||||
'o': ['res_output.ark'],
|
||||
'r': [os.path.join('ark', 'dev93_scores_10.ark')],
|
||||
|
||||
@@ -30,3 +30,243 @@
|
||||
precision: FP16-INT8
|
||||
framework: caffe2
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/faster_rcnn_resnet101_coco/tf/FP16/faster_rcnn_resnet101_coco.xml
|
||||
name: faster_rcnn_resnet101_coco
|
||||
precision: FP16
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/faster_rcnn_resnet101_coco/tf/FP16-INT8/faster_rcnn_resnet101_coco.xml
|
||||
name: faster_rcnn_resnet101_coco
|
||||
precision: FP16-INT8
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16/faster-rcnn-resnet101-coco-sparse-60-0001.xml
|
||||
name: faster-rcnn-resnet101-coco-sparse-60-0001
|
||||
precision: FP16
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/faster-rcnn-resnet101-coco-sparse-60-0001/tf/FP16-INT8/faster-rcnn-resnet101-coco-sparse-60-0001.xml
|
||||
name: faster-rcnn-resnet101-coco-sparse-60-0001
|
||||
precision: FP16-INT8
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16/googlenet-v1.xml
|
||||
name: googlenet-v1
|
||||
precision: FP16
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/googlenet-v1/tf/FP16-INT8/googlenet-v1.xml
|
||||
name: googlenet-v1
|
||||
precision: FP16-INT8
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16/googlenet-v3.xml
|
||||
name: googlenet-v3
|
||||
precision: FP16
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/googlenet-v3/tf/FP16-INT8/googlenet-v3.xml
|
||||
name: googlenet-v3
|
||||
precision: FP16-INT8
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16/ssd512.xml
|
||||
name: ssd512
|
||||
precision: FP16
|
||||
framework: caffe
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/ssd512/caffe/FP16-INT8/ssd512.xml
|
||||
name: ssd512
|
||||
precision: FP16-INT8
|
||||
framework: caffe
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16/yolo-v2-ava-0001.xml
|
||||
name: yolo-v2-ava-0001
|
||||
precision: FP16
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/yolo-v2-ava-0001/tf/FP16-INT8/yolo-v2-ava-0001.xml
|
||||
name: yolo-v2-ava-0001
|
||||
precision: FP16-INT8
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16/yolo-v2-ava-sparse-35-0001.xml
|
||||
name: yolo-v2-ava-sparse-35-0001
|
||||
precision: FP16
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-35-0001/tf/FP16-INT8/yolo-v2-ava-sparse-35-0001.xml
|
||||
name: yolo-v2-ava-sparse-35-0001
|
||||
precision: FP16-INT8
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16/yolo-v2-ava-sparse-70-0001.xml
|
||||
name: yolo-v2-ava-sparse-70-0001
|
||||
precision: FP16
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/yolo-v2-ava-sparse-70-0001/tf/FP16-INT8/yolo-v2-ava-sparse-70-0001.xml
|
||||
name: yolo-v2-ava-sparse-70-0001
|
||||
precision: FP16-INT8
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16/yolo-v2-tiny-ava-0001.xml
|
||||
name: yolo-v2-tiny-ava-0001
|
||||
precision: FP16
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-0001/tf/FP16-INT8/yolo-v2-tiny-ava-0001.xml
|
||||
name: yolo-v2-tiny-ava-0001
|
||||
precision: FP16-INT8
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16/yolo-v2-tiny-ava-sparse-30-0001.xml
|
||||
name: yolo-v2-tiny-ava-sparse-30-0001
|
||||
precision: FP16
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-30-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-30-0001.xml
|
||||
name: yolo-v2-tiny-ava-sparse-30-0001
|
||||
precision: FP16-INT8
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16/yolo-v2-tiny-ava-sparse-60-0001.xml
|
||||
name: yolo-v2-tiny-ava-sparse-60-0001
|
||||
precision: FP16
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/yolo-v2-tiny-ava-sparse-60-0001/tf/FP16-INT8/yolo-v2-tiny-ava-sparse-60-0001.xml
|
||||
name: yolo-v2-tiny-ava-sparse-60-0001
|
||||
precision: FP16-INT8
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe2/FP16/squeezenet1.1.xml
|
||||
name: squeezenet1.1
|
||||
precision: FP16
|
||||
framework: caffe2
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/squeezenet1.1/caffe2/FP16-INT8/squeezenet1.1.xml
|
||||
name: squeezenet1.1
|
||||
precision: FP16-INT8
|
||||
framework: caffe2
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16/icnet-camvid-ava-0001.xml
|
||||
name: icnet-camvid-ava-0001
|
||||
precision: FP16
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-0001/tf/FP16-INT8/icnet-camvid-ava-0001.xml
|
||||
name: icnet-camvid-ava-0001
|
||||
precision: FP16-INT8
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16/icnet-camvid-ava-sparse-30-0001.xml
|
||||
name: icnet-camvid-ava-sparse-30-0001
|
||||
precision: FP16
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-30-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-30-0001.xml
|
||||
name: icnet-camvid-ava-sparse-30-0001
|
||||
precision: FP16-INT8
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16/icnet-camvid-ava-sparse-60-0001.xml
|
||||
name: icnet-camvid-ava-sparse-60-0001
|
||||
precision: FP16
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
- device:
|
||||
name: CPU
|
||||
model:
|
||||
path: ${VPUX_MODELS_PKG}/icnet-camvid-ava-sparse-60-0001/tf/FP16-INT8/icnet-camvid-ava-sparse-60-0001.xml
|
||||
name: icnet-camvid-ava-sparse-60-0001
|
||||
precision: FP16-INT8
|
||||
framework: tf
|
||||
use_model_cache: true
|
||||
|
||||
@@ -58,15 +58,33 @@ def aggregate_stats(stats: dict):
|
||||
|
||||
def prepare_executable_cmd(args: dict):
|
||||
"""Generate common part of cmd from arguments to execute"""
|
||||
return [str(args["executable"].resolve(strict=True)),
|
||||
"-m", str(args["model"].resolve(strict=True)),
|
||||
"-d", args["device"]]
|
||||
return [
|
||||
str(args["executable"].resolve(strict=True)),
|
||||
"-m", str(args["model"].resolve(strict=True)),
|
||||
"-d", args["device"],
|
||||
"-p", args["perf_hint"],
|
||||
"-v" if args["vpu_compiler"] else "", args['vpu_compiler'] if args["vpu_compiler"] else "",
|
||||
"-c" if args["cpu_cache"] else "",
|
||||
]
|
||||
|
||||
|
||||
def get_cache_stats(flatten_data):
|
||||
"""Update statistics for run with models cache"""
|
||||
data_cache = {
|
||||
"full_run_using_cache": flatten_data["full_run"],
|
||||
"time_to_inference_using_cache": flatten_data["time_to_inference"],
|
||||
"load_plugin": flatten_data["load_plugin"],
|
||||
"load_network_using_cache": flatten_data["load_network"],
|
||||
"first_inference": flatten_data["first_inference"],
|
||||
"fill_inputs": flatten_data["fill_inputs"],
|
||||
}
|
||||
return data_cache
|
||||
|
||||
|
||||
def run_timetest(args: dict, log=None):
|
||||
"""Run provided executable several times and aggregate collected statistics"""
|
||||
if log is None:
|
||||
log = logging.getLogger('run_timetest')
|
||||
log = logging.getLogger("run_timetest")
|
||||
|
||||
cmd_common = prepare_executable_cmd(args)
|
||||
|
||||
@@ -90,6 +108,9 @@ def run_timetest(args: dict, log=None):
|
||||
flatten_data = {}
|
||||
parse_stats(raw_data[0], flatten_data)
|
||||
|
||||
if run_iter > 0 and args["cpu_cache"]:
|
||||
flatten_data = get_cache_stats(flatten_data)
|
||||
|
||||
log.debug(f"Statistics after run of executable #{run_iter}: {flatten_data}")
|
||||
|
||||
# Combine statistics from several runs
|
||||
@@ -108,29 +129,45 @@ def run_timetest(args: dict, log=None):
|
||||
|
||||
def cli_parser():
|
||||
"""parse command-line arguments"""
|
||||
parser = argparse.ArgumentParser(description='Run timetest executable')
|
||||
parser.add_argument('executable',
|
||||
parser = argparse.ArgumentParser(description="Run timetest executable")
|
||||
parser.add_argument("executable",
|
||||
type=Path,
|
||||
help='binary to execute')
|
||||
parser.add_argument('-m',
|
||||
help="Binary to execute")
|
||||
parser.add_argument("-m",
|
||||
required=True,
|
||||
dest="model",
|
||||
type=Path,
|
||||
help='path to an .xml/.onnx file with a trained model or'
|
||||
' to a .blob files with a trained compiled model')
|
||||
parser.add_argument('-d',
|
||||
help="Path to an .xml/.onnx file with a trained model or"
|
||||
" to a .blob files with a trained compiled model")
|
||||
parser.add_argument("-d",
|
||||
required=True,
|
||||
dest="device",
|
||||
type=str,
|
||||
help='target device to infer on')
|
||||
parser.add_argument('-niter',
|
||||
help="Target device to infer on")
|
||||
parser.add_argument("-niter",
|
||||
default=10,
|
||||
type=check_positive_int,
|
||||
help='number of times to execute binary to aggregate statistics of')
|
||||
parser.add_argument('-s',
|
||||
help="Number of times to execute binary to aggregate statistics of")
|
||||
parser.add_argument("-s",
|
||||
dest="stats_path",
|
||||
type=Path,
|
||||
help='path to a file to save aggregated statistics')
|
||||
help="path to a file to save aggregated statistics")
|
||||
parser.add_argument("-p",
|
||||
dest="perf_hint",
|
||||
choices=["LATENCY", "THROUGHPUT"],
|
||||
default="LATENCY",
|
||||
type=str,
|
||||
help="Enables performance hint for specified device. Default hint is LATENCY")
|
||||
exclusive_group = parser.add_mutually_exclusive_group(required=False)
|
||||
exclusive_group.add_argument("-c",
|
||||
dest="cpu_cache",
|
||||
action="store_true",
|
||||
help="Enable CPU model cache usage")
|
||||
exclusive_group.add_argument("-v",
|
||||
dest="vpu_compiler",
|
||||
choices=["MCM", "MLIR"],
|
||||
type=str,
|
||||
help="Change VPUX compiler type")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -143,6 +180,12 @@ if __name__ == "__main__":
|
||||
logging.basicConfig(format="[ %(levelname)s ] %(message)s",
|
||||
level=logging.DEBUG, stream=sys.stdout)
|
||||
|
||||
assert not (args.cpu_cache and args.device != "CPU"), \
|
||||
"The cache option is used only for the CPU device."
|
||||
|
||||
assert not (args.vpu_compiler and "VPUX" not in args.device), \
|
||||
"The VPUX compiler option is used only for the VPUX device."
|
||||
|
||||
exit_code, _, aggr_stats, _ = run_timetest(
|
||||
dict(args._get_kwargs()), log=logging) # pylint: disable=protected-access
|
||||
if args.stats_path:
|
||||
@@ -159,15 +202,15 @@ if __name__ == "__main__":
|
||||
|
||||
def test_timetest_parser():
|
||||
# Example of timetest yml file
|
||||
raw_data_example = [{'full_run': [1, {'first_inference_latency': [2, {'load_plugin': [3]}, {
|
||||
'create_exenetwork': [4, {'read_network': [5]}, {'load_network': [6]}]}]},
|
||||
{'first_inference': [7, {'fill_inputs': [8]}]}]}]
|
||||
raw_data_example = [{"full_run": [1, {"first_inference_latency": [2, {"load_plugin": [3]}, {
|
||||
"create_exenetwork": [4, {"read_network": [5]}, {"load_network": [6]}]}]},
|
||||
{"first_inference": [7, {"fill_inputs": [8]}]}]}]
|
||||
|
||||
# Refactoring raw data from yml
|
||||
flatten_dict = {}
|
||||
parse_stats(raw_data_example, flatten_dict)
|
||||
|
||||
expected_result = {'full_run': 1, 'first_inference_latency': 2, 'load_plugin': 3, 'create_exenetwork': 4,
|
||||
'read_network': 5, 'load_network': 6, 'first_inference': 7, 'fill_inputs': 8}
|
||||
expected_result = {"full_run": 1, "first_inference_latency": 2, "load_plugin": 3, "create_exenetwork": 4,
|
||||
"read_network": 5, "load_network": 6, "first_inference": 7, "fill_inputs": 8}
|
||||
|
||||
assert flatten_dict == expected_result, "Statistics parsing is performed incorrectly!"
|
||||
|
||||
@@ -17,51 +17,87 @@ using namespace InferenceEngine;
|
||||
* main(). The function should not throw any exceptions and responsible for
|
||||
* handling it by itself.
|
||||
*/
|
||||
int runPipeline(const std::string &model, const std::string &device) {
|
||||
auto pipeline = [](const std::string &model, const std::string &device) {
|
||||
int runPipeline(const std::string &model, const std::string &device, const std::string &performanceHint,
|
||||
const bool isCacheEnabled, const std::string &vpuCompiler) {
|
||||
auto pipeline = [](const std::string &model, const std::string &device, const std::string &performanceHint,
|
||||
const bool isCacheEnabled, const std::string &vpuCompiler) {
|
||||
Core ie;
|
||||
CNNNetwork cnnNetwork;
|
||||
ExecutableNetwork exeNetwork;
|
||||
InferRequest inferRequest;
|
||||
size_t batchSize = 0;
|
||||
|
||||
if (!performanceHint.empty()) {
|
||||
std::vector<std::string> supported_config_keys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
||||
|
||||
// enables performance hint for specified device
|
||||
std::string performanceConfig;
|
||||
if (performanceHint == "THROUGHPUT")
|
||||
performanceConfig = CONFIG_VALUE(THROUGHPUT);
|
||||
else if (performanceHint == "LATENCY")
|
||||
performanceConfig = CONFIG_VALUE(LATENCY);
|
||||
|
||||
if (std::find(supported_config_keys.begin(), supported_config_keys.end(), "PERFORMANCE_HINT") ==
|
||||
supported_config_keys.end()) {
|
||||
std::cerr << "Device " << device << " doesn't support config key 'PERFORMANCE_HINT'!\n"
|
||||
<< "Performance config was not set.";
|
||||
}
|
||||
else
|
||||
ie.SetConfig({{CONFIG_KEY(PERFORMANCE_HINT), performanceConfig}}, device);
|
||||
}
|
||||
|
||||
// set config for VPUX device
|
||||
std::map<std::string, std::string> vpuConfig = {};
|
||||
if (vpuCompiler == "MCM")
|
||||
vpuConfig = {{"VPUX_COMPILER_TYPE", "MCM"}};
|
||||
else if (vpuCompiler == "MLIR")
|
||||
vpuConfig = {{"VPUX_COMPILER_TYPE", "MLIR"}};
|
||||
|
||||
// first_inference_latency = time_to_inference + first_inference
|
||||
{
|
||||
SCOPED_TIMER(first_inference_latency);
|
||||
SCOPED_TIMER(time_to_inference);
|
||||
{
|
||||
SCOPED_TIMER(load_plugin);
|
||||
ie.GetVersions(device);
|
||||
// enables performance hint for specified device
|
||||
ie.SetConfig({{CONFIG_KEY(PERFORMANCE_HINT), CONFIG_VALUE(LATENCY)}}, device);
|
||||
|
||||
if (isCacheEnabled)
|
||||
ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "models_cache"}});
|
||||
}
|
||||
{
|
||||
SCOPED_TIMER(create_exenetwork);
|
||||
if (TimeTest::fileExt(model) == "blob") {
|
||||
SCOPED_TIMER(import_network);
|
||||
exeNetwork = ie.ImportNetwork(model, device);
|
||||
if (!isCacheEnabled) {
|
||||
SCOPED_TIMER(create_exenetwork);
|
||||
|
||||
if (TimeTest::fileExt(model) == "blob") {
|
||||
SCOPED_TIMER(import_network);
|
||||
exeNetwork = ie.ImportNetwork(model, device);
|
||||
}
|
||||
else {
|
||||
{
|
||||
SCOPED_TIMER(read_network);
|
||||
cnnNetwork = ie.ReadNetwork(model);
|
||||
batchSize = cnnNetwork.getBatchSize();
|
||||
}
|
||||
|
||||
{
|
||||
SCOPED_TIMER(load_network);
|
||||
exeNetwork = ie.LoadNetwork(cnnNetwork, device, vpuConfig);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
{
|
||||
SCOPED_TIMER(read_network);
|
||||
cnnNetwork = ie.ReadNetwork(model);
|
||||
batchSize = cnnNetwork.getBatchSize();
|
||||
}
|
||||
|
||||
{
|
||||
SCOPED_TIMER(load_network);
|
||||
exeNetwork = ie.LoadNetwork(cnnNetwork, device);
|
||||
}
|
||||
SCOPED_TIMER(load_network);
|
||||
exeNetwork = ie.LoadNetwork(model, device);
|
||||
}
|
||||
}
|
||||
inferRequest = exeNetwork.CreateInferRequest();
|
||||
}
|
||||
|
||||
{
|
||||
SCOPED_TIMER(first_inference);
|
||||
inferRequest = exeNetwork.CreateInferRequest();
|
||||
|
||||
{
|
||||
SCOPED_TIMER(fill_inputs)
|
||||
batchSize = batchSize != 0 ? batchSize : 1;
|
||||
SCOPED_TIMER(fill_inputs);
|
||||
const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
|
||||
batchSize = batchSize != 0 ? batchSize : 1;
|
||||
fillBlobs(inferRequest, inputsInfo, batchSize);
|
||||
}
|
||||
inferRequest.Infer();
|
||||
@@ -69,7 +105,7 @@ int runPipeline(const std::string &model, const std::string &device) {
|
||||
};
|
||||
|
||||
try {
|
||||
pipeline(model, device);
|
||||
pipeline(model, device, performanceHint, isCacheEnabled, vpuCompiler);
|
||||
} catch (const InferenceEngine::Exception &iex) {
|
||||
std::cerr
|
||||
<< "Inference Engine pipeline failed with Inference Engine exception:\n"
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <inference_engine.hpp>
|
||||
#include <ie_plugin_config.hpp>
|
||||
#include <iostream>
|
||||
|
||||
#include "common_utils.h"
|
||||
#include "timetests_helper/timer.h"
|
||||
#include "timetests_helper/utils.h"
|
||||
using namespace InferenceEngine;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function that contain executable pipeline which will be called from
|
||||
* main(). The function should not throw any exceptions and responsible for
|
||||
* handling it by itself.
|
||||
*/
|
||||
int runPipeline(const std::string &model, const std::string &device) {
|
||||
auto pipeline = [](const std::string &model, const std::string &device) {
|
||||
Core ie;
|
||||
CNNNetwork cnnNetwork;
|
||||
ExecutableNetwork exeNetwork;
|
||||
InferRequest inferRequest;
|
||||
|
||||
{
|
||||
SCOPED_TIMER(first_inference_latency);
|
||||
{
|
||||
SCOPED_TIMER(load_plugin);
|
||||
ie.GetVersions(device);
|
||||
}
|
||||
{
|
||||
SCOPED_TIMER(load_network);
|
||||
// enables cache
|
||||
ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "models_cache"}});
|
||||
exeNetwork = ie.LoadNetwork(model, device);
|
||||
}
|
||||
{
|
||||
SCOPED_TIMER(first_inference);
|
||||
inferRequest = exeNetwork.CreateInferRequest();
|
||||
{
|
||||
SCOPED_TIMER(fill_inputs)
|
||||
const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
|
||||
fillBlobs(inferRequest, inputsInfo, 1);
|
||||
}
|
||||
inferRequest.Infer();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
pipeline(model, device);
|
||||
} catch (const InferenceEngine::Exception &iex) {
|
||||
std::cerr
|
||||
<< "Inference Engine pipeline failed with Inference Engine exception:\n"
|
||||
<< iex.what();
|
||||
return 1;
|
||||
} catch (const std::exception &ex) {
|
||||
std::cerr << "Inference Engine pipeline failed with exception:\n"
|
||||
<< ex.what();
|
||||
return 2;
|
||||
} catch (...) {
|
||||
std::cerr << "Inference Engine pipeline failed\n";
|
||||
return 3;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -1,84 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <inference_engine.hpp>
|
||||
#include <iostream>
|
||||
|
||||
#include "common_utils.h"
|
||||
#include "timetests_helper/timer.h"
|
||||
#include "timetests_helper/utils.h"
|
||||
using namespace InferenceEngine;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Function that contain executable pipeline which will be called from
|
||||
* main(). The function should not throw any exceptions and responsible for
|
||||
* handling it by itself.
|
||||
*/
|
||||
int runPipeline(const std::string &model, const std::string &device) {
|
||||
auto pipeline = [](const std::string &model, const std::string &device) {
|
||||
Core ie;
|
||||
CNNNetwork cnnNetwork;
|
||||
ExecutableNetwork exeNetwork;
|
||||
InferRequest inferRequest;
|
||||
size_t batchSize = 0;
|
||||
|
||||
{
|
||||
SCOPED_TIMER(first_inference_latency);
|
||||
{
|
||||
SCOPED_TIMER(load_plugin);
|
||||
ie.GetVersions(device);
|
||||
}
|
||||
{
|
||||
SCOPED_TIMER(create_exenetwork);
|
||||
if (TimeTest::fileExt(model) == "blob") {
|
||||
SCOPED_TIMER(import_network);
|
||||
exeNetwork = ie.ImportNetwork(model, device);
|
||||
}
|
||||
else {
|
||||
{
|
||||
SCOPED_TIMER(read_network);
|
||||
cnnNetwork = ie.ReadNetwork(model);
|
||||
batchSize = cnnNetwork.getBatchSize();
|
||||
}
|
||||
|
||||
{
|
||||
SCOPED_TIMER(load_network);
|
||||
exeNetwork = ie.LoadNetwork(cnnNetwork, device, {{"VPUX_COMPILER_TYPE", "MLIR"}});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
SCOPED_TIMER(first_inference);
|
||||
inferRequest = exeNetwork.CreateInferRequest();
|
||||
|
||||
{
|
||||
SCOPED_TIMER(fill_inputs)
|
||||
batchSize = batchSize != 0 ? batchSize : 1;
|
||||
const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
|
||||
fillBlobs(inferRequest, inputsInfo, batchSize);
|
||||
}
|
||||
inferRequest.Infer();
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
pipeline(model, device);
|
||||
} catch (const InferenceEngine::Exception &iex) {
|
||||
std::cerr
|
||||
<< "Inference Engine pipeline failed with Inference Engine exception:\n"
|
||||
<< iex.what();
|
||||
return 1;
|
||||
} catch (const std::exception &ex) {
|
||||
std::cerr << "Inference Engine pipeline failed with exception:\n"
|
||||
<< ex.what();
|
||||
return 2;
|
||||
} catch (...) {
|
||||
std::cerr << "Inference Engine pipeline failed\n";
|
||||
return 3;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -26,6 +26,18 @@ static const char target_device_message[] =
|
||||
"plugin. "
|
||||
"The application looks for a suitable plugin for the specified device.";
|
||||
|
||||
/// @brief message for vpu argument
|
||||
static const char performance_hint_message[] =
|
||||
"Not required. Enables performance hint for specified device. Available hints are LATENCY and THROUGHPUT.";
|
||||
|
||||
/// @brief message for cache argument
|
||||
static const char cpu_cache_message[] =
|
||||
"Not required. Use this key to run timetests with CPU models caching.";
|
||||
|
||||
/// @brief message for vpu argument
|
||||
static const char vpu_compiler_message[] =
|
||||
"Not required. Use this key to run timetests using MLIR or MCM VPUX compiler type.";
|
||||
|
||||
/// @brief message for statistics path argument
|
||||
static const char statistics_path_message[] =
|
||||
"Required. Path to a file to write statistics.";
|
||||
@@ -44,6 +56,18 @@ DEFINE_string(m, "", model_message);
|
||||
/// It is a required parameter
|
||||
DEFINE_string(d, "", target_device_message);
|
||||
|
||||
/// @brief Define parameter for set performance hint for target device <br>
|
||||
/// It is a non-required parameter
|
||||
DEFINE_string(p, "", performance_hint_message);
|
||||
|
||||
/// @brief Define parameter for set CPU models caching <br>
|
||||
/// It is a non-required parameter
|
||||
DEFINE_bool(c, false, cpu_cache_message);
|
||||
|
||||
/// @brief Define parameter VPU compiler type <br>
|
||||
/// It is a non-required parameter
|
||||
DEFINE_string(v, "", vpu_compiler_message);
|
||||
|
||||
/// @brief Define parameter for set path to a file to write statistics <br>
|
||||
/// It is a required parameter
|
||||
DEFINE_string(s, "", statistics_path_message);
|
||||
@@ -56,10 +80,13 @@ static void showUsage() {
|
||||
std::cout << "TimeTests [OPTION]" << std::endl;
|
||||
std::cout << "Options:" << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << " -h, --help " << help_message << std::endl;
|
||||
std::cout << " -h, --help " << help_message << std::endl;
|
||||
std::cout << " -m \"<path>\" " << model_message << std::endl;
|
||||
std::cout << " -d \"<device>\" " << target_device_message
|
||||
<< std::endl;
|
||||
std::cout << " -s \"<path>\" " << statistics_path_message
|
||||
<< std::endl;
|
||||
std::cout << " -p \"<perf_hint>\" " << performance_hint_message << std::endl;
|
||||
std::cout << " -c " << cpu_cache_message << std::endl;
|
||||
std::cout << " -v \"<compiler_type>\" " << vpu_compiler_message << std::endl;
|
||||
}
|
||||
|
||||
@@ -8,7 +8,8 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
int runPipeline(const std::string &model, const std::string &device);
|
||||
int runPipeline(const std::string &model, const std::string &device, const std::string &performanceHint,
|
||||
const bool isCacheEnabled, const std::string &vpuCompiler);
|
||||
|
||||
/**
|
||||
* @brief Parses command line and check required arguments
|
||||
@@ -40,7 +41,7 @@ bool parseAndCheckCommandLine(int argc, char **argv) {
|
||||
*/
|
||||
int _runPipeline() {
|
||||
SCOPED_TIMER(full_run);
|
||||
return runPipeline(FLAGS_m, FLAGS_d);
|
||||
return runPipeline(FLAGS_m, FLAGS_d, FLAGS_p, FLAGS_c, FLAGS_v);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -54,4 +55,4 @@ int main(int argc, char **argv) {
|
||||
StatisticsWriter::Instance().setFile(FLAGS_s);
|
||||
StatisticsWriter::Instance().write();
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ def pytest_addoption(parser):
|
||||
test_args_parser.addoption(
|
||||
"--test_conf",
|
||||
type=Path,
|
||||
help="path to a test config",
|
||||
help="Path to a test config",
|
||||
default=Path(__file__).parent / "test_config.yml"
|
||||
)
|
||||
test_args_parser.addoption(
|
||||
@@ -51,20 +51,38 @@ def pytest_addoption(parser):
|
||||
required=True,
|
||||
dest="executable",
|
||||
type=Path,
|
||||
help="path to a timetest binary to execute"
|
||||
help="Path to a timetest binary to execute"
|
||||
)
|
||||
test_args_parser.addoption(
|
||||
"--niter",
|
||||
type=check_positive_int,
|
||||
help="number of iterations to run executable and aggregate results",
|
||||
help="Number of iterations to run executable and aggregate results",
|
||||
default=3
|
||||
)
|
||||
test_args_parser.addoption(
|
||||
"--cpu_cache",
|
||||
action='store_true',
|
||||
help="Enable model CPU cache usage",
|
||||
)
|
||||
test_args_parser.addoption(
|
||||
"--perf_hint",
|
||||
choices=['LATENCY', 'THROUGHPUT'],
|
||||
default='LATENCY',
|
||||
type=str,
|
||||
help='Enables performance hint for specified device. Default hint is LATENCY'
|
||||
)
|
||||
test_args_parser.addoption(
|
||||
"--vpu_compiler",
|
||||
choices=["MCM", "MLIR"],
|
||||
type=str,
|
||||
help="Change VPUX compiler type",
|
||||
)
|
||||
db_args_parser = parser.getgroup("timetest database use")
|
||||
db_args_parser.addoption(
|
||||
'--db_submit',
|
||||
metavar="RUN_ID",
|
||||
type=str,
|
||||
help='submit results to the database. ' \
|
||||
help='Submit results to the database. ' \
|
||||
'`RUN_ID` should be a string uniquely identifying the run' \
|
||||
' (like Jenkins URL or time)'
|
||||
)
|
||||
@@ -79,19 +97,21 @@ def pytest_addoption(parser):
|
||||
'--db_collection',
|
||||
type=str,
|
||||
required=is_db_used,
|
||||
help='collection name in database',
|
||||
help='Collection name in database',
|
||||
choices=DB_COLLECTIONS
|
||||
)
|
||||
db_args_parser.addoption(
|
||||
'--db_metadata',
|
||||
type=str,
|
||||
default=None,
|
||||
help='path to JSON-formatted file to extract additional information')
|
||||
help='Path to JSON-formatted file to extract additional information'
|
||||
)
|
||||
db_args_parser.addoption(
|
||||
'--manifest',
|
||||
type=Path,
|
||||
required=is_db_used,
|
||||
help='path to build manifest to extract commit information')
|
||||
help='Path to build manifest to extract commit information'
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
@@ -112,8 +132,26 @@ def niter(request):
|
||||
return request.config.getoption('niter')
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def cpu_cache(request):
|
||||
"""Fixture function for command-line option."""
|
||||
return request.config.getoption('cpu_cache')
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def perf_hint(request):
|
||||
"""Fixture function for command-line option."""
|
||||
return request.config.getoption('perf_hint')
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def vpu_compiler(request):
|
||||
"""Fixture function for command-line option."""
|
||||
return request.config.getoption('vpu_compiler')
|
||||
|
||||
# -------------------- CLI options --------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def temp_dir(pytestconfig):
|
||||
"""Create temporary directory for test purposes.
|
||||
|
||||
@@ -34,14 +34,17 @@ from scripts.run_timetest import run_timetest
|
||||
REFS_FACTOR = 1.2 # 120%
|
||||
|
||||
|
||||
def test_timetest(instance, executable, niter, cl_cache_dir, model_cache_dir, test_info, temp_dir, validate_test_case,
|
||||
prepare_db_info):
|
||||
def test_timetest(instance, executable, niter, cl_cache_dir, cpu_cache, vpu_compiler, perf_hint, model_cache_dir,
|
||||
test_info, temp_dir, validate_test_case, prepare_db_info):
|
||||
"""Parameterized test.
|
||||
|
||||
:param instance: test instance. Should not be changed during test run
|
||||
:param executable: timetest executable to run
|
||||
:param niter: number of times to run executable
|
||||
:param cl_cache_dir: directory to store OpenCL cache
|
||||
:param cpu_cache: flag to enable model CPU cache
|
||||
:param vpu_compiler: flag to change VPUX compiler type
|
||||
:param perf_hint: performance hint (optimize device for latency or throughput settings)
|
||||
:param model_cache_dir: directory to store IE model cache
|
||||
:param test_info: custom `test_info` field of built-in `request` pytest fixture
|
||||
:param temp_dir: path to a temporary directory. Will be cleaned up after test run
|
||||
@@ -63,7 +66,10 @@ def test_timetest(instance, executable, niter, cl_cache_dir, model_cache_dir, te
|
||||
"executable": Path(executable),
|
||||
"model": Path(model_path),
|
||||
"device": instance["device"]["name"],
|
||||
"niter": niter
|
||||
"niter": niter,
|
||||
"perf_hint": perf_hint,
|
||||
"cpu_cache": cpu_cache,
|
||||
"vpu_compiler": vpu_compiler if vpu_compiler else ""
|
||||
}
|
||||
logging.info("Run timetest once to generate any cache")
|
||||
retcode, msg, _, _ = run_timetest({**exe_args, "niter": 1}, log=logging)
|
||||
|
||||
@@ -5,6 +5,8 @@ import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
from openvino.runtime import Dimension
|
||||
|
||||
from openvino.tools.benchmark.benchmark import Benchmark
|
||||
from openvino.tools.benchmark.parameters import parse_args
|
||||
from openvino.tools.benchmark.utils.constants import MULTI_DEVICE_NAME, HETERO_DEVICE_NAME, CPU_DEVICE_NAME, \
|
||||
@@ -15,8 +17,8 @@ from openvino.tools.benchmark.utils.progress_bar import ProgressBar
|
||||
from openvino.tools.benchmark.utils.utils import next_step, get_number_iterations, pre_post_processing, \
|
||||
process_help_inference_string, print_perf_counters, dump_exec_graph, get_duration_in_milliseconds, \
|
||||
get_command_line_arguments, parse_nstreams_value_per_device, parse_devices, get_inputs_info, \
|
||||
print_inputs_and_outputs_info, get_batch_size, load_config, dump_config, get_latency_groups, \
|
||||
check_for_static
|
||||
print_inputs_and_outputs_info, get_network_batch_size, load_config, dump_config, get_latency_groups, \
|
||||
check_for_static, can_measure_as_static
|
||||
from openvino.tools.benchmark.utils.statistics_report import StatisticsReport, averageCntReport, detailedCntReport
|
||||
|
||||
|
||||
@@ -225,9 +227,7 @@ def run(args):
|
||||
('load network time (ms)', duration_ms)
|
||||
])
|
||||
app_inputs_info, _ = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.get_runtime_function().get_parameters())
|
||||
batch_size = get_batch_size(app_inputs_info)
|
||||
if batch_size.is_dynamic and benchmark.api_type == 'sync':
|
||||
raise Exception("Dynamic batch size is supported only in async mode")
|
||||
batch_size = get_network_batch_size(app_inputs_info)
|
||||
elif not is_network_compiled:
|
||||
# --------------------- 4. Read the Intermediate Representation of the network -----------------------------
|
||||
next_step()
|
||||
@@ -262,10 +262,7 @@ def run(args):
|
||||
])
|
||||
|
||||
# use batch size according to provided layout and shapes
|
||||
batch_size = get_batch_size(app_inputs_info)
|
||||
if batch_size.is_dynamic and benchmark.api_type == 'sync':
|
||||
raise Exception("Dynamic batch size is supported only in async mode")
|
||||
|
||||
batch_size = get_network_batch_size(app_inputs_info)
|
||||
logger.info(f'Network batch size: {batch_size}')
|
||||
|
||||
# --------------------- 6. Configuring inputs and outputs of the model --------------------------------------------------
|
||||
@@ -307,10 +304,7 @@ def run(args):
|
||||
('import network time (ms)', duration_ms)
|
||||
])
|
||||
app_inputs_info, _ = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.get_runtime_function().get_parameters())
|
||||
batch_size = get_batch_size(app_inputs_info)
|
||||
if batch_size.is_dynamic and benchmark.api_type == 'sync':
|
||||
raise Exception("Dynamic batch size is supported only in async mode")
|
||||
|
||||
batch_size = get_network_batch_size(app_inputs_info)
|
||||
|
||||
# --------------------- 8. Querying optimal runtime parameters --------------------------------------------------
|
||||
next_step()
|
||||
@@ -353,7 +347,8 @@ def run(args):
|
||||
data_queue = get_input_data(paths_to_input, app_inputs_info)
|
||||
|
||||
static_mode = check_for_static(app_inputs_info)
|
||||
if not static_mode and benchmark.api_type == 'sync':
|
||||
allow_inference_only_or_sync = can_measure_as_static(app_inputs_info)
|
||||
if not allow_inference_only_or_sync and benchmark.api_type == 'sync':
|
||||
raise Exception("Benchmarking of the model with dynamic shapes is available for async API only."
|
||||
"Please use -api async -nstreams 1 -nireq 1 to emulate sync behavior.")
|
||||
|
||||
@@ -362,9 +357,13 @@ def run(args):
|
||||
benchmark.inference_only = True
|
||||
else:
|
||||
benchmark.inference_only = False
|
||||
elif benchmark.inference_only and not static_mode:
|
||||
elif benchmark.inference_only and not allow_inference_only_or_sync:
|
||||
raise Exception("Benchmarking dynamic model available with input filling in measurement loop only!")
|
||||
|
||||
# update batch size in case dynamic network with one data_shape
|
||||
if benchmark.inference_only and batch_size.is_dynamic:
|
||||
batch_size = Dimension(data_queue.batch_sizes[data_queue.current_group_id])
|
||||
|
||||
benchmark.latency_groups = get_latency_groups(app_inputs_info)
|
||||
|
||||
if len(benchmark.latency_groups) > 1:
|
||||
|
||||
@@ -236,11 +236,17 @@ def get_duration_in_secs(target_device):
|
||||
|
||||
|
||||
def check_for_static(app_input_info):
|
||||
is_static = True
|
||||
for info in app_input_info:
|
||||
if info.is_dynamic:
|
||||
return False
|
||||
return is_static
|
||||
return True
|
||||
|
||||
|
||||
def can_measure_as_static(app_input_info):
|
||||
for info in app_input_info:
|
||||
if info.is_dynamic and (len(info.shapes) > 1 or info.original_shape.is_static):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def parse_devices(device_string):
|
||||
@@ -428,6 +434,7 @@ class AppInputInfo:
|
||||
def __init__(self):
|
||||
self.element_type = None
|
||||
self.layout = Layout()
|
||||
self.original_shape = None
|
||||
self.partial_shape = None
|
||||
self.data_shapes = []
|
||||
self.scale = []
|
||||
@@ -550,6 +557,7 @@ def get_inputs_info(shape_string, data_shape_string, layout_string, batch_size,
|
||||
# Input name
|
||||
info.name = input_names[i]
|
||||
# Shape
|
||||
info.original_shape = parameters[i].get_partial_shape()
|
||||
if info.name in shape_map.keys():
|
||||
info.partial_shape = parse_partial_shape(shape_map[info.name])
|
||||
reshape = True
|
||||
@@ -625,7 +633,7 @@ def get_inputs_info(shape_string, data_shape_string, layout_string, batch_size,
|
||||
return input_info, reshape
|
||||
|
||||
|
||||
def get_batch_size(inputs_info):
|
||||
def get_network_batch_size(inputs_info):
|
||||
null_dimension = Dimension(0)
|
||||
batch_size = null_dimension
|
||||
for info in inputs_info:
|
||||
|
||||
@@ -124,12 +124,14 @@ class CanonicalizePathCheckExistenceIfNeededAction(CanonicalizePathCheckExistenc
|
||||
|
||||
class DeprecatedCanonicalizePathCheckExistenceAction(CanonicalizePathCheckExistenceAction):
|
||||
def __call__(self, parser, namespace, values, option_string=None):
|
||||
super().__call__(parser, namespace, values, option_string)
|
||||
dep_msg = "Use of deprecated cli option {} detected. Option use in the following releases will be fatal. ".format(
|
||||
option_string)
|
||||
if 'tensorflow_use_custom_operations_config' in option_string:
|
||||
dep_msg += 'Please use --transformations_config cli option instead'
|
||||
if 'mean_file' in option_string or 'mean_offset' in option_string:
|
||||
dep_msg += 'Please use --mean_values cli option instead.'
|
||||
log.error(dep_msg, extra={'is_warning': True})
|
||||
super().__call__(parser, namespace, values, option_string)
|
||||
|
||||
|
||||
def readable_file(path: str):
|
||||
@@ -377,7 +379,7 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
|
||||
'the Inference Engine API in runtime may fail for such an IR.',
|
||||
action='store_true', default=False)
|
||||
common_group.add_argument('--keep_shape_ops',
|
||||
help='The option is ignored. Expected behavior is enabled by default.',
|
||||
help=argparse.SUPPRESS,
|
||||
action=IgnoredAction, default=True)
|
||||
common_group.add_argument('--disable_weights_compression',
|
||||
help='Disable compression and store weights with original precision.',
|
||||
@@ -524,11 +526,13 @@ def get_caffe_cli_parser(parser: argparse.ArgumentParser = None):
|
||||
'CustomLayersMapping.xml'),
|
||||
action=CanonicalizePathCheckExistenceAction)
|
||||
caffe_group.add_argument('--mean_file', '-mf',
|
||||
help='Mean image to be used for the input. Should be a binaryproto file',
|
||||
help='[DEPRECATED] ' +
|
||||
'Mean image to be used for the input. Should be a binaryproto file',
|
||||
default=None,
|
||||
action=CanonicalizePathCheckExistenceAction)
|
||||
action=DeprecatedCanonicalizePathCheckExistenceAction)
|
||||
caffe_group.add_argument('--mean_file_offsets', '-mo',
|
||||
help='Mean image offsets to be used for the input binaryproto file. ' +
|
||||
help='[DEPRECATED] ' +
|
||||
'Mean image offsets to be used for the input binaryproto file. ' +
|
||||
'When the mean image is bigger than the expected input, it is cropped. By default, centers ' +
|
||||
'of the input image and the mean image are the same and the mean image is cropped by ' +
|
||||
'dimensions of the input image. The format to pass this option is the following: "-mo (x,y)". In this ' +
|
||||
|
||||
Reference in New Issue
Block a user