diff --git a/docs/template_plugin/tests/functional/op_reference/detection_output.cpp b/docs/template_plugin/tests/functional/op_reference/detection_output.cpp index fa05c887089..cc0a6a2a7f4 100644 --- a/docs/template_plugin/tests/functional/op_reference/detection_output.cpp +++ b/docs/template_plugin/tests/functional/op_reference/detection_output.cpp @@ -45,21 +45,21 @@ struct DetectionOutputParams { refData(CreateTensor(iType, oValues)), testcaseName(test_name) { attrs.num_classes = num_classes; - attrs.background_label_id = background_label_id; - attrs.top_k = top_k; - attrs.variance_encoded_in_target = variance_encoded_in_target; - attrs.keep_top_k = keep_top_k; - attrs.code_type = code_type; - attrs.share_location = share_location; - attrs.nms_threshold = nms_threshold; - attrs.confidence_threshold = confidence_threshold; - attrs.clip_after_nms = clip_after_nms; - attrs.clip_before_nms = clip_before_nms; - attrs.decrease_label_id = decrease_label_id; - attrs.normalized = normalized; - attrs.input_height = input_height; - attrs.input_width = input_width; - attrs.objectness_score = objectness_score; + attrs_v8.background_label_id = attrs.background_label_id = background_label_id; + attrs_v8.top_k = attrs.top_k = top_k; + attrs_v8.variance_encoded_in_target = attrs.variance_encoded_in_target = variance_encoded_in_target; + attrs_v8.keep_top_k = attrs.keep_top_k = keep_top_k; + attrs_v8.code_type = attrs.code_type = code_type; + attrs_v8.share_location = attrs.share_location = share_location; + attrs_v8.nms_threshold = attrs.nms_threshold = nms_threshold; + attrs_v8.confidence_threshold = attrs.confidence_threshold = confidence_threshold; + attrs_v8.clip_after_nms = attrs.clip_after_nms = clip_after_nms; + attrs_v8.clip_before_nms = attrs.clip_before_nms = clip_before_nms; + attrs_v8.decrease_label_id = attrs.decrease_label_id = decrease_label_id; + attrs_v8.normalized = attrs.normalized = normalized; + attrs_v8.input_height = attrs.input_height = input_height; + attrs_v8.input_width = attrs.input_width = input_width; + attrs_v8.objectness_score = attrs.objectness_score = objectness_score; size_t num_loc_classes = attrs.share_location ? 1 : attrs.num_classes; size_t prior_box_size = attrs.normalized ? 4 : 5; @@ -107,21 +107,21 @@ template auxConfData(CreateTensor(iType, auxConfValues)), testcaseName(test_name) { attrs.num_classes = num_classes; - attrs.background_label_id = background_label_id; - attrs.top_k = top_k; - attrs.variance_encoded_in_target = variance_encoded_in_target; - attrs.keep_top_k = keep_top_k; - attrs.code_type = code_type; - attrs.share_location = share_location; - attrs.nms_threshold = nms_threshold; - attrs.confidence_threshold = confidence_threshold; - attrs.clip_after_nms = clip_after_nms; - attrs.clip_before_nms = clip_before_nms; - attrs.decrease_label_id = decrease_label_id; - attrs.normalized = normalized; - attrs.input_height = input_height; - attrs.input_width = input_width; - attrs.objectness_score = objectness_score; + attrs_v8.background_label_id = attrs.background_label_id = background_label_id; + attrs_v8.top_k = attrs.top_k = top_k; + attrs_v8.variance_encoded_in_target = attrs.variance_encoded_in_target = variance_encoded_in_target; + attrs_v8.keep_top_k = attrs.keep_top_k = keep_top_k; + attrs_v8.code_type = attrs.code_type = code_type; + attrs_v8.share_location = attrs.share_location = share_location; + attrs_v8.nms_threshold = attrs.nms_threshold = nms_threshold; + attrs_v8.confidence_threshold = attrs.confidence_threshold = confidence_threshold; + attrs_v8.clip_after_nms = attrs.clip_after_nms = clip_after_nms; + attrs_v8.clip_before_nms = attrs.clip_before_nms = clip_before_nms; + attrs_v8.decrease_label_id = attrs.decrease_label_id = decrease_label_id; + attrs_v8.normalized = attrs.normalized = normalized; + attrs_v8.input_height = attrs.input_height = input_height; + attrs_v8.input_width = attrs.input_width = input_width; + attrs_v8.objectness_score = attrs.objectness_score = objectness_score; size_t num_loc_classes = attrs.share_location ? 1 : attrs.num_classes; size_t prior_box_size = attrs.normalized ? 4 : 5; @@ -135,6 +135,7 @@ template } ov::op::v0::DetectionOutput::Attributes attrs; + ov::op::v8::DetectionOutput::Attributes attrs_v8; ov::PartialShape locShape; ov::PartialShape confShape; ov::PartialShape priorBoxesShape; @@ -194,10 +195,61 @@ private: } }; +class ReferenceDetectionOutputV8LayerTest : public testing::TestWithParam, + public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params); + if ((params.auxLocShape.size() != 0) && (params.auxConfShape.size() != 0)) + inputData = {params.locData, params.confData, params.priorBoxesData, params.auxConfData, params.auxLocData}; + else + inputData = {params.locData, params.confData, params.priorBoxesData}; + refOutData = {params.refData}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "locShape=" << param.locShape << "_"; + result << "confShape=" << param.confShape << "_"; + result << "priorBoxesShape=" << param.priorBoxesShape << "_"; + if ((param.auxLocShape.size() != 0) && (param.auxConfShape.size() != 0)) { + result << "auxLocShape=" << param.locShape << "_"; + result << "auxConfShape=" << param.confShape << "_"; + } + result << "iType=" << param.inType; + if (param.testcaseName != "") + result << "_" << param.testcaseName; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const DetectionOutputParams& params) { + const auto loc = std::make_shared(params.inType, params.locShape); + const auto conf = std::make_shared(params.inType, params.confShape); + const auto priorBoxes = std::make_shared(params.inType, params.priorBoxesShape); + if ((params.auxLocShape.size() != 0) && (params.auxConfShape.size() != 0)) { + const auto auxConf = std::make_shared(params.inType, params.auxConfShape); + const auto auxLoc = std::make_shared(params.inType, params.auxLocShape); + const auto DetectionOutput = + std::make_shared(loc, conf, priorBoxes, auxConf, auxLoc, params.attrs_v8); + return std::make_shared(NodeVector{DetectionOutput}, + ParameterVector{loc, conf, priorBoxes, auxConf, auxLoc}); + } else { + const auto DetectionOutput = std::make_shared(loc, conf, priorBoxes, params.attrs_v8); + return std::make_shared(NodeVector{DetectionOutput}, ParameterVector{loc, conf, priorBoxes}); + } + } +}; + TEST_P(ReferenceDetectionOutputLayerTest, CompareWithRefs) { Exec(); } +TEST_P(ReferenceDetectionOutputV8LayerTest, CompareWithRefs) { + Exec(); +} + template std::vector generateDetectionOutputFloatParams() { using T = typename element_type_traits::value_type; @@ -517,4 +569,9 @@ std::vector generateDetectionOutputCombinedParams() { INSTANTIATE_TEST_SUITE_P(smoke_DetectionOutput_With_Hardcoded_Refs, ReferenceDetectionOutputLayerTest, testing::ValuesIn(generateDetectionOutputCombinedParams()), ReferenceDetectionOutputLayerTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_DetectionOutput_With_Hardcoded_Refs, + ReferenceDetectionOutputV8LayerTest, + testing::ValuesIn(generateDetectionOutputCombinedParams()), + ReferenceDetectionOutputV8LayerTest::getTestCaseName); + } // namespace \ No newline at end of file diff --git a/inference-engine/tests/functional/inference_engine/transformations/detection_output_downgrade_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/detection_output_downgrade_test.cpp new file mode 100644 index 00000000000..55c40cd9541 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/detection_output_downgrade_test.cpp @@ -0,0 +1,189 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/detection_output_downgrade.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "transformations/init_node_info.hpp" + +using namespace ngraph; +using namespace testing; + +namespace { +void create_attributes_vectors(std::vector& attrs_v1_vector, + std::vector& attrs_v8_vector) { + // initialize attributes affecting shape inference + // others remain by default + for (int keep_top_k : {10, -1}) { + for (int top_k : {5, -1}) { + for (bool variance_encoded_in_target : {true, false}) { + for (bool share_location : {true, false}) { + for (bool normalized : {true, false}) { + opset1::DetectionOutput::Attributes attrs_v1; + opset8::DetectionOutput::Attributes attrs_v8; + attrs_v1.top_k = attrs_v8.top_k = top_k; + attrs_v1.keep_top_k = attrs_v8.keep_top_k = {keep_top_k}; + attrs_v1.variance_encoded_in_target = attrs_v8.variance_encoded_in_target = + variance_encoded_in_target; + attrs_v1.share_location = attrs_v8.share_location = share_location; + attrs_v1.normalized = attrs_v8.normalized = normalized; + attrs_v1_vector.push_back(attrs_v1); + attrs_v8_vector.push_back(attrs_v8); + } + } + } + } + } +} +} // namespace + +TEST(TransformationTests, DetectionOutput8ToDetectionOutput1) { + std::vector attrs_v1_vector; + std::vector attrs_v8_vector; + Dimension N = 5; + Dimension num_prior_boxes = 100; + Dimension priors_batch_size = N; + Dimension num_classes = 23; + + create_attributes_vectors(attrs_v1_vector, attrs_v8_vector); + ASSERT_TRUE(attrs_v1_vector.size() == attrs_v8_vector.size()) << "Sizes of attribute test vectors must be equal"; + for (size_t ind = 0; ind < attrs_v1_vector.size(); ++ind) { + std::shared_ptr f(nullptr), f_ref(nullptr); + // this case covers deducing a number of classes value + // since this value is not saved in attributes + opset8::DetectionOutput::Attributes attributes_v8 = attrs_v8_vector[ind]; + opset1::DetectionOutput::Attributes attributes_v1 = attrs_v1_vector[ind]; + if (num_classes.is_static()) { + attributes_v1.num_classes = num_classes.get_length(); + } + + Dimension num_loc_classes = attributes_v8.share_location ? 1 : num_classes; + Dimension prior_box_size = attributes_v8.normalized ? 4 : 5; + + PartialShape box_logits_shape = {N, num_prior_boxes * num_loc_classes * 4}; + PartialShape class_preds_shape = {N, num_prior_boxes * num_classes}; + PartialShape proposals_shape = {priors_batch_size, + attributes_v8.variance_encoded_in_target ? 1 : 2, + num_prior_boxes * prior_box_size}; + + { + auto box_logits = std::make_shared(ngraph::element::f32, box_logits_shape); + auto class_preds = std::make_shared(ngraph::element::f32, class_preds_shape); + auto proposals = std::make_shared(ngraph::element::f32, proposals_shape); + + auto detection_output_v8 = + std::make_shared(box_logits, class_preds, proposals, attributes_v8); + + f = std::make_shared(ngraph::NodeVector{detection_output_v8}, + ngraph::ParameterVector{box_logits, class_preds, proposals}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.run_passes(f); + } + + { + auto box_logits = std::make_shared(ngraph::element::f32, box_logits_shape); + auto class_preds = std::make_shared(ngraph::element::f32, class_preds_shape); + auto proposals = std::make_shared(ngraph::element::f32, proposals_shape); + + auto detection_output_v1 = + std::make_shared(box_logits, class_preds, proposals, attributes_v1); + + f_ref = std::make_shared(ngraph::NodeVector{detection_output_v1}, + ngraph::ParameterVector{box_logits, class_preds, proposals}); + } + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; + } +} + +TEST(TransformationTests, DetectionOutput8ToDetectionOutput1FiveArguments) { + // In this case num_classes attribute value is deduced using inputs shapes + std::vector attrs_v1_vector; + std::vector attrs_v8_vector; + Dimension N = 5; + Dimension num_prior_boxes = 15; + Dimension priors_batch_size = N; + Dimension num_classes = 23; + + create_attributes_vectors(attrs_v1_vector, attrs_v8_vector); + ASSERT_TRUE(attrs_v1_vector.size() == attrs_v8_vector.size()) << "Sizes of attribute test vectors must be equal"; + for (size_t ind = 0; ind < attrs_v1_vector.size(); ++ind) { + std::shared_ptr f(nullptr), f_ref(nullptr); + opset8::DetectionOutput::Attributes attributes_v8 = attrs_v8_vector[ind]; + opset1::DetectionOutput::Attributes attributes_v1 = attrs_v1_vector[ind]; + if (num_classes.is_static()) { + attributes_v1.num_classes = num_classes.get_length(); + } + + Dimension num_loc_classes = attributes_v8.share_location ? 1 : num_classes; + Dimension prior_box_size = attributes_v8.normalized ? 4 : 5; + + PartialShape box_logits_shape = {N, num_prior_boxes * num_loc_classes * 4}; + PartialShape class_preds_shape = {N, num_prior_boxes * num_classes}; + PartialShape proposals_shape = {priors_batch_size, + attributes_v8.variance_encoded_in_target ? 1 : 2, + num_prior_boxes * prior_box_size}; + PartialShape ad_class_preds_shape = {N, num_prior_boxes * 2}; + PartialShape ad_box_preds_shape = {N, num_prior_boxes * num_loc_classes * 4}; + + { + auto box_logits = std::make_shared(ngraph::element::f32, box_logits_shape); + auto class_preds = std::make_shared(ngraph::element::f32, class_preds_shape); + auto proposals = std::make_shared(ngraph::element::f32, proposals_shape); + auto ad_class_preds = + std::make_shared(ngraph::element::f32, ad_class_preds_shape); + auto ad_box_preds = std::make_shared(ngraph::element::f32, ad_box_preds_shape); + + auto detection_output_v8 = std::make_shared(box_logits, + class_preds, + proposals, + ad_class_preds, + ad_box_preds, + attributes_v8); + + f = std::make_shared( + ngraph::NodeVector{detection_output_v8}, + ngraph::ParameterVector{box_logits, class_preds, proposals, ad_class_preds, ad_box_preds}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.run_passes(f); + } + + { + auto box_logits = std::make_shared(ngraph::element::f32, box_logits_shape); + auto class_preds = std::make_shared(ngraph::element::f32, class_preds_shape); + auto proposals = std::make_shared(ngraph::element::f32, proposals_shape); + auto ad_class_preds = + std::make_shared(ngraph::element::f32, ad_class_preds_shape); + auto ad_box_preds = std::make_shared(ngraph::element::f32, ad_box_preds_shape); + + auto detection_output_v1 = std::make_shared(box_logits, + class_preds, + proposals, + ad_class_preds, + ad_box_preds, + attributes_v1); + + f_ref = std::make_shared( + ngraph::NodeVector{detection_output_v1}, + ngraph::ParameterVector{box_logits, class_preds, proposals, ad_class_preds, ad_box_preds}); + } + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; + } +} diff --git a/inference-engine/tests/functional/inference_engine/transformations/detection_output_upgrade_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/detection_output_upgrade_test.cpp new file mode 100644 index 00000000000..c20411dc48b --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/detection_output_upgrade_test.cpp @@ -0,0 +1,189 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/detection_output_upgrade.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" +#include "transformations/init_node_info.hpp" + +using namespace ngraph; +using namespace testing; + +namespace { +void create_attributes_vectors(std::vector& attrs_v1_vector, + std::vector& attrs_v8_vector) { + // initialize attributes affecting shape inference + // others remain by default + for (int keep_top_k : {10, -1}) { + for (int top_k : {5, -1}) { + for (bool variance_encoded_in_target : {true, false}) { + for (bool share_location : {true, false}) { + for (bool normalized : {true, false}) { + opset1::DetectionOutput::Attributes attrs_v1; + opset8::DetectionOutput::Attributes attrs_v8; + attrs_v1.top_k = attrs_v8.top_k = top_k; + attrs_v1.keep_top_k = attrs_v8.keep_top_k = {keep_top_k}; + attrs_v1.variance_encoded_in_target = attrs_v8.variance_encoded_in_target = + variance_encoded_in_target; + attrs_v1.share_location = attrs_v8.share_location = share_location; + attrs_v1.normalized = attrs_v8.normalized = normalized; + attrs_v1_vector.push_back(attrs_v1); + attrs_v8_vector.push_back(attrs_v8); + } + } + } + } + } +} +} // namespace + +TEST(TransformationTests, DetectionOutput1ToDetectionOutput8) { + std::vector attrs_v1_vector; + std::vector attrs_v8_vector; + Dimension N = 5; + Dimension num_prior_boxes = 100; + Dimension priors_batch_size = N; + Dimension num_classes = 23; + + create_attributes_vectors(attrs_v1_vector, attrs_v8_vector); + ASSERT_TRUE(attrs_v1_vector.size() == attrs_v8_vector.size()) << "Sizes of attribute test vectors must be equal"; + for (size_t ind = 0; ind < attrs_v1_vector.size(); ++ind) { + std::shared_ptr f(nullptr), f_ref(nullptr); + // this case covers deducing a number of classes value + // since this value is not saved in attributes + opset8::DetectionOutput::Attributes attributes_v8 = attrs_v8_vector[ind]; + opset1::DetectionOutput::Attributes attributes_v1 = attrs_v1_vector[ind]; + if (num_classes.is_static()) { + attributes_v1.num_classes = num_classes.get_length(); + } + + Dimension num_loc_classes = attributes_v8.share_location ? 1 : num_classes; + Dimension prior_box_size = attributes_v8.normalized ? 4 : 5; + + PartialShape box_logits_shape = {N, num_prior_boxes * num_loc_classes * 4}; + PartialShape class_preds_shape = {N, num_prior_boxes * num_classes}; + PartialShape proposals_shape = {priors_batch_size, + attributes_v8.variance_encoded_in_target ? 1 : 2, + num_prior_boxes * prior_box_size}; + + { + auto box_logits = std::make_shared(ngraph::element::f32, box_logits_shape); + auto class_preds = std::make_shared(ngraph::element::f32, class_preds_shape); + auto proposals = std::make_shared(ngraph::element::f32, proposals_shape); + + auto detection_output_v1 = + std::make_shared(box_logits, class_preds, proposals, attributes_v1); + + f = std::make_shared(ngraph::NodeVector{detection_output_v1}, + ngraph::ParameterVector{box_logits, class_preds, proposals}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.run_passes(f); + } + + { + auto box_logits = std::make_shared(ngraph::element::f32, box_logits_shape); + auto class_preds = std::make_shared(ngraph::element::f32, class_preds_shape); + auto proposals = std::make_shared(ngraph::element::f32, proposals_shape); + + auto detection_output_v8 = + std::make_shared(box_logits, class_preds, proposals, attributes_v8); + + f_ref = std::make_shared(ngraph::NodeVector{detection_output_v8}, + ngraph::ParameterVector{box_logits, class_preds, proposals}); + } + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; + } +} + +TEST(TransformationTests, DetectionOutput1ToDetectionOutput8FiveArguments) { + // In this case num_classes attribute value is deduced using inputs shapes + std::vector attrs_v1_vector; + std::vector attrs_v8_vector; + Dimension N = 5; + Dimension num_prior_boxes = 15; + Dimension priors_batch_size = N; + Dimension num_classes = 23; + + create_attributes_vectors(attrs_v1_vector, attrs_v8_vector); + ASSERT_TRUE(attrs_v1_vector.size() == attrs_v8_vector.size()) << "Sizes of attribute test vectors must be equal"; + for (size_t ind = 0; ind < attrs_v1_vector.size(); ++ind) { + std::shared_ptr f(nullptr), f_ref(nullptr); + opset8::DetectionOutput::Attributes attributes_v8 = attrs_v8_vector[ind]; + opset1::DetectionOutput::Attributes attributes_v1 = attrs_v1_vector[ind]; + if (num_classes.is_static()) { + attributes_v1.num_classes = num_classes.get_length(); + } + + Dimension num_loc_classes = attributes_v8.share_location ? 1 : num_classes; + Dimension prior_box_size = attributes_v8.normalized ? 4 : 5; + + PartialShape box_logits_shape = {N, num_prior_boxes * num_loc_classes * 4}; + PartialShape class_preds_shape = {N, num_prior_boxes * num_classes}; + PartialShape proposals_shape = {priors_batch_size, + attributes_v8.variance_encoded_in_target ? 1 : 2, + num_prior_boxes * prior_box_size}; + PartialShape ad_class_preds_shape = {N, num_prior_boxes * 2}; + PartialShape ad_box_preds_shape = {N, num_prior_boxes * num_loc_classes * 4}; + + { + auto box_logits = std::make_shared(ngraph::element::f32, box_logits_shape); + auto class_preds = std::make_shared(ngraph::element::f32, class_preds_shape); + auto proposals = std::make_shared(ngraph::element::f32, proposals_shape); + auto ad_class_preds = + std::make_shared(ngraph::element::f32, ad_class_preds_shape); + auto ad_box_preds = std::make_shared(ngraph::element::f32, ad_box_preds_shape); + + auto detection_output_v1 = std::make_shared(box_logits, + class_preds, + proposals, + ad_class_preds, + ad_box_preds, + attributes_v1); + + f = std::make_shared( + ngraph::NodeVector{detection_output_v1}, + ngraph::ParameterVector{box_logits, class_preds, proposals, ad_class_preds, ad_box_preds}); + + ngraph::pass::Manager manager; + manager.register_pass(); + manager.run_passes(f); + } + + { + auto box_logits = std::make_shared(ngraph::element::f32, box_logits_shape); + auto class_preds = std::make_shared(ngraph::element::f32, class_preds_shape); + auto proposals = std::make_shared(ngraph::element::f32, proposals_shape); + auto ad_class_preds = + std::make_shared(ngraph::element::f32, ad_class_preds_shape); + auto ad_box_preds = std::make_shared(ngraph::element::f32, ad_box_preds_shape); + + auto detection_output_v8 = std::make_shared(box_logits, + class_preds, + proposals, + ad_class_preds, + ad_box_preds, + attributes_v8); + + f_ref = std::make_shared( + ngraph::NodeVector{detection_output_v8}, + ngraph::ParameterVector{box_logits, class_preds, proposals, ad_class_preds, ad_box_preds}); + } + auto res = compare_functions(f, f_ref); + ASSERT_TRUE(res.first) << res.second; + } +} diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt index 9bfd6d68f98..82070da5ece 100644 --- a/model-optimizer/automation/package_BOM.txt +++ b/model-optimizer/automation/package_BOM.txt @@ -85,7 +85,7 @@ extensions/front/caffe/conv_ext.py extensions/front/caffe/crop_ext.py extensions/front/caffe/ctcgreedydecoder_ext.py extensions/front/caffe/CustomLayersMapping.xml.example -extensions/front/caffe/detection_output.py +extensions/front/caffe/detection_output_ext.py extensions/front/caffe/dropout_ext.py extensions/front/caffe/elementwise_ext.py extensions/front/caffe/eltwise_add_normalize.py @@ -271,8 +271,8 @@ extensions/front/onnx/cumsum_ext.py extensions/front/onnx/deformable_conv_ext.py extensions/front/onnx/depth_to_space_ext.py extensions/front/onnx/dequantize_linear_ext.py -extensions/front/onnx/detection_output.py -extensions/front/onnx/detectionoutput_ext.py +extensions/front/onnx/detection_output_ext.py +extensions/front/onnx/detection_output_onnx_ext.py extensions/front/onnx/dropout_ext.py extensions/front/onnx/einsum_ext.py extensions/front/onnx/elementwise_ext.py @@ -680,8 +680,8 @@ extensions/ops/ctc_loss.py extensions/ops/cumsum.py extensions/ops/depth_to_space.py extensions/ops/dequantize_linear.py +extensions/ops/detection_output_onnx.py extensions/ops/DetectionOutput.py -extensions/ops/detectionoutput_onnx.py extensions/ops/dft.py extensions/ops/einsum.py extensions/ops/elementwise.py @@ -837,7 +837,6 @@ mo/front/common/partial_infer/concat.py mo/front/common/partial_infer/crop.py mo/front/common/partial_infer/elemental.py mo/front/common/partial_infer/eltwise.py -mo/front/common/partial_infer/multi_box_detection.py mo/front/common/partial_infer/multi_box_prior.py mo/front/common/partial_infer/roipooling.py mo/front/common/partial_infer/utils.py diff --git a/model-optimizer/extensions/front/caffe/detection_output.py b/model-optimizer/extensions/front/caffe/detection_output_ext.py similarity index 94% rename from model-optimizer/extensions/front/caffe/detection_output.py rename to model-optimizer/extensions/front/caffe/detection_output_ext.py index 1371fe1ffbd..48559c1411d 100644 --- a/model-optimizer/extensions/front/caffe/detection_output.py +++ b/model-optimizer/extensions/front/caffe/detection_output_ext.py @@ -5,7 +5,6 @@ import logging as log from extensions.ops.DetectionOutput import DetectionOutput from mo.front.caffe.collect_attributes import merge_attrs -from mo.front.common.partial_infer.multi_box_detection import multi_box_detection_infer from mo.front.extractor import FrontExtractorOp @@ -85,7 +84,6 @@ class DetectionOutputFrontExtractor(FrontExtractorOp): interp_mode += interp_mode_values[x] attrs = { - 'num_classes': param.num_classes, 'share_location': int(param.share_location), 'background_label_id': param.background_label_id, 'code_type': code_type, @@ -132,9 +130,6 @@ class DetectionOutputFrontExtractor(FrontExtractorOp): mapping_rule = merge_attrs(param, attrs) - # force setting infer function because it doesn't exist in proto so merge_attrs will not set it - mapping_rule.update({'infer': multi_box_detection_infer}) - # update the attributes of the node DetectionOutput.update_node_stat(node, mapping_rule) return cls.enabled diff --git a/model-optimizer/extensions/front/mxnet/multibox_detection_ext.py b/model-optimizer/extensions/front/mxnet/multibox_detection_ext.py index 05a6dd49166..8961c5d2ea8 100644 --- a/model-optimizer/extensions/front/mxnet/multibox_detection_ext.py +++ b/model-optimizer/extensions/front/mxnet/multibox_detection_ext.py @@ -13,30 +13,20 @@ class MultiBoxDetectionOutputExtractor(FrontExtractorOp): @classmethod def extract(cls, node): attrs = get_mxnet_layer_attrs(node.symbol_dict) - # We can not get num_classes attribute from the operation, so it must be set to None. - # In this case num_classes attribute will be defined in the infer function in - # mo/front/common/partial_infer/multi_box_detection.py - num_classes = None top_k = attrs.int("nms_topk", -1) - keep_top_k = top_k - variance_encoded_in_target = 0 - code_type = "caffe.PriorBoxParameter.CENTER_SIZE" - share_location = 1 nms_threshold = attrs.float("nms_threshold", 0.5) confidence_threshold = attrs.float("threshold", 0.01) - background_label_id = 0 clip = 0 if not attrs.bool("clip", True) else 1 node_attrs = { 'type': 'DetectionOutput', 'op': __class__.op, - 'num_classes': num_classes, - 'keep_top_k': keep_top_k, - 'variance_encoded_in_target': variance_encoded_in_target, - 'code_type': code_type, - 'share_location': share_location, + 'keep_top_k': top_k, + 'variance_encoded_in_target': 0, + 'code_type': "caffe.PriorBoxParameter.CENTER_SIZE", + 'share_location': 1, 'confidence_threshold': confidence_threshold, - 'background_label_id': background_label_id, + 'background_label_id': 0, 'nms_threshold': nms_threshold, 'top_k': top_k, 'decrease_label_id': 1, diff --git a/model-optimizer/extensions/front/onnx/detection_output.py b/model-optimizer/extensions/front/onnx/detection_output_ext.py similarity index 100% rename from model-optimizer/extensions/front/onnx/detection_output.py rename to model-optimizer/extensions/front/onnx/detection_output_ext.py diff --git a/model-optimizer/extensions/front/onnx/detectionoutput_ext.py b/model-optimizer/extensions/front/onnx/detection_output_onnx_ext.py similarity index 93% rename from model-optimizer/extensions/front/onnx/detectionoutput_ext.py rename to model-optimizer/extensions/front/onnx/detection_output_onnx_ext.py index b87865012e2..728bb62025a 100644 --- a/model-optimizer/extensions/front/onnx/detectionoutput_ext.py +++ b/model-optimizer/extensions/front/onnx/detection_output_onnx_ext.py @@ -5,7 +5,7 @@ from math import log import numpy as np -from extensions.ops.detectionoutput_onnx import ExperimentalDetectronDetectionOutput +from extensions.ops.detection_output_onnx import ExperimentalDetectronDetectionOutput from mo.front.extractor import FrontExtractorOp from mo.front.onnx.extractors.utils import onnx_attr diff --git a/model-optimizer/extensions/front/onnx/mask_rcnn_conversion.py b/model-optimizer/extensions/front/onnx/mask_rcnn_conversion.py index ce11bb82fde..22c7128f09f 100644 --- a/model-optimizer/extensions/front/onnx/mask_rcnn_conversion.py +++ b/model-optimizer/extensions/front/onnx/mask_rcnn_conversion.py @@ -5,7 +5,7 @@ import numpy as np from extensions.front.onnx.softmaxONNX_to_softmax import SoftmaxONNXFrontReplacer from extensions.ops.Cast import Cast -from extensions.ops.detectionoutput_onnx import ExperimentalDetectronDetectionOutput +from extensions.ops.detection_output_onnx import ExperimentalDetectronDetectionOutput from extensions.ops.parameter import Parameter from extensions.ops.roifeatureextractor_onnx import ExperimentalDetectronROIFeatureExtractor from mo.front.common.partial_infer.utils import int64_array diff --git a/model-optimizer/extensions/front/tf/AutomlEfficientDet.py b/model-optimizer/extensions/front/tf/AutomlEfficientDet.py index 2ac2a166b66..1a75a868c10 100644 --- a/model-optimizer/extensions/front/tf/AutomlEfficientDet.py +++ b/model-optimizer/extensions/front/tf/AutomlEfficientDet.py @@ -115,7 +115,6 @@ class EfficientDet(FrontReplacementFromConfigFileGeneral): detection_output_node = DetectionOutput(graph, dict( name='detections', - num_classes=int(replacement_descriptions['num_classes']), share_location=1, background_label_id=int(replacement_descriptions['num_classes']) + 1, nms_threshold=replacement_descriptions['nms_threshold'], diff --git a/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py b/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py index 6c8937946f7..0fa2a54397a 100644 --- a/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py +++ b/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py @@ -1067,7 +1067,6 @@ class ObjectDetectionAPIDetectionOutputReplacement(FrontReplacementFromConfigFil code_type='caffe.PriorBoxParameter.CENTER_SIZE', pad_mode='caffe.ResizeParameter.CONSTANT', resize_mode='caffe.ResizeParameter.WARP', - num_classes=num_classes + 1, confidence_threshold=_value_or_raise(match, pipeline_config, 'postprocessing_score_threshold'), top_k=_value_or_raise(match, pipeline_config, 'postprocessing_max_detections_per_class'), keep_top_k=_value_or_raise(match, pipeline_config, 'postprocessing_max_total_detections'), @@ -1436,7 +1435,6 @@ class ObjectDetectionAPIProposalReplacement(FrontReplacementFromConfigFileSubGra 'input_width': 1, 'keep_top_k': max_proposals, 'normalized': True, - 'num_classes': 2, 'objectness_score': 0, 'share_location': True, 'top_k': 6000, @@ -1664,7 +1662,6 @@ class ObjectDetectionAPISSDPostprocessorReplacement(FrontReplacementFromConfigFi [reshape_offsets, reshape_conf_node, priors_node], dict(name=detection_output_op.attrs['type'], background_label_id=0 if has_background_class else -1, - num_classes=num_classes, variances_encoded_in_target=False, confidence_threshold=_value_or_raise(match, pipeline_config, 'postprocessing_score_threshold'), top_k=_value_or_raise(match, pipeline_config, 'postprocessing_max_detections_per_class'), diff --git a/model-optimizer/extensions/ops/DetectionOutput.py b/model-optimizer/extensions/ops/DetectionOutput.py index 55bf0bdcebd..a1ad570f4d8 100644 --- a/model-optimizer/extensions/ops/DetectionOutput.py +++ b/model-optimizer/extensions/ops/DetectionOutput.py @@ -3,10 +3,12 @@ import numpy as np -from mo.front.common.partial_infer.multi_box_detection import multi_box_detection_infer +from mo.front.common.partial_infer.utils import is_fully_defined, compatible_dims from mo.front.extractor import bool_to_str -from mo.graph.graph import Graph, Node +from mo.graph.graph import Graph +from mo.graph.graph import Node from mo.ops.op import Op +from mo.utils.error import Error class DetectionOutput(Op): @@ -17,10 +19,10 @@ class DetectionOutput(Op): super().__init__(graph, { 'type': self.op, 'op': self.op, - 'version': 'opset1', + 'version': 'opset8', 'in_ports_count': 3, 'out_ports_count': 1, - 'infer': multi_box_detection_infer, + 'infer': self.infer, 'input_width': 1, 'input_height': 1, 'normalized': True, @@ -33,7 +35,7 @@ class DetectionOutput(Op): }, attrs) def supported_attrs(self): - return [ + supported_attrs = [ 'background_label_id', ('clip_after_nms', lambda node: bool_to_str(node, 'clip_after_nms')), ('clip_before_nms', lambda node: bool_to_str(node, 'clip_before_nms')), @@ -45,13 +47,60 @@ class DetectionOutput(Op): 'keep_top_k', 'nms_threshold', ('normalized', lambda node: bool_to_str(node, 'normalized')), - 'num_classes', ('share_location', lambda node: bool_to_str(node, 'share_location')), 'top_k', ('variance_encoded_in_target', lambda node: bool_to_str(node, 'variance_encoded_in_target')), 'objectness_score', ] + opset = self.get_opset() + if opset == 'opset1': + supported_attrs += ['num_classes'] + return supported_attrs @staticmethod def type_infer(node: Node): node.out_port(0).set_data_type(np.float32) + + @staticmethod + def infer(node: Node): + node_name = node.soft_get('name', node.id) + loc_shape = node.in_port(0).data.get_shape() + conf_shape = node.in_port(1).data.get_shape() + prior_boxes_shape = node.in_port(2).data.get_shape() + + if loc_shape is None or conf_shape is None or prior_boxes_shape is None: + raise Error('Shapes for the Detection Output node "{}" are not defined'.format(node_name)) + + prior_size = 4 + if node.has('normalized') and not node.normalized: + prior_size = 5 + + if is_fully_defined(prior_boxes_shape[-1]) and prior_boxes_shape[-1] % prior_size != 0: + raise Error('Amount of confidences "{}" is not divisible by {} for node "{}"' + ''.format(prior_boxes_shape[-1], prior_size, node_name)) + + num_priors = prior_boxes_shape[-1] // prior_size + if not node.has_valid('keep_top_k') or node.keep_top_k == -1: + node['keep_top_k'] = num_priors + + num_classes = conf_shape[-1] // num_priors + num_loc_classes = num_classes + if node.has_and_set('share_location') and node.share_location: + num_loc_classes = 1 + + if not compatible_dims(num_priors * num_loc_classes * 4, loc_shape[-1]): + raise Error('Locations and prior boxes shapes mismatch: "{}" vs "{}" for node "{}"' + ''.format(loc_shape, prior_boxes_shape, node_name)) + + if not node.variance_encoded_in_target and not compatible_dims(prior_boxes_shape[-2], 2): + raise Error('The "-2" dimension of the prior boxes must be 2 but it is "{}" for node "{}".' + ''.format(prior_boxes_shape[-2], node_name)) + + if is_fully_defined(conf_shape[-1]) and is_fully_defined(num_priors) and conf_shape[-1] % num_priors != 0: + raise Error('Amount of confidences "{}" is not divisible by amount of priors "{}" for node "{}".' + ''.format(conf_shape[-1], num_priors, node_name)) + + node.out_port(0).data.set_shape([1, 1, conf_shape[0] * node.keep_top_k, 7]) + + # the line below is needed for the TF framework so the MO will not change the layout + node.graph.node[node.out_node(0).id]['nchw_layout'] = True diff --git a/model-optimizer/extensions/ops/detectionoutput_onnx.py b/model-optimizer/extensions/ops/detection_output_onnx.py similarity index 100% rename from model-optimizer/extensions/ops/detectionoutput_onnx.py rename to model-optimizer/extensions/ops/detection_output_onnx.py diff --git a/model-optimizer/mo/front/common/partial_infer/multi_box_detection.py b/model-optimizer/mo/front/common/partial_infer/multi_box_detection.py deleted file mode 100644 index 67b6f5fdb09..00000000000 --- a/model-optimizer/mo/front/common/partial_infer/multi_box_detection.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (C) 2018-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import logging as log - -from mo.front.common.partial_infer.utils import is_fully_defined, compatible_dims -from mo.graph.graph import Node -from mo.utils.error import Error - - -def multi_box_detection_infer(node: Node): - loc_shape = node.in_node(0).shape - conf_shape = node.in_node(1).shape - prior_boxes_shape = node.in_node(2).shape - node_name = node.soft_get('name', node.id) - - if loc_shape is None or conf_shape is None or prior_boxes_shape is None: - raise Error('Shapes for the Detection Output node "{}" are not defined'.format(node_name)) - - prior_size = 4 - if node.has('normalized') and not node.normalized: - prior_size = 5 - - if is_fully_defined(prior_boxes_shape[-1]) and prior_boxes_shape[-1] % prior_size != 0: - raise Error('Amount of confidences "{}" is not divisible by {} for node "{}"' - ''.format(prior_boxes_shape[-1], prior_size, node_name)) - - num_priors = prior_boxes_shape[-1] // prior_size - if not node.has_valid('keep_top_k') or node.keep_top_k == -1: - node['keep_top_k'] = num_priors - - # do not try to infer number of classes because it is not possible in case when input shapes are partially defined - if not node.has_valid('num_classes'): - node['num_classes'] = conf_shape[-1] // num_priors - log.debug('Inferred amount of classes "{}"'.format(node.num_classes)) - - num_loc_classes = node.num_classes - if node.has_and_set('share_location') and node.share_location: - num_loc_classes = 1 - - if not compatible_dims(num_priors * num_loc_classes * 4, loc_shape[-1]): - raise Error('Locations and prior boxes shapes mismatch: "{}" vs "{}" for node "{}"' - ''.format(loc_shape, prior_boxes_shape, node_name)) - - if not node.variance_encoded_in_target and not compatible_dims(prior_boxes_shape[-2], 2): - raise Error('The "-2" dimension of the prior boxes must be 2 but it is "{}" for node "{}".' - ''.format(prior_boxes_shape[-2], node_name)) - - if is_fully_defined(conf_shape[-1]) and is_fully_defined(num_priors) and conf_shape[-1] % num_priors != 0: - raise Error('Amount of confidences "{}" is not divisible by amount of priors "{}" for node "{}".' - ''.format(conf_shape[-1], num_priors, node_name)) - - node.out_port(0).data.set_shape([1, 1, conf_shape[0] * node.keep_top_k, 7]) - - # the line below is needed for the TF framework so the MO will not change the layout - node.graph.node[node.out_node(0).id]['nchw_layout'] = True diff --git a/model-optimizer/unit_tests/extensions/front/mxnet/multibox_detection_test.py b/model-optimizer/unit_tests/extensions/front/mxnet/multibox_detection_test.py index c65feaeb055..a62893d900a 100644 --- a/model-optimizer/unit_tests/extensions/front/mxnet/multibox_detection_test.py +++ b/model-optimizer/unit_tests/extensions/front/mxnet/multibox_detection_test.py @@ -21,7 +21,6 @@ class TestMultiBoxDetection_Parsing(unittest.TestCase): exp_attrs = { 'type': 'DetectionOutput', - 'num_classes': None, 'keep_top_k': 400, 'variance_encoded_in_target': 0, 'code_type': "caffe.PriorBoxParameter.CENTER_SIZE", @@ -51,7 +50,6 @@ class TestMultiBoxDetection_Parsing(unittest.TestCase): exp_attrs = { 'type': 'DetectionOutput', - 'num_classes': None, 'keep_top_k': -1, 'variance_encoded_in_target': 0, 'code_type': "caffe.PriorBoxParameter.CENTER_SIZE", diff --git a/model-optimizer/unit_tests/extensions/front/onnx/detection_output_test.py b/model-optimizer/unit_tests/extensions/front/onnx/detection_output_test.py index 2a0fe9116d3..b106623c9f8 100644 --- a/model-optimizer/unit_tests/extensions/front/onnx/detection_output_test.py +++ b/model-optimizer/unit_tests/extensions/front/onnx/detection_output_test.py @@ -6,7 +6,7 @@ import unittest import numpy as np import onnx -from extensions.front.onnx.detection_output import DetectionOutputFrontExtractor +from extensions.front.onnx.detection_output_ext import DetectionOutputFrontExtractor from extensions.ops.DetectionOutput import DetectionOutput from mo.ops.op import Op from unit_tests.utils.extractors import PB diff --git a/model-optimizer/unit_tests/mo/front/common/partial_infer/multi_box_detection_test.py b/model-optimizer/unit_tests/mo/front/common/partial_infer/multi_box_detection_test.py deleted file mode 100644 index 1e73f840d39..00000000000 --- a/model-optimizer/unit_tests/mo/front/common/partial_infer/multi_box_detection_test.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright (C) 2018-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import unittest - -import numpy as np - -from mo.front.common.partial_infer.multi_box_detection import multi_box_detection_infer -from mo.front.common.partial_infer.utils import shape_array, dynamic_dimension_value, strict_compare_tensors -from mo.graph.graph import Node -from mo.utils.error import Error -from unit_tests.utils.graph import build_graph - -nodes_attributes = {'node_1': {'value': None, 'kind': 'data'}, - 'node_2': {'value': None, 'kind': 'data'}, - 'node_3': {'value': None, 'kind': 'data'}, - 'detection_output_1': {'type': 'DetectionOutput', 'value': None, 'kind': 'op'}, - 'node_4': {'value': None, 'kind': 'data'} - } - - -class TestMultiBoxDetectionInfer(unittest.TestCase): - def test_do_infer_ideal(self): - graph = build_graph(nodes_attributes, - [('node_1', 'detection_output_1'), - ('node_2', 'detection_output_1'), - ('node_3', 'detection_output_1'), - ('detection_output_1', 'node_4')], - {'node_1': {'shape': np.array([1, 34928])}, - 'node_2': {'shape': np.array([1, 183372])}, - 'node_3': {'shape': np.array([1, 2, 34928])}, - 'detection_output_1': {"background_label_id": 0, "clip": 1, - "code_type": "caffe.PriorBoxParameter.CENTER_SIZE", - "confidence_threshold": 0.01, "keep_top_k": 200, - "nms_threshold": 0.5, "num_classes": 21, - "share_location": 1, "top_k": 200, - "variance_encoded_in_target": 0}, - 'node_4': {'shape': np.array([1, 1, 200, 7])}, - }) - - multi_box_detection_node = Node(graph, 'detection_output_1') - print(multi_box_detection_node) - - multi_box_detection_infer(multi_box_detection_node) - exp_shape = np.array([1, 1, 200, 7]) - res_shape = graph.node['node_4']['shape'] - for i in range(0, len(exp_shape)): - self.assertEqual(exp_shape[i], res_shape[i]) - - self.assertEqual(multi_box_detection_node.background_label_id, 0) - self.assertEqual(multi_box_detection_node.clip, 1) - self.assertEqual(multi_box_detection_node.code_type, 'caffe.PriorBoxParameter.CENTER_SIZE') - self.assertEqual(multi_box_detection_node.confidence_threshold, 0.01) - self.assertEqual(multi_box_detection_node.keep_top_k, 200) - self.assertEqual(multi_box_detection_node.nms_threshold, 0.5) - self.assertEqual(multi_box_detection_node.num_classes, 21) - self.assertEqual(multi_box_detection_node.share_location, 1) - self.assertEqual(multi_box_detection_node.top_k, 200) - self.assertEqual(multi_box_detection_node.variance_encoded_in_target, 0) - - def test_do_infer_without_top_k(self): - graph = build_graph(nodes_attributes, - [('node_1', 'detection_output_1'), - ('node_2', 'detection_output_1'), - ('node_3', 'detection_output_1'), - ('detection_output_1', 'node_4')], - {'node_1': {'shape': np.array([1, 34928])}, - 'node_2': {'shape': np.array([1, 183372])}, - 'node_3': {'shape': np.array([1, 2, 34928])}, - 'detection_output_1': {"background_label_id": "0", "clip": "1", - "code_type": "caffe.PriorBoxParameter.CENTER_SIZE", - "confidence_threshold": "0.01", "keep_top_k": -1, - "nms_threshold": "0.5", "num_classes": 21, - "share_location": "1", "top_k": -1, - "variance_encoded_in_target": "0"}, - 'node_4': {'shape': np.array([1, 1, 69856, 7])}, - }) - - multi_box_detection_node = Node(graph, 'detection_output_1') - - multi_box_detection_infer(multi_box_detection_node) - exp_shape = np.array([1, 1, 8732, 7]) - res_shape = graph.node['node_4']['shape'] - for i in range(0, len(exp_shape)): - self.assertEqual(exp_shape[i], res_shape[i]) - - self.assertEqual(multi_box_detection_node.background_label_id, '0') - self.assertEqual(multi_box_detection_node.clip, '1') - self.assertEqual(multi_box_detection_node.code_type, 'caffe.PriorBoxParameter.CENTER_SIZE') - self.assertEqual(multi_box_detection_node.confidence_threshold, '0.01') - self.assertEqual(multi_box_detection_node.keep_top_k, 8732) - self.assertEqual(multi_box_detection_node.nms_threshold, '0.5') - self.assertEqual(multi_box_detection_node.num_classes, 21) - self.assertEqual(multi_box_detection_node.share_location, '1') - self.assertEqual(multi_box_detection_node.top_k, -1) - self.assertEqual(multi_box_detection_node.variance_encoded_in_target, '0') - - def test_do_infer_without_top_k_dynamic_shape(self): - graph = build_graph(nodes_attributes, - [('node_1', 'detection_output_1'), - ('node_2', 'detection_output_1'), - ('node_3', 'detection_output_1'), - ('detection_output_1', 'node_4')], - {'node_1': {'shape': np.array([1, 34928])}, - 'node_2': {'shape': shape_array([dynamic_dimension_value, 183372])}, - 'node_3': {'shape': np.array([1, 2, 34928])}, - 'detection_output_1': {"background_label_id": "0", "clip": "1", - "code_type": "caffe.PriorBoxParameter.CENTER_SIZE", - "confidence_threshold": "0.01", "keep_top_k": -1, - "nms_threshold": "0.5", "num_classes": 21, - "share_location": "1", "top_k": -1, - "variance_encoded_in_target": "0"}, - 'node_4': {'shape': np.array([1, 1, 69856, 7])}, - }) - - multi_box_detection_node = Node(graph, 'detection_output_1') - - multi_box_detection_infer(multi_box_detection_node) - exp_shape = shape_array([1, 1, dynamic_dimension_value, 7]) - res_shape = graph.node['node_4']['shape'] - self.assertTrue(strict_compare_tensors(exp_shape, res_shape)) - - self.assertEqual(multi_box_detection_node.background_label_id, '0') - self.assertEqual(multi_box_detection_node.clip, '1') - self.assertEqual(multi_box_detection_node.code_type, 'caffe.PriorBoxParameter.CENTER_SIZE') - self.assertEqual(multi_box_detection_node.confidence_threshold, '0.01') - self.assertEqual(multi_box_detection_node.keep_top_k, 8732) - self.assertEqual(multi_box_detection_node.nms_threshold, '0.5') - self.assertEqual(multi_box_detection_node.num_classes, 21) - self.assertEqual(multi_box_detection_node.share_location, '1') - self.assertEqual(multi_box_detection_node.top_k, -1) - self.assertEqual(multi_box_detection_node.variance_encoded_in_target, '0') - - def test_do_infer_raise_error(self): - graph = build_graph(nodes_attributes, - [('node_1', 'detection_output_1'), - ('node_2', 'detection_output_1'), - ('node_3', 'detection_output_1'), - ('detection_output_1', 'node_4')], - {'node_1': {'shape': np.array([1, 34928])}, - 'node_2': {'shape': np.array([1, 183372])}, - 'node_3': {'shape': np.array([1, 3, 34928])}, - 'detection_output_1': {"background_label_id": "0", "clip": "1", - "code_type": "caffe.PriorBoxParameter.CENTER_SIZE", - "confidence_threshold": "0.01", "keep_top_k": -1, - "nms_threshold": "0.5", "num_classes": 21, - "share_location": "1", "top_k": -1, - "variance_encoded_in_target": 0}, - 'node_4': {'shape': np.array([1, 1, 69856, 7])}, - }) - - multi_box_detection_node = Node(graph, 'detection_output_1') - - with self.assertRaisesRegex(Error, 'The "-2" dimension of the prior boxes must be 2 but it is "3" for node*'): - multi_box_detection_infer(multi_box_detection_node) diff --git a/src/bindings/python/src/compatibility/ngraph/opset8/__init__.py b/src/bindings/python/src/compatibility/ngraph/opset8/__init__.py index 1bf5cd254e4..55b1a5a8777 100644 --- a/src/bindings/python/src/compatibility/ngraph/opset8/__init__.py +++ b/src/bindings/python/src/compatibility/ngraph/opset8/__init__.py @@ -38,7 +38,7 @@ from ngraph.opset3.ops import cum_sum as cumsum from ngraph.opset8.ops import deformable_convolution from ngraph.opset1.ops import deformable_psroi_pooling from ngraph.opset1.ops import depth_to_space -from ngraph.opset1.ops import detection_output +from ngraph.opset8.ops import detection_output from ngraph.opset7.ops import dft from ngraph.opset1.ops import divide from ngraph.opset7.ops import einsum diff --git a/src/bindings/python/src/compatibility/ngraph/opset8/ops.py b/src/bindings/python/src/compatibility/ngraph/opset8/ops.py index dc59a8e7a93..d0f32933d66 100644 --- a/src/bindings/python/src/compatibility/ngraph/opset8/ops.py +++ b/src/bindings/python/src/compatibility/ngraph/opset8/ops.py @@ -3,41 +3,28 @@ """Factory functions for all ngraph ops.""" from functools import partial -from typing import Callable, Iterable, List, Optional, Set, Union, Tuple +from typing import List, Optional, Tuple import numpy as np from ngraph.exceptions import UserInputError -from ngraph.impl import Node, Shape -from ngraph.impl.op import Constant, Parameter +from ngraph.impl import Node from ngraph.opset_utils import _get_node_factory -from ngraph.utils.decorators import binary_op, nameable_op, unary_op +from ngraph.utils.decorators import nameable_op from ngraph.utils.input_validation import ( - assert_list_of_ints, check_valid_attributes, is_non_negative_value, is_positive_value, ) -from ngraph.utils.node_factory import NodeFactory from ngraph.utils.tensor_iterator_types import ( GraphBody, - TensorIteratorSliceInputDesc, - TensorIteratorMergedInputDesc, TensorIteratorInvariantInputDesc, TensorIteratorBodyOutputDesc, - TensorIteratorConcatOutputDesc, ) from ngraph.utils.types import ( NodeInput, - NumericData, - NumericType, - ScalarData, TensorShape, as_node, as_nodes, - get_dtype, - get_element_type, - get_element_type_str, - make_constant_node, ) _get_node_factory_opset8 = partial(_get_node_factory, "opset8") @@ -141,18 +128,18 @@ def adaptive_max_pool( @nameable_op def multiclass_nms( - boxes: NodeInput, - scores: NodeInput, - sort_result_type: str = "none", - sort_result_across_batch: bool = False, - output_type: str = "i64", - iou_threshold: float = 0.0, - score_threshold: float = 0.0, - nms_top_k: int = -1, - keep_top_k: int = -1, - background_class: int = -1, - nms_eta: float = 1.0, - normalized: bool = True + boxes: NodeInput, + scores: NodeInput, + sort_result_type: str = "none", + sort_result_across_batch: bool = False, + output_type: str = "i64", + iou_threshold: float = 0.0, + score_threshold: float = 0.0, + nms_top_k: int = -1, + keep_top_k: int = -1, + background_class: int = -1, + nms_eta: float = 1.0, + normalized: bool = True ) -> Node: """Return a node which performs MulticlassNms. @@ -197,19 +184,19 @@ def multiclass_nms( @nameable_op def matrix_nms( - boxes: NodeInput, - scores: NodeInput, - sort_result_type: str = "none", - sort_result_across_batch: bool = False, - output_type: str = "i64", - score_threshold: float = 0.0, - nms_top_k: int = -1, - keep_top_k: int = -1, - background_class: int = -1, - decay_function: str = "linear", - gaussian_sigma: float = 2.0, - post_threshold: float = 0.0, - normalized: bool = True + boxes: NodeInput, + scores: NodeInput, + sort_result_type: str = "none", + sort_result_across_batch: bool = False, + output_type: str = "i64", + score_threshold: float = 0.0, + nms_top_k: int = -1, + keep_top_k: int = -1, + background_class: int = -1, + decay_function: str = "linear", + gaussian_sigma: float = 2.0, + post_threshold: float = 0.0, + normalized: bool = True ) -> Node: """Return a node which performs MatrixNms. @@ -281,17 +268,17 @@ def gather( @nameable_op def max_pool( - data: NodeInput, - strides: List[int], - dilations: List[int], - pads_begin: List[int], - pads_end: List[int], - kernel_shape: TensorShape, - rounding_type: str = "floor", - auto_pad: Optional[str] = None, - index_element_type: Optional[str] = "i64", - axis: Optional[int] = 0, - name: Optional[str] = None, + data: NodeInput, + strides: List[int], + dilations: List[int], + pads_begin: List[int], + pads_end: List[int], + kernel_shape: TensorShape, + rounding_type: str = "floor", + auto_pad: Optional[str] = None, + index_element_type: Optional[str] = "i64", + axis: Optional[int] = 0, + name: Optional[str] = None, ) -> Node: """Perform max pooling operation and return both values and indices of the selected elements. @@ -457,7 +444,7 @@ def gather_nd( def prior_box( - layer_shape: Node, image_shape: NodeInput, attrs: dict, name: Optional[str] = None + layer_shape: Node, image_shape: NodeInput, attrs: dict, name: Optional[str] = None ) -> Node: """Generate prior boxes of specified sizes and aspect ratios across all dimensions. @@ -655,3 +642,138 @@ def nv12_to_rgb( inputs = as_nodes(arg, arg_uv) return _get_node_factory_opset8().create("NV12toRGB", inputs) + + +@nameable_op +def detection_output( + box_logits: NodeInput, + class_preds: NodeInput, + proposals: NodeInput, + attrs: dict, + aux_class_preds: Optional[NodeInput] = None, + aux_box_preds: Optional[NodeInput] = None, + name: Optional[str] = None, +) -> Node: + """Generate the detection output using information on location and confidence predictions. + + @param box_logits: The 2D input tensor with box logits. + @param class_preds: The 2D input tensor with class predictions. + @param proposals: The 3D input tensor with proposals. + @param attrs: The dictionary containing key, value pairs for attributes. + @param aux_class_preds: The 2D input tensor with additional class predictions information. + @param aux_box_preds: The 2D input tensor with additional box predictions information. + @param name: Optional name for the output node. + @return Node representing DetectionOutput operation. + Available attributes are: + * background_label_id The background label id. + Range of values: integer value + Default value: 0 + Required: no + * top_k Maximum number of results to be kept per batch after NMS step. + Range of values: integer value + Default value: -1 + Required: no + * variance_encoded_in_target The flag that denotes if variance is encoded in target. + Range of values: {False, True} + Default value: False + Required: no + * keep_top_k Maximum number of bounding boxes per batch to be kept after NMS step. + Range of values: integer values + Default value: None + Required: yes + * code_type The type of coding method for bounding boxes. + Range of values: {'caffe.PriorBoxParameter.CENTER_SIZE', + 'caffe.PriorBoxParameter.CORNER'} + Default value: 'caffe.PriorBoxParameter.CORNER' + Required: no + * share_location The flag that denotes if bounding boxes are shared among different + classes. + Range of values: {True, False} + Default value: True + Required: no + * nms_threshold The threshold to be used in the NMS stage. + Range of values: floating point value + Default value: None + Required: yes + * confidence_threshold Specifies the minimum confidence threshold for detection boxes to be + considered. + Range of values: floating point value + Default value: 0 + Required: no + * clip_after_nms The flag that denotes whether to perform clip bounding boxes after + non-maximum suppression or not. + Range of values: {True, False} + Default value: False + Required: no + * clip_before_nms The flag that denotes whether to perform clip bounding boxes before + non-maximum suppression or not. + Range of values: {True, False} + Default value: False + Required: no + * decrease_label_id The flag that denotes how to perform NMS. + Range of values: False - perform NMS like in Caffe*. + True - perform NMS like in MxNet*. + Default value: False + Required: no + * normalized The flag that denotes whether input tensors with boxes are normalized. + Range of values: {True, False} + Default value: False + Required: no + * input_height The input image height. + Range of values: positive integer number + Default value: 1 + Required: no + * input_width The input image width. + Range of values: positive integer number + Default value: 1 + Required: no + * objectness_score The threshold to sort out confidence predictions. + Range of values: non-negative float number + Default value: 0 + Required: no + Example of attribute dictionary: + @code{.py} + # just required ones + attrs = { + 'keep_top_k': [1, 2, 3], + 'nms_threshold': 0.645, + } + attrs = { + 'keep_top_k': [1, 2, 3], + 'nms_threshold': 0.645, + 'normalized': True, + 'clip_before_nms': True, + 'input_height': [32], + 'input_width': [32], + } + @endcode + Optional attributes which are absent from dictionary will be set with corresponding default. + """ + requirements = [ + ("background_label_id", False, np.integer, None), + ("top_k", False, np.integer, None), + ("variance_encoded_in_target", False, np.bool_, None), + ("keep_top_k", True, np.integer, None), + ("code_type", False, np.str_, None), + ("share_location", False, np.bool_, None), + ("nms_threshold", True, np.floating, None), + ("confidence_threshold", False, np.floating, None), + ("clip_after_nms", False, np.bool_, None), + ("clip_before_nms", False, np.bool_, None), + ("decrease_label_id", False, np.bool_, None), + ("normalized", False, np.bool_, None), + ("input_height", False, np.integer, is_positive_value), + ("input_width", False, np.integer, is_positive_value), + ("objectness_score", False, np.floating, is_non_negative_value), + ] + + check_valid_attributes("DetectionOutput", attrs, requirements) + + inputs = [box_logits, class_preds, proposals] + if aux_class_preds is not None: + inputs.append(aux_class_preds) + if aux_box_preds is not None: + inputs.append(aux_box_preds) + inputs = as_nodes(*inputs) + + return _get_node_factory_opset8().create("DetectionOutput", inputs, attrs) diff --git a/src/bindings/python/src/openvino/runtime/opset8/__init__.py b/src/bindings/python/src/openvino/runtime/opset8/__init__.py index ead6eb48426..7b3352edcfc 100644 --- a/src/bindings/python/src/openvino/runtime/opset8/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset8/__init__.py @@ -38,7 +38,7 @@ from openvino.runtime.opset3.ops import cum_sum as cumsum from openvino.runtime.opset8.ops import deformable_convolution from openvino.runtime.opset1.ops import deformable_psroi_pooling from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset1.ops import detection_output +from openvino.runtime.opset8.ops import detection_output from openvino.runtime.opset7.ops import dft from openvino.runtime.opset1.ops import divide from openvino.runtime.opset7.ops import einsum diff --git a/src/bindings/python/src/openvino/runtime/opset8/ops.py b/src/bindings/python/src/openvino/runtime/opset8/ops.py index bd2fde2625f..fda7230a825 100644 --- a/src/bindings/python/src/openvino/runtime/opset8/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset8/ops.py @@ -3,41 +3,28 @@ """Factory functions for all openvino ops.""" from functools import partial -from typing import Callable, Iterable, List, Optional, Set, Union, Tuple +from typing import List, Optional, Tuple import numpy as np from openvino.runtime.exceptions import UserInputError -from openvino.runtime.impl import Node, Shape -from openvino.runtime.impl.op import Constant, Parameter +from openvino.runtime.impl import Node from openvino.runtime.opset_utils import _get_node_factory -from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op +from openvino.runtime.utils.decorators import nameable_op from openvino.runtime.utils.input_validation import ( - assert_list_of_ints, check_valid_attributes, is_non_negative_value, is_positive_value, ) -from openvino.runtime.utils.node_factory import NodeFactory from openvino.runtime.utils.tensor_iterator_types import ( GraphBody, - TensorIteratorSliceInputDesc, - TensorIteratorMergedInputDesc, TensorIteratorInvariantInputDesc, TensorIteratorBodyOutputDesc, - TensorIteratorConcatOutputDesc, ) from openvino.runtime.utils.types import ( NodeInput, - NumericData, - NumericType, - ScalarData, TensorShape, as_node, as_nodes, - get_dtype, - get_element_type, - get_element_type_str, - make_constant_node, ) _get_node_factory_opset8 = partial(_get_node_factory, "opset8") @@ -141,18 +128,18 @@ def adaptive_max_pool( @nameable_op def multiclass_nms( - boxes: NodeInput, - scores: NodeInput, - sort_result_type: str = "none", - sort_result_across_batch: bool = False, - output_type: str = "i64", - iou_threshold: float = 0.0, - score_threshold: float = 0.0, - nms_top_k: int = -1, - keep_top_k: int = -1, - background_class: int = -1, - nms_eta: float = 1.0, - normalized: bool = True + boxes: NodeInput, + scores: NodeInput, + sort_result_type: str = "none", + sort_result_across_batch: bool = False, + output_type: str = "i64", + iou_threshold: float = 0.0, + score_threshold: float = 0.0, + nms_top_k: int = -1, + keep_top_k: int = -1, + background_class: int = -1, + nms_eta: float = 1.0, + normalized: bool = True ) -> Node: """Return a node which performs MulticlassNms. @@ -197,19 +184,19 @@ def multiclass_nms( @nameable_op def matrix_nms( - boxes: NodeInput, - scores: NodeInput, - sort_result_type: str = "none", - sort_result_across_batch: bool = False, - output_type: str = "i64", - score_threshold: float = 0.0, - nms_top_k: int = -1, - keep_top_k: int = -1, - background_class: int = -1, - decay_function: str = "linear", - gaussian_sigma: float = 2.0, - post_threshold: float = 0.0, - normalized: bool = True + boxes: NodeInput, + scores: NodeInput, + sort_result_type: str = "none", + sort_result_across_batch: bool = False, + output_type: str = "i64", + score_threshold: float = 0.0, + nms_top_k: int = -1, + keep_top_k: int = -1, + background_class: int = -1, + decay_function: str = "linear", + gaussian_sigma: float = 2.0, + post_threshold: float = 0.0, + normalized: bool = True ) -> Node: """Return a node which performs MatrixNms. @@ -281,17 +268,17 @@ def gather( @nameable_op def max_pool( - data: NodeInput, - strides: List[int], - dilations: List[int], - pads_begin: List[int], - pads_end: List[int], - kernel_shape: TensorShape, - rounding_type: str = "floor", - auto_pad: Optional[str] = None, - index_element_type: Optional[str] = "i64", - axis: Optional[int] = 0, - name: Optional[str] = None, + data: NodeInput, + strides: List[int], + dilations: List[int], + pads_begin: List[int], + pads_end: List[int], + kernel_shape: TensorShape, + rounding_type: str = "floor", + auto_pad: Optional[str] = None, + index_element_type: Optional[str] = "i64", + axis: Optional[int] = 0, + name: Optional[str] = None, ) -> Node: """Perform max pooling operation and return both values and indices of the selected elements. @@ -458,7 +445,7 @@ def gather_nd( @nameable_op def prior_box( - layer_shape: Node, image_shape: NodeInput, attrs: dict, name: Optional[str] = None + layer_shape: Node, image_shape: NodeInput, attrs: dict, name: Optional[str] = None ) -> Node: """Generate prior boxes of specified sizes and aspect ratios across all dimensions. @@ -656,3 +643,138 @@ def nv12_to_rgb( inputs = as_nodes(arg, arg_uv) return _get_node_factory_opset8().create("NV12toRGB", inputs) + + +@nameable_op +def detection_output( + box_logits: NodeInput, + class_preds: NodeInput, + proposals: NodeInput, + attrs: dict, + aux_class_preds: Optional[NodeInput] = None, + aux_box_preds: Optional[NodeInput] = None, + name: Optional[str] = None, +) -> Node: + """Generate the detection output using information on location and confidence predictions. + + @param box_logits: The 2D input tensor with box logits. + @param class_preds: The 2D input tensor with class predictions. + @param proposals: The 3D input tensor with proposals. + @param attrs: The dictionary containing key, value pairs for attributes. + @param aux_class_preds: The 2D input tensor with additional class predictions information. + @param aux_box_preds: The 2D input tensor with additional box predictions information. + @param name: Optional name for the output node. + @return Node representing DetectionOutput operation. + Available attributes are: + * background_label_id The background label id. + Range of values: integer value + Default value: 0 + Required: no + * top_k Maximum number of results to be kept per batch after NMS step. + Range of values: integer value + Default value: -1 + Required: no + * variance_encoded_in_target The flag that denotes if variance is encoded in target. + Range of values: {False, True} + Default value: False + Required: no + * keep_top_k Maximum number of bounding boxes per batch to be kept after NMS step. + Range of values: integer values + Default value: None + Required: yes + * code_type The type of coding method for bounding boxes. + Range of values: {'caffe.PriorBoxParameter.CENTER_SIZE', + 'caffe.PriorBoxParameter.CORNER'} + Default value: 'caffe.PriorBoxParameter.CORNER' + Required: no + * share_location The flag that denotes if bounding boxes are shared among different + classes. + Range of values: {True, False} + Default value: True + Required: no + * nms_threshold The threshold to be used in the NMS stage. + Range of values: floating point value + Default value: None + Required: yes + * confidence_threshold Specifies the minimum confidence threshold for detection boxes to be + considered. + Range of values: floating point value + Default value: 0 + Required: no + * clip_after_nms The flag that denotes whether to perform clip bounding boxes after + non-maximum suppression or not. + Range of values: {True, False} + Default value: False + Required: no + * clip_before_nms The flag that denotes whether to perform clip bounding boxes before + non-maximum suppression or not. + Range of values: {True, False} + Default value: False + Required: no + * decrease_label_id The flag that denotes how to perform NMS. + Range of values: False - perform NMS like in Caffe*. + True - perform NMS like in MxNet*. + Default value: False + Required: no + * normalized The flag that denotes whether input tensors with boxes are normalized. + Range of values: {True, False} + Default value: False + Required: no + * input_height The input image height. + Range of values: positive integer number + Default value: 1 + Required: no + * input_width The input image width. + Range of values: positive integer number + Default value: 1 + Required: no + * objectness_score The threshold to sort out confidence predictions. + Range of values: non-negative float number + Default value: 0 + Required: no + Example of attribute dictionary: + @code{.py} + # just required ones + attrs = { + 'keep_top_k': [1, 2, 3], + 'nms_threshold': 0.645, + } + attrs = { + 'keep_top_k': [1, 2, 3], + 'nms_threshold': 0.645, + 'normalized': True, + 'clip_before_nms': True, + 'input_height': [32], + 'input_width': [32], + } + @endcode + Optional attributes which are absent from dictionary will be set with corresponding default. + """ + requirements = [ + ("background_label_id", False, np.integer, None), + ("top_k", False, np.integer, None), + ("variance_encoded_in_target", False, np.bool_, None), + ("keep_top_k", True, np.integer, None), + ("code_type", False, np.str_, None), + ("share_location", False, np.bool_, None), + ("nms_threshold", True, np.floating, None), + ("confidence_threshold", False, np.floating, None), + ("clip_after_nms", False, np.bool_, None), + ("clip_before_nms", False, np.bool_, None), + ("decrease_label_id", False, np.bool_, None), + ("normalized", False, np.bool_, None), + ("input_height", False, np.integer, is_positive_value), + ("input_width", False, np.integer, is_positive_value), + ("objectness_score", False, np.floating, is_non_negative_value), + ] + + check_valid_attributes("DetectionOutput", attrs, requirements) + + inputs = [box_logits, class_preds, proposals] + if aux_class_preds is not None: + inputs.append(aux_class_preds) + if aux_box_preds is not None: + inputs.append(aux_box_preds) + inputs = as_nodes(*inputs) + + return _get_node_factory_opset8().create("DetectionOutput", inputs, attrs) diff --git a/src/bindings/python/tests/test_ngraph/test_create_op.py b/src/bindings/python/tests/test_ngraph/test_create_op.py index 77d535573a0..35db15f640e 100644 --- a/src/bindings/python/tests/test_ngraph/test_create_op.py +++ b/src/bindings/python/tests/test_ngraph/test_create_op.py @@ -110,7 +110,7 @@ def test_ctc_greedy_decoder(dtype): (np.float64, np.int64, "i64", "i32", False, False), (np.float64, np.int64, "i32", "i64", False, False), (np.float64, np.int64, "i64", "i64", False, False) - ],) + ], ) def test_ctc_greedy_decoder_seq_len(fp_dtype, int_dtype, int_ci, int_sl, merge_repeated, blank_index): input0_shape = [8, 20, 128] input1_shape = [8] @@ -1093,41 +1093,6 @@ def test_prior_box_clustered(int_dtype, fp_dtype): assert list(node.get_output_shape(0)) == [2, 4332] -@pytest.mark.parametrize( - "int_dtype, fp_dtype", - [ - (np.int8, np.float32), - (np.int16, np.float32), - (np.int32, np.float32), - (np.int64, np.float32), - (np.uint8, np.float32), - (np.uint16, np.float32), - (np.uint32, np.float32), - (np.uint64, np.float32), - (np.int32, np.float16), - (np.int32, np.float64), - ], -) -def test_detection_output(int_dtype, fp_dtype): - attributes = { - "num_classes": int_dtype(85), - "keep_top_k": np.array([64], dtype=int_dtype), - "nms_threshold": fp_dtype(0.645), - } - - box_logits = ov.parameter([4, 8], fp_dtype, "box_logits") - class_preds = ov.parameter([4, 170], fp_dtype, "class_preds") - proposals = ov.parameter([4, 2, 10], fp_dtype, "proposals") - aux_class_preds = ov.parameter([4, 4], fp_dtype, "aux_class_preds") - aux_box_preds = ov.parameter([4, 8], fp_dtype, "aux_box_preds") - - node = ov.detection_output(box_logits, class_preds, proposals, attributes, aux_class_preds, aux_box_preds) - - assert node.get_type_name() == "DetectionOutput" - assert node.get_output_size() == 1 - assert list(node.get_output_shape(0)) == [1, 1, 256, 7] - - @pytest.mark.parametrize( "int_dtype, fp_dtype", [ diff --git a/src/bindings/python/tests/test_ngraph/test_detection_output.py b/src/bindings/python/tests/test_ngraph/test_detection_output.py new file mode 100644 index 00000000000..5534b2ea6f7 --- /dev/null +++ b/src/bindings/python/tests/test_ngraph/test_detection_output.py @@ -0,0 +1,111 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import openvino.runtime.opset8 as ov +import pytest + +np_types = [np.float32, np.int32] +integral_np_types = [ + np.int8, + np.int16, + np.int32, + np.int64, + np.uint8, + np.uint16, + np.uint32, + np.uint64, +] + + +@pytest.mark.parametrize( + "int_dtype, fp_dtype", + [ + (np.int8, np.float32), + (np.int16, np.float32), + (np.int32, np.float32), + (np.int64, np.float32), + (np.uint8, np.float32), + (np.uint16, np.float32), + (np.uint32, np.float32), + (np.uint64, np.float32), + (np.int32, np.float16), + (np.int32, np.float64), + ], +) +def test_detection_output(int_dtype, fp_dtype): + attributes = { + "keep_top_k": np.array([64], dtype=int_dtype), + "nms_threshold": fp_dtype(0.645), + } + + box_logits = ov.parameter([4, 8], fp_dtype, "box_logits") + class_preds = ov.parameter([4, 170], fp_dtype, "class_preds") + proposals = ov.parameter([4, 2, 10], fp_dtype, "proposals") + aux_class_preds = ov.parameter([4, 4], fp_dtype, "aux_class_preds") + aux_box_preds = ov.parameter([4, 8], fp_dtype, "aux_box_preds") + + node = ov.detection_output(box_logits, class_preds, proposals, attributes, aux_class_preds, aux_box_preds) + + assert node.get_type_name() == "DetectionOutput" + assert node.get_output_size() == 1 + assert list(node.get_output_shape(0)) == [1, 1, 256, 7] + + +@pytest.mark.parametrize( + "int_dtype, fp_dtype", + [ + (np.int8, np.float32), + (np.int16, np.float32), + (np.int32, np.float32), + (np.int64, np.float32), + (np.uint8, np.float32), + (np.uint16, np.float32), + (np.uint32, np.float32), + (np.uint64, np.float32), + (np.int32, np.float16), + (np.int32, np.float64), + ], +) +def test_dynamic_get_attribute_value(int_dtype, fp_dtype): + attributes = { + "background_label_id": int_dtype(13), + "top_k": int_dtype(16), + "variance_encoded_in_target": True, + "keep_top_k": np.array([64, 32, 16, 8], dtype=int_dtype), + "code_type": "caffe.PriorBoxParameter.CENTER_SIZE", + "share_location": False, + "nms_threshold": fp_dtype(0.645), + "confidence_threshold": fp_dtype(0.111), + "clip_after_nms": True, + "clip_before_nms": False, + "decrease_label_id": True, + "normalized": True, + "input_height": int_dtype(86), + "input_width": int_dtype(79), + "objectness_score": fp_dtype(0.77), + } + + box_logits = ov.parameter([4, 680], fp_dtype, "box_logits") + class_preds = ov.parameter([4, 170], fp_dtype, "class_preds") + proposals = ov.parameter([4, 1, 8], fp_dtype, "proposals") + aux_class_preds = ov.parameter([4, 4], fp_dtype, "aux_class_preds") + aux_box_preds = ov.parameter([4, 680], fp_dtype, "aux_box_preds") + + node = ov.detection_output(box_logits, class_preds, proposals, attributes, aux_class_preds, aux_box_preds) + + assert node.get_background_label_id() == int_dtype(13) + assert node.get_top_k() == int_dtype(16) + assert node.get_variance_encoded_in_target() + assert np.all(np.equal(node.get_keep_top_k(), np.array([64, 32, 16, 8], dtype=int_dtype))) + assert node.get_code_type() == "caffe.PriorBoxParameter.CENTER_SIZE" + assert not node.get_share_location() + assert np.isclose(node.get_nms_threshold(), fp_dtype(0.645)) + assert np.isclose(node.get_confidence_threshold(), fp_dtype(0.111)) + assert node.get_clip_after_nms() + assert not node.get_clip_before_nms() + assert node.get_decrease_label_id() + assert node.get_normalized() + assert node.get_input_height() == int_dtype(86) + assert node.get_input_width() == int_dtype(79) + assert np.isclose(node.get_objectness_score(), fp_dtype(0.77)) diff --git a/src/bindings/python/tests/test_ngraph/test_dyn_attributes.py b/src/bindings/python/tests/test_ngraph/test_dyn_attributes.py index 3d692f3b140..3a41281500c 100644 --- a/src/bindings/python/tests/test_ngraph/test_dyn_attributes.py +++ b/src/bindings/python/tests/test_ngraph/test_dyn_attributes.py @@ -2,9 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import numpy as np -import pytest - import openvino.runtime.opset8 as ov +import pytest @pytest.fixture() @@ -37,68 +36,6 @@ def test_dynamic_attributes_softmax(): assert node.get_axis() == 3 -@pytest.mark.parametrize( - "int_dtype, fp_dtype", - [ - (np.int8, np.float32), - (np.int16, np.float32), - (np.int32, np.float32), - (np.int64, np.float32), - (np.uint8, np.float32), - (np.uint16, np.float32), - (np.uint32, np.float32), - (np.uint64, np.float32), - (np.int32, np.float16), - (np.int32, np.float64), - ], -) -def test_dynamic_get_attribute_value(int_dtype, fp_dtype): - attributes = { - "num_classes": int_dtype(85), - "background_label_id": int_dtype(13), - "top_k": int_dtype(16), - "variance_encoded_in_target": True, - "keep_top_k": np.array([64, 32, 16, 8], dtype=int_dtype), - "code_type": "caffe.PriorBoxParameter.CENTER_SIZE", - "share_location": False, - "nms_threshold": fp_dtype(0.645), - "confidence_threshold": fp_dtype(0.111), - "clip_after_nms": True, - "clip_before_nms": False, - "decrease_label_id": True, - "normalized": True, - "input_height": int_dtype(86), - "input_width": int_dtype(79), - "objectness_score": fp_dtype(0.77), - } - - box_logits = ov.parameter([4, 680], fp_dtype, "box_logits") - class_preds = ov.parameter([4, 170], fp_dtype, "class_preds") - proposals = ov.parameter([4, 1, 8], fp_dtype, "proposals") - aux_class_preds = ov.parameter([4, 4], fp_dtype, "aux_class_preds") - aux_box_preds = ov.parameter([4, 680], fp_dtype, "aux_box_preds") - - node = ov.detection_output(box_logits, class_preds, proposals, attributes, aux_class_preds, aux_box_preds) - - assert node.get_num_classes() == int_dtype(85) - assert node.get_background_label_id() == int_dtype(13) - assert node.get_top_k() == int_dtype(16) - assert node.get_variance_encoded_in_target() - assert np.all(np.equal(node.get_keep_top_k(), np.array([64, 32, 16, 8], dtype=int_dtype))) - assert node.get_code_type() == "caffe.PriorBoxParameter.CENTER_SIZE" - assert not node.get_share_location() - assert np.isclose(node.get_nms_threshold(), fp_dtype(0.645)) - assert np.isclose(node.get_confidence_threshold(), fp_dtype(0.111)) - assert node.get_clip_after_nms() - assert not node.get_clip_before_nms() - assert node.get_decrease_label_id() - assert node.get_normalized() - assert node.get_input_height() == int_dtype(86) - assert node.get_input_width() == int_dtype(79) - assert np.isclose(node.get_objectness_score(), fp_dtype(0.77)) - assert node.get_num_classes() == int_dtype(85) - - @pytest.mark.parametrize( "int_dtype, fp_dtype", [ diff --git a/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py b/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py index a1d1af7207e..ceeb71c126d 100644 --- a/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py +++ b/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py @@ -110,7 +110,7 @@ def test_ctc_greedy_decoder(dtype): (np.float64, np.int64, "i64", "i32", False, False), (np.float64, np.int64, "i32", "i64", False, False), (np.float64, np.int64, "i64", "i64", False, False) - ],) + ], ) def test_ctc_greedy_decoder_seq_len(fp_dtype, int_dtype, int_ci, int_sl, merge_repeated, blank_index): input0_shape = [8, 20, 128] input1_shape = [8] @@ -1093,41 +1093,6 @@ def test_prior_box_clustered(int_dtype, fp_dtype): assert list(node.get_output_shape(0)) == [2, 4332] -@pytest.mark.parametrize( - "int_dtype, fp_dtype", - [ - (np.int8, np.float32), - (np.int16, np.float32), - (np.int32, np.float32), - (np.int64, np.float32), - (np.uint8, np.float32), - (np.uint16, np.float32), - (np.uint32, np.float32), - (np.uint64, np.float32), - (np.int32, np.float16), - (np.int32, np.float64), - ], -) -def test_detection_output(int_dtype, fp_dtype): - attributes = { - "num_classes": int_dtype(85), - "keep_top_k": np.array([64], dtype=int_dtype), - "nms_threshold": fp_dtype(0.645), - } - - box_logits = ng.parameter([4, 8], fp_dtype, "box_logits") - class_preds = ng.parameter([4, 170], fp_dtype, "class_preds") - proposals = ng.parameter([4, 2, 10], fp_dtype, "proposals") - aux_class_preds = ng.parameter([4, 4], fp_dtype, "aux_class_preds") - aux_box_preds = ng.parameter([4, 8], fp_dtype, "aux_box_preds") - - node = ng.detection_output(box_logits, class_preds, proposals, attributes, aux_class_preds, aux_box_preds) - - assert node.get_type_name() == "DetectionOutput" - assert node.get_output_size() == 1 - assert list(node.get_output_shape(0)) == [1, 1, 256, 7] - - @pytest.mark.parametrize( "int_dtype, fp_dtype", [ diff --git a/src/bindings/python/tests_compatibility/test_ngraph/test_detection_output.py b/src/bindings/python/tests_compatibility/test_ngraph/test_detection_output.py new file mode 100644 index 00000000000..83082d33d0c --- /dev/null +++ b/src/bindings/python/tests_compatibility/test_ngraph/test_detection_output.py @@ -0,0 +1,111 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import ngraph as ng +import numpy as np +import pytest + +np_types = [np.float32, np.int32] +integral_np_types = [ + np.int8, + np.int16, + np.int32, + np.int64, + np.uint8, + np.uint16, + np.uint32, + np.uint64, +] + + +@pytest.mark.parametrize( + "int_dtype, fp_dtype", + [ + (np.int8, np.float32), + (np.int16, np.float32), + (np.int32, np.float32), + (np.int64, np.float32), + (np.uint8, np.float32), + (np.uint16, np.float32), + (np.uint32, np.float32), + (np.uint64, np.float32), + (np.int32, np.float16), + (np.int32, np.float64), + ], +) +def test_detection_output(int_dtype, fp_dtype): + attributes = { + "keep_top_k": np.array([64], dtype=int_dtype), + "nms_threshold": fp_dtype(0.645), + } + + box_logits = ng.parameter([4, 8], fp_dtype, "box_logits") + class_preds = ng.parameter([4, 170], fp_dtype, "class_preds") + proposals = ng.parameter([4, 2, 10], fp_dtype, "proposals") + aux_class_preds = ng.parameter([4, 4], fp_dtype, "aux_class_preds") + aux_box_preds = ng.parameter([4, 8], fp_dtype, "aux_box_preds") + + node = ng.detection_output(box_logits, class_preds, proposals, attributes, aux_class_preds, aux_box_preds) + + assert node.get_type_name() == "DetectionOutput" + assert node.get_output_size() == 1 + assert list(node.get_output_shape(0)) == [1, 1, 256, 7] + + +@pytest.mark.parametrize( + "int_dtype, fp_dtype", + [ + (np.int8, np.float32), + (np.int16, np.float32), + (np.int32, np.float32), + (np.int64, np.float32), + (np.uint8, np.float32), + (np.uint16, np.float32), + (np.uint32, np.float32), + (np.uint64, np.float32), + (np.int32, np.float16), + (np.int32, np.float64), + ], +) +def test_dynamic_get_attribute_value(int_dtype, fp_dtype): + attributes = { + "background_label_id": int_dtype(13), + "top_k": int_dtype(16), + "variance_encoded_in_target": True, + "keep_top_k": np.array([64, 32, 16, 8], dtype=int_dtype), + "code_type": "caffe.PriorBoxParameter.CENTER_SIZE", + "share_location": False, + "nms_threshold": fp_dtype(0.645), + "confidence_threshold": fp_dtype(0.111), + "clip_after_nms": True, + "clip_before_nms": False, + "decrease_label_id": True, + "normalized": True, + "input_height": int_dtype(86), + "input_width": int_dtype(79), + "objectness_score": fp_dtype(0.77), + } + + box_logits = ng.parameter([4, 680], fp_dtype, "box_logits") + class_preds = ng.parameter([4, 170], fp_dtype, "class_preds") + proposals = ng.parameter([4, 1, 8], fp_dtype, "proposals") + aux_class_preds = ng.parameter([4, 4], fp_dtype, "aux_class_preds") + aux_box_preds = ng.parameter([4, 680], fp_dtype, "aux_box_preds") + + node = ng.detection_output(box_logits, class_preds, proposals, attributes, aux_class_preds, aux_box_preds) + + assert node.get_background_label_id() == int_dtype(13) + assert node.get_top_k() == int_dtype(16) + assert node.get_variance_encoded_in_target() + assert np.all(np.equal(node.get_keep_top_k(), np.array([64, 32, 16, 8], dtype=int_dtype))) + assert node.get_code_type() == "caffe.PriorBoxParameter.CENTER_SIZE" + assert not node.get_share_location() + assert np.isclose(node.get_nms_threshold(), fp_dtype(0.645)) + assert np.isclose(node.get_confidence_threshold(), fp_dtype(0.111)) + assert node.get_clip_after_nms() + assert not node.get_clip_before_nms() + assert node.get_decrease_label_id() + assert node.get_normalized() + assert node.get_input_height() == int_dtype(86) + assert node.get_input_width() == int_dtype(79) + assert np.isclose(node.get_objectness_score(), fp_dtype(0.77)) diff --git a/src/bindings/python/tests_compatibility/test_ngraph/test_dyn_attributes.py b/src/bindings/python/tests_compatibility/test_ngraph/test_dyn_attributes.py index a945ec91ef0..34adade060e 100644 --- a/src/bindings/python/tests_compatibility/test_ngraph/test_dyn_attributes.py +++ b/src/bindings/python/tests_compatibility/test_ngraph/test_dyn_attributes.py @@ -1,11 +1,10 @@ # Copyright (C) 2018-2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import ngraph as ng import numpy as np import pytest -import ngraph as ng - @pytest.fixture() def _proposal_node(): @@ -37,68 +36,6 @@ def test_dynamic_attributes_softmax(): assert node.get_axis() == 3 -@pytest.mark.parametrize( - "int_dtype, fp_dtype", - [ - (np.int8, np.float32), - (np.int16, np.float32), - (np.int32, np.float32), - (np.int64, np.float32), - (np.uint8, np.float32), - (np.uint16, np.float32), - (np.uint32, np.float32), - (np.uint64, np.float32), - (np.int32, np.float16), - (np.int32, np.float64), - ], -) -def test_dynamic_get_attribute_value(int_dtype, fp_dtype): - attributes = { - "num_classes": int_dtype(85), - "background_label_id": int_dtype(13), - "top_k": int_dtype(16), - "variance_encoded_in_target": True, - "keep_top_k": np.array([64, 32, 16, 8], dtype=int_dtype), - "code_type": "caffe.PriorBoxParameter.CENTER_SIZE", - "share_location": False, - "nms_threshold": fp_dtype(0.645), - "confidence_threshold": fp_dtype(0.111), - "clip_after_nms": True, - "clip_before_nms": False, - "decrease_label_id": True, - "normalized": True, - "input_height": int_dtype(86), - "input_width": int_dtype(79), - "objectness_score": fp_dtype(0.77), - } - - box_logits = ng.parameter([4, 680], fp_dtype, "box_logits") - class_preds = ng.parameter([4, 170], fp_dtype, "class_preds") - proposals = ng.parameter([4, 1, 8], fp_dtype, "proposals") - aux_class_preds = ng.parameter([4, 4], fp_dtype, "aux_class_preds") - aux_box_preds = ng.parameter([4, 680], fp_dtype, "aux_box_preds") - - node = ng.detection_output(box_logits, class_preds, proposals, attributes, aux_class_preds, aux_box_preds) - - assert node.get_num_classes() == int_dtype(85) - assert node.get_background_label_id() == int_dtype(13) - assert node.get_top_k() == int_dtype(16) - assert node.get_variance_encoded_in_target() - assert np.all(np.equal(node.get_keep_top_k(), np.array([64, 32, 16, 8], dtype=int_dtype))) - assert node.get_code_type() == "caffe.PriorBoxParameter.CENTER_SIZE" - assert not node.get_share_location() - assert np.isclose(node.get_nms_threshold(), fp_dtype(0.645)) - assert np.isclose(node.get_confidence_threshold(), fp_dtype(0.111)) - assert node.get_clip_after_nms() - assert not node.get_clip_before_nms() - assert node.get_decrease_label_id() - assert node.get_normalized() - assert node.get_input_height() == int_dtype(86) - assert node.get_input_width() == int_dtype(79) - assert np.isclose(node.get_objectness_score(), fp_dtype(0.77)) - assert node.get_num_classes() == int_dtype(85) - - @pytest.mark.parametrize( "int_dtype, fp_dtype", [ diff --git a/src/common/transformations/include/transformations/op_conversions/detection_output_downgrade.hpp b/src/common/transformations/include/transformations/op_conversions/detection_output_downgrade.hpp new file mode 100644 index 00000000000..2205bbef275 --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/detection_output_downgrade.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API ConvertDetectionOutput8ToDetectionOutput1; + +} // namespace pass +} // namespace ngraph + +/** + * @ingroup ie_transformation_common_api + * @brief ConvertDetectionOutput8ToDetectionOutput1 converts v8::DetectionOutput + * into v0::DetectionOutput. + */ +class ngraph::pass::ConvertDetectionOutput8ToDetectionOutput1 : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + ConvertDetectionOutput8ToDetectionOutput1(); +}; diff --git a/src/common/transformations/include/transformations/op_conversions/detection_output_upgrade.hpp b/src/common/transformations/include/transformations/op_conversions/detection_output_upgrade.hpp new file mode 100644 index 00000000000..68041d7d8fa --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/detection_output_upgrade.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API ConvertDetectionOutput1ToDetectionOutput8; + +} // namespace pass +} // namespace ngraph + +/** + * @ingroup ie_transformation_common_api + * @brief ConvertDetectionOutput1ToDetectionOutput8 converts v0::DetectionOutput + * into v8::DetectionOutput. + */ +class ngraph::pass::ConvertDetectionOutput1ToDetectionOutput8 : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + ConvertDetectionOutput1ToDetectionOutput8(); +}; diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index ed32b6a1ae5..da8a676f5bd 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -67,6 +67,7 @@ #include "transformations/op_conversions/convert_broadcast_to_tiles.hpp" #include "transformations/op_conversions/convert_gelu.hpp" #include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp" +#include "transformations/op_conversions/detection_output_downgrade.hpp" #include "transformations/op_conversions/batch_norm_decomposition.hpp" #include "transformations/op_conversions/einsum_decomposition.hpp" #include "transformations/op_conversions/gelu7_downgrade.hpp" @@ -176,6 +177,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr(); manager.register_pass(); manager.register_pass(); // not plugins implemented priorbox8 + manager.register_pass(); auto fq_fusions = manager.register_pass(); fq_fusions->add_matcher(); diff --git a/src/common/transformations/src/transformations/op_conversions/detection_output_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/detection_output_downgrade.cpp new file mode 100644 index 00000000000..1bb2f6734c1 --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/detection_output_downgrade.cpp @@ -0,0 +1,80 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/detection_output_downgrade.hpp" + +#include +#include +#include +#include +#include + +#include "itt.hpp" + +using namespace std; +using namespace ngraph; +using namespace ngraph::op::util; + +NGRAPH_RTTI_DEFINITION(pass::ConvertDetectionOutput8ToDetectionOutput1, "ConvertDetectionOutput8ToDetectionOutput1", 0); + +pass::ConvertDetectionOutput8ToDetectionOutput1::ConvertDetectionOutput8ToDetectionOutput1() { + MATCHER_SCOPE(ConvertDetectionOutput8ToDetectionOutput1); + + auto detection_output_v8_pattern = pattern::wrap_type(); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + auto detection_output_v8_node = std::dynamic_pointer_cast(m.get_match_root()); + if (!detection_output_v8_node) + return false; + const auto& attributes_v8 = detection_output_v8_node->get_attrs(); + auto num_classes = detection_output_v8_node->compute_num_classes(attributes_v8); + + // the transformation is applicable only if the number of classes is deduced + if (num_classes.is_dynamic()) + return false; + + opset1::DetectionOutput::Attributes attributes_v1; + attributes_v1.background_label_id = attributes_v8.background_label_id; + attributes_v1.clip_after_nms = attributes_v8.clip_after_nms; + attributes_v1.clip_before_nms = attributes_v8.clip_before_nms; + attributes_v1.code_type = attributes_v8.code_type; + attributes_v1.confidence_threshold = attributes_v8.confidence_threshold; + attributes_v1.decrease_label_id = attributes_v8.decrease_label_id; + attributes_v1.input_height = attributes_v8.input_height; + attributes_v1.input_width = attributes_v8.input_width; + attributes_v1.keep_top_k = attributes_v8.keep_top_k; + attributes_v1.nms_threshold = attributes_v8.nms_threshold; + attributes_v1.normalized = attributes_v8.normalized; + attributes_v1.num_classes = num_classes.get_length(); + attributes_v1.objectness_score = attributes_v8.objectness_score; + attributes_v1.share_location = attributes_v8.share_location; + attributes_v1.top_k = attributes_v8.top_k; + attributes_v1.variance_encoded_in_target = attributes_v8.variance_encoded_in_target; + + std::shared_ptr detection_output_v1_node = nullptr; + if (detection_output_v8_node->get_input_size() == 3) { + detection_output_v1_node = make_shared(detection_output_v8_node->input_value(0), + detection_output_v8_node->input_value(1), + detection_output_v8_node->input_value(2), + attributes_v1); + } else if (detection_output_v8_node->get_input_size() == 5) { + detection_output_v1_node = make_shared(detection_output_v8_node->input_value(0), + detection_output_v8_node->input_value(1), + detection_output_v8_node->input_value(2), + detection_output_v8_node->input_value(3), + detection_output_v8_node->input_value(4), + attributes_v1); + } + if (!detection_output_v1_node) + return false; + + detection_output_v1_node->set_friendly_name(detection_output_v8_node->get_friendly_name()); + ngraph::copy_runtime_info(detection_output_v8_node, detection_output_v1_node); + ngraph::replace_node(detection_output_v8_node, detection_output_v1_node); + return true; + }; + + auto m = make_shared(detection_output_v8_pattern, matcher_name); + register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/op_conversions/detection_output_upgrade.cpp b/src/common/transformations/src/transformations/op_conversions/detection_output_upgrade.cpp new file mode 100644 index 00000000000..d254dc8d4a6 --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/detection_output_upgrade.cpp @@ -0,0 +1,76 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/detection_output_upgrade.hpp" + +#include +#include +#include +#include +#include + +#include "itt.hpp" + +using namespace std; +using namespace ngraph; +using namespace ngraph::op::util; + +NGRAPH_RTTI_DEFINITION(pass::ConvertDetectionOutput1ToDetectionOutput8, "ConvertDetectionOutput1ToDetectionOutput8", 0); + +pass::ConvertDetectionOutput1ToDetectionOutput8::ConvertDetectionOutput1ToDetectionOutput8() { + MATCHER_SCOPE(ConvertDetectionOutput1ToDetectionOutput8); + + auto detection_output_v1_pattern = pattern::wrap_type(); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + auto detection_output_v1_node = std::dynamic_pointer_cast(m.get_match_root()); + if (!detection_output_v1_node) + return false; + + const auto& attributes_v1 = detection_output_v1_node->get_attrs(); + opset8::DetectionOutput::Attributes attributes_v8; + attributes_v8.background_label_id = attributes_v1.background_label_id; + attributes_v8.clip_after_nms = attributes_v1.clip_after_nms; + attributes_v8.clip_before_nms = attributes_v1.clip_before_nms; + attributes_v8.code_type = attributes_v1.code_type; + attributes_v8.confidence_threshold = attributes_v1.confidence_threshold; + attributes_v8.decrease_label_id = attributes_v1.decrease_label_id; + attributes_v8.input_height = attributes_v1.input_height; + attributes_v8.input_width = attributes_v1.input_width; + attributes_v8.keep_top_k = attributes_v1.keep_top_k; + attributes_v8.nms_threshold = attributes_v1.nms_threshold; + attributes_v8.normalized = attributes_v1.normalized; + attributes_v8.objectness_score = attributes_v1.objectness_score; + attributes_v8.share_location = attributes_v1.share_location; + attributes_v8.top_k = attributes_v1.top_k; + attributes_v8.variance_encoded_in_target = attributes_v1.variance_encoded_in_target; + + std::shared_ptr detection_output_v8_node = nullptr; + if (detection_output_v1_node->get_input_size() == 3) { + auto detection_output_v8_node = + make_shared(detection_output_v1_node->input_value(0), + detection_output_v1_node->input_value(1), + detection_output_v1_node->input_value(2), + attributes_v8); + } else if (detection_output_v1_node->get_input_size() == 5) { + auto detection_output_v8_node = + make_shared(detection_output_v1_node->input_value(0), + detection_output_v1_node->input_value(1), + detection_output_v1_node->input_value(2), + detection_output_v1_node->input_value(3), + detection_output_v1_node->input_value(4), + attributes_v8); + } + if (!detection_output_v8_node) + return false; + + detection_output_v8_node->set_friendly_name(detection_output_v1_node->get_friendly_name()); + ngraph::copy_runtime_info(detection_output_v1_node, detection_output_v8_node); + ngraph::replace_node(detection_output_v1_node, detection_output_v8_node); + return true; + }; + + auto m = make_shared(detection_output_v1_pattern, matcher_name); + register_matcher(m, callback); +} diff --git a/src/core/include/ngraph/op/detection_output.hpp b/src/core/include/ngraph/op/detection_output.hpp index 5400d5b22bf..0bda488d7fc 100644 --- a/src/core/include/ngraph/op/detection_output.hpp +++ b/src/core/include/ngraph/op/detection_output.hpp @@ -15,5 +15,9 @@ namespace v0 { using ov::op::v0::DetectionOutput; } // namespace v0 using v0::DetectionOutput; + +namespace v8 { +using ov::op::v8::DetectionOutput; +} // namespace v8 } // namespace op } // namespace ngraph diff --git a/src/core/include/ngraph/op/util/detection_output_base.hpp b/src/core/include/ngraph/op/util/detection_output_base.hpp new file mode 100644 index 00000000000..79d66ef3857 --- /dev/null +++ b/src/core/include/ngraph/op/util/detection_output_base.hpp @@ -0,0 +1,16 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ngraph/op/op.hpp" +#include "openvino/op/util/detection_output_base.hpp" + +namespace ngraph { +namespace op { +namespace util { +using ov::op::util::DetectionOutputBase; +} // namespace util +} // namespace op +} // namespace ngraph diff --git a/src/core/include/openvino/op/detection_output.hpp b/src/core/include/openvino/op/detection_output.hpp index ea88903c753..4bc2e542b27 100644 --- a/src/core/include/openvino/op/detection_output.hpp +++ b/src/core/include/openvino/op/detection_output.hpp @@ -5,34 +5,20 @@ #pragma once #include "openvino/op/op.hpp" +#include "openvino/op/util/detection_output_base.hpp" namespace ov { namespace op { namespace v0 { /// \brief Layer which performs non-max suppression to /// generate detection output using location and confidence predictions -class OPENVINO_API DetectionOutput : public Op { +class OPENVINO_API DetectionOutput : public op::util::DetectionOutputBase { public: - struct Attributes { + struct Attributes : public op::util::DetectionOutputBase::AttributesBase { int num_classes; - int background_label_id = 0; - int top_k = -1; - bool variance_encoded_in_target = false; - std::vector keep_top_k; - std::string code_type = std::string{"caffe.PriorBoxParameter.CORNER"}; - bool share_location = true; - float nms_threshold; - float confidence_threshold = 0; - bool clip_after_nms = false; - bool clip_before_nms = false; - bool decrease_label_id = false; - bool normalized = false; - size_t input_height = 1; - size_t input_width = 1; - float objectness_score = 0; }; - OPENVINO_OP("DetectionOutput", "opset1"); + OPENVINO_OP("DetectionOutput", "opset1", op::util::DetectionOutputBase); BWDCMP_RTTI_DECLARATION; DetectionOutput() = default; @@ -75,5 +61,55 @@ private: Attributes m_attrs; }; } // namespace v0 + +namespace v8 { +/// \brief Layer which performs non-max suppression to +/// generate detection output using location and confidence predictions +class OPENVINO_API DetectionOutput : public op::util::DetectionOutputBase { +public: + using Attributes = op::util::DetectionOutputBase::AttributesBase; + + OPENVINO_OP("DetectionOutput", "opset8", op::util::DetectionOutputBase); + + DetectionOutput() = default; + /// \brief Constructs a DetectionOutput operation + /// + /// \param box_logits Box logits + /// \param class_preds Class predictions + /// \param proposals Proposals + /// \param aux_class_preds Auxilary class predictions + /// \param aux_box_preds Auxilary box predictions + /// \param attrs Detection Output attributes + DetectionOutput(const Output& box_logits, + const Output& class_preds, + const Output& proposals, + const Output& aux_class_preds, + const Output& aux_box_preds, + const Attributes& attrs); + + /// \brief Constructs a DetectionOutput operation + /// + /// \param box_logits Box logits + /// \param class_preds Class predictions + /// \param proposals Proposals + /// \param attrs Detection Output attributes + DetectionOutput(const Output& box_logits, + const Output& class_preds, + const Output& proposals, + const Attributes& attrs); + + void validate_and_infer_types() override; + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + + const Attributes& get_attrs() const { + return m_attrs; + } + bool visit_attributes(AttributeVisitor& visitor) override; + +private: + Attributes m_attrs; +}; +} // namespace v8 } // namespace op } // namespace ov diff --git a/src/core/include/openvino/op/util/detection_output_base.hpp b/src/core/include/openvino/op/util/detection_output_base.hpp new file mode 100644 index 00000000000..4a8030ae891 --- /dev/null +++ b/src/core/include/openvino/op/util/detection_output_base.hpp @@ -0,0 +1,45 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" + +namespace ov { +namespace op { +namespace util { +/// \brief DetectionOutputBase basic class for DetectionOutput v0 and v8 +class OPENVINO_API DetectionOutputBase : public Op { +public: + struct AttributesBase { + int background_label_id = 0; + int top_k = -1; + bool variance_encoded_in_target = false; + std::vector keep_top_k; + std::string code_type = std::string{"caffe.PriorBoxParameter.CORNER"}; + bool share_location = true; + float nms_threshold; + float confidence_threshold = 0; + bool clip_after_nms = false; + bool clip_before_nms = false; + bool decrease_label_id = false; + bool normalized = false; + size_t input_height = 1; + size_t input_width = 1; + float objectness_score = 0; + }; + + OPENVINO_OP("DetectionOutputBase", "util"); + DetectionOutputBase() = default; + DetectionOutputBase(const OutputVector& args); + + void validate_and_infer_types_base(const AttributesBase& attrs, Dimension num_classes); + + bool visit_attributes_base(AttributeVisitor& visitor, AttributesBase& attrs); + + Dimension compute_num_classes(const AttributesBase& attrs); +}; +} // namespace util +} // namespace op +} // namespace ov diff --git a/src/core/include/openvino/opsets/opset8_tbl.hpp b/src/core/include/openvino/opsets/opset8_tbl.hpp index a11dd507302..4f626f4fb91 100644 --- a/src/core/include/openvino/opsets/opset8_tbl.hpp +++ b/src/core/include/openvino/opsets/opset8_tbl.hpp @@ -31,7 +31,6 @@ _OPENVINO_OP_REG(Cosh, ov::op::v0) _OPENVINO_OP_REG(CumSum, ov::op::v0) _OPENVINO_OP_REG(DeformablePSROIPooling, ov::op::v1) _OPENVINO_OP_REG(DepthToSpace, ov::op::v0) -_OPENVINO_OP_REG(DetectionOutput, ov::op::v0) _OPENVINO_OP_REG(Divide, ov::op::v1) _OPENVINO_OP_REG(Elu, ov::op::v0) _OPENVINO_OP_REG(Erf, ov::op::v0) @@ -177,6 +176,7 @@ _OPENVINO_OP_REG(GatherND, ov::op::v8) _OPENVINO_OP_REG(AdaptiveAvgPool, ov::op::v8) _OPENVINO_OP_REG(AdaptiveMaxPool, ov::op::v8) _OPENVINO_OP_REG(DeformableConvolution, ov::op::v8) +_OPENVINO_OP_REG(DetectionOutput, ov::op::v8) _OPENVINO_OP_REG(I420toBGR, ov::op::v8) _OPENVINO_OP_REG(I420toRGB, ov::op::v8) _OPENVINO_OP_REG(MatrixNms, ov::op::v8) diff --git a/src/core/reference/include/ngraph/runtime/reference/detection_output.hpp b/src/core/reference/include/ngraph/runtime/reference/detection_output.hpp index ec137360c1c..cf27b56f1cf 100644 --- a/src/core/reference/include/ngraph/runtime/reference/detection_output.hpp +++ b/src/core/reference/include/ngraph/runtime/reference/detection_output.hpp @@ -10,6 +10,7 @@ #include #include "ngraph/op/detection_output.hpp" +#include "ngraph/op/util/detection_output_base.hpp" #include "ngraph/shape.hpp" namespace ngraph { @@ -28,7 +29,7 @@ private: }; using LabelBBox = std::map>; - ngraph::op::DetectionOutputAttrs attrs; + ngraph::op::util::DetectionOutputBase::AttributesBase attrs; size_t numImages; size_t priorSize; size_t numPriors; @@ -37,6 +38,7 @@ private: size_t offset; size_t numResults; size_t outTotalSize; + size_t numClasses; void GetLocPredictions(const dataType* locData, std::vector& locations) { locations.resize(numImages); @@ -64,12 +66,12 @@ private: for (size_t i = 0; i < numImages; ++i) { std::map>& labelScores = confPreds[i]; for (size_t p = 0; p < numPriors; ++p) { - int startIdx = p * attrs.num_classes; - for (int c = 0; c < attrs.num_classes; ++c) { + int startIdx = p * numClasses; + for (int c = 0; c < numClasses; ++c) { labelScores[c].push_back(confData[startIdx + c]); } } - confData += numPriors * attrs.num_classes; + confData += numPriors * numClasses; } } @@ -80,18 +82,18 @@ private: for (size_t i = 0; i < numImages; ++i) { std::map>& labelScores = confPreds[i]; for (size_t p = 0; p < numPriors; ++p) { - int startIdx = p * attrs.num_classes; + int startIdx = p * numClasses; if (armConfData[p * 2 + 1] < attrs.objectness_score) { - for (int c = 0; c < attrs.num_classes; ++c) { + for (int c = 0; c < numClasses; ++c) { c == attrs.background_label_id ? labelScores[c].push_back(1) : labelScores[c].push_back(0); } } else { - for (int c = 0; c < attrs.num_classes; ++c) { + for (int c = 0; c < numClasses; ++c) { labelScores[c].push_back(confData[startIdx + c]); } } } - confData += numPriors * attrs.num_classes; + confData += numPriors * numClasses; armConfData += numPriors * 2; } } @@ -369,7 +371,7 @@ private: for (size_t p = 0; p < numPriors; p++) { dataType conf = -1; int id = 0; - for (int c = 1; c < attrs.num_classes; c++) { + for (int c = 1; c < numClasses; c++) { if (attrs.background_label_id > -1 && c == attrs.background_label_id) continue; dataType temp = confScores.at(c)[p]; @@ -425,7 +427,25 @@ public: offset = _attrs.normalized ? 0 : 1; numPriors = priorsShape[2] / priorSize; priorsBatchSize = priorsShape[0]; - numLocClasses = _attrs.share_location ? 1 : static_cast(_attrs.num_classes); + numClasses = _attrs.num_classes; + numLocClasses = _attrs.share_location ? 1 : numClasses; + numResults = outShape[2]; + outTotalSize = shape_size(outShape); + } + + referenceDetectionOutput(const ngraph::op::util::DetectionOutputBase::AttributesBase& _attrs, + const ngraph::Shape& locShape, + const ngraph::Shape& classPredShape, + const ngraph::Shape& priorsShape, + const ngraph::Shape& outShape) + : attrs(_attrs) { + numImages = locShape[0]; + priorSize = _attrs.normalized ? 4 : 5; + offset = _attrs.normalized ? 0 : 1; + numPriors = priorsShape[2] / priorSize; + priorsBatchSize = priorsShape[0]; + numClasses = classPredShape[1] / numPriors; + numLocClasses = _attrs.share_location ? 1 : numClasses; numResults = outShape[2]; outTotalSize = shape_size(outShape); } @@ -469,7 +489,7 @@ public: int numDet = 0; if (!attrs.decrease_label_id) { // Caffe style - for (int c = 0; c < attrs.num_classes; ++c) { + for (int c = 0; c < numClasses; ++c) { if (c == attrs.background_label_id) { continue; } diff --git a/src/core/src/op/detection_output.cpp b/src/core/src/op/detection_output.cpp index f98d7438c66..27936f16bf6 100644 --- a/src/core/src/op/detection_output.cpp +++ b/src/core/src/op/detection_output.cpp @@ -8,15 +8,15 @@ using namespace std; +// ------------------------------ V0 ------------------------------ BWDCMP_RTTI_DEFINITION(ov::op::v0::DetectionOutput); - ov::op::v0::DetectionOutput::DetectionOutput(const Output& box_logits, const Output& class_preds, const Output& proposals, const Output& aux_class_preds, const Output& aux_box_preds, const Attributes& attrs) - : Op({box_logits, class_preds, proposals, aux_class_preds, aux_box_preds}), + : DetectionOutputBase({box_logits, class_preds, proposals, aux_class_preds, aux_box_preds}), m_attrs(attrs) { constructor_validate_and_infer_types(); } @@ -25,7 +25,7 @@ ov::op::v0::DetectionOutput::DetectionOutput(const Output& box_logits, const Output& class_preds, const Output& proposals, const Attributes& attrs) - : Op({box_logits, class_preds, proposals}), + : DetectionOutputBase({box_logits, class_preds, proposals}), m_attrs(attrs) { constructor_validate_and_infer_types(); } @@ -33,180 +33,7 @@ ov::op::v0::DetectionOutput::DetectionOutput(const Output& box_logits, void ov::op::v0::DetectionOutput::validate_and_infer_types() { NGRAPH_OP_SCOPE(v0_DetectionOutput_validate_and_infer_types); NODE_VALIDATION_CHECK(this, m_attrs.num_classes > 0, "Number of classes must be greater than zero"); - - NODE_VALIDATION_CHECK(this, m_attrs.keep_top_k.size() > 0, "keep_top_k attribute must be provided"); - - NODE_VALIDATION_CHECK(this, - m_attrs.code_type == "caffe.PriorBoxParameter.CORNER" || - m_attrs.code_type == "caffe.PriorBoxParameter.CENTER_SIZE", - "code_type must be either \"caffe.PriorBoxParameter.CORNER\" or " - "\"caffe.PriorBoxParameter.CENTER_SIZE\""); - - auto box_logits_et = get_input_element_type(0); - NODE_VALIDATION_CHECK(this, - box_logits_et.is_real(), - "Box logits' data type must be floating point. Got " + box_logits_et.get_type_name()); - auto class_preds_et = get_input_element_type(1); - NODE_VALIDATION_CHECK(this, - class_preds_et == box_logits_et, - "Class predictions' data type must be the same as box logits type (" + - box_logits_et.get_type_name() + "). Got " + class_preds_et.get_type_name()); - auto proposals_et = get_input_element_type(2); - NODE_VALIDATION_CHECK(this, - proposals_et.is_real(), - "Proposals' data type must be floating point. Got " + proposals_et.get_type_name()); - - const ov::PartialShape& box_logits_pshape = get_input_partial_shape(0); - const ov::PartialShape& class_preds_pshape = get_input_partial_shape(1); - const ov::PartialShape& proposals_pshape = get_input_partial_shape(2); - - int num_loc_classes = m_attrs.share_location ? 1 : m_attrs.num_classes; - int prior_box_size = m_attrs.normalized ? 4 : 5; - - Dimension num_images = Dimension::dynamic(); - Dimension num_prior_boxes = Dimension::dynamic(); - if (box_logits_pshape.rank().is_static()) { - NODE_VALIDATION_CHECK( - this, - box_logits_pshape.rank().get_length() == 2, - "Box logits rank must be 2. Got " + std::to_string(box_logits_pshape.rank().get_length())); - num_images = box_logits_pshape[0]; - if (box_logits_pshape[1].is_static()) { - NODE_VALIDATION_CHECK(this, - (box_logits_pshape[1].get_length() % (num_loc_classes * 4)) == 0, - "Box logits' second dimension must be a multiply of num_loc_classes * 4 (" + - std::to_string(num_loc_classes * 4) + "). Current value is: ", - box_logits_pshape[1].get_length(), - "."); - num_prior_boxes = box_logits_pshape[1].get_length() / (num_loc_classes * 4); - } - } - if (class_preds_pshape.rank().is_static()) { - NODE_VALIDATION_CHECK( - this, - class_preds_pshape.rank().get_length() == 2, - "Class predictions rank must be 2. Got " + std::to_string(class_preds_pshape.rank().get_length())); - if (num_images.is_dynamic() && class_preds_pshape[0].is_static()) { - num_images = class_preds_pshape[0]; - } else { - NODE_VALIDATION_CHECK(this, - class_preds_pshape[0].compatible(num_images), - "Class predictions' first dimension is not compatible with batch size."); - } - if (class_preds_pshape[1].is_static()) { - if (num_prior_boxes.is_dynamic()) { - NODE_VALIDATION_CHECK(this, - class_preds_pshape[1].get_length() % m_attrs.num_classes == 0, - "Class predictions' second dimension must be a multiply of num_classes (" + - std::to_string(m_attrs.num_classes) + "). Current value is: ", - class_preds_pshape[1].get_length(), - "."); - num_prior_boxes = class_preds_pshape[1].get_length() / m_attrs.num_classes; - } else { - int num_prior_boxes_val = num_prior_boxes.get_length(); - NODE_VALIDATION_CHECK(this, - class_preds_pshape[1].get_length() == num_prior_boxes_val * m_attrs.num_classes, - "Class predictions' second dimension must be equal to num_prior_boxes * " - "num_classes (" + - std::to_string(num_prior_boxes_val * m_attrs.num_classes) + - "). Current value is: ", - class_preds_pshape[1].get_length(), - "."); - } - } - } - if (proposals_pshape.rank().is_static()) { - NODE_VALIDATION_CHECK(this, - proposals_pshape.rank().get_length() == 3, - "Proposals rank must be 3. Got " + std::to_string(proposals_pshape.rank().get_length())); - if (num_images.is_static() && proposals_pshape[0].is_static()) { - int64_t proposals_1st_dim = proposals_pshape[0].get_length(); - int64_t num_images_val = num_images.get_length(); - NODE_VALIDATION_CHECK(this, - proposals_1st_dim == 1 || proposals_1st_dim == num_images_val, - "Proposals' first dimension is must be equal to either batch size (" + - std::to_string(num_images_val) + - ") or 1. Got: " + std::to_string(proposals_1st_dim) + "."); - } - if (proposals_pshape[1].is_static()) { - size_t proposals_expected_2nd_dim = m_attrs.variance_encoded_in_target ? 1 : 2; - NODE_VALIDATION_CHECK(this, - proposals_pshape[1].compatible(proposals_expected_2nd_dim), - "Proposals' second dimension is mismatched. Current value is: ", - proposals_pshape[1].get_length(), - ", expected: ", - proposals_expected_2nd_dim, - "."); - } - if (proposals_pshape[2].is_static()) { - if (num_prior_boxes.is_dynamic()) { - NODE_VALIDATION_CHECK(this, - proposals_pshape[2].get_length() % prior_box_size == 0, - "Proposals' third dimension must be a multiply of prior_box_size (" + - std::to_string(prior_box_size) + "). Current value is: ", - proposals_pshape[2].get_length(), - "."); - num_prior_boxes = proposals_pshape[2].get_length() / prior_box_size; - } else { - int num_prior_boxes_val = num_prior_boxes.get_length(); - NODE_VALIDATION_CHECK(this, - proposals_pshape[2].get_length() == num_prior_boxes_val * prior_box_size, - "Proposals' third dimension must be equal to num_prior_boxes " - "* prior_box_size (" + - std::to_string(num_prior_boxes_val * prior_box_size) + - "). Current value is: ", - proposals_pshape[2].get_length(), - "."); - } - } - } - - if (get_input_size() > 3) { - auto aux_class_preds_et = get_input_element_type(3); - NODE_VALIDATION_CHECK(this, - aux_class_preds_et == class_preds_et, - "Additional class predictions' data type must be the same as class " - "predictions data type (" + - class_preds_et.get_type_name() + "). Got " + aux_class_preds_et.get_type_name()); - auto aux_box_preds_et = get_input_element_type(4); - NODE_VALIDATION_CHECK(this, - aux_box_preds_et == box_logits_et, - "Additional box predictions' data type must be the same as box logits data type (" + - box_logits_et.get_type_name() + "). Got " + aux_box_preds_et.get_type_name()); - - const ov::PartialShape& aux_class_preds_pshape = get_input_partial_shape(3); - const ov::PartialShape& aux_box_preds_pshape = get_input_partial_shape(4); - if (aux_class_preds_pshape.rank().is_static()) { - NODE_VALIDATION_CHECK(this, - aux_class_preds_pshape[0].compatible(num_images), - "Additional class predictions' first dimension must be " - "compatible with batch size."); - if (num_prior_boxes.is_static()) { - int num_prior_boxes_val = num_prior_boxes.get_length(); - NODE_VALIDATION_CHECK(this, - aux_class_preds_pshape[1].get_length() == num_prior_boxes_val * 2, - "Additional class predictions' second dimension must be equal to " - "num_prior_boxes * 2 (" + - std::to_string(num_prior_boxes_val * 2) + "). Got " + - std::to_string(aux_class_preds_pshape[1].get_length()) + "."); - } - } - NODE_VALIDATION_CHECK(this, - aux_box_preds_pshape.compatible(box_logits_pshape), - "Additional box predictions' shape must be compatible with box logits shape."); - } - - std::vector output_shape{1, 1}; - if (m_attrs.keep_top_k[0] > 0) { - output_shape.push_back(num_images * m_attrs.keep_top_k[0]); - } else if (m_attrs.top_k > 0) { - output_shape.push_back(num_images * m_attrs.top_k * m_attrs.num_classes); - } else { - output_shape.push_back(num_images * num_prior_boxes * m_attrs.num_classes); - } - output_shape.emplace_back(7); - - set_output_type(0, box_logits_et, output_shape); + validate_and_infer_types_base(m_attrs, m_attrs.num_classes); } shared_ptr ov::op::v0::DetectionOutput::clone_with_new_inputs(const OutputVector& new_args) const { @@ -232,20 +59,58 @@ shared_ptr ov::op::v0::DetectionOutput::clone_with_new_inputs(const Ou bool ov::op::v0::DetectionOutput::visit_attributes(AttributeVisitor& visitor) { NGRAPH_OP_SCOPE(v0_DetectionOutput_visit_attributes); visitor.on_attribute("num_classes", m_attrs.num_classes); - visitor.on_attribute("background_label_id", m_attrs.background_label_id); - visitor.on_attribute("top_k", m_attrs.top_k); - visitor.on_attribute("variance_encoded_in_target", m_attrs.variance_encoded_in_target); - visitor.on_attribute("keep_top_k", m_attrs.keep_top_k); - visitor.on_attribute("code_type", m_attrs.code_type); - visitor.on_attribute("share_location", m_attrs.share_location); - visitor.on_attribute("nms_threshold", m_attrs.nms_threshold); - visitor.on_attribute("confidence_threshold", m_attrs.confidence_threshold); - visitor.on_attribute("clip_after_nms", m_attrs.clip_after_nms); - visitor.on_attribute("clip_before_nms", m_attrs.clip_before_nms); - visitor.on_attribute("decrease_label_id", m_attrs.decrease_label_id); - visitor.on_attribute("normalized", m_attrs.normalized); - visitor.on_attribute("input_height", m_attrs.input_height); - visitor.on_attribute("input_width", m_attrs.input_width); - visitor.on_attribute("objectness_score", m_attrs.objectness_score); + visit_attributes_base(visitor, m_attrs); + return true; +} + +// ------------------------------ V8 ------------------------------ +ov::op::v8::DetectionOutput::DetectionOutput(const Output& box_logits, + const Output& class_preds, + const Output& proposals, + const Output& aux_class_preds, + const Output& aux_box_preds, + const Attributes& attrs) + : DetectionOutputBase({box_logits, class_preds, proposals, aux_class_preds, aux_box_preds}), + m_attrs(attrs) { + constructor_validate_and_infer_types(); +} + +ov::op::v8::DetectionOutput::DetectionOutput(const Output& box_logits, + const Output& class_preds, + const Output& proposals, + const Attributes& attrs) + : DetectionOutputBase({box_logits, class_preds, proposals}), + m_attrs(attrs) { + constructor_validate_and_infer_types(); +} + +void ov::op::v8::DetectionOutput::validate_and_infer_types() { + NGRAPH_OP_SCOPE(v0_DetectionOutput_validate_and_infer_types); + validate_and_infer_types_base(m_attrs, Dimension::dynamic()); +} + +shared_ptr ov::op::v8::DetectionOutput::clone_with_new_inputs(const OutputVector& new_args) const { + NGRAPH_OP_SCOPE(v0_DetectionOutput_clone_with_new_inputs); + check_new_args_count(this, new_args); + + auto num_args = new_args.size(); + + NODE_VALIDATION_CHECK(this, num_args == 3 || num_args == 5, "DetectionOutput accepts 3 or 5 inputs."); + + if (num_args == 3) { + return make_shared(new_args.at(0), new_args.at(1), new_args.at(2), m_attrs); + } else { + return make_shared(new_args.at(0), + new_args.at(1), + new_args.at(2), + new_args.at(3), + new_args.at(4), + m_attrs); + } +} + +bool ov::op::v8::DetectionOutput::visit_attributes(AttributeVisitor& visitor) { + NGRAPH_OP_SCOPE(v0_DetectionOutput_visit_attributes); + visit_attributes_base(visitor, m_attrs); return true; } diff --git a/src/core/src/op/util/detection_output_base.cpp b/src/core/src/op/util/detection_output_base.cpp new file mode 100644 index 00000000000..c8e2462c012 --- /dev/null +++ b/src/core/src/op/util/detection_output_base.cpp @@ -0,0 +1,340 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph/op/util/detection_output_base.hpp" + +#include + +#include "ngraph/op/concat.hpp" +#include "ngraph/op/constant.hpp" +#include "ngraph/op/squeeze.hpp" +#include "ngraph/runtime/host_tensor.hpp" +#include "ngraph/shape.hpp" + +using namespace std; +using namespace ov::op::util; + +DetectionOutputBase::DetectionOutputBase(const ov::OutputVector& args) : Op(args) {} + +ov::Dimension DetectionOutputBase::compute_num_classes(const AttributesBase& attrs) { + Dimension num_classes = Dimension::dynamic(); + + NODE_VALIDATION_CHECK(this, + 3 <= get_input_size() && get_input_size() <= 5, + "A number of arguments must be greater than or equal to 3 and less than or equal to 5. Got " + + std::to_string(get_input_size())); + + const ov::PartialShape& box_logits_pshape = get_input_partial_shape(0); + const ov::PartialShape& class_preds_pshape = get_input_partial_shape(1); + const ov::PartialShape& proposals_pshape = get_input_partial_shape(2); + ov::PartialShape ad_class_preds_shape = ov::PartialShape::dynamic(); + ov::PartialShape ad_box_preds_shape = ov::PartialShape::dynamic(); + + if (box_logits_pshape.rank().is_static()) { + NODE_VALIDATION_CHECK( + this, + box_logits_pshape.rank().get_length() == 2, + "Box logits rank must be 2. Got " + std::to_string(box_logits_pshape.rank().get_length())); + } + if (class_preds_pshape.rank().is_static()) { + NODE_VALIDATION_CHECK( + this, + class_preds_pshape.rank().get_length() == 2, + "Class predictions rank must be 2. Got " + std::to_string(class_preds_pshape.rank().get_length())); + } + if (proposals_pshape.rank().is_static()) { + NODE_VALIDATION_CHECK(this, + proposals_pshape.rank().get_length() == 3, + "Proposals rank must be 3. Got " + std::to_string(proposals_pshape.rank().get_length())); + } + if (get_input_size() >= 4) { + ad_class_preds_shape = get_input_partial_shape(3); + if (ad_class_preds_shape.rank().is_static()) { + NODE_VALIDATION_CHECK(this, + ad_class_preds_shape.rank().get_length() == 2, + "Additional class predictions rank must be 2. Got " + + std::to_string(ad_class_preds_shape.rank().get_length())); + } + } + if (get_input_size() == 5) { + ad_box_preds_shape = get_input_partial_shape(4); + if (ad_box_preds_shape.rank().is_static()) { + NODE_VALIDATION_CHECK(this, + ad_box_preds_shape.rank().get_length() == 2, + "Additional box predictions rank must be 2. Got " + + std::to_string(ad_box_preds_shape.rank().get_length())); + } + } + + int prior_box_size = attrs.normalized ? 4 : 5; + Dimension num_prior_boxes = Dimension::dynamic(); + + // try to deduce a number of prior boxes + if (num_prior_boxes.is_dynamic() && proposals_pshape.rank().is_static() && proposals_pshape[2].is_static()) { + NODE_VALIDATION_CHECK(this, + proposals_pshape[2].get_length() % prior_box_size == 0, + "Proposals' third dimension must be a multiply of prior_box_size (" + + std::to_string(prior_box_size) + "). Current value is: ", + proposals_pshape[2].get_length(), + "."); + num_prior_boxes = proposals_pshape[2].get_length() / prior_box_size; + NODE_VALIDATION_CHECK( + this, + num_prior_boxes.get_length() > 0, + "A number of prior boxes must be greater zero. Got: " + std::to_string(num_prior_boxes.get_length())); + } + if (num_prior_boxes.is_dynamic() && ad_class_preds_shape.rank().is_static() && + ad_class_preds_shape[1].is_static()) { + NODE_VALIDATION_CHECK( + this, + ad_class_preds_shape[1].get_length() % 2 == 0, + "Additional class predictions second dimension must be a multiply of 2. Current value is: ", + ad_class_preds_shape[1].get_length(), + "."); + num_prior_boxes = ad_class_preds_shape[1].get_length() / 2; + NODE_VALIDATION_CHECK( + this, + num_prior_boxes.get_length() > 0, + "A number of prior boxes must be greater zero. Got: " + std::to_string(num_prior_boxes.get_length())); + } + + // try to deduce a number of classes + if (num_classes.is_dynamic() && num_prior_boxes.is_static() && class_preds_pshape.rank().is_static() && + class_preds_pshape[1].is_static()) { + NODE_VALIDATION_CHECK(this, + class_preds_pshape[1].get_length() % num_prior_boxes.get_length() == 0, + "Class predictions second dimension must be a multiply of num_prior_boxes (" + + std::to_string(num_prior_boxes.get_length()) + "). Current value is: ", + class_preds_pshape[1].get_length(), + "."); + num_classes = class_preds_pshape[1].get_length() / num_prior_boxes.get_length(); + } + if (num_classes.is_dynamic() && num_prior_boxes.is_static() && box_logits_pshape.rank().is_static() && + box_logits_pshape[1].is_static() && !attrs.share_location) { + NODE_VALIDATION_CHECK(this, + box_logits_pshape[1].get_length() % (num_prior_boxes.get_length() * 4) == 0, + "Box logits second dimension must be a multiply of num_prior_boxes * 4 (" + + std::to_string(num_prior_boxes.get_length() * 4) + "). Current value is: ", + box_logits_pshape[1].get_length(), + "."); + num_classes = box_logits_pshape[1].get_length() / (num_prior_boxes.get_length() * 4); + } + if (num_classes.is_dynamic() && num_prior_boxes.is_static() && ad_box_preds_shape.is_static() && + ad_box_preds_shape[1].is_static() && !attrs.share_location) { + NODE_VALIDATION_CHECK( + this, + ad_box_preds_shape[1].get_length() % (num_prior_boxes.get_length() * 4) == 0, + "Additional box predictions second dimension must be a multiply of num_prior_boxes * 4 (" + + std::to_string(num_prior_boxes.get_length() * 4) + "). Current value is: ", + ad_box_preds_shape[1].get_length(), + "."); + num_classes = ad_box_preds_shape[1].get_length() / (num_prior_boxes.get_length() * 4); + } + + return num_classes; +} + +void DetectionOutputBase::validate_and_infer_types_base(const DetectionOutputBase::AttributesBase& attrs, + ov::Dimension num_classes) { + NODE_VALIDATION_CHECK(this, attrs.keep_top_k.size() > 0, "keep_top_k attribute must be provided"); + + NODE_VALIDATION_CHECK( + this, + attrs.code_type == "caffe.PriorBoxParameter.CORNER" || attrs.code_type == "caffe.PriorBoxParameter.CENTER_SIZE", + "code_type must be either \"caffe.PriorBoxParameter.CORNER\" or " + "\"caffe.PriorBoxParameter.CENTER_SIZE\""); + + auto box_logits_et = get_input_element_type(0); + NODE_VALIDATION_CHECK(this, + box_logits_et.is_real(), + "Box logits' data type must be floating point. Got " + box_logits_et.get_type_name()); + auto class_preds_et = get_input_element_type(1); + NODE_VALIDATION_CHECK(this, + class_preds_et == box_logits_et, + "Class predictions' data type must be the same as box logits type (" + + box_logits_et.get_type_name() + "). Got " + class_preds_et.get_type_name()); + auto proposals_et = get_input_element_type(2); + NODE_VALIDATION_CHECK(this, + proposals_et.is_real(), + "Proposals' data type must be floating point. Got " + proposals_et.get_type_name()); + + const ov::PartialShape& box_logits_pshape = get_input_partial_shape(0); + const ov::PartialShape& class_preds_pshape = get_input_partial_shape(1); + const ov::PartialShape& proposals_pshape = get_input_partial_shape(2); + + // deduce a number of classes for DetectionOutput-8 + if (num_classes.is_dynamic()) { + num_classes = compute_num_classes(attrs); + } + + Dimension num_loc_classes = attrs.share_location ? 1 : num_classes; + int prior_box_size = attrs.normalized ? 4 : 5; + + Dimension num_images = Dimension::dynamic(); + Dimension num_prior_boxes = Dimension::dynamic(); + if (box_logits_pshape.rank().is_static()) { + NODE_VALIDATION_CHECK( + this, + box_logits_pshape.rank().get_length() == 2, + "Box logits rank must be 2. Got " + std::to_string(box_logits_pshape.rank().get_length())); + num_images = box_logits_pshape[0]; + if (box_logits_pshape[1].is_static() && num_loc_classes.is_static()) { + NODE_VALIDATION_CHECK(this, + (box_logits_pshape[1].get_length() % (num_loc_classes.get_length() * 4)) == 0, + "Box logits' second dimension must be a multiply of num_loc_classes * 4 (" + + std::to_string(num_loc_classes.get_length() * 4) + "). Current value is: ", + box_logits_pshape[1].get_length(), + "."); + num_prior_boxes = box_logits_pshape[1].get_length() / (num_loc_classes.get_length() * 4); + } + } + if (class_preds_pshape.rank().is_static()) { + NODE_VALIDATION_CHECK( + this, + class_preds_pshape.rank().get_length() == 2, + "Class predictions rank must be 2. Got " + std::to_string(class_preds_pshape.rank().get_length())); + if (num_images.is_dynamic() && class_preds_pshape[0].is_static()) { + num_images = class_preds_pshape[0]; + } else { + NODE_VALIDATION_CHECK(this, + class_preds_pshape[0].compatible(num_images), + "Class predictions' first dimension is not compatible with batch size."); + } + if (class_preds_pshape[1].is_static()) { + if (num_prior_boxes.is_dynamic() && num_classes.is_static()) { + NODE_VALIDATION_CHECK(this, + class_preds_pshape[1].get_length() % num_classes.get_length() == 0, + "Class predictions' second dimension must be a multiply of num_classes (" + + std::to_string(num_classes.get_length()) + "). Current value is: ", + class_preds_pshape[1].get_length(), + "."); + num_prior_boxes = class_preds_pshape[1].get_length() / num_classes.get_length(); + } else if (num_classes.is_static()) { + int num_prior_boxes_val = num_prior_boxes.get_length(); + NODE_VALIDATION_CHECK( + this, + class_preds_pshape[1].get_length() == num_prior_boxes_val * num_classes.get_length(), + "Class predictions' second dimension must be equal to num_prior_boxes * " + "num_classes (" + + std::to_string(num_prior_boxes_val * num_classes.get_length()) + "). Current value is: ", + class_preds_pshape[1].get_length(), + "."); + } + } + } + if (proposals_pshape.rank().is_static()) { + NODE_VALIDATION_CHECK(this, + proposals_pshape.rank().get_length() == 3, + "Proposals rank must be 3. Got " + std::to_string(proposals_pshape.rank().get_length())); + if (num_images.is_static() && proposals_pshape[0].is_static()) { + int64_t proposals_1st_dim = proposals_pshape[0].get_length(); + int64_t num_images_val = num_images.get_length(); + NODE_VALIDATION_CHECK(this, + proposals_1st_dim == 1 || proposals_1st_dim == num_images_val, + "Proposals' first dimension is must be equal to either batch size (" + + std::to_string(num_images_val) + + ") or 1. Got: " + std::to_string(proposals_1st_dim) + "."); + } + if (proposals_pshape[1].is_static()) { + size_t proposals_expected_2nd_dim = attrs.variance_encoded_in_target ? 1 : 2; + NODE_VALIDATION_CHECK(this, + proposals_pshape[1].compatible(proposals_expected_2nd_dim), + "Proposals' second dimension is mismatched. Current value is: ", + proposals_pshape[1].get_length(), + ", expected: ", + proposals_expected_2nd_dim, + "."); + } + if (proposals_pshape[2].is_static()) { + if (num_prior_boxes.is_dynamic()) { + NODE_VALIDATION_CHECK(this, + proposals_pshape[2].get_length() % prior_box_size == 0, + "Proposals' third dimension must be a multiply of prior_box_size (" + + std::to_string(prior_box_size) + "). Current value is: ", + proposals_pshape[2].get_length(), + "."); + num_prior_boxes = proposals_pshape[2].get_length() / prior_box_size; + } else { + int num_prior_boxes_val = num_prior_boxes.get_length(); + NODE_VALIDATION_CHECK(this, + proposals_pshape[2].get_length() == num_prior_boxes_val * prior_box_size, + "Proposals' third dimension must be equal to num_prior_boxes " + "* prior_box_size (" + + std::to_string(num_prior_boxes_val * prior_box_size) + + "). Current value is: ", + proposals_pshape[2].get_length(), + "."); + } + } + } + + if (get_input_size() > 3) { + auto aux_class_preds_et = get_input_element_type(3); + NODE_VALIDATION_CHECK(this, + aux_class_preds_et == class_preds_et, + "Additional class predictions' data type must be the same as class " + "predictions data type (" + + class_preds_et.get_type_name() + "). Got " + aux_class_preds_et.get_type_name()); + auto aux_box_preds_et = get_input_element_type(4); + NODE_VALIDATION_CHECK(this, + aux_box_preds_et == box_logits_et, + "Additional box predictions' data type must be the same as box logits data type (" + + box_logits_et.get_type_name() + "). Got " + aux_box_preds_et.get_type_name()); + + const ov::PartialShape& aux_class_preds_pshape = get_input_partial_shape(3); + const ov::PartialShape& aux_box_preds_pshape = get_input_partial_shape(4); + if (aux_class_preds_pshape.rank().is_static()) { + NODE_VALIDATION_CHECK(this, + aux_class_preds_pshape[0].compatible(num_images), + "Additional class predictions' first dimension must be " + "compatible with batch size."); + if (num_prior_boxes.is_static()) { + int num_prior_boxes_val = num_prior_boxes.get_length(); + NODE_VALIDATION_CHECK(this, + aux_class_preds_pshape[1].get_length() == num_prior_boxes_val * 2, + "Additional class predictions' second dimension must be equal to " + "num_prior_boxes * 2 (" + + std::to_string(num_prior_boxes_val * 2) + "). Got " + + std::to_string(aux_class_preds_pshape[1].get_length()) + "."); + } + } + NODE_VALIDATION_CHECK(this, + aux_box_preds_pshape.compatible(box_logits_pshape), + "Additional box predictions' shape must be compatible with box logits shape."); + } + + std::vector output_shape{1, 1}; + if (attrs.keep_top_k[0] > 0) { + output_shape.push_back(num_images * attrs.keep_top_k[0]); + } else if (attrs.top_k > 0 && num_classes.is_static()) { + output_shape.push_back(num_images * attrs.top_k * num_classes); + } else if (num_classes.is_static()) { + output_shape.push_back(num_images * num_prior_boxes * num_classes); + } else { + output_shape.push_back(Dimension::dynamic()); + } + output_shape.emplace_back(7); + + set_output_type(0, box_logits_et, output_shape); +} + +bool ov::op::util::DetectionOutputBase::visit_attributes_base(AttributeVisitor& visitor, + DetectionOutputBase::AttributesBase& attrs) { + visitor.on_attribute("background_label_id", attrs.background_label_id); + visitor.on_attribute("top_k", attrs.top_k); + visitor.on_attribute("variance_encoded_in_target", attrs.variance_encoded_in_target); + visitor.on_attribute("keep_top_k", attrs.keep_top_k); + visitor.on_attribute("code_type", attrs.code_type); + visitor.on_attribute("share_location", attrs.share_location); + visitor.on_attribute("nms_threshold", attrs.nms_threshold); + visitor.on_attribute("confidence_threshold", attrs.confidence_threshold); + visitor.on_attribute("clip_after_nms", attrs.clip_after_nms); + visitor.on_attribute("clip_before_nms", attrs.clip_before_nms); + visitor.on_attribute("decrease_label_id", attrs.decrease_label_id); + visitor.on_attribute("normalized", attrs.normalized); + visitor.on_attribute("input_height", attrs.input_height); + visitor.on_attribute("input_width", attrs.input_width); + visitor.on_attribute("objectness_score", attrs.objectness_score); + return true; +} diff --git a/src/core/tests/runtime/interpreter/evaluates_map.cpp b/src/core/tests/runtime/interpreter/evaluates_map.cpp index b83769c5757..5fd24338f67 100644 --- a/src/core/tests/runtime/interpreter/evaluates_map.cpp +++ b/src/core/tests/runtime/interpreter/evaluates_map.cpp @@ -1438,6 +1438,37 @@ bool evaluate(const shared_ptr& op, using T = typename element_type_traits::value_type; runtime::reference::referenceDetectionOutput refDetOut(op->get_attrs(), op->get_input_shape(0), + op->get_input_shape(1), + op->get_input_shape(2), + op->get_output_shape(0)); + if (op->get_input_size() == 3) { + refDetOut.run(inputs[0]->get_data_ptr(), + inputs[1]->get_data_ptr(), + inputs[2]->get_data_ptr(), + nullptr, + nullptr, + outputs[0]->get_data_ptr()); + } else if (op->get_input_size() == 5) { + refDetOut.run(inputs[0]->get_data_ptr(), + inputs[1]->get_data_ptr(), + inputs[2]->get_data_ptr(), + inputs[3]->get_data_ptr(), + inputs[4]->get_data_ptr(), + outputs[0]->get_data_ptr()); + } else { + throw ngraph_error("DetectionOutput layer supports only 3 or 5 inputs"); + } + return true; +} + +template +bool evaluate(const shared_ptr& op, + const HostTensorVector& outputs, + const HostTensorVector& inputs) { + using T = typename element_type_traits::value_type; + runtime::reference::referenceDetectionOutput refDetOut(op->get_attrs(), + op->get_input_shape(0), + op->get_input_shape(1), op->get_input_shape(2), op->get_output_shape(0)); if (op->get_input_size() == 3) { diff --git a/src/core/tests/runtime/interpreter/opset_int_tbl.hpp b/src/core/tests/runtime/interpreter/opset_int_tbl.hpp index e100660e9a9..fe1230d79d4 100644 --- a/src/core/tests/runtime/interpreter/opset_int_tbl.hpp +++ b/src/core/tests/runtime/interpreter/opset_int_tbl.hpp @@ -106,6 +106,7 @@ NGRAPH_OP(MulticlassNms, op::v8) NGRAPH_OP(DeformableConvolution, ngraph::op::v8) NGRAPH_OP(If, ngraph::op::v8) NGRAPH_OP(GatherND, op::v8) +NGRAPH_OP(DetectionOutput, op::v8) NGRAPH_OP(Sigmoid, op::v0) NGRAPH_OP(Tanh, op::v0) diff --git a/src/core/tests/type_prop/detection_output.cpp b/src/core/tests/type_prop/detection_output.cpp index c4cbad1d266..ed7b83ae859 100644 --- a/src/core/tests/type_prop/detection_output.cpp +++ b/src/core/tests/type_prop/detection_output.cpp @@ -13,6 +13,7 @@ using namespace std; using namespace ngraph; +// ------------------------------ V0 ------------------------------ std::shared_ptr create_detection_output(const PartialShape& box_logits_shape, const PartialShape& class_preds_shape, const PartialShape& proposals_shape, @@ -656,3 +657,225 @@ TEST(type_prop_layers, detection_output_invalid_aux_box_preds) { } } } + +// ------------------------------ V8 ------------------------------ +namespace { +std::shared_ptr create_detection_output_v8(const PartialShape& box_logits_shape, + const PartialShape& class_preds_shape, + const PartialShape& proposals_shape, + const op::v8::DetectionOutput::Attributes& attrs, + element::Type input_type) { + auto box_logits = make_shared(input_type, box_logits_shape); + auto class_preds = make_shared(input_type, class_preds_shape); + auto proposals = make_shared(input_type, proposals_shape); + return make_shared(box_logits, class_preds, proposals, attrs); +} + +std::shared_ptr create_detection_output2_v8(const PartialShape& box_logits_shape, + const PartialShape& class_preds_shape, + const PartialShape& proposals_shape, + const PartialShape& aux_class_preds_shape, + const PartialShape& aux_box_preds_shape, + const op::v8::DetectionOutput::Attributes& attrs, + element::Type input_type) { + auto box_logits = make_shared(input_type, box_logits_shape); + auto class_preds = make_shared(input_type, class_preds_shape); + auto proposals = make_shared(input_type, proposals_shape); + auto aux_class_preds = make_shared(input_type, aux_class_preds_shape); + auto aux_box_preds = make_shared(input_type, aux_box_preds_shape); + return make_shared(box_logits, + class_preds, + proposals, + aux_class_preds, + aux_box_preds, + attrs); +} + +PartialShape compute_reference_output_shape(const std::vector& keep_top_k, + int top_k, + const Dimension& deduced_N, + const Dimension& deduced_num_classes, + const Dimension& deduced_num_prior_boxes) { + if (keep_top_k.size() > 0 && keep_top_k[0] > 0) { + return PartialShape({1, 1, deduced_N * keep_top_k[0], 7}); + } else if (top_k > 0) { + return PartialShape({1, 1, deduced_N * top_k * deduced_num_classes, 7}); + } else { + return PartialShape({1, 1, deduced_N * deduced_num_classes * deduced_num_prior_boxes, 7}); + } +} + +std::vector create_attributes_vector() { + // initialize attributes affecting shape inference + // others remain by default + std::vector result; + for (int keep_top_k : {10, -1}) { + for (int top_k : {5, -1}) { + for (bool variance_encoded_in_target : {true, false}) { + for (bool share_location : {true, false}) { + for (bool normalized : {true, false}) { + op::v8::DetectionOutput::Attributes attrs; + attrs.top_k = top_k; + attrs.keep_top_k = {keep_top_k}; + attrs.variance_encoded_in_target = variance_encoded_in_target; + attrs.share_location = share_location; + attrs.normalized = normalized; + result.push_back(attrs); + } + } + } + } + } + return result; +} +} // namespace + +TEST(type_prop_layers, detection_outputv8_all_static) { + // this case covers deducing a number of classes value + // since this value is not saved in attributes + op::v8::DetectionOutput::Attributes attrs; + + // initialize attributes affecting shape inference + // others remain by default + Dimension N = 5; + Dimension num_prior_boxes = 100; + Dimension priors_batch_size = N; + Dimension num_classes = 23; + + auto attrs_vector = create_attributes_vector(); + for (const auto& attrs : attrs_vector) { + Dimension num_loc_classes = attrs.share_location ? 1 : num_classes; + Dimension prior_box_size = attrs.normalized ? 4 : 5; + + PartialShape box_logits_shape = {N, num_prior_boxes * num_loc_classes * 4}; + PartialShape class_preds_shape = {N, num_prior_boxes * num_classes}; + PartialShape proposals_shape = {priors_batch_size, + attrs.variance_encoded_in_target ? 1 : 2, + num_prior_boxes * prior_box_size}; + PartialShape output_shape_reference = + compute_reference_output_shape(attrs.keep_top_k, attrs.top_k, N, num_classes, num_prior_boxes); + + auto op = create_detection_output_v8(box_logits_shape, class_preds_shape, proposals_shape, attrs, element::f32); + ASSERT_EQ(op->get_output_partial_shape(0), output_shape_reference); + ASSERT_EQ(op->get_element_type(), element::f32); + } +} + +TEST(type_prop_layers, detection_outputv8_dynamic) { + op::v8::DetectionOutput::Attributes attrs; + + // initialize attributes affecting shape inference + // others remain by default + Dimension N = 13; + Dimension num_prior_boxes = 33; + Dimension priors_batch_size = 1; + Dimension num_classes = 10; + + auto attrs_vector = create_attributes_vector(); + for (const auto& attrs : attrs_vector) { + Dimension prior_box_size = attrs.normalized ? 4 : 5; + + PartialShape box_logits_shape = {N, Dimension::dynamic()}; + PartialShape class_preds_shape = {Dimension::dynamic(), num_prior_boxes * num_classes}; + PartialShape proposals_shape = {priors_batch_size, + attrs.variance_encoded_in_target ? 1 : 2, + num_prior_boxes * prior_box_size}; + PartialShape output_shape_reference = + compute_reference_output_shape(attrs.keep_top_k, attrs.top_k, N, num_classes, num_prior_boxes); + + auto op = create_detection_output_v8(box_logits_shape, class_preds_shape, proposals_shape, attrs, element::f32); + ASSERT_EQ(op->get_output_partial_shape(0), output_shape_reference); + ASSERT_EQ(op->get_element_type(), element::f32); + } +} + +TEST(type_prop_layers, detection_outputv8_num_classes_not_deduced) { + op::v8::DetectionOutput::Attributes attrs; + + // initialize attributes affecting shape inference + // others remain by default + Dimension N = 13; + Dimension num_prior_boxes = 33; + Dimension priors_batch_size = 1; + + auto attrs_vector = create_attributes_vector(); + for (const auto& attrs : attrs_vector) { + Dimension prior_box_size = attrs.normalized ? 4 : 5; + + PartialShape box_logits_shape = {N, Dimension::dynamic()}; + PartialShape class_preds_shape = {N, Dimension::dynamic()}; + PartialShape proposals_shape = {priors_batch_size, + attrs.variance_encoded_in_target ? 1 : 2, + num_prior_boxes * prior_box_size}; + PartialShape output_shape_reference = + compute_reference_output_shape(attrs.keep_top_k, attrs.top_k, N, Dimension::dynamic(), num_prior_boxes); + + auto op = create_detection_output_v8(box_logits_shape, class_preds_shape, proposals_shape, attrs, element::f32); + ASSERT_EQ(op->get_output_partial_shape(0), output_shape_reference); + ASSERT_EQ(op->get_element_type(), element::f32); + } +} + +TEST(type_prop_layers, detection_outputv8_num_classes_no_deduction) { + // In this case a number of classes and a number of prior boxes are not deduced + op::v8::DetectionOutput::Attributes attrs; + + // initialize attributes affecting shape inference + // others remain by default + Dimension N = 3; + Dimension priors_batch_size = N; + + auto attrs_vector = create_attributes_vector(); + for (const auto& attrs : attrs_vector) { + PartialShape box_logits_shape = {N, Dimension::dynamic()}; + PartialShape class_preds_shape = {N, Dimension::dynamic()}; + PartialShape proposals_shape = {priors_batch_size, + attrs.variance_encoded_in_target ? 1 : 2, + Dimension::dynamic()}; + PartialShape output_shape_reference = compute_reference_output_shape(attrs.keep_top_k, + attrs.top_k, + N, + Dimension::dynamic(), + Dimension::dynamic()); + + auto op = create_detection_output_v8(box_logits_shape, class_preds_shape, proposals_shape, attrs, element::f32); + ASSERT_EQ(op->get_output_partial_shape(0), output_shape_reference); + ASSERT_EQ(op->get_element_type(), element::f32); + } +} + +TEST(type_prop_layers, detection_outputv8_dynamic2) { + // In this case a number of prior boxes is deduced using additional input + // and after that a number of classes is deduced using the second input shape + op::v8::DetectionOutput::Attributes attrs; + + // initialize attributes affecting shape inference + // others remain by default + Dimension N = 13; + Dimension num_prior_boxes = 33; + Dimension priors_batch_size = 1; + Dimension num_classes = 10; + + auto attrs_vector = create_attributes_vector(); + for (const auto& attrs : attrs_vector) { + PartialShape box_logits_shape = {N, Dimension::dynamic()}; + PartialShape class_preds_shape = {Dimension::dynamic(), num_prior_boxes * num_classes}; + PartialShape proposals_shape = {priors_batch_size, + attrs.variance_encoded_in_target ? 1 : 2, + Dimension::dynamic()}; + PartialShape ad_class_preds_shape = {N, num_prior_boxes * 2}; + PartialShape ad_box_preds_shape = {N, Dimension::dynamic()}; + PartialShape output_shape_reference = + compute_reference_output_shape(attrs.keep_top_k, attrs.top_k, N, num_classes, num_prior_boxes); + + auto op = create_detection_output2_v8(box_logits_shape, + class_preds_shape, + proposals_shape, + ad_class_preds_shape, + ad_box_preds_shape, + attrs, + element::f32); + ASSERT_EQ(op->get_output_partial_shape(0), output_shape_reference); + ASSERT_EQ(op->get_element_type(), element::f32); + } +} diff --git a/src/core/tests/visitors/op/detection_output.cpp b/src/core/tests/visitors/op/detection_output.cpp index 488bbf423d3..91c95c2ce21 100644 --- a/src/core/tests/visitors/op/detection_output.cpp +++ b/src/core/tests/visitors/op/detection_output.cpp @@ -3,29 +3,19 @@ // #include "gtest/gtest.h" -#include "ngraph/ngraph.hpp" -#include "ngraph/op/util/attr_types.hpp" -#include "ngraph/opsets/opset1.hpp" -#include "ngraph/opsets/opset3.hpp" -#include "ngraph/opsets/opset4.hpp" -#include "ngraph/opsets/opset5.hpp" +#include "openvino/op/util/attr_types.hpp" +#include "openvino/opsets/opset8.hpp" #include "util/visitor.hpp" using namespace std; using namespace ngraph; +using namespace ov::op; +using namespace ov::op::util; using ngraph::test::NodeBuilder; using ngraph::test::ValueMap; -TEST(attributes, detection_output_op) { - NodeBuilder::get_ops().register_factory(); - const auto box_logits = make_shared(element::f32, Shape{1, 2 * 1 * 4}); - const auto class_preds = make_shared(element::f32, Shape{1, 2 * 32}); - const auto proposals = make_shared(element::f32, Shape{1, 2, 2 * 4}); - const auto aux_class_preds = make_shared(element::f32, Shape{1, 2 * 2}); - const auto aux_box_pred = make_shared(element::f32, Shape{1, 2 * 1 * 4}); - - op::DetectionOutputAttrs attrs; - attrs.num_classes = 32; +namespace { +void initialize_attributes(DetectionOutputBase::AttributesBase& attrs) { attrs.background_label_id = 0; attrs.top_k = 1; attrs.variance_encoded_in_target = false; @@ -41,29 +31,70 @@ TEST(attributes, detection_output_op) { attrs.input_height = 32; attrs.input_width = 32; attrs.objectness_score = 0.73f; +} +void is_equal_attrs(const DetectionOutputBase::AttributesBase& attrs1, + const DetectionOutputBase::AttributesBase& attrs2) { + EXPECT_EQ(attrs1.background_label_id, attrs2.background_label_id); + EXPECT_EQ(attrs1.top_k, attrs2.top_k); + EXPECT_EQ(attrs1.variance_encoded_in_target, attrs2.variance_encoded_in_target); + EXPECT_EQ(attrs1.keep_top_k, attrs2.keep_top_k); + EXPECT_EQ(attrs1.code_type, attrs2.code_type); + EXPECT_EQ(attrs1.share_location, attrs2.share_location); + EXPECT_EQ(attrs1.nms_threshold, attrs2.nms_threshold); + EXPECT_EQ(attrs1.confidence_threshold, attrs2.confidence_threshold); + EXPECT_EQ(attrs1.clip_after_nms, attrs2.clip_after_nms); + EXPECT_EQ(attrs1.clip_before_nms, attrs2.clip_before_nms); + EXPECT_EQ(attrs1.decrease_label_id, attrs2.decrease_label_id); + EXPECT_EQ(attrs1.normalized, attrs2.normalized); + EXPECT_EQ(attrs1.input_height, attrs2.input_height); + EXPECT_EQ(attrs1.input_width, attrs2.input_width); + EXPECT_EQ(attrs1.objectness_score, attrs2.objectness_score); +} +} // namespace + +TEST(attributes, detection_output_op) { + NodeBuilder::get_ops().register_factory(); + const auto box_logits = make_shared(element::f32, Shape{1, 2 * 1 * 4}); + const auto class_preds = make_shared(element::f32, Shape{1, 2 * 32}); + const auto proposals = make_shared(element::f32, Shape{1, 2, 2 * 4}); + const auto aux_class_preds = make_shared(element::f32, Shape{1, 2 * 2}); + const auto aux_box_pred = make_shared(element::f32, Shape{1, 2 * 1 * 4}); + + op::v0::DetectionOutput::Attributes attrs; + initialize_attributes(attrs); + attrs.num_classes = 32; auto detection_output = - make_shared(box_logits, class_preds, proposals, aux_class_preds, aux_box_pred, attrs); + make_shared(box_logits, class_preds, proposals, aux_class_preds, aux_box_pred, attrs); NodeBuilder builder(detection_output); - auto g_detection_output = ov::as_type_ptr(builder.create()); + auto g_detection_output = ov::as_type_ptr(builder.create()); const auto do_attrs = detection_output->get_attrs(); const auto g_do_attrs = g_detection_output->get_attrs(); EXPECT_EQ(g_do_attrs.num_classes, do_attrs.num_classes); - EXPECT_EQ(g_do_attrs.background_label_id, do_attrs.background_label_id); - EXPECT_EQ(g_do_attrs.top_k, do_attrs.top_k); - EXPECT_EQ(g_do_attrs.variance_encoded_in_target, do_attrs.variance_encoded_in_target); - EXPECT_EQ(g_do_attrs.keep_top_k, do_attrs.keep_top_k); - EXPECT_EQ(g_do_attrs.code_type, do_attrs.code_type); - EXPECT_EQ(g_do_attrs.share_location, do_attrs.share_location); - EXPECT_EQ(g_do_attrs.nms_threshold, do_attrs.nms_threshold); - EXPECT_EQ(g_do_attrs.confidence_threshold, do_attrs.confidence_threshold); - EXPECT_EQ(g_do_attrs.clip_after_nms, do_attrs.clip_after_nms); - EXPECT_EQ(g_do_attrs.clip_before_nms, do_attrs.clip_before_nms); - EXPECT_EQ(g_do_attrs.decrease_label_id, do_attrs.decrease_label_id); - EXPECT_EQ(g_do_attrs.normalized, do_attrs.normalized); - EXPECT_EQ(g_do_attrs.input_height, do_attrs.input_height); - EXPECT_EQ(g_do_attrs.input_width, do_attrs.input_width); - EXPECT_EQ(g_do_attrs.objectness_score, do_attrs.objectness_score); + is_equal_attrs(g_do_attrs, do_attrs); +} + +// ------------------------------ V8 ------------------------------ +TEST(attributes, detection_output_v8) { + NodeBuilder::get_ops().register_factory(); + const auto box_logits = make_shared(element::f32, Shape{1, 2 * 1 * 4}); + const auto class_preds = make_shared(element::f32, Shape{1, 2 * 32}); + const auto proposals = make_shared(element::f32, Shape{1, 2, 2 * 4}); + const auto aux_class_preds = make_shared(element::f32, Shape{1, 2 * 2}); + const auto aux_box_pred = make_shared(element::f32, Shape{1, 2 * 1 * 4}); + + op::v8::DetectionOutput::Attributes attrs; + initialize_attributes(attrs); + + auto detection_output = + make_shared(box_logits, class_preds, proposals, aux_class_preds, aux_box_pred, attrs); + NodeBuilder builder(detection_output); + auto g_detection_output = ov::as_type_ptr(builder.create()); + + const auto do_attrs = detection_output->get_attrs(); + const auto g_do_attrs = g_detection_output->get_attrs(); + + is_equal_attrs(g_do_attrs, do_attrs); }