Add transformation that aligns elementwise input ranks (#10125)

* [CPU] Add transformation that aligns elementwise input ranks

* fix tests - check also aBcd16b format

* add support for fq

* add test for sqr diff

* move to moc transformations

* fix tests

* align only for numpy autobroadcast type

* fix fetching autob from fq

* [CPU] Eltwise tests corrected & callback for CPU removed

* remove transformation callback call

* revert changes to getMKLDNNOutputMemoryFormats

* remove comment

* use single wrap_type

Co-authored-by: Vladislav Golubev <vladislav.golubev@intel.com>
This commit is contained in:
Mateusz Tabaka 2022-02-15 10:47:54 +01:00 committed by GitHub
parent 523adff17a
commit a875f6ed9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 294 additions and 16 deletions

View File

@ -0,0 +1,25 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <transformations_visibility.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
/**
* @ingroup ie_transformation_common_api
* @brief transformation aligns elementwise constant inputs ranks with its output rank
*/
namespace ngraph {
namespace pass {
class TRANSFORMATIONS_API AlignEltwiseInputRanks: public MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
AlignEltwiseInputRanks();
};
} // namespace pass
} // namespace ngraph

View File

@ -0,0 +1,63 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "transformations/common_optimizations/align_eltwise_input_ranks.hpp"
#include <ngraph/opsets/opset8.hpp>
#include <ngraph/pattern/op/wrap_type.hpp>
NGRAPH_RTTI_DEFINITION(ngraph::pass::AlignEltwiseInputRanks, "AlignEltwiseInputRanks", 0);
ngraph::pass::AlignEltwiseInputRanks::AlignEltwiseInputRanks() {
auto eltwise_pattern = pattern::wrap_type<opset8::SquaredDifference,
op::util::BinaryElementwiseComparison,
op::util::BinaryElementwiseLogical,
op::util::BinaryElementwiseArithmetic,
opset8::FakeQuantize>(pattern::has_static_rank());
matcher_pass_callback callback = [=](pattern::Matcher& m) {
auto node = m.get_match_root();
auto fq = as_type<opset8::FakeQuantize>(node.get());
if (fq) {
if (fq->get_auto_broadcast() != ngraph::op::AutoBroadcastType::NUMPY) {
return false;
}
} else if (node->get_autob() != ngraph::op::AutoBroadcastType::NUMPY) {
return false;
}
// NormalizeL2 and Multiply can be fused to NormalizeIE.
// NormalizeIE has an attribute called channel_shared, which is set
// based on Multiply's constant input rank - it's true if the rank is 1.
// So we skip extending Multiply's constant input rank here.
if (ov::is_type<opset8::Multiply>(node)) {
auto inputs = node->input_values();
if (std::any_of(inputs.begin(), inputs.end(),
[] (const Output<Node>& input) -> bool { return ov::is_type<opset8::NormalizeL2>(input.get_node()); }))
return false;
}
const auto rank = node->get_output_partial_shape(0).size();
for (size_t i = 0; i < node->get_input_size(); i++) {
auto const_node = as_type<op::Constant>(node->get_input_node_ptr(i));
if (const_node == nullptr)
continue;
const auto& const_shape = const_node->get_shape();
auto diff = rank - const_shape.size();
if (diff > 0) {
Shape new_shape = const_shape;
new_shape.insert(new_shape.begin(), diff, 1);
auto new_const = std::make_shared<op::Constant>(*const_node, new_shape);
node->input(i).replace_source_output(new_const);
}
}
return false;
};
auto m = std::make_shared<pattern::Matcher>(eltwise_pattern, "AlignEltwiseInputRanks");
this->register_matcher(m, callback);
}

View File

@ -58,6 +58,7 @@
#include <transformations/common_optimizations/nearest_neighbor_upsampling_fusion.hpp>
#include <transformations/common_optimizations/ric_fusion.hpp>
#include <transformations/common_optimizations/matmul_multiply_fusion.hpp>
#include "transformations/common_optimizations/align_eltwise_input_ranks.hpp"
NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0);
@ -180,6 +181,8 @@ bool ngraph::pass::MOCTransformations::run_on_model(const std::shared_ptr<ngraph
manager.register_pass<ngraph::pass::ReverseInputChannelsFusion>();
manager.register_pass<ngraph::pass::AlignEltwiseInputRanks>();
manager.register_pass<ngraph::pass::ConstantFolding>();
manager.run_passes(f);

View File

@ -937,6 +937,8 @@ TEST_F(NGraphReaderTests, ConvertMulToEltwise) {
<layer id="14" name="broadcast1_data" precision="FP32" type="Const">
<output>
<port id="1">
<dim>1</dim>
<dim>1</dim>
<dim>112</dim>
<dim>1</dim>
</port>
@ -955,6 +957,8 @@ TEST_F(NGraphReaderTests, ConvertMulToEltwise) {
<dim>112</dim>
</port>
<port id="1">
<dim>1</dim>
<dim>1</dim>
<dim>112</dim>
<dim>1</dim>
</port>
@ -1058,6 +1062,8 @@ TEST_F(NGraphReaderTests, ConvertAddToEltwise) {
<layer id="14" name="broadcast1_data" precision="FP32" type="Const">
<output>
<port id="1">
<dim>1</dim>
<dim>1</dim>
<dim>112</dim>
<dim>1</dim>
</port>
@ -1076,6 +1082,8 @@ TEST_F(NGraphReaderTests, ConvertAddToEltwise) {
<dim>112</dim>
</port>
<port id="1">
<dim>1</dim>
<dim>1</dim>
<dim>112</dim>
<dim>1</dim>
</port>
@ -1873,6 +1881,7 @@ TEST_F(NGraphReaderTests, ConvertAddToEltwise2) {
<dim>1</dim>
<dim>1</dim>
<dim>1</dim>
<dim>1</dim>
</port>
</output>
<blobs>
@ -1892,6 +1901,7 @@ TEST_F(NGraphReaderTests, ConvertAddToEltwise2) {
<dim>1</dim>
<dim>1</dim>
<dim>1</dim>
<dim>1</dim>
</port>
</input>
<output>

View File

@ -0,0 +1,78 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <ngraph/opsets/opset8.hpp>
#include <transformations/common_optimizations/align_eltwise_input_ranks.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
using namespace testing;
using namespace ngraph;
using AlignEltwiseInputRanksParams = std::tuple<PartialShape, Shape, Shape, bool>;
class AlignEltwiseInputRanksTest
: public testing::WithParamInterface<AlignEltwiseInputRanksParams>,
public TransformationTestsF {
};
TEST_P(AlignEltwiseInputRanksTest, FusionTest) {
auto params = GetParam();
const auto& input_shape = std::get<0>(params);
auto const_shape = std::get<1>(params);
auto expected_const_shape = std::get<2>(params);
bool can_align = std::get<3>(params);
{
auto data = std::make_shared<opset8::Parameter>(element::f32, input_shape);
auto add = std::make_shared<opset8::Add>(data, op::Constant::create(element::f32, const_shape, {3}));
auto less = std::make_shared<opset8::Less>(data, op::Constant::create(element::f32, const_shape, {5}));
auto sqr_diff = std::make_shared<opset8::SquaredDifference>(data, op::Constant::create(element::f32, const_shape, {5}));
auto convert = std::make_shared<opset8::Convert>(data, element::boolean);
auto logical_or = std::make_shared<opset8::LogicalOr>(convert, op::Constant::create(element::boolean, const_shape, {false}));
auto low = op::Constant::create(element::f32, const_shape, {0});
auto high = op::Constant::create(element::f32, const_shape, {20});
auto fq = std::make_shared<opset8::FakeQuantize>(add, low, high, low, high, 256);
function = std::make_shared<Function>(NodeVector{less, logical_or, fq}, ParameterVector{data});
manager.register_pass<pass::AlignEltwiseInputRanks>();
}
if (can_align) {
auto data = std::make_shared<opset8::Parameter>(element::f32, input_shape);
auto add = std::make_shared<opset8::Add>(data, op::Constant::create(element::f32, expected_const_shape, {3}));
auto less = std::make_shared<opset8::Less>(data, op::Constant::create(element::f32, expected_const_shape, {5}));
auto sqr_diff = std::make_shared<opset8::SquaredDifference>(data, op::Constant::create(element::f32, expected_const_shape, {5}));
auto convert = std::make_shared<opset8::Convert>(data, element::boolean);
auto logical_or = std::make_shared<opset8::LogicalOr>(convert, op::Constant::create(element::boolean, expected_const_shape, {false}));
auto low = op::Constant::create(element::f32, expected_const_shape, {0});
auto high = op::Constant::create(element::f32, expected_const_shape, {20});
auto fq = std::make_shared<opset8::FakeQuantize>(add, low, high, low, high, 256);
function_ref = std::make_shared<Function>(NodeVector{less, logical_or, fq}, ParameterVector{data});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
}
static std::vector<AlignEltwiseInputRanksParams> params = {
AlignEltwiseInputRanksParams(PartialShape::dynamic(3), {}, {1, 1, 1}, true),
AlignEltwiseInputRanksParams(PartialShape::dynamic(3), {1}, {1, 1, 1}, true),
AlignEltwiseInputRanksParams(PartialShape::dynamic(3), {1, 1}, {1, 1, 1}, true),
AlignEltwiseInputRanksParams(PartialShape::dynamic(3), {3}, {1, 1, 3}, true),
AlignEltwiseInputRanksParams(PartialShape::dynamic(3), {2, 3}, {1, 2, 3}, true),
AlignEltwiseInputRanksParams(PartialShape::dynamic(4), {}, {1, 1, 1, 1}, true),
AlignEltwiseInputRanksParams(PartialShape::dynamic(4), {3, 1, 1}, {1, 3, 1, 1}, true),
AlignEltwiseInputRanksParams(PartialShape::dynamic(4), {3}, {1, 1, 1, 3}, true),
AlignEltwiseInputRanksParams(Shape{1, 4, 10, 10}, {4, 1, 1}, {1, 4, 1, 1}, true),
// negative cases
AlignEltwiseInputRanksParams(PartialShape::dynamic(), {2, 3, 4}, {}, false),
AlignEltwiseInputRanksParams(Shape{}, {}, {}, false),
AlignEltwiseInputRanksParams(Shape{}, {1}, {}, false),
AlignEltwiseInputRanksParams(Shape{}, {2, 3, 4}, {}, false),
};
INSTANTIATE_TEST_SUITE_P(TransformationTests, AlignEltwiseInputRanksTest, ::testing::ValuesIn(params));

View File

@ -434,54 +434,98 @@ std::vector<std::vector<ngraph::Shape>> inShapes_4D_1D = {
{{1, 3, 3, 3}, {3}},
};
std::vector<CPUSpecificParams> cpuParams_4D_1D = {
CPUSpecificParams({nChw16c, x}, {nChw16c}, {}, {}),
CPUSpecificParams({nhwc, x}, {nhwc}, {}, {}),
CPUSpecificParams({nchw, x}, {nchw}, {}, {})
std::vector<CPUSpecificParams> cpuParams_4D_1D_Constant_mode = {
CPUSpecificParams({nChw16c, nchw}, {nChw16c}, {}, {}),
CPUSpecificParams({nhwc, nhwc}, {nhwc}, {}, {}),
CPUSpecificParams({nchw, nchw}, {nchw}, {}, {})
};
const auto params_4D_1D = ::testing::Combine(
const auto params_4D_1D_constant_mode = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(inShapes_4D_1D)),
::testing::Values(ngraph::helpers::EltwiseTypes::ADD, ngraph::helpers::EltwiseTypes::MULTIPLY),
::testing::ValuesIn(secondaryInputTypes),
::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
::testing::ValuesIn(opTypes),
::testing::ValuesIn(netType),
::testing::Values(ElementType::f32),
::testing::Values(ElementType::f32),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(additional_config)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_1D)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_1D_Constant_mode)),
::testing::Values(emptyFusingSpec));
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_1D, EltwiseLayerCPUTest, params_4D_1D, EltwiseLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_1D_Constant, EltwiseLayerCPUTest, params_4D_1D_constant_mode, EltwiseLayerCPUTest::getTestCaseName);
std::vector<CPUSpecificParams> cpuParams_4D_1D_Parameter_mode = {
CPUSpecificParams({nChw16c, x}, {nChw16c}, {}, {}),
CPUSpecificParams({nhwc, x}, {nhwc}, {}, {}),
CPUSpecificParams({nchw, x}, {nchw}, {}, {})
};
const auto params_4D_1D_parameter_mode = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(inShapes_4D_1D)),
::testing::Values(ngraph::helpers::EltwiseTypes::ADD, ngraph::helpers::EltwiseTypes::MULTIPLY),
::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
::testing::ValuesIn(opTypes),
::testing::ValuesIn(netType),
::testing::Values(ElementType::f32),
::testing::Values(ElementType::f32),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(additional_config)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D_1D_Parameter_mode)),
::testing::Values(emptyFusingSpec));
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_4D_1D_Parameter, EltwiseLayerCPUTest, params_4D_1D_parameter_mode, EltwiseLayerCPUTest::getTestCaseName);
std::vector<std::vector<ngraph::Shape>> inShapes_5D_1D = {
{{2, 17, 5, 4, 10}, {10}},
{{1, 3, 3, 3, 3}, {3}},
};
std::vector<CPUSpecificParams> cpuParams_5D_1D = {
CPUSpecificParams({nCdhw16c, x}, {nCdhw16c}, {}, {}),
CPUSpecificParams({ndhwc, x}, {ndhwc}, {}, {}),
CPUSpecificParams({ncdhw, x}, {ncdhw}, {}, {})
std::vector<CPUSpecificParams> cpuParams_5D_1D_constant = {
CPUSpecificParams({nCdhw16c, ncdhw}, {nCdhw16c}, {}, {}),
CPUSpecificParams({ndhwc, ndhwc}, {ndhwc}, {}, {}),
CPUSpecificParams({ncdhw, ncdhw}, {ncdhw}, {}, {})
};
const auto params_5D_1D = ::testing::Combine(
const auto params_5D_1D_constant = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(inShapes_5D_1D)),
::testing::Values(ngraph::helpers::EltwiseTypes::ADD, ngraph::helpers::EltwiseTypes::MULTIPLY),
::testing::ValuesIn(secondaryInputTypes),
::testing::Values(ngraph::helpers::InputLayerType::CONSTANT),
::testing::ValuesIn(opTypes),
::testing::ValuesIn(netType),
::testing::Values(ElementType::f32),
::testing::Values(ElementType::f32),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(additional_config)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_1D)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_1D_constant)),
::testing::Values(emptyFusingSpec));
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D_1D, EltwiseLayerCPUTest, params_5D_1D, EltwiseLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D_1D_Constant, EltwiseLayerCPUTest, params_5D_1D_constant, EltwiseLayerCPUTest::getTestCaseName);
std::vector<CPUSpecificParams> cpuParams_5D_1D_parameter = {
CPUSpecificParams({nCdhw16c, x}, {nCdhw16c}, {}, {}),
CPUSpecificParams({ndhwc, x}, {ndhwc}, {}, {}),
CPUSpecificParams({ncdhw, x}, {ncdhw}, {}, {})
};
const auto params_5D_1D_parameter = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(static_shapes_to_test_representation(inShapes_5D_1D)),
::testing::Values(ngraph::helpers::EltwiseTypes::ADD, ngraph::helpers::EltwiseTypes::MULTIPLY),
::testing::Values(ngraph::helpers::InputLayerType::PARAMETER),
::testing::ValuesIn(opTypes),
::testing::ValuesIn(netType),
::testing::Values(ElementType::f32),
::testing::Values(ElementType::f32),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(additional_config)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_1D_parameter)),
::testing::Values(emptyFusingSpec));
INSTANTIATE_TEST_SUITE_P(smoke_CompareWithRefs_5D_1D_Parameter, EltwiseLayerCPUTest, params_5D_1D_parameter, EltwiseLayerCPUTest::getTestCaseName);
std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypesBinDyn = {

View File

@ -0,0 +1,55 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils/cpu_test_utils.hpp"
#include "ngraph_functions/builders.hpp"
#include <ngraph/opsets/opset8.hpp>
using namespace ngraph;
namespace SubgraphTestsDefinitions {
class SubgraphWithBlockedFormat : virtual public LayerTestsUtils::LayerTestsCommon {
protected:
void SetUp() override {
targetDevice = CommonTestUtils::DEVICE_CPU;
auto type = element::f32;
auto param = std::make_shared<opset8::Parameter>(type, Shape{1, 32, 64, 32});
auto weights = builder::makeConstant(type, Shape{32, 32, 1, 1}, std::vector<float>{}, true);
auto conv = std::make_shared<opset8::Convolution>(param, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
auto mean = std::make_shared<opset8::ReduceMean>(conv, opset8::Constant::create(element::i32, Shape{2}, {2, 3}), true);
auto reshape_before = std::make_shared<opset8::Reshape>(mean, opset8::Constant::create(element::i32, Shape{3}, {0, 16, -1}), true);
auto mvn = std::make_shared<opset8::MVN>(reshape_before, opset8::Constant::create(element::i32, Shape{1}, {2}),
false, 0.1, op::MVNEpsMode::INSIDE_SQRT);
auto reshape_after = std::make_shared<opset8::Reshape>(mvn, std::make_shared<opset8::ShapeOf>(mean), false);
auto mul = std::make_shared<opset8::Multiply>(reshape_after, builder::makeConstant(type, Shape{32, 1, 1}, std::vector<float>{}, true));
auto add = std::make_shared<opset8::Add>(mul, builder::makeConstant(type, Shape{32, 1, 1}, std::vector<float>{}, true));
auto sigmoid = std::make_shared<opset8::Sigmoid>(add);
auto mul2 = std::make_shared<opset8::Multiply>(conv, sigmoid);
function = std::make_shared<Function>(mul2, ParameterVector{param});
}
void TearDown() override {
auto runtime_function = executableNetwork.GetExecGraphInfo().getFunction();
int nodes_found = 0;
for (const auto& n : runtime_function->get_ordered_ops()) {
auto layer_type = n->get_rt_info().at(ExecGraphInfoSerialization::LAYER_TYPE).as<std::string>();
if (layer_type == "Subgraph") {
nodes_found++;
auto output_layout = n->get_rt_info().at(ExecGraphInfoSerialization::OUTPUT_LAYOUTS).as<std::string>();
ASSERT_TRUE(output_layout == "aBcd8b" || output_layout == "aBcd16b");
}
}
ASSERT_GT(nodes_found, 0);
}
};
TEST_F(SubgraphWithBlockedFormat, smoke_CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
}
} // namespace SubgraphTestsDefinitions