From 56581dbe2eb5794c0e78a43dea54dd88e7b2b5bb Mon Sep 17 00:00:00 2001 From: Nadezhda Ageeva Date: Mon, 17 Jan 2022 14:16:23 +0300 Subject: [PATCH] [GNA] Support new kaldi irs (#9474) * Support new kaldi IRs (generated in NHWC layout) * Update tests with activation and fq * Cleanup * Fix reordering FQ and MaxPool and problem with overflow * Fix win * Update src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.hpp Co-authored-by: Elizaveta Lobanova * Update src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.cpp Co-authored-by: Elizaveta Lobanova * Update inference-engine/tests/unit/gna/ngraph/transformations/gna_unfuse_reshape_and_transpose.cpp Co-authored-by: Elizaveta Lobanova * Code review Co-authored-by: Elizaveta Lobanova --- .../gna_unfuse_reshape_and_transpose.cpp | 225 ++++++++++++++++++ src/plugins/intel_gna/gna_graph_compiler.cpp | 34 ++- src/plugins/intel_gna/gna_graph_compiler.hpp | 4 +- src/plugins/intel_gna/gna_graph_patterns.hpp | 77 ++++-- src/plugins/intel_gna/gna_plugin.cpp | 9 +- .../layers/gna_convolution_layer.cpp | 11 +- .../layers/gna_convolution_layer.hpp | 4 +- .../intel_gna/layers/gna_layer_info.hpp | 18 ++ .../intel_gna/optimizer/gna_pass_manager.cpp | 13 +- .../decompose_2d_convolution.cpp | 4 +- .../transformations/remove_extra_reshapes.cpp | 10 +- ...lit_convolution_with_large_buffer_size.cpp | 3 +- .../unfuse_reshape_and_transpose.cpp | 189 +++++++++++++++ .../unfuse_reshape_and_transpose.hpp | 72 ++++++ .../gna/pass_tests/fq_maxpool_reordering.cpp | 68 ++++-- .../remove_permutations_NHWC_to_NCHW_pass.cpp | 35 ++- 16 files changed, 697 insertions(+), 79 deletions(-) create mode 100644 inference-engine/tests/unit/gna/ngraph/transformations/gna_unfuse_reshape_and_transpose.cpp create mode 100644 src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.cpp create mode 100644 src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.hpp diff --git a/inference-engine/tests/unit/gna/ngraph/transformations/gna_unfuse_reshape_and_transpose.cpp b/inference-engine/tests/unit/gna/ngraph/transformations/gna_unfuse_reshape_and_transpose.cpp new file mode 100644 index 00000000000..26486dd563e --- /dev/null +++ b/inference-engine/tests/unit/gna/ngraph/transformations/gna_unfuse_reshape_and_transpose.cpp @@ -0,0 +1,225 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "transformations/unfuse_reshape_and_transpose.hpp" + +#include "common_test_utils/ngraph_test_utils.hpp" +#include +#include +#include +#include +#include + +namespace testing { +namespace { + +class IActivationFactory { +public: + virtual ~IActivationFactory() = default; + virtual std::shared_ptr createNode(const ngraph::Output& in) = 0; +}; + +template +class ActivationFactory : public IActivationFactory { +public: + ActivationFactory() = default; + std::shared_ptr createNode(const ngraph::Output& operation_before) override { + return std::make_shared(operation_before); + } +private: + ActivationFactory(const ActivationFactory&) = delete; + ActivationFactory& operator=(const ActivationFactory& ) = delete; +}; + +template <> +class ActivationFactory : public IActivationFactory { +public: + ActivationFactory(const double min, const double max) : min_(min), max_(max) {} + std::shared_ptr createNode(const ngraph::Output& operation_before) override { + return std::make_shared(operation_before, min_, max_); + } +private: + ActivationFactory(const ActivationFactory&) = delete; + ActivationFactory& operator=(const ActivationFactory& ) = delete; +private: + const double min_; + const double max_; +}; + +using ActivationFactoryPtr = std::shared_ptr; + +template +ActivationFactoryPtr createActivationFactory(Args&& ... args) { + return std::make_shared>(std::forward(args) ...); +} + +static std::shared_ptr createFunction(const ngraph::Shape& conv_input_shape, + const ngraph::Shape& conv_filter_shape, + bool with_bias, + bool with_pool, + ActivationFactoryPtr activation_factory, + bool with_fq, + bool single_reshape_before, + bool single_reshape_after) { + size_t total_in = std::accumulate(std::begin(conv_input_shape), std::end(conv_input_shape), 1, std::multiplies()); + auto input = std::make_shared(ngraph::element::f32, ngraph::Shape{1, total_in}); + std::shared_ptr last_node, last_const; + auto add_fake_quantize = [&](const std::shared_ptr& node) { + auto input_low = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1}); + auto input_high = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {5}); + auto output_low = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0}); + auto output_high = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {10}); + return std::make_shared(node, input_low, input_high, output_low, output_high, 11); + }; + if (single_reshape_before) { + auto reshape_in_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, conv_input_shape); + auto reshape_in = std::make_shared(input, reshape_in_const, false); + last_node = reshape_in; + } else { + auto reshape_in_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, + ngraph::Shape{conv_input_shape[0], conv_input_shape[2], conv_input_shape[3], conv_input_shape[1]}); + auto reshape_in = std::make_shared(input, reshape_in_const, false); + auto transpose_in_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{0, 3, 1, 2}); + auto transpose_in = std::make_shared(reshape_in, transpose_in_const); + last_node = transpose_in; + } + auto conv_weights = ngraph::opset8::Constant::create(ngraph::element::f32, conv_filter_shape, {1}); + last_const = conv_weights; + if (with_fq) { + auto conv_input_fq = add_fake_quantize(last_node); + last_node = conv_input_fq; + auto conv_weights_fq = add_fake_quantize(conv_weights); + last_const = conv_weights_fq; + } + auto conv = std::make_shared(last_node, + last_const, + ngraph::Strides{1, 1}, + ngraph::CoordinateDiff{0, 0}, + ngraph::CoordinateDiff{0, 0}, + ngraph::Strides{1, 1}); + last_node = conv; + auto conv_output_shape = conv->get_output_shape(0); + size_t total_out = std::accumulate(std::begin(conv_output_shape), std::end(conv_output_shape), 1, std::multiplies()); + if (with_bias) { + auto add_const = ngraph::opset8::Constant::create(ngraph::element::f32, ngraph::Shape{1, conv_output_shape.at(1), 1, 1}, {1}); + auto add = std::make_shared(conv, add_const); + last_node = add; + } + if (with_fq) { + auto conv_bias_fq = add_fake_quantize(last_node); + last_node = conv_bias_fq; + } + if (with_pool) { + auto pool = std::make_shared(last_node, + ngraph::Strides{1, 1}, ngraph::Shape{0, 0}, ngraph::Shape{0, 0}, ngraph::Shape{1, 1}); + last_node = pool; + } + if (activation_factory) { + if (with_fq) { + auto act_fq_in = add_fake_quantize(last_node); + last_node = act_fq_in; + } + auto act = activation_factory->createNode(last_node); + last_node = act; + if (with_fq) { + auto act_fq_out = add_fake_quantize(last_node); + last_node = act_fq_out; + } + } + auto reshape_out_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{1, total_out}); + if (!single_reshape_after) { + auto transpose_out_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{0, 2, 3, 1}); + auto transpose_out = std::make_shared(last_node, transpose_out_const); + last_node = transpose_out; + } + auto reshape_out = std::make_shared(last_node, reshape_out_const, false); + + auto result = std::make_shared(reshape_out); + auto func = std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input}); + + return func; +} + +typedef std::tuple< + std::tuple, + bool, // with bias + bool, // with pooling + ActivationFactoryPtr, // with activation + bool // with fq +> UnfuseReshapeAndTransposeParams; + +class UnfuseReshapeAndTransposeTestSuiteFixture: public CommonTestUtils::TestsCommon, + public ::testing::WithParamInterface { +public: + void SetUp() override; +public: + std::shared_ptr function, reference_function; +}; + +void UnfuseReshapeAndTransposeTestSuiteFixture::SetUp() { + std::tuple conv_data; + bool with_bias; + bool with_pool; + bool with_fq; + ActivationFactoryPtr af; + std::tie(conv_data, with_bias, with_pool, af, with_fq) = this->GetParam(); + ngraph::Shape conv_input_shape; + ngraph::Shape conv_filter_shape; + bool replace_before; + bool replace_after; + std::tie(conv_input_shape, conv_filter_shape, replace_before, replace_after) = conv_data; + function = createFunction(conv_input_shape, conv_filter_shape, with_bias, with_pool, af, with_fq, true, true); + reference_function = createFunction(conv_input_shape, conv_filter_shape, with_bias, with_pool, af, with_fq, !replace_before, !replace_after); +} + +void execute_test(std::shared_ptr function, + std::shared_ptr reference_function) { + ngraph::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.run_passes(function); + const FunctionsComparator func_comparator = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); + const FunctionsComparator::Result result = func_comparator(function, reference_function); + ASSERT_TRUE(result.valid) << result.message; +} + +TEST_P(UnfuseReshapeAndTransposeTestSuiteFixture, CompareFunctions) { + execute_test(function, reference_function); +} + +const std::vector activationFactories = { + nullptr, + createActivationFactory(), + createActivationFactory(), + createActivationFactory(), + createActivationFactory(), + createActivationFactory(), + createActivationFactory(), + createActivationFactory(), + createActivationFactory(0.1, 0.2) +}; + +INSTANTIATE_TEST_SUITE_P(UnfuseReshapeAndTransposeTestSuite, UnfuseReshapeAndTransposeTestSuiteFixture, + ::testing::Combine( + ::testing::ValuesIn( + std::vector>{ + {ngraph::Shape{1, 1, 1, 168}, ngraph::Shape{12, 1, 1, 8}, true, false}, + {ngraph::Shape{1, 1, 1, 640}, ngraph::Shape{256, 1, 1, 512}, true, false}, + {ngraph::Shape{1, 1, 1, 1024}, ngraph::Shape{256, 1, 1, 512}, true, false}, + {ngraph::Shape{1, 1, 33, 32}, ngraph::Shape{128, 1, 33, 9}, true, false}, + {ngraph::Shape{1, 1, 11, 13}, ngraph::Shape{128, 1, 11, 9}, true, false}, + {ngraph::Shape{1, 1, 33, 23}, ngraph::Shape{128, 1, 11, 5}, true, false}, + {ngraph::Shape{1, 1, 33, 32}, ngraph::Shape{1, 1, 33, 9}, true, true}, + {ngraph::Shape{1, 1, 1, 1024}, ngraph::Shape{256, 1, 1, 1024}, true, true}, + {ngraph::Shape{1, 1, 33, 32}, ngraph::Shape{1, 1, 33, 9}, true, true}}), + ::testing::ValuesIn(std::vector{true, false}), // with bias + ::testing::ValuesIn(std::vector{true, false}), // with max pool + ::testing::ValuesIn(activationFactories), // with activation + ::testing::ValuesIn(std::vector{true, false}))); // with fq + +} // namespace +} // namespace testing diff --git a/src/plugins/intel_gna/gna_graph_compiler.cpp b/src/plugins/intel_gna/gna_graph_compiler.cpp index fbc81329edf..9cbe9fe3760 100644 --- a/src/plugins/intel_gna/gna_graph_compiler.cpp +++ b/src/plugins/intel_gna/gna_graph_compiler.cpp @@ -266,13 +266,21 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) std::swap(convolution._dilation_x, convolution._dilation_y); } + auto in_kernel_w = convolution._kernel_x; + auto in_kernel_h = convolution._kernel_y; + bool transpose_h_w = false; + // Map 2d convolution to 1d if it's possible - if (GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, convolution._kernel_x, convolution._stride_x)) { + if (GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, in_channels, + convolution._kernel_y, convolution._kernel_x, + convolution._stride_y, convolution._stride_x)) { + transpose_h_w = (in_height == convolution._kernel_y); in_width *= in_height; in_height = 1; out_width *= out_height; out_height = 1; - convolution._stride_x *= (convolution._stride_y * convolution._kernel_x); + convolution._stride_x *= transpose_h_w ? (convolution._stride_y * convolution._kernel_y) : + (convolution._stride_y * convolution._kernel_x); convolution._kernel_x *= convolution._kernel_y; convolution._kernel_y = 1; } @@ -304,19 +312,20 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) in_height != 1) { // TensorFlow default layout is NHWC // OpenVino Default layout is NCHW - // GNA Convolution input is NHCW + // GNA Convolution input is NHCW (old) or NHWC (new) // When layer layout is in NHWC it means that is was created by PassManager return finalizeConvolution2DPrimitive(layer, in_batch, in_channels, in_height, in_width, out_batch, out_channels, out_height, out_width); THROW_GNA_LAYER_EXCEPTION(layer) << "Convolution 2D is not supported on GNA 1.0 library"; } finalizeConvolution1DPrimitive(layer, in_batch, in_channels, in_width, - out_batch, out_channels, out_width); + out_batch, out_channels, out_width, in_kernel_w, in_kernel_h, transpose_h_w); } void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerPtr layer, uint32_t in_batch, uint32_t in_channels, uint32_t in_width, - uint32_t out_batch, uint32_t out_channels, uint32_t out_width) { + uint32_t out_batch, uint32_t out_channels, uint32_t out_width, + uint32_t in_kernel_w, uint32_t in_kernel_h, bool transpose_h_w) { auto& convolution = dynamic_cast(*layer.get()); printConvolutionLayer(convolution); @@ -429,7 +438,10 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP ptr_weights, ptr_biases); - if (inputs->getLayout() == Layout::NHWC) { + // Keep both variants of kaldi models working: + // Old one has layout which is different from NHWC + // New one has layout NHWC, but it is mapped from 2d by H + if (inputs->getLayout() == Layout::NHWC && !transpose_h_w) { currentComponent.orientation_in = kDnnInterleavedOrientation; currentComponent.orientation_out = kDnnInterleavedOrientation; } @@ -447,7 +459,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that if (!dnn->do_rotate_input) { - if (inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) { + if ((inputs->getLayout() != Layout::NHWC || transpose_h_w) && LayerInfo(connectedInputLayer).isInput()) { // Kaldi features are opposite orientation dnn->do_rotate_input = true; dnn->num_rotate_rows = effectiveStride; @@ -459,12 +471,16 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP connectOutput(layer, ptr_outputs, num_data_bytes_out); + // Transpose H with W or C with HW + auto A = transpose_h_w ? in_kernel_h : in_channels; + auto B = transpose_h_w ? in_kernel_w : convolution._kernel[X_AXIS]; + std::vector transposedWeights; for (uint32_t k = 0; k < convolution._out_depth; k++) { uint8_t * ptr_filt_current = convolution._weights->cbuffer().as() + - k * in_channels * convolution._kernel[X_AXIS] * convolution.precision.size(); - auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), in_channels, convolution._kernel[X_AXIS]); + k * A * B * convolution.precision.size(); + auto transposedPart = transposeMatrix(ptr_filt_current, convolution.precision.size(), A, B); transposedWeights.insert(transposedWeights.end(), transposedPart.begin(), transposedPart.end()); } if (transposedWeights.size() != convolution._weights->byteSize()) { diff --git a/src/plugins/intel_gna/gna_graph_compiler.hpp b/src/plugins/intel_gna/gna_graph_compiler.hpp index 5331b1b6a6d..6871ee8489d 100644 --- a/src/plugins/intel_gna/gna_graph_compiler.hpp +++ b/src/plugins/intel_gna/gna_graph_compiler.hpp @@ -128,8 +128,8 @@ public: void finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerPtr, uint32_t in_batch, uint32_t in_channels, uint32_t in_width, - uint32_t out_batch, uint32_t out_channels, uint32_t out_width); - + uint32_t out_batch, uint32_t out_channels, uint32_t out_width, + uint32_t in_kernel_x, uint32_t in_kernel_y, bool transpose); void finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerPtr, uint32_t in_batch, uint32_t in_channels, uint32_t in_height, uint32_t in_width, uint32_t out_batch, uint32_t out_channels, uint32_t out_height, uint32_t out_width); diff --git a/src/plugins/intel_gna/gna_graph_patterns.hpp b/src/plugins/intel_gna/gna_graph_patterns.hpp index f9d6e55c49e..ace2b528f14 100644 --- a/src/plugins/intel_gna/gna_graph_patterns.hpp +++ b/src/plugins/intel_gna/gna_graph_patterns.hpp @@ -89,7 +89,8 @@ inline std::pair Fin auto next = getInputTo(layer->outData.front()).begin()->second; // Permute is inserted before Reshape by MO in NHWC models, so we need to find either permute, or reshape, or output - while (!LayerInfo(next).isPermute() && !LayerInfo(next).isOutput() && next->outData.size() == 1) { + while (!LayerInfo(next).isPermute() && !LayerInfo(next).isPermuteViaReshape() && + !LayerInfo(next).isOutput() && next->outData.size() == 1) { if (LayerInfo(next).isNonFunctional() && !IsReshapeFrom4dTo3d(next) && !IsReshapeFrom3dTo4d(next)) { break; } @@ -111,14 +112,27 @@ inline std::pair Fin if (next->outData.size() != 1) { return std::make_pair(nullptr, nullptr); } - // Check if reshape is expected for this pattern: - // the next layer has the both, height and width dimensions > 1 - auto in_dim_size = next->insData[0].lock()->getDims().size(); - IE_ASSERT(in_dim_size == 3 || in_dim_size == 4); - size_t height = in_dim_size == 3 ? 1 : GetDataDimSize(next->insData[0].lock(), InferenceEngine::DataDimName::H); - size_t width = GetDataDimSize(next->insData[0].lock(), InferenceEngine::DataDimName::W); - if (next->outData[0]->getDims().size() < 3 || height != 1 || width != 1) { - return std::make_pair(nullptr, nullptr); + auto input_dims = next->insData[0].lock()->getDims(); + auto in_dims_size = input_dims.size(); + auto output_dims = next->outData[0]->getDims(); + auto out_dims_size = output_dims.size(); + if (in_dims_size == 4 && out_dims_size == 4) { + if (!LayerInfo(next).isPermuteViaReshape() || + (input_dims[0] != output_dims[0]) || // N + (input_dims[1] != output_dims[3]) || // C + (input_dims[2] != output_dims[1]) || // H + (input_dims[3] != output_dims[2])) { // W + return std::make_pair(nullptr, nullptr); + } + } else { + // Check if reshape is expected for this pattern: + // the next layer has the both, height and width dimensions > 1 + IE_ASSERT(in_dims_size == 3 || in_dims_size == 4); + size_t height = in_dims_size == 3 ? 1 : GetDataDimSize(next->insData[0].lock(), InferenceEngine::DataDimName::H); + size_t width = GetDataDimSize(next->insData[0].lock(), InferenceEngine::DataDimName::W); + if (out_dims_size < 3 || height != 1 || width != 1) { + return std::make_pair(nullptr, nullptr); + } } } else { return std::make_pair(nullptr, nullptr); @@ -127,7 +141,8 @@ inline std::pair Fin // Permute is inserted after Reshape by MO in NHWC models, so we need to find either permute, or reshape, or input auto parent = InferenceEngine::CNNNetPrevLayer(layer); auto prev = parent; - while (!LayerInfo(prev).isPermute() && !LayerInfo(prev).isInput() && InferenceEngine::CNNNetHasPrevLayer(prev.get())) { + while (!LayerInfo(prev).isPermute() && !LayerInfo(prev).isPermuteViaReshape() && + !LayerInfo(prev).isInput() && InferenceEngine::CNNNetHasPrevLayer(prev.get())) { if (LayerInfo(prev).isNonFunctional() && !IsReshapeFrom4dTo3d(prev) && !IsReshapeFrom3dTo4d(prev)) { break; } @@ -142,19 +157,35 @@ inline std::pair Fin order != std::vector{0, 2, 1} /* NWC to NCW */) { return std::make_pair(nullptr, nullptr); } - } else if (LayerInfo(prev).isReshape()) { - if (parent->outData.size() != 1 || InferenceEngine::getInputTo(parent->outData[0]).size() != 1) { - return std::make_pair(nullptr, nullptr); - } - // Check if reshape is expected for this pattern: - // the previous layer has number of channels > 1 and one of height/width dimensions is also > 1 - size_t out_dims_size = parent->outData[0]->getDims().size(); - IE_ASSERT(out_dims_size == 3 || out_dims_size == 4); - size_t channels = GetDataDimSize(parent->outData[0], out_dims_size - 1); - size_t height = out_dims_size == 3 ? 1 : GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::H); - size_t width = GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::W); - if (parent->insData[0].lock()->getDims().size() < 3 || channels != 1 && (height != 1 || width != 1)) { - return std::make_pair(nullptr, nullptr); + } else if (LayerInfo(prev).isReshape()) { + auto input_dims = prev->insData[0].lock()->getDims(); + auto in_dims_size = input_dims.size(); + auto output_dims = prev->outData[0]->getDims(); + auto out_dims_size = output_dims.size(); + + if (in_dims_size == 4 && out_dims_size == 4) { + if (!LayerInfo(prev).isPermuteViaReshape() || + (input_dims[0] != output_dims[0]) || // N + (input_dims[1] != output_dims[2]) || // H + (input_dims[2] != output_dims[3]) || // W + (input_dims[3] != output_dims[1])) { // C + return std::make_pair(nullptr, nullptr); + } + } else { + if (parent->outData.size() != 1 || InferenceEngine::getInputTo(parent->outData[0]).size() != 1) { + return std::make_pair(nullptr, nullptr); + } + // Check if reshape is expected for this pattern: + // the previous layer has number of channels > 1 and one of height/width dimensions is also > 1 + in_dims_size = parent->insData[0].lock()->getDims().size(); + out_dims_size = parent->outData[0]->getDims().size(); + IE_ASSERT(out_dims_size == 3 || out_dims_size == 4); + size_t channels = GetDataDimSize(parent->outData[0], out_dims_size - 1); + size_t height = out_dims_size == 3 ? 1 : GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::H); + size_t width = GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::W); + if (in_dims_size < 3 || channels != 1 && (height != 1 || width != 1)) { + return std::make_pair(nullptr, nullptr); + } } } else { return std::make_pair(nullptr, nullptr); diff --git a/src/plugins/intel_gna/gna_plugin.cpp b/src/plugins/intel_gna/gna_plugin.cpp index 094b8c90dbd..f121ab47ea0 100644 --- a/src/plugins/intel_gna/gna_plugin.cpp +++ b/src/plugins/intel_gna/gna_plugin.cpp @@ -79,6 +79,7 @@ #include "transformations/decompose_mvn.hpp" #include "transformations/substitute_softsign.hpp" #include "transformations/convert_precision.hpp" +#include "transformations/unfuse_reshape_and_transpose.hpp" #include @@ -698,6 +699,9 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); @@ -792,7 +796,10 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { passes->registerPass(); - passes->registerPass(); + if (!isNgraphPassesUsed) { + passes->registerPass(); + } + passes->registerPass(); passes->registerPass(); diff --git a/src/plugins/intel_gna/layers/gna_convolution_layer.cpp b/src/plugins/intel_gna/layers/gna_convolution_layer.cpp index d90e330d1e3..e5f79906903 100644 --- a/src/plugins/intel_gna/layers/gna_convolution_layer.cpp +++ b/src/plugins/intel_gna/layers/gna_convolution_layer.cpp @@ -16,8 +16,13 @@ namespace GNAPluginNS { namespace GNAConvolutionLayer { -bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth) { - return inHeight > 1 && inWidth > 1 && inWidth == kernelWidth && strideWidth == 1; + +bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t in_channels, + const uint32_t kernelHeight, const uint32_t kernelWidth, + const uint32_t strideHeight, const uint32_t strideWidth) { + return ((inHeight > 1 && inWidth > 1) && + ((inWidth == kernelWidth && strideWidth == 1) || + (inHeight == kernelHeight && strideHeight == 1 && in_channels == 1))); } // 3D input or 2D kernel @@ -39,7 +44,7 @@ double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) { const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H); const auto inWidth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::W); if (isConv2D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) && - !isMappableFrom2DTo1D(inHeight, inWidth, conv._kernel_x, conv._stride_x)) { + !isMappableFrom2DTo1D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x, conv._stride_y, conv._stride_x)) { const auto kernelSize = conv._kernel_x * conv._kernel_y; auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize, [](const KRT& l, const KRT::first_type& r) {return l.first > r; }); diff --git a/src/plugins/intel_gna/layers/gna_convolution_layer.hpp b/src/plugins/intel_gna/layers/gna_convolution_layer.hpp index c603a801255..654aa3d78ad 100644 --- a/src/plugins/intel_gna/layers/gna_convolution_layer.hpp +++ b/src/plugins/intel_gna/layers/gna_convolution_layer.hpp @@ -10,7 +10,9 @@ namespace GNAPluginNS { namespace GNAConvolutionLayer { -bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth); +bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inChannels, + const uint32_t kernelHeight, const uint32_t kernelWidth, + const uint32_t strideHeight, const uint32_t strideWidth); // 3D input or 2D kernel bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth, diff --git a/src/plugins/intel_gna/layers/gna_layer_info.hpp b/src/plugins/intel_gna/layers/gna_layer_info.hpp index 1d744eabc17..eb93522ce24 100644 --- a/src/plugins/intel_gna/layers/gna_layer_info.hpp +++ b/src/plugins/intel_gna/layers/gna_layer_info.hpp @@ -271,6 +271,24 @@ class LayerInfo { bool isPermute() const noexcept { return isOfType("permute"); } + bool isPermuteViaReshape() const { + if (!isOfType("reshape")) return false; + + auto input_dims = layer->insData[0].lock()->getDims(); + auto output_dims = layer->outData[0]->getDims(); + + if (input_dims.size() != output_dims.size()) { + return false; + } + + input_dims.erase(std::remove(input_dims.begin(), input_dims.end(), 1), input_dims.end()); + output_dims.erase(std::remove(output_dims.begin(), output_dims.end(), 1), output_dims.end()); + + if (input_dims != output_dims) { + return false; + } + return true; + } // @brief this not only mathematically trivial, has some WA for kaldi case bool isTrivialPermute() const { if (!isPermute()) return false; diff --git a/src/plugins/intel_gna/optimizer/gna_pass_manager.cpp b/src/plugins/intel_gna/optimizer/gna_pass_manager.cpp index e053e36144d..9f051379bcf 100644 --- a/src/plugins/intel_gna/optimizer/gna_pass_manager.cpp +++ b/src/plugins/intel_gna/optimizer/gna_pass_manager.cpp @@ -633,11 +633,11 @@ void RemovePermutationsNHWCToNCHWPass::run() { if (prev == nullptr || next == nullptr) continue; - if (LayerInfo(prev).isPermute()) { + if (LayerInfo(prev).isPermute() || LayerInfo(prev).isPermuteViaReshape()) { permutations_to_remove.insert(prev); } - if (LayerInfo(next).isPermute()) { + if (LayerInfo(next).isPermute() || LayerInfo(prev).isPermuteViaReshape()) { permutations_to_remove.insert(next); } @@ -699,7 +699,8 @@ void RemovePermutationsNHWCToNCHWPass::run() { }; propogateNHWCOrderRecursive(current_layer); - if (LayerInfo(pattern_start).isPermute() && !getInputTo(pattern_start->outData.front()).empty()) { + if ((LayerInfo(pattern_start).isPermute() || LayerInfo(pattern_start).isPermuteViaReshape()) && + !getInputTo(pattern_start->outData.front()).empty()) { auto layer_before_permute = CNNNetPrevLayer(pattern_start); DataPtr output = nullptr; for (auto before_output : layer_before_permute->outData) { @@ -2017,11 +2018,11 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() { }; auto allowFQFuse = [](CNNLayerPtr layer) -> bool { - auto doNotSkup = [](CNNLayerPtr layer) { + auto doNotSkip = [](CNNLayerPtr layer) { return false; }; - if (CNNNetGetAllNextLayersSkipCertain(layer, -1, doNotSkup).empty()) { + if (CNNNetGetAllNextLayersSkipCertain(layer, -1, doNotSkip).empty()) { return false; } @@ -2142,7 +2143,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() { // Before FQ layer is removed, the previous functional layer has to be updated with its quantization data auto prevFuncLayer = CNNNetPrevLayerSkipCertain(*fqLayer, 0, [](CNNLayerPtr layer) { - return LayerInfo(layer).isNonFunctional(); + return LayerInfo(layer).isNonFunctional() || LayerInfo(layer).isPooling(); }); auto quantParamsPrevLayer = InferenceEngine::getInjectedData(prevFuncLayer); quantParamsPrevLayer->_dst_quant.SetLevels(fqLevels); diff --git a/src/plugins/intel_gna/transformations/decompose_2d_convolution.cpp b/src/plugins/intel_gna/transformations/decompose_2d_convolution.cpp index 735d44bd58d..8d9efaa7a52 100644 --- a/src/plugins/intel_gna/transformations/decompose_2d_convolution.cpp +++ b/src/plugins/intel_gna/transformations/decompose_2d_convolution.cpp @@ -128,7 +128,9 @@ static bool ShouldDecompose(GraphData& graph_data, const ConvData& conv_data) { // GNA supported features or handled otherwise - there is no need to decompose such convolution if (graph_data.conv_count == 1 && (((conv_data.input_height == 1 || conv_data.input_width == 1) && conv_data.filter_dilation_width == 1 && conv_data.filter_dilation_height == 1) || - GNAConvolutionLayer::isMappableFrom2DTo1D(conv_data.input_height, conv_data.input_width, conv_data.filter_width, conv_data.filter_stride_width))) + GNAConvolutionLayer::isMappableFrom2DTo1D(conv_data.input_height, conv_data.input_width, conv_data.input_channel_count, + conv_data.filter_height, conv_data.filter_width, + conv_data.filter_stride_height, conv_data.filter_stride_width))) return false; return true; diff --git a/src/plugins/intel_gna/transformations/remove_extra_reshapes.cpp b/src/plugins/intel_gna/transformations/remove_extra_reshapes.cpp index e1cfdefa311..bca303e49c8 100644 --- a/src/plugins/intel_gna/transformations/remove_extra_reshapes.cpp +++ b/src/plugins/intel_gna/transformations/remove_extra_reshapes.cpp @@ -8,6 +8,7 @@ #include #include +#include using namespace GNAPluginNS; @@ -15,16 +16,15 @@ NGRAPH_RTTI_DEFINITION(RemoveExtraReshapes, "RemoveExtraReshapes", 0); RemoveExtraReshapes::RemoveExtraReshapes() { MATCHER_SCOPE(RemoveExtraReshapes); - const auto reshape = ngraph::pattern::wrap_type(); + const auto reshape = ngraph::pattern::wrap_type( + [](const ngraph::Output& value) { + return (value.get_node_shared_ptr()->get_input_shape(0) == value.get_node_shared_ptr()->get_output_shape(0)); + }); const auto pooling = ngraph::pattern::wrap_type({reshape}); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { const auto& pattern_map = m.get_pattern_value_map(); const auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr(); - if (reshape_node->get_input_shape(0) != reshape_node->get_output_shape(0)) { - return false; - } - ngraph::replace_output_update_name(reshape_node->output(0), reshape_node->input_value(0)); return true; }; diff --git a/src/plugins/intel_gna/transformations/split_convolution_with_large_buffer_size.cpp b/src/plugins/intel_gna/transformations/split_convolution_with_large_buffer_size.cpp index d36519e54c5..eeec55ff32f 100644 --- a/src/plugins/intel_gna/transformations/split_convolution_with_large_buffer_size.cpp +++ b/src/plugins/intel_gna/transformations/split_convolution_with_large_buffer_size.cpp @@ -33,9 +33,10 @@ static bool shouldSplitCnn(const ngraph::Output& node) { uint32_t height = input.at(2); auto kH = filters.at(2); auto kW = filters.at(3); + auto sH = convolution->get_strides().at(0); auto sW = convolution->get_strides().at(1); if (GNAConvolutionLayer::isConv2D(height, width, in_channels, kH, kW) && - !GNAConvolutionLayer::isMappableFrom2DTo1D(height, width, kW, sW)) { + !GNAConvolutionLayer::isMappableFrom2DTo1D(height, width, in_channels, kH, kW, sH, sW)) { return false; } } diff --git a/src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.cpp b/src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.cpp new file mode 100644 index 00000000000..dc323bfc0de --- /dev/null +++ b/src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.cpp @@ -0,0 +1,189 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "transformations/unfuse_reshape_and_transpose.hpp" +#include "transformations/utils/utils.hpp" +#include "transformations/utils/transformation_helper.hpp" +#include +#include +#include +#include + + +using namespace GNAPluginNS; + +NGRAPH_RTTI_DEFINITION(Unfuse2dto4dReshapeAndTranspose, "Unfuse2dto4dReshapeAndTranspose", 0); + +Unfuse2dto4dReshapeAndTranspose::Unfuse2dto4dReshapeAndTranspose() { + MATCHER_SCOPE(Unfuse2dto4dReshapeAndTranspose); + auto is_required_reshape = [](const ngraph::Output& value) { + auto input_shape = value.get_node_shared_ptr()->get_input_shape(0); + auto output_shape = value.get_node_shared_ptr()->get_output_shape(0); + return ((input_shape.size() == 2) && (output_shape.size() == 4) && + ((output_shape.at(1) == 1) || (output_shape.at(2)*output_shape.at(3) == 1))); + }; + const auto reshape = ngraph::pattern::wrap_type(is_required_reshape); + auto fq = ngraph::pattern::wrap_type({reshape, + ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()}, + consumers_and_rank(1, 4)); + const auto conv = ngraph::pattern::wrap_type({std::make_shared(ngraph::OutputVector{reshape, fq}), + ngraph::pattern::any_input()}); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + const auto& pattern_map = m.get_pattern_value_map(); + const auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr(); + auto consumers = reshape_node->output(0).get_target_inputs(); + + auto N = reshape_node->get_output_shape(0)[0]; + auto C = reshape_node->get_output_shape(0)[1]; + auto H = reshape_node->get_output_shape(0)[2]; + auto W = reshape_node->get_output_shape(0)[3]; + + // Create reshape NxW => NxHxWxC (C or HxW is equal to 1) + auto data = reshape_node->input_value(0); + auto reshape_nhwc_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, ngraph::Shape{N, H, W, C}); + auto reshape_nhwc = register_new_node(data, reshape_nhwc_const, false); + reshape_nhwc->set_friendly_name(reshape_node->get_friendly_name() + "/Reshape"); + + // Create transpose NxHxWxC => NxCxHxW + auto transpose_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 3, 1, 2}); + auto transpose = register_new_node(reshape_nhwc, transpose_const); + transpose->set_friendly_name(reshape_node->get_friendly_name()); + + ngraph::copy_runtime_info(reshape, {reshape_nhwc, transpose}); + for (auto consumer : consumers) { + consumer.replace_source_output(transpose); + } + + return true; + }; + + auto m = std::make_shared(conv, matcher_name); + this->register_matcher(m, callback); +} + +NGRAPH_RTTI_DEFINITION(Unfuse4dto2dReshapeAndTranspose, "Unfuse4dto2dReshapeAndTranspose", 0); + +Unfuse4dto2dReshapeAndTranspose::Unfuse4dto2dReshapeAndTranspose() { + MATCHER_SCOPE(Unfuse4dto2dReshapeAndTranspose); + auto is_required_reshape = [](const ngraph::Output& value) { + auto input_shape = value.get_node_shared_ptr()->get_input_shape(0); + auto output_shape = value.get_node_shared_ptr()->get_output_shape(0); + return ((input_shape.size() == 4) && (output_shape.size() == 2) && + ((input_shape.at(1) == 1) || (input_shape.at(2)*input_shape.at(3) == 1))); + }; + // Convolution + auto conv = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), ngraph::pattern::any_input()}, + consumers_and_rank(1, 4)); + auto fq_conv = ngraph::pattern::wrap_type({conv, + ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()}, + consumers_and_rank(1, 4)); + // Bias + auto bias = ngraph::pattern::wrap_type({conv, ngraph::pattern::any_input()}, + consumers_and_rank(1, 4)); + auto fq_bias = ngraph::pattern::wrap_type({bias, + ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()}, + consumers_and_rank(1, 4)); + // Max Pooling + auto max_pool_conv = ngraph::pattern::wrap_type({conv}, + consumers_and_rank(1, 4)); + auto max_pool_fq_conv = ngraph::pattern::wrap_type({fq_conv}, + consumers_and_rank(1, 4)); + auto max_pool_bias = ngraph::pattern::wrap_type({bias}, + consumers_and_rank(1, 4)); + auto max_pool_fq_bias = ngraph::pattern::wrap_type({fq_bias}, + consumers_and_rank(1, 4)); + // Activation + auto fq_fq_conv = ngraph::pattern::wrap_type({fq_conv, + ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()}, + consumers_and_rank(1, 4)); + auto fq_fq_bias = ngraph::pattern::wrap_type({fq_bias, + ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()}, + consumers_and_rank(1, 4)); + auto act_conv = ngraph::pattern::wrap_type({conv}, + consumers_and_rank(1, 4)); + auto act_bias = ngraph::pattern::wrap_type({bias}, + consumers_and_rank(1, 4)); + auto act_max_pool_conv = ngraph::pattern::wrap_type({max_pool_conv}, + consumers_and_rank(1, 4)); + auto act_max_pool_bias = ngraph::pattern::wrap_type({max_pool_bias}, + consumers_and_rank(1, 4)); + auto act_fq_fq_conv = ngraph::pattern::wrap_type({fq_fq_conv}, + consumers_and_rank(1, 4)); + auto act_fq_fq_bias = ngraph::pattern::wrap_type({fq_fq_bias}, + consumers_and_rank(1, 4)); + auto fq_max_pool_fq_conv = ngraph::pattern::wrap_type({max_pool_fq_conv, + ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()}, + consumers_and_rank(1, 4)); + auto act_fq_max_pool_fq_conv = ngraph::pattern::wrap_type({fq_max_pool_fq_conv}, + consumers_and_rank(1, 4)); + auto fq_max_pool_fq_bias = ngraph::pattern::wrap_type({max_pool_fq_bias, + ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()}, + consumers_and_rank(1, 4)); + auto act_fq_max_pool_fq_bias = ngraph::pattern::wrap_type({fq_max_pool_fq_bias}, + consumers_and_rank(1, 4)); + auto fq_act_fq_fq_conv = ngraph::pattern::wrap_type({act_fq_fq_conv, + ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()}, + consumers_and_rank(1, 4)); + auto fq_act_fq_fq_bias = ngraph::pattern::wrap_type({act_fq_fq_bias, + ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()}, + consumers_and_rank(1, 4)); + auto fq_act_fq_max_pool_fq_conv = ngraph::pattern::wrap_type({act_fq_max_pool_fq_conv, + ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()}, + consumers_and_rank(1, 4)); + auto fq_act_fq_max_pool_fq_bias = ngraph::pattern::wrap_type({act_fq_max_pool_fq_bias, + ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()}, + consumers_and_rank(1, 4)); + auto root_reshape = + std::make_shared(ngraph::OutputVector{conv, bias, max_pool_conv, max_pool_fq_conv, max_pool_bias, max_pool_fq_bias, + fq_conv, fq_bias, act_conv, act_bias, act_max_pool_conv, act_max_pool_bias, + fq_act_fq_fq_conv, fq_act_fq_fq_bias, fq_act_fq_max_pool_fq_conv, fq_act_fq_max_pool_fq_bias}); + const auto reshape = ngraph::pattern::wrap_type({root_reshape, ngraph::pattern::any_input()}, is_required_reshape); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + const auto& pattern_map = m.get_pattern_value_map(); + const auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr(); + auto consumers = reshape_node->output(0).get_target_inputs(); + + auto N = reshape_node->get_input_shape(0)[0]; + auto W = reshape_node->get_input_shape(0)[1]*reshape_node->get_input_shape(0)[2]*reshape_node->get_input_shape(0)[3]; + + // Create transpose NxCxHxW => NxHxWxC + auto data = reshape_node->input_value(0); + auto transpose_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{4}, {0, 2, 3, 1}); + auto transpose = register_new_node(data, transpose_const); + transpose->set_friendly_name(reshape_node->get_friendly_name() + "/Transpose"); + + // Create reshape NxHxWxC => NxW (C or HxW is equal to 1) + auto reshape_nw_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{2}, ngraph::Shape{N, W}); + auto reshape_nw = register_new_node(transpose, reshape_nw_const, false); + reshape_nw->set_friendly_name(reshape_node->get_friendly_name()); + + ngraph::copy_runtime_info(reshape_node, {transpose, reshape_nw}); + for (auto consumer : consumers) { + consumer.replace_source_output(reshape_nw); + } + + return true; + }; + + auto m = std::make_shared(reshape, matcher_name); + this->register_matcher(m, callback); +} \ No newline at end of file diff --git a/src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.hpp b/src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.hpp new file mode 100644 index 00000000000..53dc6f66ec3 --- /dev/null +++ b/src/plugins/intel_gna/transformations/unfuse_reshape_and_transpose.hpp @@ -0,0 +1,72 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace GNAPluginNS { + +/** + * @brief Replace 2d->4d reshape to pair of 2 reshapes (before Convolution) + * Before: + * [N, HW] + * | + * Reshape + * | + * [N, C, H, W] + * | + * Convolution + * + * After (TransposeSinking friendly): + * [N, HW] + * | + * Reshape + * | + * [N, H, W, C] + * | + * Reshape + * | + * [N, C, H, W] + * | + * Convolution + */ +class Unfuse2dto4dReshapeAndTranspose : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + Unfuse2dto4dReshapeAndTranspose(); +}; + +/** + * @brief Replace 2d->4d reshape to pair of 2 reshapes (after Convolution) + * Before: + * Convolution (optionally + bias/pooling/activation) + * | + * [N, C, H, W] + * | + * Reshape + * | + * [N, HW] + * + * After (TransposeSinking friendly): + * Convolution + * | + * [N, C, H, W] + * | + * Reshape + * | + * [N, H, W, C] + * | + * Reshape + * | + * [N, HW] + * + */ +class Unfuse4dto2dReshapeAndTranspose : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + Unfuse4dto2dReshapeAndTranspose(); +}; + +} // namespace GNAPluginNS \ No newline at end of file diff --git a/src/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp b/src/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp index af6d0c1a19b..1700f48da75 100644 --- a/src/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp +++ b/src/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp @@ -24,16 +24,20 @@ typedef std::tuple< std::string, // Target Device std::map, // Configuration std::vector, // Input Shape - std::pair, // Input Min and Max - size_t // Levels + std::pair, // Input Min and Max (before conv) + std::pair, // Input Min and Max (after conv) + size_t, // Levels + bool // Reshape between FQ and Pooling > fqMaxpoolReorderingParams; namespace LayerTestsDefinitions { class FQMaxpoolReordering : public testing::WithParamInterface, public LayerTestsUtils::LayerTestsCommon { - float inputDataMin = 0.0f; - float inputDataMax = 0.0f; + float inputDataMin1 = 0.0f; + float inputDataMax1 = 0.0f; + float inputDataMin2 = 0.0f; + float inputDataMax2 = 0.0f; float inputDataResolution = 1.0f; public: @@ -42,9 +46,11 @@ public: std::string targetDevice; std::map configuration; std::vector inputShape; - std::pair inputMinMax; + std::pair inputMinMax1; + std::pair inputMinMax2; size_t levels = 0; - std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = obj.param; + bool reshape = false; + std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax1, inputMinMax2, levels, reshape) = obj.param; std::ostringstream result; result << "netPRC=" << netPrecision.name() << "_"; @@ -53,14 +59,16 @@ public: result << "_configItem=" << configItem.first << "_" << configItem.second; } result << "_inputShape=" << CommonTestUtils::vec2str(inputShape); - result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")"; + result << "_inputMinMax1=(" << inputMinMax1.first << ".." << inputMinMax1.second << ")"; + result << "_inputMinMax2=(" << inputMinMax2.first << ".." << inputMinMax2.second << ")"; result << "_levels=" << levels; + result << "_reshape=" << reshape; return result.str(); } InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override { - return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, 1 / inputDataResolution); + return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax1 - inputDataMin1, inputDataMin1, 1 / inputDataResolution); } protected: @@ -68,23 +76,28 @@ protected: InferenceEngine::Precision netPrecision; std::vector inputShape; - std::pair inputMinMax; + std::pair inputMinMax1; + std::pair inputMinMax2; size_t levels = 0; - std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = this->GetParam(); + bool reshape = false; + std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax1, inputMinMax2, levels, reshape) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - std::tie(inputDataMin, inputDataMax) = inputMinMax; - auto inputLowNode = ngraph::builder::makeConstant(ngPrc, {1}, { inputDataMin }); - auto inputHighNode = ngraph::builder::makeConstant(ngPrc, {1}, { inputDataMax }); + std::tie(inputDataMin1, inputDataMax1) = inputMinMax1; + std::tie(inputDataMin2, inputDataMax2) = inputMinMax2; + auto inputLowNode1 = ngraph::builder::makeConstant(ngPrc, {1}, { inputDataMin1 }); + auto inputHighNode1 = ngraph::builder::makeConstant(ngPrc, {1}, { inputDataMax1 }); + auto inputLowNode2 = ngraph::builder::makeConstant(ngPrc, {1}, { inputDataMin2 }); + auto inputHighNode2 = ngraph::builder::makeConstant(ngPrc, {1}, { inputDataMax2 }); auto inputVector = ngraph::builder::makeParams(ngPrc, {inputShape}); auto inputFQ = std::make_shared(inputVector[0], - inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels); + inputLowNode1, inputHighNode1, inputLowNode1, inputHighNode1, levels); auto filterWeightsNode = ngraph::builder::makeConstant(ngPrc, {8, inputShape[1], 1, 8}, { 1.0f }); - auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector{ 1 }, std::vector{inputDataMin}); - auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector{ 1 }, std::vector{inputDataMax}); + auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector{ 1 }, std::vector{inputDataMin1}); + auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector{ 1 }, std::vector{inputDataMax1}); auto convWeightsFQNode = std::make_shared(filterWeightsNode, convLowNode, convHighNode, convLowNode, convHighNode, levels); auto convWeightsFQ = std::dynamic_pointer_cast(convWeightsFQNode); @@ -97,9 +110,20 @@ protected: auto add = std::make_shared(conv, biasesWeightsNode); auto convFQNode = std::make_shared(add, - inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels); + inputLowNode2, inputHighNode2, inputLowNode2, inputHighNode2, levels); - auto maxpool = ngraph::builder::makePooling(convFQNode, {1, 2}, {0, 0}, {0, 0}, {1, 2}, ngraph::op::RoundingType::FLOOR, + std::shared_ptr node_before_pooling = convFQNode; + if (reshape) { + const auto& shape = conv->get_output_shape(0); + size_t total = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + auto reshapeConst1 = ngraph::builder::makeConstant(ngraph::element::i64, std::vector{ 2 }, ngraph::Shape{1, total}); + auto reshapeNode1 = std::make_shared(convFQNode, reshapeConst1, false); + auto reshapeConst2 = ngraph::builder::makeConstant(ngraph::element::i64, std::vector{ 4 }, shape); + auto reshapeNode2 = std::make_shared(reshapeNode1, reshapeConst2, false); + node_before_pooling = reshapeNode2; + } + + auto maxpool = ngraph::builder::makePooling(node_before_pooling, {1, 2}, {0, 0}, {0, 0}, {1, 2}, ngraph::op::RoundingType::FLOOR, ngraph::op::PadType::VALID, false, ngraph::helpers::PoolingTypes::MAX); ngraph::ResultVector results{ std::make_shared(maxpool)}; @@ -130,7 +154,9 @@ const std::vector> inputShape = { const std::vector> inputMinMax = { {-0.5, 0.5}, {-2, 2}, - {-8, 8} + {-8, 8}, + {-5, 5}, + {-17.5, 17.5}, }; const std::vector levels = { @@ -144,6 +170,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_fq_maxpool_reordering, FQMaxpoolReordering, ::testing::ValuesIn(configs), ::testing::ValuesIn(inputShape), ::testing::ValuesIn(inputMinMax), - ::testing::ValuesIn(levels)), + ::testing::ValuesIn(inputMinMax), + ::testing::ValuesIn(levels), + ::testing::ValuesIn(std::vector{true, false})), FQMaxpoolReordering::getTestCaseName); } // namespace LayerTestsDefinitions diff --git a/src/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp b/src/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp index 379cbffdf01..ad33f5e511b 100644 --- a/src/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp +++ b/src/tests/functional/plugin/gna/pass_tests/remove_permutations_NHWC_to_NCHW_pass.cpp @@ -18,6 +18,7 @@ #include "ngraph_functions/builders.hpp" #include "ngraph_functions/pass/convert_prc.hpp" +#include "transformations/common_optimizations/transpose_to_reshape.hpp" typedef std::tuple< InferenceEngine::Precision, // Network Precision @@ -31,7 +32,8 @@ typedef std::tuple< std::string, // Target Device std::map, // Configuration std::vector, // Input shape - bool // additional bool parameter + bool, // additional bool parameter + bool // transpose to reshape > removePermutationsAddParamPassParams; namespace LayerTestsDefinitions { @@ -106,7 +108,8 @@ class RemovePermutationsNHWCToNCHWPassTest : public testing::WithParamInterface< std::map configuration; std::vector inputShape; bool output1D; - std::tie(netPrecision, targetDevice, configuration, inputShape, output1D) = obj.param; + bool transpose_to_reshape; + std::tie(netPrecision, targetDevice, configuration, inputShape, output1D, transpose_to_reshape) = obj.param; std::ostringstream result; result << "netPRC=" << netPrecision.name() << "_"; @@ -116,6 +119,7 @@ class RemovePermutationsNHWCToNCHWPassTest : public testing::WithParamInterface< } result << "_IS=" << CommonTestUtils::vec2str(inputShape); result << "_1d_out=" << output1D; + result << "_transpose2reshape=" << transpose_to_reshape; return result.str(); } @@ -133,7 +137,8 @@ class RemovePermutationsNHWCToNCHWPassTest : public testing::WithParamInterface< InferenceEngine::Precision netPrecision; std::vector inputShape; bool output1D; - std::tie(netPrecision, targetDevice, configuration, inputShape, output1D) = this->GetParam(); + bool transpose_to_reshape; + std::tie(netPrecision, targetDevice, configuration, inputShape, output1D, transpose_to_reshape) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); size_t shape_size = inputShape.size(); @@ -158,6 +163,11 @@ class RemovePermutationsNHWCToNCHWPassTest : public testing::WithParamInterface< ngraph::ResultVector results{ std::make_shared(reshape2) }; function = std::make_shared(results, params, "RemovePermutationsTest"); + if (transpose_to_reshape) { + ngraph::pass::Manager manager; + manager.register_pass(); + manager.run_passes(function); + } } }; @@ -212,7 +222,8 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface< std::map configuration; std::vector inputShape; bool withActivation; - std::tie(netPrecision, targetDevice, configuration, inputShape, withActivation) = obj.param; + bool transpose_to_reshape; + std::tie(netPrecision, targetDevice, configuration, inputShape, withActivation, transpose_to_reshape) = obj.param; std::ostringstream result; result << "netPRC=" << netPrecision.name() << "_"; @@ -222,6 +233,7 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface< } result << "_IS=" << CommonTestUtils::vec2str(inputShape); result << "_withActivation=" << withActivation; + result << "_transpose2reshape=" << transpose_to_reshape; return result.str(); } @@ -255,7 +267,8 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface< InferenceEngine::Precision netPrecision; std::vector inputShape; bool withActivation; - std::tie(netPrecision, targetDevice, configuration, inputShape, withActivation) = this->GetParam(); + bool transpose_to_reshape; + std::tie(netPrecision, targetDevice, configuration, inputShape, withActivation, transpose_to_reshape) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); size_t shape_size = inputShape.size(); @@ -280,6 +293,12 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface< ngraph::ResultVector results{ std::make_shared(reshape2) }; function = std::make_shared(results, params, "RemovePermutationsWithPoolAndActTest"); + + if (transpose_to_reshape) { + ngraph::pass::Manager manager; + manager.register_pass(); + manager.run_passes(function); + } } }; @@ -512,7 +531,8 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface{false, true})), // with 1d output of convolution + ::testing::ValuesIn(std::vector{false, true}), // with 1d output of convolution + ::testing::ValuesIn(std::vector{false, true})),// transpose to reshape RemovePermutationsNHWCToNCHWPassTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_PermutationPass, RemovePermutationsNHWCToNCHWPassNoReshapesTest, @@ -529,7 +549,8 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface{false, true})), // with activation + ::testing::ValuesIn(std::vector{false, true}), // with activation + ::testing::ValuesIn(std::vector{false, true})),// transpose to reshape RemovePermutationsWithPoolAndActTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_PermutationPass, RemovePermutationsWithTwoConvTest,