diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp index 76e2f81940e..541bd142c3e 100644 --- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp +++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp @@ -453,6 +453,12 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP size_t num_data_bytes_in = (num_inputs + num_input_padding) * inputs->getPrecision().size(); auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input; + // Skip FakeQuantize and ScaleShift between Convolution and Input + if (LayerInfo(connectedInputLayer).isFakeQuantize()) { + connectedInputLayer = CNNNetPrevLayerSkipCertain(connectedInputLayer, 0, [](CNNLayerPtr l) { + return LayerInfo(l).isScaleShift(); + }); + } // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that if (!dnn->do_rotate_input) { @@ -626,6 +632,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP ptr_weights, ptr_biases); + currentComponent.num_bytes_per_input = inputs->getPrecision().size(); currentComponent.num_bytes_per_output = outputs->getPrecision().size(); if (inputs->getLayout() == Layout::NHWC) { diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp index df0a71fc1ce..a1f7e003dc7 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin.cpp @@ -56,6 +56,8 @@ #include #include +#include "transformations/remove_extra_reshapes.hpp" + #if GNA_LIB_VER == 2 #include @@ -663,6 +665,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); // UnrollTI should be the last transformation in the transformation pipeline manager.register_pass(); diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp index 0a7a4a44e02..4c40692d239 100644 --- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp +++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp @@ -371,19 +371,21 @@ namespace { void ReorderMaxPoolPass::run() { // detecting following pattern - // conv->relu->maxpooling - // changing it to conv->maxpooling->relu + // conv->activation->maxpooling + // changing it to conv->maxpooling->activation for (auto & l : *pLayers) { auto pool = LayerInfo(l); if (!pool.isMaxPooling()) continue; // don't reorder if pooling is 2D for CNN2D auto pooling = dynamic_cast(l.get()); - if (pooling == nullptr || (is2D(pooling->_kernel) || is2D(pooling->_stride))) continue; + // todo: return the check for stride after it'll be fixed in MO for Kaldi models + if (pooling == nullptr || (is2D(pooling->_kernel))) continue; // checking prev layer type - auto activation = LayerInfo(CNNNetPrevLayer(l)); - if (!activation.isActivation()) continue; + auto actLayer = CNNNetPrevLayer(l); + auto activation = LayerInfo(actLayer); + if (!activation.isActivation() || actLayer->insData.size() > 1) continue; // if activation came from convolution auto convolution = LayerInfo(CNNNetPrevLayer(static_cast(activation))); diff --git a/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.cpp b/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.cpp new file mode 100644 index 00000000000..cbb4cb625d0 --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.cpp @@ -0,0 +1,31 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/remove_extra_reshapes.hpp" + +#include +#include + +using namespace GNAPluginNS; + +NGRAPH_RTTI_DEFINITION(RemoveExtraReshapes, "RemoveExtraReshapes", 0); + +RemoveExtraReshapes::RemoveExtraReshapes() { + const auto reshape = ngraph::pattern::wrap_type(); + const auto pooling = ngraph::pattern::wrap_type({reshape}); + + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + const auto& pattern_map = m.get_pattern_value_map(); + const auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr(); + if (reshape_node->get_input_shape(0) != reshape_node->get_output_shape(0)) { + return false; + } + + ngraph::replace_output_update_name(reshape_node->output(0), reshape_node->input_value(0)); + return true; + }; + + auto m = std::make_shared(pooling, "RemoveExtraReshapes"); + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.hpp b/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.hpp new file mode 100644 index 00000000000..4f189abdba5 --- /dev/null +++ b/inference-engine/src/gna_plugin/transformations/remove_extra_reshapes.hpp @@ -0,0 +1,20 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace GNAPluginNS { + +/** + * @brief Removes reshapes before MaxPool which do nothing. Such reshapes can be a result of conversion from IR10 to IR7. + */ +class RemoveExtraReshapes : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + RemoveExtraReshapes(); +}; + +} // namespace GNAPluginNS \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp new file mode 100644 index 00000000000..316df2ca9d7 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gna/pass_tests/fq_maxpool_reordering.cpp @@ -0,0 +1,148 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" + +#include "ngraph_functions/pass/convert_prc.hpp" + +typedef std::tuple< + InferenceEngine::Precision, // Network Precision + std::string, // Target Device + std::map, // Configuration + std::vector, // Input Shape + std::pair, // Input Min and Max + size_t // Levels +> fqMaxpoolReorderingParams; + +namespace LayerTestsDefinitions { + +class FQMaxpoolReordering : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { + float inputDataMin = 0.0f; + float inputDataMax = 0.0f; + float inputDataResolution = 1.0f; + +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision netPrecision; + std::string targetDevice; + std::map configuration; + std::vector inputShape; + std::pair inputMinMax; + size_t levels = 0; + std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = obj.param; + + std::ostringstream result; + result << "netPRC=" << netPrecision.name() << "_"; + result << "targetDevice=" << targetDevice << "_"; + for (auto const& configItem : configuration) { + result << "_configItem=" << configItem.first << "_" << configItem.second; + } + result << "_inputShape=" << CommonTestUtils::vec2str(inputShape); + result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")"; + result << "_levels=" << levels; + + return result.str(); + } + + InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const { + return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, 1 / inputDataResolution); + } + +protected: + void SetUp() override { + InferenceEngine::Precision netPrecision; + + std::vector inputShape; + std::pair inputMinMax; + size_t levels = 0; + std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = this->GetParam(); + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + auto inputLowNode = ngraph::builder::makeConstant(ngPrc, {1}, { inputMinMax.first }); + auto inputHighNode = ngraph::builder::makeConstant(ngPrc, {1}, { inputMinMax.second }); + + auto inputVector = ngraph::builder::makeParams(ngPrc, {inputShape}); + + auto inputFQ = std::make_shared(inputVector[0], + inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels); + + auto filterWeightsNode = ngraph::builder::makeConstant(ngPrc, {8, inputShape[1], 1, 8}, { 1.0f }); + auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector{ 1 }, std::vector{inputDataMin}); + auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector{ 1 }, std::vector{inputDataMax}); + auto convWeightsFQNode = std::make_shared(filterWeightsNode, + convLowNode, convHighNode, convLowNode, convHighNode, levels); + auto convWeightsFQ = std::dynamic_pointer_cast(convWeightsFQNode); + + auto conv = std::make_shared(inputFQ, convWeightsFQ, std::vector{ 1, 1 }, + std::vector{ 0, 0 }, std::vector{ 0, 0 }, + std::vector{ 1, 1 }, + ngraph::op::PadType::VALID); + auto biasesWeightsNode = ngraph::builder::makeConstant(ngPrc, {}, std::vector{ 0.0f }); + auto add = std::make_shared(conv, biasesWeightsNode); + + auto convFQNode = std::make_shared(add, + inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels); + + auto maxpool = ngraph::builder::makePooling(convFQNode, {1, 2}, {0, 0}, {0, 0}, {1, 2}, ngraph::op::RoundingType::FLOOR, + ngraph::op::PadType::VALID, false, ngraph::helpers::PoolingTypes::MAX); + + ngraph::ResultVector results{ std::make_shared(maxpool)}; + function = std::make_shared(results, inputVector, "FQMaxPoolReorder"); + } +}; + +TEST_P(FQMaxpoolReordering, CompareWithRefImpl) { + Run(); +}; + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; + +const std::vector> configs = { + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, + } +}; + +const std::vector> inputShape = { + {1, 1, 1, 1024}, + {1, 8, 1, 168}, +}; + +const std::vector> inputMinMax = { + {-0.5, 0.5}, + {-2, 2}, + {-8, 8} +}; + +const std::vector levels = { + 65535, +}; + +INSTANTIATE_TEST_CASE_P(smoke_fq_maxpool_reordering, FQMaxpoolReordering, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(configs), + ::testing::ValuesIn(inputShape), + ::testing::ValuesIn(inputMinMax), + ::testing::ValuesIn(levels)), + FQMaxpoolReordering::getTestCaseName); +} // namespace LayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/fq_conv_fq_affine.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/fq_conv_fq_affine.cpp index 28f414ee11d..e48d4ad12c0 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/fq_conv_fq_affine.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/fq_conv_fq_affine.cpp @@ -46,10 +46,13 @@ const auto convParams = ::testing::Combine( ::testing::ValuesIn(outputChannels) ); +const std::vector permute = {false, true}; + INSTANTIATE_TEST_CASE_P(smoke_FqConvFqAffineTest, FqConvFqAffineTest, ::testing::Combine( fqParams, convParams, + ::testing::ValuesIn(permute), ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(inputShapes), ::testing::Values(CommonTestUtils::DEVICE_GNA), diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/fq_conv_fq_affine.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/fq_conv_fq_affine.hpp index a8ca812b749..30c014dd498 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/fq_conv_fq_affine.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/fq_conv_fq_affine.hpp @@ -30,6 +30,7 @@ typedef std::tuple< typedef std::tuple< FqSpecificParams, ConvParams, + bool, // Permute after convolution InferenceEngine::Precision, // Net precision InferenceEngine::SizeVector, // Input shapes LayerTestsUtils::TargetDevice, // Device name diff --git a/inference-engine/tests/functional/shared_test_classes/src/subgraph/fq_conv_fq_affine.cpp b/inference-engine/tests/functional/shared_test_classes/src/subgraph/fq_conv_fq_affine.cpp index cd4370d9661..6255b41db01 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/subgraph/fq_conv_fq_affine.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/fq_conv_fq_affine.cpp @@ -9,11 +9,12 @@ namespace SubgraphTestsDefinitions { std::string FqConvFqAffineTest::getTestCaseName(testing::TestParamInfo obj) { FqSpecificParams fqParams; ConvParams convParams; + bool permute; InferenceEngine::Precision netPrecision; InferenceEngine::SizeVector inputShapes; std::string targetDevice; std::map config; - std::tie(fqParams, convParams, netPrecision, inputShapes, targetDevice, config) = obj.param; + std::tie(fqParams, convParams, permute, netPrecision, inputShapes, targetDevice, config) = obj.param; std::vector levels; std::vector inputArg; @@ -39,17 +40,19 @@ std::string FqConvFqAffineTest::getTestCaseName(testing::TestParamInfo inputShape; std::map config; auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; - std::tie(fqParams, convParams, netPrecision, inputShape, targetDevice, config) = this->GetParam(); + std::tie(fqParams, convParams, permute, netPrecision, inputShape, targetDevice, config) = this->GetParam(); configuration.insert(config.begin(), config.end()); std::vector levels; @@ -100,8 +103,19 @@ void FqConvFqAffineTest::SetUp() { auto heightAfterConv = (convInputShape[2] - kernelShape[0]) / strides[0] + 1; std::vector outFormShapes = {1, outputChannels * widthAfterConv * heightAfterConv }; + ngraph::Output nodeBeforeReshape; + if (permute) { + auto permuteOrder = std::make_shared(ngraph::element::i64, + ngraph::Shape{4}, + ngraph::Shape{{0, 3, 2, 1}}); + auto transpose = std::make_shared(add, permuteOrder); + nodeBeforeReshape = transpose; + } else { + nodeBeforeReshape = add; + } + auto reshapePattern2 = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes); - auto reshape2 = std::make_shared(add, reshapePattern2, false); + auto reshape2 = std::make_shared(nodeBeforeReshape, reshapePattern2, false); auto matMulWeightsNode = ngraph::builder::makeConstant(ngPrc, {outFormShapes[1], outFormShapes[1]}, { 1.0f }); auto matMulLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector{ 1 }, std::vector{inputDataMin});