From 7877287301748c0cd6f93d2813a714fa37824b11 Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Wed, 7 Apr 2021 11:02:43 +0300 Subject: [PATCH] [LPT] Split & VariadicSplit support (#4195) * [LPT] Split support in ConcatTransformation * [LPT] fixing functional problems after enabling Split/VariadicSplit transformations * [LPT] added test case for StridedSliceTransformation, enabled tests with split * [LPT] ConcatTransformation refactoring * ConcatTransformation: added axis check * [LPT] Added foldDequantizationConstant to NetworkHelper & SplitTransformation refactoring * [LPT] Subgraph: returned and refactored quantizationPerChannel * [LPT] foldDequantizationConstant refactoring * [LPT] SplitTransformation refactoring * [LPT] hbonets fix & ConcatTrasnformation refactoring --- .../include/low_precision/concat.hpp | 10 + .../low_precision/concat_multi_channels.hpp | 9 +- .../include/low_precision/network_helper.hpp | 6 + .../include/low_precision/split.hpp | 9 - .../include/low_precision/variadic_split.hpp | 5 - .../src/concat.cpp | 180 ++++++++-------- .../src/concat_multi_channels.cpp | 192 ++++++------------ .../src/network_helper.cpp | 25 +++ .../src/split.cpp | 149 +++++--------- .../src/strided_slice.cpp | 18 +- .../src/subgraph.cpp | 32 +-- .../src/transformer.cpp | 2 + .../src/variadic_split.cpp | 20 -- .../concat_transformation.cpp | 75 ++++++- .../concat_with_split_transformation.cpp | 37 +++- .../split_transformation.cpp | 129 ++++-------- .../strided_slice_transformation.cpp | 43 ++++ .../variadic_split_transformation.cpp | 68 +++---- .../concat_with_split_transformation.cpp | 3 +- .../concat_with_split_transformation.cpp | 3 +- .../lpt_ngraph_functions/concat_function.hpp | 3 +- .../src/concat_function.cpp | 12 +- 22 files changed, 521 insertions(+), 509 deletions(-) diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp index 8ed8dfde55c..e381fd5d0a0 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp @@ -35,6 +35,7 @@ protected: ngraph::pass::low_precision::Subgraph& subgraph, std::function layer, + std::shared_ptr child, const std::string originalLayerName, std::vector& dequantizationsToConcatenate)> getLayerDequantizationCallback) const; @@ -42,6 +43,15 @@ protected: const TransformationContext& context, const std::vector>& quantizationOperations); + void fillDequantizationNodes( + const std::vector& layerDequantizations, + const std::shared_ptr layer, + NodeVector& convertNodes, + NodeVector& subtractNodes, + NodeVector& multiplyNodes) const; + + std::shared_ptr concatenateDeqNodes(NodeVector& nodes) const; + private: size_t getMinQuantizationLevels( const DataPrecision& dataPrecision, diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp index 06515d0d72e..48c0a0ef9ea 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp @@ -27,12 +27,9 @@ public: bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; private: + // Go through the parent elements of the layer and fill dequantization collection + // with Dq operations that should be inserted before the layer. void fillDequantization( - std::shared_ptr layer, - std::unordered_map& dequantizationByFakeQuantize, - std::vector& dequantizationsToConcatenate) const; - - void fillQuantization( const std::shared_ptr layer, const std::unordered_map& dequantizationByFakeQuantize, std::vector& dequantization) const; @@ -46,8 +43,6 @@ private: const FakeQuantizeDequantization& dequantization, const size_t sourceOutputIdx); - static FakeQuantizeDequantization broadcastDequantiationConstant(const FakeQuantizeDequantization& deq); - bool isMultiChannel(const std::vector>& concatLayers) const noexcept; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp index f113f749c68..f9665f9a886 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp @@ -50,6 +50,12 @@ public: template static std::shared_ptr setOutDataPrecision(std::shared_ptr operation, const element::Type& precision); + // applies constant folding of operation to constant and returns the specified output + static std::shared_ptr foldDequantizationConstant( + const std::shared_ptr& foldingConstant, + const std::shared_ptr& operation, + const size_t outIdx = 0); + static size_t getOutputChannelsCount(std::shared_ptr layer, bool isOnWeights = false); static std::vector> getParentsRecursivelyExceptTypes( diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp index c7a41cd25c7..5a9fbc48ce7 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp @@ -24,15 +24,6 @@ public: TransformationContext& context, std::vector> lastNodes, std::shared_ptr originalNode) const; -protected: - ngraph::Shape getConstSplitShape( - const std::vector& constSplitLengths, - const ngraph::Shape& constShape, const size_t axis, - const size_t idx) const; - virtual std::vector getConstSplitLengths( - const OutputVector& inputs, - const ngraph::Shape& constShape, - const size_t outputSize) const; }; } // namespace low_precision } // namespace pass diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp index c9fdf76998a..e7cab0c527c 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp @@ -17,11 +17,6 @@ class TRANSFORMATIONS_API VariadicSplitTransformation : public SplitTransformati public: VariadicSplitTransformation(const Params& params); void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; -protected: - std::vector getConstSplitLengths( - const OutputVector& inputs, - const ngraph::Shape& constShape, - const size_t outputSize) const override; }; } // namespace low_precision } // namespace pass diff --git a/inference-engine/src/low_precision_transformations/src/concat.cpp b/inference-engine/src/low_precision_transformations/src/concat.cpp index 20c7d4f2cc7..02de081ec03 100644 --- a/inference-engine/src/low_precision_transformations/src/concat.cpp +++ b/inference-engine/src/low_precision_transformations/src/concat.cpp @@ -201,6 +201,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat auto dequantizationValuesCallback = [&]( std::shared_ptr layer, + std::shared_ptr child, const std::string originalLayerName, std::vector& dequantizationsToConcatenate) { dequantizationsToConcatenate.push_back(dequantization); @@ -234,15 +235,97 @@ bool ConcatTransformation::isPrecisionPreserved(std::shared_ptr) const noe bool ConcatTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const { std::shared_ptr concat = as_type_ptr(layer); - return concat && concat->get_axis() == 1ul; + if (concat == nullptr) { + return false; + } + + const auto axis = concat->get_axis(); + const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, concat->get_output_partial_shape(0).rank()); + return normalizedAxis == 1ul; } +void ConcatTransformation::fillDequantizationNodes( + const std::vector& layerDequantizations, + const std::shared_ptr layer, + NodeVector& convertNodes, + NodeVector& subtractNodes, + NodeVector& multiplyNodes) const { + if (layerDequantizations.size() > 1ul) { + auto broadcastElementWiseConst = []( + // FakeQuantize constant shape must be broadcastable to the shape on data. + std::shared_ptr operation, + const ngraph::Shape targetShape) -> std::shared_ptr { + auto targetShapeConst = std::make_shared( + element::i64, ngraph::Shape{ targetShape.size() }, + targetShape); + + auto broadcast = ngraph::pass::low_precision::fold( + operation, + targetShapeConst, + ngraph::op::AutoBroadcastType::NUMPY); + + return broadcast; + }; + + bool allDequantizationShiftAreZero = true; + bool allDequantizationMultiplyAreZero = true; + for (const auto& dequantization : layerDequantizations) { + if (dequantization.subtract != nullptr) { + allDequantizationShiftAreZero = false; + } + if (dequantization.multiply != nullptr) { + allDequantizationMultiplyAreZero = false; + } + } + + for (size_t i = 0; i < layerDequantizations.size(); ++i) { + const auto& dequantization = layerDequantizations[i]; + const ngraph::element::Type precision = deqPrecision; + ngraph::Shape targetShape(layer->get_input_shape(i).size(), 1ul); + targetShape[1] = layer->get_input_shape(i)[1]; + + if (dequantization.convert != nullptr) { + convertNodes.push_back(dequantization.convert); + } + + if (!allDequantizationShiftAreZero) { + subtractNodes.push_back(dequantization.subtract == nullptr ? + std::make_shared(precision, targetShape, std::vector({ 0.f })) : + broadcastElementWiseConst(dequantization.subtractConstant, targetShape)); + } + + if (!allDequantizationMultiplyAreZero) { + multiplyNodes.push_back(dequantization.multiply == nullptr ? + std::make_shared(precision, targetShape, std::vector({ 1.0f })) : + broadcastElementWiseConst(dequantization.multiplyConstant, targetShape)); + } + } + } else { + // TODO: check constant shapes here - has to be scalar + if (layerDequantizations[0].convert != nullptr) { + convertNodes.push_back(layerDequantizations[0].convert); + } + + if (layerDequantizations[0].subtract != nullptr) { + subtractNodes.push_back(layerDequantizations[0].subtract->input_value(1).get_node_shared_ptr()); + } + + if (layerDequantizations[0].multiply != nullptr) { + multiplyNodes.push_back(layerDequantizations[0].multiply->input_value(1).get_node_shared_ptr()); + } + } +} + +std::shared_ptr ConcatTransformation::concatenateDeqNodes(NodeVector& nodes) const { + return nodes.size() == 1ul ? nodes[0] : fold(nodes, 1); +} void ConcatTransformation::addDequantizationLayers( TransformationContext& context, ngraph::pass::low_precision::Subgraph& subgraph, std::function layer, + std::shared_ptr child, const std::string originalLayerName, std::vector& dequantizationsToConcatenate)> getLayerDequantizationCallback) const { std::unordered_map outputs; @@ -269,95 +352,28 @@ void ConcatTransformation::addDequantizationLayers( ngraph::Node& child = *childInput.get_node(); if (subgraph.layers.find(child.get_friendly_name()) == subgraph.layers.end()) { + std::shared_ptr source = layer; + const std::shared_ptr destination = child.shared_from_this(); + if (layerDequantizations.size() == 0ul) { // fill layerDequantizations collection - getLayerDequantizationCallback(layer, layer->get_friendly_name(), layerDequantizations); + getLayerDequantizationCallback(source, destination, source->get_friendly_name(), layerDequantizations); } - std::shared_ptr source = layer->shared_from_this(); { - std::vector> convertNodes; - std::vector> subtractNodes; - std::vector> multiplyNodes; + NodeVector convertNodes; + NodeVector subtractNodes; + NodeVector multiplyNodes; // forming nodes for concatenation - if (layerDequantizations.size() > 1ul) { - auto broadcastElementWiseConst = []( - // FakeQuantize constant shape must be broadcastable to the shape on data. - std::shared_ptr operation, - const ngraph::Shape targetShape) -> std::shared_ptr { - auto targetShapeConst = std::make_shared( - element::i64, ngraph::Shape{ targetShape.size() }, - targetShape); - - auto broadcast = ngraph::pass::low_precision::fold( - operation, - targetShapeConst, - ngraph::op::AutoBroadcastType::NUMPY); - - return broadcast; - }; - - bool allDequantizationShiftAreZero = true; - bool allDequantizationMultiplyAreZero = true; - for (FakeQuantizeDequantization dequantization : layerDequantizations) { - if (dequantization.subtract != nullptr) { - allDequantizationShiftAreZero = false; - } - if (dequantization.multiply != nullptr) { - allDequantizationMultiplyAreZero = false; - } - } - - for (size_t i = 0; i < layerDequantizations.size(); ++i) { - const auto& dequantization = layerDequantizations[i]; - - if (dequantization.convert != nullptr) { - convertNodes.push_back(dequantization.convert); - } - - const ngraph::element::Type precision = deqPrecision; - ngraph::Shape targetShape(layer->get_input_shape(i).size(), 1ul); - targetShape[1] = layer->get_input_shape(i)[1]; - - if (!allDequantizationShiftAreZero) { - subtractNodes.push_back(dequantization.subtract == nullptr ? - std::make_shared(precision, targetShape, std::vector({ 0.f })) : - broadcastElementWiseConst( - as_type_ptr(dequantization.subtract->input_value(1).get_node_shared_ptr()), - targetShape)); - } - - if (!allDequantizationMultiplyAreZero) { - multiplyNodes.push_back(dequantization.multiply == nullptr ? - std::make_shared(precision, targetShape, std::vector({ 1.0f })) : - broadcastElementWiseConst( - as_type_ptr(dequantization.multiply->input_value(1).get_node_shared_ptr()), - targetShape)); - } - } - } else { - // TODO: check constant shapes here - has to be scalar - if (layerDequantizations[0].convert != nullptr) { - convertNodes.push_back(layerDequantizations[0].convert); - } - - if (layerDequantizations[0].subtract != nullptr) { - subtractNodes.push_back(layerDequantizations[0].subtract->input_value(1).get_node_shared_ptr()); - } - - if (layerDequantizations[0].multiply != nullptr) { - multiplyNodes.push_back(layerDequantizations[0].multiply->input_value(1).get_node_shared_ptr()); - } - } + fillDequantizationNodes(layerDequantizations, layer, convertNodes, subtractNodes, multiplyNodes); // TODO: the second place (first is FQ decomposition) where dequantization operations are inserted - const std::shared_ptr destination = child.shared_from_this(); - if (!convertNodes.empty()) { const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination); std::shared_ptr convert = convertNodes[0]->clone_with_new_inputs({ destination->get_input_source_output(sourceOutputIdx) }); + insert_new_node_between(source, destination, convert); ngraph::copy_runtime_info({ layer, convert }, convert); source = convert; @@ -368,9 +384,8 @@ void ConcatTransformation::addDequantizationLayers( const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination); std::shared_ptr subtract = std::make_shared( destination->get_input_source_output(sourceOutputIdx), - NetworkHelper::toScalarIfPossible(subtractNodes.size() == 1ul ? - subtractNodes[0] : - ngraph::pass::low_precision::fold(subtractNodes, 1))); + NetworkHelper::toScalarIfPossible(concatenateDeqNodes(subtractNodes))); + insert_new_node_between(source, destination, subtract); ngraph::copy_runtime_info({ layer, subtract }, subtract); source = subtract; @@ -381,10 +396,9 @@ void ConcatTransformation::addDequantizationLayers( std::shared_ptr multiply = std::make_shared>( DequantizationMultiply( destination->get_input_source_output(sourceOutputIdx), - NetworkHelper::toScalarIfPossible(multiplyNodes.size() == 1ul ? - multiplyNodes[0] : - ngraph::pass::low_precision::fold(multiplyNodes, 1))), + NetworkHelper::toScalarIfPossible(concatenateDeqNodes(multiplyNodes))), layerDequantizations[0].multiply->get_output_element_type(0)); + insert_new_node_between(source, destination, multiply); ngraph::copy_runtime_info({ layer, multiply }, multiply); source = multiply; diff --git a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp index 932e771c4ce..62d958d22b4 100644 --- a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp +++ b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp @@ -137,6 +137,7 @@ bool ConcatMultiChannelsTransformation::transform(TransformationContext& context auto dequantizationValuesCallback = [&]( std::shared_ptr layer, + std::shared_ptr child, const std::string originalLayerName, std::vector& dequantizationsToConcatenate) { if (layer->get_friendly_name() != originalLayerName) { @@ -157,6 +158,15 @@ bool ConcatMultiChannelsTransformation::transform(TransformationContext& context layer, dequantizations, dequantizationsToConcatenate); + + if (!is_type(layer)) { + // for intermediate layers we should get Dq operations to be inserted between layer and child + assert(dequantizationsToConcatenate.size() == 1ul); + const size_t sourceOutputIdx = NetworkHelper::getParentOutputIndex(layer, child); + if (layer->get_input_shape(0)[1] != layer->get_output_shape(sourceOutputIdx)[1]) { + dequantizationsToConcatenate[0] = getFoldedDequantization(layer, dequantizationsToConcatenate[0], sourceOutputIdx); + } + } }; addDequantizationLayers(context, subgraph, dequantizationValuesCallback); @@ -185,137 +195,66 @@ bool ConcatMultiChannelsTransformation::isPrecisionPreserved(std::shared_ptr layer, - std::unordered_map& dequantizationByFakeQuantize, - std::vector& dequantizationsToConcatenate) const { - std::shared_ptr currentFakeQuantize = ngraph::as_type_ptr(layer); - if (currentFakeQuantize) { - const auto it = dequantizationByFakeQuantize.find(currentFakeQuantize->get_friendly_name()); - if (it == dequantizationByFakeQuantize.end()) { - THROW_IE_LPT_EXCEPTION(*currentFakeQuantize) << "dequantization scale values are not found"; - } - const FakeQuantizeDequantization& fakeQuantizeDequantization = it->second; - dequantizationsToConcatenate.push_back(broadcastDequantiationConstant(fakeQuantizeDequantization)); - } else { - fillQuantization(layer, dequantizationByFakeQuantize, dequantizationsToConcatenate); - } -} - -void ConcatMultiChannelsTransformation::fillQuantization( const std::shared_ptr layer, const std::unordered_map& dequantizationByFakeQuantize, std::vector& dequantization) const { - for (size_t i = 0; i < layer->get_input_size(); ++i) { - std::shared_ptr parent = layer->get_input_node_shared_ptr(i); + const auto fillDqByFakeQuantize = [&](const std::shared_ptr& fq) { + const auto it = dequantizationByFakeQuantize.find(fq->get_friendly_name()); + if (it == dequantizationByFakeQuantize.end()) { + THROW_IE_LPT_EXCEPTION(*fq) << "dequantization scale values are not found"; + } - std::shared_ptr fakeQuantize = ngraph::as_type_ptr(parent); - if (fakeQuantize) { - const auto it = dequantizationByFakeQuantize.find(fakeQuantize->get_friendly_name()); - if (it == dequantizationByFakeQuantize.end()) { - THROW_IE_LPT_EXCEPTION(*fakeQuantize) << "dequantization scale values are not found"; + const FakeQuantizeDequantization& fakeQuantizeDequantization = it->second; + dequantization.push_back(fakeQuantizeDequantization); + }; + + if (is_type(layer)) { + fillDqByFakeQuantize(layer); + } else { + for (size_t i = 0; i < layer->get_input_size(); ++i) { + std::shared_ptr parent = layer->get_input_node_shared_ptr(i); + if (as_type_ptr(parent)) { + continue; } - const FakeQuantizeDequantization& fakeQuantizeDequantization = it->second; - dequantization.push_back(broadcastDequantiationConstant(fakeQuantizeDequantization)); - } else { - std::shared_ptr concat = ngraph::as_type_ptr(parent); - if (concat) { - std::vector dequantizationToConcatenate; - fillQuantization(concat, dequantizationByFakeQuantize, dequantizationToConcatenate); - - // add concatenated dequantization operations to dequantization collection - dequantization.push_back(getConcatenatedDequantization(concat, dequantizationToConcatenate)); + const auto fakeQuantize = ngraph::as_type_ptr(parent); + if (fakeQuantize) { + fillDqByFakeQuantize(fakeQuantize); } else { - std::shared_ptr stridedSlice = ngraph::as_type_ptr(parent); - if (stridedSlice) { - std::vector dequantizationToPropagate; - fillQuantization(stridedSlice, dequantizationByFakeQuantize, dequantizationToPropagate); + const auto concat = ngraph::as_type_ptr(parent); + if (concat) { + std::vector dequantizationToConcatenate; + fillDequantization(concat, dequantizationByFakeQuantize, dequantizationToConcatenate); - const size_t sourceOutputIdx = NetworkHelper::getParentOutputIndex(parent, layer); - // add folded dequantization operations to dequantization colection - dequantization.push_back(getFoldedDequantization(stridedSlice, dequantizationToPropagate[0], sourceOutputIdx)); + // add concatenated dequantization operations to dequantization collection + dequantization.push_back(getConcatenatedDequantization(concat, dequantizationToConcatenate)); } else { - fillQuantization(parent, dequantizationByFakeQuantize, dequantization); + const size_t sourceOutputIdx = NetworkHelper::getParentOutputIndex(parent, layer); + if (parent->get_input_shape(0)[1] != parent->get_output_shape(sourceOutputIdx)[1]) { + std::vector dequantizationToPropagate; + fillDequantization(parent, dequantizationByFakeQuantize, dequantizationToPropagate); + + // add folded dequantization operations to dequantization colection + dequantization.push_back(getFoldedDequantization(parent, dequantizationToPropagate[0], sourceOutputIdx)); + } else { + fillDequantization(parent, dequantizationByFakeQuantize, dequantization); + } } } } } } -// broadcast of dequantization constants by channels -FakeQuantizeDequantization ConcatMultiChannelsTransformation::broadcastDequantiationConstant(const FakeQuantizeDequantization& deq) { - ngraph::Shape targetShape(deq.data.get_shape().size(), 1ul); - targetShape[1] = deq.data.get_shape()[1]; - - FakeQuantizeDequantization result; - result.data = deq.data; - result.convert = deq.convert; - - const auto targetShapeConst = std::make_shared( - element::i64, ngraph::Shape{ targetShape.size() }, - targetShape); - - if (deq.subtract) { - auto broadcast = ngraph::pass::low_precision::fold( - deq.subtractConstant, - targetShapeConst, - ngraph::op::AutoBroadcastType::NUMPY); - - result.subtract = deq.subtract; - result.subtractConstant = as_type_ptr(broadcast); - } - - if (deq.multiply) { - auto broadcast = ngraph::pass::low_precision::fold( - deq.multiplyConstant, - targetShapeConst, - ngraph::op::AutoBroadcastType::NUMPY); - - result.multiply = deq.multiply; - result.multiplyConstant = as_type_ptr(broadcast); - } - - return result; -} - FakeQuantizeDequantization ConcatMultiChannelsTransformation::getConcatenatedDequantization( const std::shared_ptr concat, const std::vector& dequantization) const { - bool allDequantizationShiftAreZero = true; - bool allDequantizationMultiplyAreZero = true; - for (const auto& deq : dequantization) { - if (deq.subtract != nullptr) { - allDequantizationShiftAreZero = false; - } - if (deq.multiply != nullptr) { - allDequantizationMultiplyAreZero = false; - } - } - NodeVector convertNodes; - NodeVector subNodes; - NodeVector mulNodes; - //preparing to concatenate dequantization nodes - for (const auto& deq : dequantization) { - ngraph::Shape targetShape(deq.data.get_shape().size(), 1ul); - targetShape[1] = deq.data.get_shape()[1]; + NodeVector subtractNodes; + NodeVector multiplyNodes; - if (deq.convert != nullptr) { - convertNodes.push_back(deq.convert); - } - if (!allDequantizationShiftAreZero) { - subNodes.push_back(deq.subtract == nullptr ? - std::make_shared(deqPrecision, targetShape, std::vector({ 0.f })) : - deq.subtractConstant); - } - if (!allDequantizationMultiplyAreZero) { - mulNodes.push_back(deq.multiply == nullptr ? - std::make_shared(deqPrecision, targetShape, std::vector({ 1.0f })) : - deq.multiplyConstant); - } - } + // forming nodes for concatenation + fillDequantizationNodes(dequantization, concat, convertNodes, subtractNodes, multiplyNodes); std::shared_ptr parent = concat; std::shared_ptr convert; @@ -326,20 +265,16 @@ FakeQuantizeDequantization ConcatMultiChannelsTransformation::getConcatenatedDeq std::shared_ptr subtract; std::shared_ptr subConst; - if (!subNodes.empty()) { - subConst = as_type_ptr( - subNodes.size() == 1ul ? subNodes[0] : fold(subNodes, 1ul)); - + if (!subtractNodes.empty()) { + subConst = as_type_ptr(concatenateDeqNodes(subtractNodes)); subtract = std::make_shared(parent, subConst); parent = subtract; } std::shared_ptr multiply; std::shared_ptr mulConst; - if (!mulNodes.empty()) { - mulConst = as_type_ptr( - mulNodes.size() == 1ul ? mulNodes[0] : fold(mulNodes, 1ul)); - + if (!multiplyNodes.empty()) { + mulConst = as_type_ptr(concatenateDeqNodes(multiplyNodes)); multiply = std::make_shared(parent, mulConst); } @@ -352,24 +287,19 @@ FakeQuantizeDequantization ConcatMultiChannelsTransformation::getFoldedDequantiz const size_t sourceOutputIdx) { OutputVector inputs = operation->input_values(); OutputVector outputs(operation->get_output_size()); + Output data = operation->output(sourceOutputIdx); std::shared_ptr parent = operation; std::shared_ptr convert; if (dequantization.convert) { - convert = as_type_ptr(dequantization.convert->clone_with_new_inputs({ parent })); + convert = as_type_ptr(dequantization.convert->clone_with_new_inputs({ data })); parent = convert; } std::shared_ptr subtract; std::shared_ptr subConst; if (dequantization.subtract) { - inputs[0] = dequantization.subtractConstant; - const auto op = operation->clone_with_new_inputs(inputs); - - // constant folding of subtract constant - op->constant_fold(outputs, inputs); - - subConst = as_type_ptr(outputs[sourceOutputIdx].get_node_shared_ptr()); + subConst = NetworkHelper::foldDequantizationConstant(dequantization.subtractConstant, operation, sourceOutputIdx); subtract = std::make_shared(parent, subConst); parent = subtract; } @@ -377,17 +307,11 @@ FakeQuantizeDequantization ConcatMultiChannelsTransformation::getFoldedDequantiz std::shared_ptr multiply; std::shared_ptr mulConst; if (dequantization.multiply) { - inputs[0] = dequantization.multiplyConstant; - const auto op = operation->clone_with_new_inputs(inputs); - - // constant folding of multiply constant - op->constant_fold(outputs, inputs); - - mulConst = as_type_ptr(outputs[sourceOutputIdx].get_node_shared_ptr()); + mulConst = NetworkHelper::foldDequantizationConstant(dequantization.multiplyConstant, operation, sourceOutputIdx); multiply = std::make_shared(parent, mulConst); } - return FakeQuantizeDequantization(operation->output(sourceOutputIdx), convert, subtract, nullptr, subConst, multiply, mulConst); + return FakeQuantizeDequantization(data, convert, subtract, nullptr, subConst, multiply, mulConst); } } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp index 6afdaa070a3..258c65f91d2 100644 --- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp +++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp @@ -87,6 +87,31 @@ bool NetworkHelper::isConstantPath(const std::shared_ptr& op) { return true; } +std::shared_ptr NetworkHelper::foldDequantizationConstant( + const std::shared_ptr& foldingConstant, + const std::shared_ptr& operation, + const size_t outIdx) { + OutputVector inputs = operation->input_values(); + OutputVector outputs(operation->get_output_size()); + + if (shape_size(foldingConstant->get_shape()) == 1ul) { + return toScalar(foldingConstant); + } else { + inputs[0] = foldingConstant; + const auto op = operation->clone_with_new_inputs(inputs); + + // constant folding of constant + op->constant_fold(outputs, inputs); + + const auto result = as_type_ptr(outputs[outIdx].get_node_shared_ptr()); + if (result == nullptr) { + THROW_IE_LPT_EXCEPTION(*result) << "result of constant folding is not constant"; + } + + return result; + } +} + size_t NetworkHelper::getOutputChannelsCount(std::shared_ptr layer, bool isOnWeights) { if (layer->outputs().size() == 0) { THROW_TRANSFORMATION_EXCEPTION << "Layer " << layer->get_friendly_name() << " doesn't have output tensors"; diff --git a/inference-engine/src/low_precision_transformations/src/split.cpp b/inference-engine/src/low_precision_transformations/src/split.cpp index a14867fc8e4..5f0dbaebef0 100644 --- a/inference-engine/src/low_precision_transformations/src/split.cpp +++ b/inference-engine/src/low_precision_transformations/src/split.cpp @@ -5,6 +5,7 @@ #include "low_precision/split.hpp" #include "ngraph/node.hpp" #include "low_precision/network_helper.hpp" +#include "low_precision/common/dequantization_op.hpp" namespace ngraph { namespace pass { @@ -22,81 +23,68 @@ bool SplitTransformation::transform(TransformationContext& context, ngraph::patt return false; } - const std::shared_ptr split = NetworkHelper::separateInStandaloneBranch(m.get_match_root()); - auto dequantization = NetworkHelper::getDequantization(split); + const auto split = NetworkHelper::separateInStandaloneBranch(m.get_match_root()); + const auto dequantization = NetworkHelper::getDequantization(split); - OutputVector inputs(split->get_input_size()); - for (size_t i = 0; i < split->get_input_size(); ++i) { - inputs[i] = split->get_input_node_shared_ptr(i); - } + OutputVector inputs = split->input_values(); + inputs[0] = dequantization.data; - const size_t dequantizationIndex = NetworkHelper::getChildInputIndex(dequantization.multiply, split); - inputs[dequantizationIndex] = dequantization.data; - - std::shared_ptr newSplit = split->clone_with_new_inputs(inputs); + const auto newSplit = split->clone_with_new_inputs(inputs); newSplit->set_friendly_name(split->get_friendly_name()); + ngraph::copy_runtime_info(split, newSplit); - const ngraph::Shape subConstShape = dequantization.subtract ? - dequantization.subtract->get_input_node_shared_ptr(1)->get_shape() : Shape{}; - std::vector subValues = dequantization.subtract ? as_type_ptr( - dequantization.subtract->get_input_node_shared_ptr(1))->cast_vector() : std::vector(); + const int64_t axis = as_type_ptr(split->get_input_node_shared_ptr(1))->cast_vector()[0]; + const size_t normalizedAxis = normalize_axis(split->get_friendly_name(), axis, split->get_input_partial_shape(0).rank()); + const size_t outputSize = newSplit->get_output_size(); - const ngraph::Shape mulConstShape = dequantization.multiply->get_input_node_shared_ptr(1)->get_shape(); - std::vector mulValues = as_type_ptr( - dequantization.multiply->get_input_node_shared_ptr(1))->cast_vector(); + const auto splitConstant = [&](const std::shared_ptr operation) { + // if batch is absent in constant shape - add batch + const auto normalizedConstant = NetworkHelper::normalizeDequantizationShape(operation); + const auto constantShape = normalizedConstant->get_shape(); - int64_t SplitedAxis = as_type_ptr(split->get_input_node_shared_ptr(1))->cast_vector()[0]; - size_t axis = SplitedAxis > 0 ? SplitedAxis : split->get_input_shape(0).size() + SplitedAxis; - size_t outputSize = newSplit->get_output_size(); - - const auto subSplitLengths = getConstSplitLengths(inputs, subConstShape, outputSize); - const auto mulSplitLengths = getConstSplitLengths(inputs, mulConstShape, outputSize); - - std::vector> lastNodes(outputSize); - ngraph::OutputVector replacement; - for (size_t i = 0; i < outputSize; ++i) { - Output previous = newSplit->output(i); - - if (dequantization.convert != nullptr) { - const std::shared_ptr convert = - dequantization.convert->clone_with_new_inputs({ newSplit->output(i) }); - previous = convert; - } - - if (dequantization.subtract != nullptr) { - std::shared_ptr subConst; - if (!subSplitLengths.empty()) { - const auto newSubConstShape = getConstSplitShape(subSplitLengths, subConstShape, axis, i); - - std::vector newSubValues( - subValues.begin() + subSplitLengths[i], - subValues.begin() + subSplitLengths[i + 1]); - - subConst = as_type_ptr(std::make_shared( - dequantization.subtract->get_input_element_type(1), - newSubConstShape, - newSubValues)); - } else { - subConst = as_type_ptr(dequantization.subtract->get_input_node_shared_ptr(1)->clone_with_new_inputs({})); - } - const std::shared_ptr subtract = std::make_shared(previous, subConst); - previous = subtract; - } - - std::shared_ptr mulConst; - if (!mulSplitLengths.empty()) { - const auto newMulConstShape = getConstSplitShape(mulSplitLengths, mulConstShape, axis, i); - - std::vector newMulValues( - mulValues.begin() + mulSplitLengths[i], - mulValues.begin() + mulSplitLengths[i + 1]); - - mulConst = as_type_ptr(std::make_shared( - dequantization.multiply->get_input_element_type(1), newMulConstShape, newMulValues)); + OutputVector results(outputSize); + if ((shape_size(constantShape) == 1ul) || (constantShape[normalizedAxis] == 1ul)) { + std::for_each(results.begin(), results.end(), [&](Output& elem) { elem = normalizedConstant->clone_with_new_inputs({}); }); } else { - mulConst = as_type_ptr(dequantization.multiply->get_input_node_shared_ptr(1)->clone_with_new_inputs({})); + // prepare new inputs for constant folding + OutputVector inputs = newSplit->input_values(); + inputs[0] = normalizedConstant; + const auto foldSplit = newSplit->clone_with_new_inputs(inputs); + + // fold and fill results + foldSplit->constant_fold(results, inputs); } - const std::shared_ptr multiply = std::make_shared(previous, mulConst); + + for (auto& result : results) { + result = NetworkHelper::toScalarIfPossible(result.get_node_shared_ptr()); + } + + return results; + }; + + // get splited dequantization constants + OutputVector splitedSub = dequantization.subtract ? splitConstant(dequantization.subtract) : OutputVector{}; + OutputVector splitedMul = splitConstant(dequantization.multiply); + + NodeVector lastNodes; + OutputVector replacement; + for (size_t i = 0; i < outputSize; ++i) { + Output parent = newSplit->output(i); + + if (dequantization.convert) { + const auto convert = dequantization.convert->clone_with_new_inputs({ newSplit->output(i) }); + copy_runtime_info({ newSplit, convert }, convert); + parent = convert; + } + + if (dequantization.subtract) { + const auto subtract = std::make_shared(parent, splitedSub[i]); + copy_runtime_info({ newSplit, subtract }, subtract); + parent = subtract; + } + + const auto multiply = std::make_shared(parent, splitedMul[i]); + copy_runtime_info({ newSplit, multiply }, multiply); lastNodes.push_back(multiply); replacement.push_back(multiply); @@ -107,33 +95,6 @@ bool SplitTransformation::transform(TransformationContext& context, ngraph::patt return true; } -std::vector SplitTransformation::getConstSplitLengths( - const OutputVector& inputs, - const ngraph::Shape& constShape, - const size_t outputSize) const { - int64_t axis = as_type_ptr(inputs[1].get_node_shared_ptr())->cast_vector()[0]; - size_t splitedAxis = axis > 0 ? axis : inputs[0].get_shape().size() + axis; - - if ((!constShape.empty()) && (constShape[splitedAxis] != 1)) { - std::vector result(outputSize + 1); - result[0] = 0; - for (size_t i = 1; i < result.size(); ++i) { - result[i] = result[i - 1] + constShape[splitedAxis] / outputSize; - } - return result; - } else { - return std::vector(); - } -} - -ngraph::Shape SplitTransformation::getConstSplitShape( - const std::vector& constSplitLengths, - const ngraph::Shape& constShape, const size_t axis, - const size_t idx) const { - Shape result(constShape); - result[axis] = constSplitLengths[idx + 1] - constSplitLengths[idx]; - return result; -} void SplitTransformation::updateOutputs( TransformationContext& context, diff --git a/inference-engine/src/low_precision_transformations/src/strided_slice.cpp b/inference-engine/src/low_precision_transformations/src/strided_slice.cpp index e7b7a796566..a269e392302 100644 --- a/inference-engine/src/low_precision_transformations/src/strided_slice.cpp +++ b/inference-engine/src/low_precision_transformations/src/strided_slice.cpp @@ -23,7 +23,7 @@ std::shared_ptr stridedSliceDeqConstant( //} const auto stridedSliceShape = strSlice->get_input_shape(0); - const auto constantShape = constant->get_shape(); + auto constantShape = constant->get_shape(); if (stridedSliceShape.size() != constantShape.size()) { ngraph::Shape newConstantShape; if (ngraph::shape_size(constantShape) == 1) { @@ -37,6 +37,7 @@ std::shared_ptr stridedSliceDeqConstant( newConstantShape.insert(newConstantShape.begin(), stridedSliceShape[0]); } } + constantShape = newConstantShape; const auto newConstant = fold( constant, @@ -45,13 +46,24 @@ std::shared_ptr stridedSliceDeqConstant( } const auto stridedSlice = as_type_ptr(strSlice); + + auto beginMask = stridedSlice->get_begin_mask(); + auto endMask = stridedSlice->get_end_mask(); + for (size_t i = 0; i < constantShape.size(); ++i) { + // don't slice constant if current dimension is 1 + if (constantShape[i] == 1ul) { + beginMask[i] = 1ul; + endMask[i] = 1ul; + } + } + const auto result = fold( constant, stridedSlice->get_input_node_shared_ptr(1), stridedSlice->get_input_node_shared_ptr(2), stridedSlice->get_input_node_shared_ptr(3), - stridedSlice->get_begin_mask(), - stridedSlice->get_end_mask(), + beginMask, + endMask, stridedSlice->get_new_axis_mask(), stridedSlice->get_shrink_axis_mask(), stridedSlice->get_ellipsis_mask()); diff --git a/inference-engine/src/low_precision_transformations/src/subgraph.cpp b/inference-engine/src/low_precision_transformations/src/subgraph.cpp index c7f1caf56dd..7638fcb0714 100644 --- a/inference-engine/src/low_precision_transformations/src/subgraph.cpp +++ b/inference-engine/src/low_precision_transformations/src/subgraph.cpp @@ -22,16 +22,15 @@ namespace ngraph { namespace pass { namespace low_precision { -bool isQuantizationPerChannel(const std::shared_ptr& node) { - if (node->outputs().size() > 1ul) { - return false; - } - - //WA to support StridedSlice in ConcatTransformation - if (ngraph::is_type(node)) { +bool operationIsSupportedInConcat(const std::shared_ptr& node) { + // list of operations, which change channels, but supported in ConcatTransformation + if (ngraph::is_type(node) || + ngraph::is_type(node) || + ngraph::is_type(node)) { return true; } + // operations, which change channels, usually don't support in ConcatTransformation const auto inputs = node->input_values(); for (const auto& input : inputs) { if (ngraph::is_type(input.get_node())) { @@ -82,7 +81,7 @@ bool Subgraph::fillSubgraphForQuantization( if (fakeQuantizeChild != nullptr) { // } else { - if (layerTransformationsManager->isPrecisionPreserved(child) && isQuantizationPerChannel(child)) { + if (layerTransformationsManager->isPrecisionPreserved(child) && operationIsSupportedInConcat(child)) { if (!fillSubgraphForIntermediate(child, handledLayers)) { return false; } @@ -104,7 +103,7 @@ bool Subgraph::atLeastOneIsIntermediate(const std::shared_ptr& nod return true; } - if (!layerTransformationsManager->isPrecisionPreserved(child) || !isQuantizationPerChannel(child)) { + if (!layerTransformationsManager->isPrecisionPreserved(child) || !operationIsSupportedInConcat(child)) { // child branch is out of subgraph continue; } @@ -144,10 +143,6 @@ bool Subgraph::fill(const std::shared_ptr& layer, std::unordered_s return false; } } else { - // WA: issue #46906 - if (parent->get_output_size() != 1ul) { - return false; - } const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(parent, 0, true); const std::shared_ptr fakeQuantizeParent = dequantization.empty() ? ngraph::as_type_ptr(parent) : @@ -161,7 +156,7 @@ bool Subgraph::fill(const std::shared_ptr& layer, std::unordered_s if (constant != nullptr) { // } else { - if (layerTransformationsManager->isPrecisionPreserved(parent) && isQuantizationPerChannel(parent)) { + if (layerTransformationsManager->isPrecisionPreserved(parent) && operationIsSupportedInConcat(parent)) { if (!fillSubgraphForIntermediate(parent, handledLayers)) { return false; } @@ -197,7 +192,7 @@ bool Subgraph::fill(const std::shared_ptr& layer, std::unordered_s const std::shared_ptr fakeQuantizeChild = ngraph::as_type_ptr(child); if (fakeQuantizeChild != nullptr) { // - } else if (layerTransformationsManager->isPrecisionPreserved(child) && isQuantizationPerChannel(child)) { + } else if (layerTransformationsManager->isPrecisionPreserved(child) && operationIsSupportedInConcat(child)) { if (!fillSubgraphForIntermediate(child, handledLayers)) { return false; } @@ -221,6 +216,13 @@ bool Subgraph::empty() const { } bool Subgraph::fillSubgraphForConcat(const std::shared_ptr& concat, std::unordered_set& handledLayers) { + const auto axis = concat->get_axis(); + const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, concat->get_output_partial_shape(0).rank()); + // supported only per-channel concat + if (normalizedAxis != 1ul) { + return false; + } + concatLayers.push_back(concat); handledLayers.insert(concat->get_friendly_name()); layers.emplace(concat->get_friendly_name(), concat); diff --git a/inference-engine/src/low_precision_transformations/src/transformer.cpp b/inference-engine/src/low_precision_transformations/src/transformer.cpp index 205cd77e930..d66263bdf07 100644 --- a/inference-engine/src/low_precision_transformations/src/transformer.cpp +++ b/inference-engine/src/low_precision_transformations/src/transformer.cpp @@ -229,9 +229,11 @@ LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const add(params). add(params). add(params). + add(params). add(params). add(params). add(params). + add(params). addCleanup(params). addCleanup(params). diff --git a/inference-engine/src/low_precision_transformations/src/variadic_split.cpp b/inference-engine/src/low_precision_transformations/src/variadic_split.cpp index ccc8e72634d..685219f2773 100644 --- a/inference-engine/src/low_precision_transformations/src/variadic_split.cpp +++ b/inference-engine/src/low_precision_transformations/src/variadic_split.cpp @@ -20,26 +20,6 @@ void VariadicSplitTransformation::registerMatcherIn(GraphRewrite& pass, Transfor make_op_label() })); } -std::vector VariadicSplitTransformation::getConstSplitLengths( - const OutputVector& inputs, - const ngraph::Shape& constShape, - const size_t outputSize) const { - std::vector lengths = as_type_ptr(inputs[2].get_node_shared_ptr())->cast_vector(); - - int64_t axis = as_type_ptr(inputs[1].get_node_shared_ptr())->cast_vector()[0]; - size_t splitedAxis = axis > 0 ? axis : inputs[0].get_shape().size() + axis; - - if ((!constShape.empty()) && (constShape[splitedAxis] != 1)) { - std::vector result(outputSize + 1); - result[0] = 0; - for (size_t i = 1; i < result.size(); ++i) { - result[i] = result[i - 1] + lengths[i - 1]; - } - return result; - } else { - return std::vector(); - } -} } // namespace low_precision } // namespace pass } // namespace ngraph diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp index 4f58c1f3573..148358f9409 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp @@ -74,6 +74,7 @@ class ConcatTransformationTestValues { public: ngraph::pass::low_precision::LayerTransformation::Params params; bool multiChannels; + std::int64_t axis; ConcatTransformationActualValues actual; ConcatTransformationResultValues result; }; @@ -114,7 +115,8 @@ public: testValues.actual.convert2, testValues.actual.dequantization2, ngraph::element::undefined, - {}); + {}, + testValues.axis); SimpleLowPrecisionTransformer transform; if (testValues.multiChannels) { @@ -146,7 +148,8 @@ public: testValues.result.convert2, testValues.result.dequantization2, testValues.result.precisionAfterOperation, - testValues.result.dequantizationAfter); + testValues.result.dequantizationAfter, + testValues.axis); } static std::string getTestCaseName(testing::TestParamInfo obj) { @@ -158,6 +161,7 @@ public: result << LayerTransformation::getTestCaseNameByParams(precision, shape, testValues.params) << "_" << (testValues.multiChannels ? "multiChannels_" : "notMultiChannels_") << + "axis_" << testValues.axis << "_" << testValues.actual << "_" << testValues.result << "_"; return result.str(); @@ -180,6 +184,7 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, + 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -201,6 +206,7 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, + 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, { ngraph::element::u8 }, @@ -232,6 +238,7 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), true, + 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, { ngraph::element::u8 }, @@ -263,6 +270,7 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, + 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -290,6 +298,7 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), true, + 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -317,6 +326,7 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, + 1, { { 256ul, {{1}, {1}, {1}, {1}}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -340,6 +350,7 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, + 1, { { 256ul, {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -363,6 +374,7 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), true, + 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -386,6 +398,7 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), true, + 1, { { 256ul, {{1}, {1}, {1}, {1}}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -409,6 +422,7 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), true, + 1, { { 256ul, @@ -450,6 +464,7 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), true, + 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -477,6 +492,7 @@ const std::vector testValues = { { LayerTransformation::createParamsI8I8(), false, + 1, { { 256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f} }, {}, @@ -500,6 +516,7 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, + 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -523,6 +540,7 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), true, + 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -546,6 +564,7 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, + 1, { { 256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f} }, {}, @@ -569,6 +588,7 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, + 1, { { 256ul, {}, {-1.28f}, {1.27f}, {0.f}, {2.3007815f} }, {}, @@ -588,10 +608,61 @@ const std::vector testValues = { { ngraph::element::f32, { 128 }, { 0.0302619f } } } }, + // U8: concat multi channels with subtract, negative axis + { + LayerTransformation::createParamsU8I8(), + true, + -3, + { + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, + {}, + {}, + { 256ul, {}, {1.275f}, {2.55f}, {1.275f}, {2.55f} }, + {}, + {} + }, + { + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + { 256ul, {}, {1.275f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, + {}, + {}, + ngraph::element::u8, + { + ngraph::element::f32, + {{ 0.f, 0.f, 0.f, -255.f, -255.f, -255.f }}, + {{ 0.01f, 0.01f, 0.01f, 0.005f, 0.005f, 0.005f }} + } + } + }, + // U8: concat multi channels with subtract, not supported axis + { + LayerTransformation::createParamsU8I8(), + true, + 0, + { + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, + {}, + {}, + { 256ul, {}, {1.275f}, {2.55f}, {1.275f}, {2.55f} }, + {}, + {} + }, + { + { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, + {}, + {}, + { 256ul, {}, {1.275f}, {2.55f}, {1.275f}, {2.55f} }, + {}, + {} + }, + }, // not update precisions { LayerTransformation::createParamsU8I8().setUpdatePrecisions(false), false, + 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_split_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_split_transformation.cpp index dbbe4b35f11..76b137d7468 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_split_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_split_transformation.cpp @@ -217,6 +217,40 @@ const std::vector testValues = { { ngraph::element::f32, {}, { 0.005f } } } }, + // U8: concat multi channels with per-channel quantization + { + { 1, 6, 10, 10 }, + LayerTransformation::createParamsU8I8(), + true, + { + { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, {2.55f / 2.f} }, + { + 256ul, + ngraph::Shape({ 1, 6, 1, 1 }), + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {255.f, 25.5f, 2.55f, 25.5f, 255.f, 2.55f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {255.f, 25.5f, 2.55f, 25.5f, 255.f, 2.55f} + } + }, + { + { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, {255.f}}, + { + 256ul, + ngraph::Shape({ 1, 6, 1, 1 }), + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {255.f, 25.5f, 2.55f, 25.5f, 255.f, 2.55f}, + {0.f}, + {255.f} + }, + ngraph::element::u8, + {{}, {}, {}}, + {{}, {}, {}}, + ngraph::element::u8, + { ngraph::element::f32, {}, {{ 0.005f, 0.005f, 0.005f, 1.f, 0.1f, 0.01f }} }, + { ngraph::element::f32, {}, {{ 0.1f, 1.f, 0.01f }} } + } + }, // I8: concat multi channels { { 1, 6, 10, 10 }, @@ -259,9 +293,8 @@ const std::vector testValues = { }, }; -// TODO: Split/VariadicSplit operations are not supported in ConcatTransformation INSTANTIATE_TEST_CASE_P( - DISABLED_smoke_LPT, + smoke_LPT, ConcatWithSplitTransformation, ::testing::Combine( ::testing::ValuesIn(precisions), diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/split_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/split_transformation.cpp index bc4e5580b4b..9f04c18a580 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/split_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/split_transformation.cpp @@ -160,21 +160,30 @@ const std::vector testValues = { {}, ngraph::element::u8, { - { - {ngraph::element::f32}, - {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, - { - {ngraph::element::f32}, - {{2.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{22.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, - { - {ngraph::element::f32}, - {{3.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{33.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, + {{ngraph::element::f32}, {1.f}, {11.f}}, + {{ngraph::element::f32}, {2.f}, {22.f}}, + {{ngraph::element::f32}, {3.f}, {33.f}}, + } + } + }, + // U8 per channel quantization with different values (constants without batch) + { + ngraph::Shape({ 1, 3, 16, 16 }), std::int64_t{-3}, size_t{3}, + LayerTransformation::createParamsU8I8(), + { + ngraph::element::u8, + {{ngraph::element::f32}, + {{1.f, 2.f, 3.f}, ngraph::element::f32, {3, 1, 1}}, + {{11.f, 22.f, 33.f}, ngraph::element::f32, {3, 1, 1}}} + }, + { + ngraph::element::u8, + {}, + ngraph::element::u8, + { + {{ngraph::element::f32}, {1.f}, {11.f}}, + {{ngraph::element::f32}, {2.f}, {22.f}}, + {{ngraph::element::f32}, {3.f}, {33.f}}, } } }, @@ -193,21 +202,9 @@ const std::vector testValues = { {}, ngraph::element::i8, { - { - {ngraph::element::f32}, - {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, - { - {ngraph::element::f32}, - {{2.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{22.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, - { - {ngraph::element::f32}, - {{3.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{33.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, + {{ngraph::element::f32}, {1.f}, {11.f}}, + {{ngraph::element::f32}, {2.f}, {22.f}}, + {{ngraph::element::f32}, {3.f}, {33.f}}, } } }, @@ -226,21 +223,9 @@ const std::vector testValues = { {}, ngraph::element::u8, { - { - {ngraph::element::f32}, - {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, - { - {ngraph::element::f32}, - {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, - { - {ngraph::element::f32}, - {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, + {{ngraph::element::f32}, {1.f}, {11.f}}, + {{ngraph::element::f32}, {1.f}, {11.f}}, + {{ngraph::element::f32}, {1.f}, {11.f}}, } } }, @@ -259,21 +244,9 @@ const std::vector testValues = { {}, ngraph::element::i8, { - { - {ngraph::element::f32}, - {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, - { - {ngraph::element::f32}, - {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, - { - {ngraph::element::f32}, - {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, + {{ngraph::element::f32}, {1.f}, {11.f}}, + {{ngraph::element::f32}, {1.f}, {11.f}}, + {{ngraph::element::f32}, {1.f}, {11.f}} } } }, @@ -358,21 +331,9 @@ const std::vector testValues = { {}, ngraph::element::u8, { - { - {ngraph::element::f32}, - {}, - {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, - { - {ngraph::element::f32}, - {}, - {{22.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, - { - {ngraph::element::f32}, - {}, - {{33.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, + {{ngraph::element::f32}, {}, {11.f}}, + {{ngraph::element::f32}, {}, {22.f}}, + {{ngraph::element::f32}, {}, {33.f}}, } } }, @@ -391,21 +352,9 @@ const std::vector testValues = { {}, ngraph::element::i8, { - { - {ngraph::element::f32}, - {}, - {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, - { - {ngraph::element::f32}, - {}, - {{22.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, - { - {ngraph::element::f32}, - {}, - {{33.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, + {{ngraph::element::f32}, {}, {11.f}}, + {{ngraph::element::f32}, {}, {22.f}}, + {{ngraph::element::f32}, {}, {33.f}}, } } }, diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/strided_slice_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/strided_slice_transformation.cpp index 8f570484c81..8b16ce99d75 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/strided_slice_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/strided_slice_transformation.cpp @@ -150,6 +150,17 @@ StridedSliceTransformationTestValues::LayerParams specialDimensionSlice = { {} }; +StridedSliceTransformationTestValues::LayerParams specialDimensionEndSlice = { + { 0, 0, 20, 0 }, + { 1, 3, 24, 24 }, + { 1, 1, 1, 1 }, + { 1, 1, 0, 1 }, + { 1, 1, 0, 1 }, + {}, + {}, + {} +}; + const std::vector stridedSliceTransformationTestValues = { // U8: channel slice, per-tensor quantization { @@ -311,6 +322,38 @@ const std::vector stridedSliceTransformati {{ngraph::element::f32}, {{ 32.f, 64.f, 32.f }}, {{ 0.1f, 0.01f, 1.f }}} } }, + // I8: special dimension end slice, per-channel quantization with different values + { + ngraph::Shape{1, 3, 24, 24}, + LayerTransformation::createParamsI8I8(), + specialDimensionEndSlice, + { + ngraph::element::i8, + {{ngraph::element::f32}, {{ 32.f, 64.f, 32.f }}, {{ 0.1f, 0.01f, 1.f }}} + }, + { + ngraph::element::i8, + {}, + ngraph::element::i8, + {{ngraph::element::f32}, {{ 32.f, 64.f, 32.f }}, {{ 0.1f, 0.01f, 1.f }}} + } + }, + // I8: special dimension end slice, per-tensor quantization with different values + { + ngraph::Shape{1, 3, 24, 24}, + LayerTransformation::createParamsI8I8(), + specialDimensionEndSlice, + { + ngraph::element::i8, + {{ngraph::element::f32}, { 32.f }, { 0.1f }} + }, + { + ngraph::element::i8, + {}, + ngraph::element::i8, + {{ngraph::element::f32}, { 32.f }, { 0.1f }} + } + }, // I8: channel slice, quantization by special dimension { ngraph::Shape{1, 3, 4, 4}, diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/variadic_split_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/variadic_split_transformation.cpp index d77e80b21d7..990de4d98d7 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/variadic_split_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/variadic_split_transformation.cpp @@ -177,11 +177,31 @@ const std::vector testValues = { {{1.f, 2.f}, ngraph::element::f32, {1, 2, 1, 1}}, {{11.f, 22.f}, ngraph::element::f32, {1, 2, 1, 1}} }, + {{ngraph::element::f32}, {3.f}, {33.f}} + } + } + }, + // U8 per channel quantization with different values (constants without batch) + { + ngraph::Shape({ 1, 3, 16, 16 }), std::int64_t{ -3 }, std::vector{ 2, 1 }, + LayerTransformation::createParamsU8I8(), + { + ngraph::element::u8, + {{ngraph::element::f32}, + {{1.f, 2.f, 3.f}, ngraph::element::f32, {3, 1, 1}}, + {{11.f, 22.f, 33.f}, ngraph::element::f32, {3, 1, 1}}} + }, + { + ngraph::element::u8, + {}, + ngraph::element::u8, + { { {ngraph::element::f32}, - {{3.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{33.f}, ngraph::element::f32, {1, 1, 1, 1}} - } + {{1.f, 2.f}, ngraph::element::f32, {1, 2, 1, 1}}, + {{11.f, 22.f}, ngraph::element::f32, {1, 2, 1, 1}} + }, + {{ngraph::element::f32}, {3.f}, {33.f}} } } }, @@ -205,11 +225,7 @@ const std::vector testValues = { {{1.f, 2.f}, ngraph::element::f32, {1, 2, 1, 1}}, {{11.f, 22.f}, ngraph::element::f32, {1, 2, 1, 1}} }, - { - {ngraph::element::f32}, - {{3.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{33.f}, ngraph::element::f32, {1, 1, 1, 1}} - } + {{ngraph::element::f32}, {3.f}, {33.f}} } } }, @@ -228,16 +244,8 @@ const std::vector testValues = { {}, ngraph::element::u8, { - { - {ngraph::element::f32}, - {{1.f, 1.f}, ngraph::element::f32, {1, 2, 1, 1}}, - {{11.f, 11.f}, ngraph::element::f32, {1, 2, 1, 1}} - }, - { - {ngraph::element::f32}, - {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} - } + {{ngraph::element::f32}, {1.f}, {11.f}}, + {{ngraph::element::f32}, {1.f}, {11.f}} } } }, @@ -256,16 +264,8 @@ const std::vector testValues = { {}, ngraph::element::i8, { - { - {ngraph::element::f32}, - {{1.f, 1.f}, ngraph::element::f32, {1, 2, 1, 1}}, - {{11.f, 11.f}, ngraph::element::f32, {1, 2, 1, 1}} - }, - { - {ngraph::element::f32}, - {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} - } + {{ngraph::element::f32}, {1.f}, {11.f}}, + {{ngraph::element::f32}, {1.f}, {11.f}} } } }, @@ -322,21 +322,13 @@ const std::vector testValues = { {}, ngraph::element::i8, { - { - {ngraph::element::f32}, - {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} - }, + {{ngraph::element::f32}, {1.f}, {11.f}}, { {ngraph::element::f32}, {{2.f, 3.f}, ngraph::element::f32, {1, 2, 1, 1}}, {{22.f, 33.f}, ngraph::element::f32, {1, 2, 1, 1}} }, - { - {ngraph::element::f32}, - {{4.f}, ngraph::element::f32, {1, 1, 1, 1}}, - {{44.f}, ngraph::element::f32, {1, 1, 1, 1}} - } + {{ngraph::element::f32}, {4.f}, {44.f}} } } }, diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp index f035f132e01..4c9d43c124f 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp @@ -45,8 +45,7 @@ const std::vector testValues = { } }; -// TODO: Split/VariadicSplit operations are not supported in ConcatTransformation -INSTANTIATE_TEST_CASE_P(DISABLED_smoke_LPT, ConcatWithSplitTransformation, +INSTANTIATE_TEST_CASE_P(smoke_LPT, ConcatWithSplitTransformation, ::testing::Combine( ::testing::ValuesIn(netPrecisions), ::testing::Values(ngraph::Shape({ 1, 6, 10, 10 })), diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp index c2e16a810cd..cf789286f7f 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp @@ -45,8 +45,7 @@ const std::vector testValues = { } }; -// TODO: Split/VariadicSplit operations are not supported in ConcatTransformation -INSTANTIATE_TEST_CASE_P(DISABLED_smoke_LPT, ConcatWithSplitTransformation, +INSTANTIATE_TEST_CASE_P(smoke_LPT, ConcatWithSplitTransformation, ::testing::Combine( ::testing::ValuesIn(netPrecisions), ::testing::Values(ngraph::Shape({ 1, 6, 10, 10 })), diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp index e69fd113be5..95da3db91ef 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp @@ -114,7 +114,8 @@ public: const DequantizationOperations::Convert& convert2, const DequantizationOperations& dequantization2, const ngraph::element::Type precisionAfterOperation, - const DequantizationOperations& dequantizationAfter); + const DequantizationOperations& dequantizationAfter, + const std::int64_t& axis); static std::shared_ptr getReferenceWithNeighbors( const ngraph::element::Type precision, diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp index 2d9bb24453a..8b251a4d9be 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp @@ -752,7 +752,8 @@ std::shared_ptr ConcatFunction::get( const DequantizationOperations::Convert& convert2, const DequantizationOperations& dequantization2, const ngraph::element::Type precisionAfterOperation, - const DequantizationOperations& dequantizationAfter) { + const DequantizationOperations& dequantizationAfter, + const std::int64_t& axis) { const auto input1 = std::make_shared(inputPrecision, inputShape); input1->set_friendly_name("input1"); @@ -775,7 +776,7 @@ std::shared_ptr ConcatFunction::get( parent2 = makeDequantization(parent2, dequantization2); } - const std::shared_ptr concat = std::make_shared(ngraph::OutputVector{ parent1, parent2 }, 1); + const std::shared_ptr concat = std::make_shared(ngraph::OutputVector{ parent1, parent2 }, axis); auto& rtInfo = concat->get_rt_info(); rtInfo["Variant::std::string"] = std::make_shared>("concat"); @@ -989,6 +990,13 @@ std::shared_ptr ConcatFunction::getReferenceWithSplitedInterme input2->set_friendly_name("input2"); const auto fakeQuantize2 = makeFakeQuantizeTypeRelaxed(input2, precision, fqOnData2); + replace_node( + fakeQuantize2->get_input_node_shared_ptr(3), + ngraph::pass::low_precision::NetworkHelper::toScalarIfPossible(fakeQuantize2->get_input_node_shared_ptr(3))); + replace_node( + fakeQuantize2->get_input_node_shared_ptr(4), + ngraph::pass::low_precision::NetworkHelper::toScalarIfPossible(fakeQuantize2->get_input_node_shared_ptr(4))); + fakeQuantize2->set_friendly_name("fakeQuantize2"); low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(fakeQuantize2, precisionAfterOperation); const auto deqBefore2 = makeDequantization(fakeQuantize2, dequantizationBefore1);