diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp index e381fd5d0a0..8ed8dfde55c 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp @@ -35,7 +35,6 @@ protected: ngraph::pass::low_precision::Subgraph& subgraph, std::function layer, - std::shared_ptr child, const std::string originalLayerName, std::vector& dequantizationsToConcatenate)> getLayerDequantizationCallback) const; @@ -43,15 +42,6 @@ protected: const TransformationContext& context, const std::vector>& quantizationOperations); - void fillDequantizationNodes( - const std::vector& layerDequantizations, - const std::shared_ptr layer, - NodeVector& convertNodes, - NodeVector& subtractNodes, - NodeVector& multiplyNodes) const; - - std::shared_ptr concatenateDeqNodes(NodeVector& nodes) const; - private: size_t getMinQuantizationLevels( const DataPrecision& dataPrecision, diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp index 48c0a0ef9ea..06515d0d72e 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp @@ -27,9 +27,12 @@ public: bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; private: - // Go through the parent elements of the layer and fill dequantization collection - // with Dq operations that should be inserted before the layer. void fillDequantization( + std::shared_ptr layer, + std::unordered_map& dequantizationByFakeQuantize, + std::vector& dequantizationsToConcatenate) const; + + void fillQuantization( const std::shared_ptr layer, const std::unordered_map& dequantizationByFakeQuantize, std::vector& dequantization) const; @@ -43,6 +46,8 @@ private: const FakeQuantizeDequantization& dequantization, const size_t sourceOutputIdx); + static FakeQuantizeDequantization broadcastDequantiationConstant(const FakeQuantizeDequantization& deq); + bool isMultiChannel(const std::vector>& concatLayers) const noexcept; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp index f9665f9a886..f113f749c68 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp @@ -50,12 +50,6 @@ public: template static std::shared_ptr setOutDataPrecision(std::shared_ptr operation, const element::Type& precision); - // applies constant folding of operation to constant and returns the specified output - static std::shared_ptr foldDequantizationConstant( - const std::shared_ptr& foldingConstant, - const std::shared_ptr& operation, - const size_t outIdx = 0); - static size_t getOutputChannelsCount(std::shared_ptr layer, bool isOnWeights = false); static std::vector> getParentsRecursivelyExceptTypes( diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp index 5a9fbc48ce7..c7a41cd25c7 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp @@ -24,6 +24,15 @@ public: TransformationContext& context, std::vector> lastNodes, std::shared_ptr originalNode) const; +protected: + ngraph::Shape getConstSplitShape( + const std::vector& constSplitLengths, + const ngraph::Shape& constShape, const size_t axis, + const size_t idx) const; + virtual std::vector getConstSplitLengths( + const OutputVector& inputs, + const ngraph::Shape& constShape, + const size_t outputSize) const; }; } // namespace low_precision } // namespace pass diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp index e7cab0c527c..c9fdf76998a 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp @@ -17,6 +17,11 @@ class TRANSFORMATIONS_API VariadicSplitTransformation : public SplitTransformati public: VariadicSplitTransformation(const Params& params); void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; +protected: + std::vector getConstSplitLengths( + const OutputVector& inputs, + const ngraph::Shape& constShape, + const size_t outputSize) const override; }; } // namespace low_precision } // namespace pass diff --git a/inference-engine/src/low_precision_transformations/src/concat.cpp b/inference-engine/src/low_precision_transformations/src/concat.cpp index 02de081ec03..20c7d4f2cc7 100644 --- a/inference-engine/src/low_precision_transformations/src/concat.cpp +++ b/inference-engine/src/low_precision_transformations/src/concat.cpp @@ -201,7 +201,6 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat auto dequantizationValuesCallback = [&]( std::shared_ptr layer, - std::shared_ptr child, const std::string originalLayerName, std::vector& dequantizationsToConcatenate) { dequantizationsToConcatenate.push_back(dequantization); @@ -235,97 +234,15 @@ bool ConcatTransformation::isPrecisionPreserved(std::shared_ptr) const noe bool ConcatTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const { std::shared_ptr concat = as_type_ptr(layer); - if (concat == nullptr) { - return false; - } - - const auto axis = concat->get_axis(); - const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, concat->get_output_partial_shape(0).rank()); - return normalizedAxis == 1ul; + return concat && concat->get_axis() == 1ul; } -void ConcatTransformation::fillDequantizationNodes( - const std::vector& layerDequantizations, - const std::shared_ptr layer, - NodeVector& convertNodes, - NodeVector& subtractNodes, - NodeVector& multiplyNodes) const { - if (layerDequantizations.size() > 1ul) { - auto broadcastElementWiseConst = []( - // FakeQuantize constant shape must be broadcastable to the shape on data. - std::shared_ptr operation, - const ngraph::Shape targetShape) -> std::shared_ptr { - auto targetShapeConst = std::make_shared( - element::i64, ngraph::Shape{ targetShape.size() }, - targetShape); - - auto broadcast = ngraph::pass::low_precision::fold( - operation, - targetShapeConst, - ngraph::op::AutoBroadcastType::NUMPY); - - return broadcast; - }; - - bool allDequantizationShiftAreZero = true; - bool allDequantizationMultiplyAreZero = true; - for (const auto& dequantization : layerDequantizations) { - if (dequantization.subtract != nullptr) { - allDequantizationShiftAreZero = false; - } - if (dequantization.multiply != nullptr) { - allDequantizationMultiplyAreZero = false; - } - } - - for (size_t i = 0; i < layerDequantizations.size(); ++i) { - const auto& dequantization = layerDequantizations[i]; - const ngraph::element::Type precision = deqPrecision; - ngraph::Shape targetShape(layer->get_input_shape(i).size(), 1ul); - targetShape[1] = layer->get_input_shape(i)[1]; - - if (dequantization.convert != nullptr) { - convertNodes.push_back(dequantization.convert); - } - - if (!allDequantizationShiftAreZero) { - subtractNodes.push_back(dequantization.subtract == nullptr ? - std::make_shared(precision, targetShape, std::vector({ 0.f })) : - broadcastElementWiseConst(dequantization.subtractConstant, targetShape)); - } - - if (!allDequantizationMultiplyAreZero) { - multiplyNodes.push_back(dequantization.multiply == nullptr ? - std::make_shared(precision, targetShape, std::vector({ 1.0f })) : - broadcastElementWiseConst(dequantization.multiplyConstant, targetShape)); - } - } - } else { - // TODO: check constant shapes here - has to be scalar - if (layerDequantizations[0].convert != nullptr) { - convertNodes.push_back(layerDequantizations[0].convert); - } - - if (layerDequantizations[0].subtract != nullptr) { - subtractNodes.push_back(layerDequantizations[0].subtract->input_value(1).get_node_shared_ptr()); - } - - if (layerDequantizations[0].multiply != nullptr) { - multiplyNodes.push_back(layerDequantizations[0].multiply->input_value(1).get_node_shared_ptr()); - } - } -} - -std::shared_ptr ConcatTransformation::concatenateDeqNodes(NodeVector& nodes) const { - return nodes.size() == 1ul ? nodes[0] : fold(nodes, 1); -} void ConcatTransformation::addDequantizationLayers( TransformationContext& context, ngraph::pass::low_precision::Subgraph& subgraph, std::function layer, - std::shared_ptr child, const std::string originalLayerName, std::vector& dequantizationsToConcatenate)> getLayerDequantizationCallback) const { std::unordered_map outputs; @@ -352,28 +269,95 @@ void ConcatTransformation::addDequantizationLayers( ngraph::Node& child = *childInput.get_node(); if (subgraph.layers.find(child.get_friendly_name()) == subgraph.layers.end()) { - std::shared_ptr source = layer; - const std::shared_ptr destination = child.shared_from_this(); - if (layerDequantizations.size() == 0ul) { // fill layerDequantizations collection - getLayerDequantizationCallback(source, destination, source->get_friendly_name(), layerDequantizations); + getLayerDequantizationCallback(layer, layer->get_friendly_name(), layerDequantizations); } + std::shared_ptr source = layer->shared_from_this(); { - NodeVector convertNodes; - NodeVector subtractNodes; - NodeVector multiplyNodes; + std::vector> convertNodes; + std::vector> subtractNodes; + std::vector> multiplyNodes; // forming nodes for concatenation - fillDequantizationNodes(layerDequantizations, layer, convertNodes, subtractNodes, multiplyNodes); + if (layerDequantizations.size() > 1ul) { + auto broadcastElementWiseConst = []( + // FakeQuantize constant shape must be broadcastable to the shape on data. + std::shared_ptr operation, + const ngraph::Shape targetShape) -> std::shared_ptr { + auto targetShapeConst = std::make_shared( + element::i64, ngraph::Shape{ targetShape.size() }, + targetShape); + + auto broadcast = ngraph::pass::low_precision::fold( + operation, + targetShapeConst, + ngraph::op::AutoBroadcastType::NUMPY); + + return broadcast; + }; + + bool allDequantizationShiftAreZero = true; + bool allDequantizationMultiplyAreZero = true; + for (FakeQuantizeDequantization dequantization : layerDequantizations) { + if (dequantization.subtract != nullptr) { + allDequantizationShiftAreZero = false; + } + if (dequantization.multiply != nullptr) { + allDequantizationMultiplyAreZero = false; + } + } + + for (size_t i = 0; i < layerDequantizations.size(); ++i) { + const auto& dequantization = layerDequantizations[i]; + + if (dequantization.convert != nullptr) { + convertNodes.push_back(dequantization.convert); + } + + const ngraph::element::Type precision = deqPrecision; + ngraph::Shape targetShape(layer->get_input_shape(i).size(), 1ul); + targetShape[1] = layer->get_input_shape(i)[1]; + + if (!allDequantizationShiftAreZero) { + subtractNodes.push_back(dequantization.subtract == nullptr ? + std::make_shared(precision, targetShape, std::vector({ 0.f })) : + broadcastElementWiseConst( + as_type_ptr(dequantization.subtract->input_value(1).get_node_shared_ptr()), + targetShape)); + } + + if (!allDequantizationMultiplyAreZero) { + multiplyNodes.push_back(dequantization.multiply == nullptr ? + std::make_shared(precision, targetShape, std::vector({ 1.0f })) : + broadcastElementWiseConst( + as_type_ptr(dequantization.multiply->input_value(1).get_node_shared_ptr()), + targetShape)); + } + } + } else { + // TODO: check constant shapes here - has to be scalar + if (layerDequantizations[0].convert != nullptr) { + convertNodes.push_back(layerDequantizations[0].convert); + } + + if (layerDequantizations[0].subtract != nullptr) { + subtractNodes.push_back(layerDequantizations[0].subtract->input_value(1).get_node_shared_ptr()); + } + + if (layerDequantizations[0].multiply != nullptr) { + multiplyNodes.push_back(layerDequantizations[0].multiply->input_value(1).get_node_shared_ptr()); + } + } // TODO: the second place (first is FQ decomposition) where dequantization operations are inserted + const std::shared_ptr destination = child.shared_from_this(); + if (!convertNodes.empty()) { const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination); std::shared_ptr convert = convertNodes[0]->clone_with_new_inputs({ destination->get_input_source_output(sourceOutputIdx) }); - insert_new_node_between(source, destination, convert); ngraph::copy_runtime_info({ layer, convert }, convert); source = convert; @@ -384,8 +368,9 @@ void ConcatTransformation::addDequantizationLayers( const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination); std::shared_ptr subtract = std::make_shared( destination->get_input_source_output(sourceOutputIdx), - NetworkHelper::toScalarIfPossible(concatenateDeqNodes(subtractNodes))); - + NetworkHelper::toScalarIfPossible(subtractNodes.size() == 1ul ? + subtractNodes[0] : + ngraph::pass::low_precision::fold(subtractNodes, 1))); insert_new_node_between(source, destination, subtract); ngraph::copy_runtime_info({ layer, subtract }, subtract); source = subtract; @@ -396,9 +381,10 @@ void ConcatTransformation::addDequantizationLayers( std::shared_ptr multiply = std::make_shared>( DequantizationMultiply( destination->get_input_source_output(sourceOutputIdx), - NetworkHelper::toScalarIfPossible(concatenateDeqNodes(multiplyNodes))), + NetworkHelper::toScalarIfPossible(multiplyNodes.size() == 1ul ? + multiplyNodes[0] : + ngraph::pass::low_precision::fold(multiplyNodes, 1))), layerDequantizations[0].multiply->get_output_element_type(0)); - insert_new_node_between(source, destination, multiply); ngraph::copy_runtime_info({ layer, multiply }, multiply); source = multiply; diff --git a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp index 62d958d22b4..932e771c4ce 100644 --- a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp +++ b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp @@ -137,7 +137,6 @@ bool ConcatMultiChannelsTransformation::transform(TransformationContext& context auto dequantizationValuesCallback = [&]( std::shared_ptr layer, - std::shared_ptr child, const std::string originalLayerName, std::vector& dequantizationsToConcatenate) { if (layer->get_friendly_name() != originalLayerName) { @@ -158,15 +157,6 @@ bool ConcatMultiChannelsTransformation::transform(TransformationContext& context layer, dequantizations, dequantizationsToConcatenate); - - if (!is_type(layer)) { - // for intermediate layers we should get Dq operations to be inserted between layer and child - assert(dequantizationsToConcatenate.size() == 1ul); - const size_t sourceOutputIdx = NetworkHelper::getParentOutputIndex(layer, child); - if (layer->get_input_shape(0)[1] != layer->get_output_shape(sourceOutputIdx)[1]) { - dequantizationsToConcatenate[0] = getFoldedDequantization(layer, dequantizationsToConcatenate[0], sourceOutputIdx); - } - } }; addDequantizationLayers(context, subgraph, dequantizationValuesCallback); @@ -195,66 +185,137 @@ bool ConcatMultiChannelsTransformation::isPrecisionPreserved(std::shared_ptr layer, + std::unordered_map& dequantizationByFakeQuantize, + std::vector& dequantizationsToConcatenate) const { + std::shared_ptr currentFakeQuantize = ngraph::as_type_ptr(layer); + if (currentFakeQuantize) { + const auto it = dequantizationByFakeQuantize.find(currentFakeQuantize->get_friendly_name()); + if (it == dequantizationByFakeQuantize.end()) { + THROW_IE_LPT_EXCEPTION(*currentFakeQuantize) << "dequantization scale values are not found"; + } + const FakeQuantizeDequantization& fakeQuantizeDequantization = it->second; + dequantizationsToConcatenate.push_back(broadcastDequantiationConstant(fakeQuantizeDequantization)); + } else { + fillQuantization(layer, dequantizationByFakeQuantize, dequantizationsToConcatenate); + } +} + +void ConcatMultiChannelsTransformation::fillQuantization( const std::shared_ptr layer, const std::unordered_map& dequantizationByFakeQuantize, std::vector& dequantization) const { - const auto fillDqByFakeQuantize = [&](const std::shared_ptr& fq) { - const auto it = dequantizationByFakeQuantize.find(fq->get_friendly_name()); - if (it == dequantizationByFakeQuantize.end()) { - THROW_IE_LPT_EXCEPTION(*fq) << "dequantization scale values are not found"; - } + for (size_t i = 0; i < layer->get_input_size(); ++i) { + std::shared_ptr parent = layer->get_input_node_shared_ptr(i); - const FakeQuantizeDequantization& fakeQuantizeDequantization = it->second; - dequantization.push_back(fakeQuantizeDequantization); - }; - - if (is_type(layer)) { - fillDqByFakeQuantize(layer); - } else { - for (size_t i = 0; i < layer->get_input_size(); ++i) { - std::shared_ptr parent = layer->get_input_node_shared_ptr(i); - if (as_type_ptr(parent)) { - continue; + std::shared_ptr fakeQuantize = ngraph::as_type_ptr(parent); + if (fakeQuantize) { + const auto it = dequantizationByFakeQuantize.find(fakeQuantize->get_friendly_name()); + if (it == dequantizationByFakeQuantize.end()) { + THROW_IE_LPT_EXCEPTION(*fakeQuantize) << "dequantization scale values are not found"; } - const auto fakeQuantize = ngraph::as_type_ptr(parent); - if (fakeQuantize) { - fillDqByFakeQuantize(fakeQuantize); + const FakeQuantizeDequantization& fakeQuantizeDequantization = it->second; + dequantization.push_back(broadcastDequantiationConstant(fakeQuantizeDequantization)); + } else { + std::shared_ptr concat = ngraph::as_type_ptr(parent); + if (concat) { + std::vector dequantizationToConcatenate; + fillQuantization(concat, dequantizationByFakeQuantize, dequantizationToConcatenate); + + // add concatenated dequantization operations to dequantization collection + dequantization.push_back(getConcatenatedDequantization(concat, dequantizationToConcatenate)); } else { - const auto concat = ngraph::as_type_ptr(parent); - if (concat) { - std::vector dequantizationToConcatenate; - fillDequantization(concat, dequantizationByFakeQuantize, dequantizationToConcatenate); + std::shared_ptr stridedSlice = ngraph::as_type_ptr(parent); + if (stridedSlice) { + std::vector dequantizationToPropagate; + fillQuantization(stridedSlice, dequantizationByFakeQuantize, dequantizationToPropagate); - // add concatenated dequantization operations to dequantization collection - dequantization.push_back(getConcatenatedDequantization(concat, dequantizationToConcatenate)); - } else { const size_t sourceOutputIdx = NetworkHelper::getParentOutputIndex(parent, layer); - if (parent->get_input_shape(0)[1] != parent->get_output_shape(sourceOutputIdx)[1]) { - std::vector dequantizationToPropagate; - fillDequantization(parent, dequantizationByFakeQuantize, dequantizationToPropagate); - - // add folded dequantization operations to dequantization colection - dequantization.push_back(getFoldedDequantization(parent, dequantizationToPropagate[0], sourceOutputIdx)); - } else { - fillDequantization(parent, dequantizationByFakeQuantize, dequantization); - } + // add folded dequantization operations to dequantization colection + dequantization.push_back(getFoldedDequantization(stridedSlice, dequantizationToPropagate[0], sourceOutputIdx)); + } else { + fillQuantization(parent, dequantizationByFakeQuantize, dequantization); } } } } } +// broadcast of dequantization constants by channels +FakeQuantizeDequantization ConcatMultiChannelsTransformation::broadcastDequantiationConstant(const FakeQuantizeDequantization& deq) { + ngraph::Shape targetShape(deq.data.get_shape().size(), 1ul); + targetShape[1] = deq.data.get_shape()[1]; + + FakeQuantizeDequantization result; + result.data = deq.data; + result.convert = deq.convert; + + const auto targetShapeConst = std::make_shared( + element::i64, ngraph::Shape{ targetShape.size() }, + targetShape); + + if (deq.subtract) { + auto broadcast = ngraph::pass::low_precision::fold( + deq.subtractConstant, + targetShapeConst, + ngraph::op::AutoBroadcastType::NUMPY); + + result.subtract = deq.subtract; + result.subtractConstant = as_type_ptr(broadcast); + } + + if (deq.multiply) { + auto broadcast = ngraph::pass::low_precision::fold( + deq.multiplyConstant, + targetShapeConst, + ngraph::op::AutoBroadcastType::NUMPY); + + result.multiply = deq.multiply; + result.multiplyConstant = as_type_ptr(broadcast); + } + + return result; +} + FakeQuantizeDequantization ConcatMultiChannelsTransformation::getConcatenatedDequantization( const std::shared_ptr concat, const std::vector& dequantization) const { - NodeVector convertNodes; - NodeVector subtractNodes; - NodeVector multiplyNodes; + bool allDequantizationShiftAreZero = true; + bool allDequantizationMultiplyAreZero = true; + for (const auto& deq : dequantization) { + if (deq.subtract != nullptr) { + allDequantizationShiftAreZero = false; + } + if (deq.multiply != nullptr) { + allDequantizationMultiplyAreZero = false; + } + } - // forming nodes for concatenation - fillDequantizationNodes(dequantization, concat, convertNodes, subtractNodes, multiplyNodes); + NodeVector convertNodes; + NodeVector subNodes; + NodeVector mulNodes; + //preparing to concatenate dequantization nodes + for (const auto& deq : dequantization) { + ngraph::Shape targetShape(deq.data.get_shape().size(), 1ul); + targetShape[1] = deq.data.get_shape()[1]; + + if (deq.convert != nullptr) { + convertNodes.push_back(deq.convert); + } + if (!allDequantizationShiftAreZero) { + subNodes.push_back(deq.subtract == nullptr ? + std::make_shared(deqPrecision, targetShape, std::vector({ 0.f })) : + deq.subtractConstant); + } + if (!allDequantizationMultiplyAreZero) { + mulNodes.push_back(deq.multiply == nullptr ? + std::make_shared(deqPrecision, targetShape, std::vector({ 1.0f })) : + deq.multiplyConstant); + } + } std::shared_ptr parent = concat; std::shared_ptr convert; @@ -265,16 +326,20 @@ FakeQuantizeDequantization ConcatMultiChannelsTransformation::getConcatenatedDeq std::shared_ptr subtract; std::shared_ptr subConst; - if (!subtractNodes.empty()) { - subConst = as_type_ptr(concatenateDeqNodes(subtractNodes)); + if (!subNodes.empty()) { + subConst = as_type_ptr( + subNodes.size() == 1ul ? subNodes[0] : fold(subNodes, 1ul)); + subtract = std::make_shared(parent, subConst); parent = subtract; } std::shared_ptr multiply; std::shared_ptr mulConst; - if (!multiplyNodes.empty()) { - mulConst = as_type_ptr(concatenateDeqNodes(multiplyNodes)); + if (!mulNodes.empty()) { + mulConst = as_type_ptr( + mulNodes.size() == 1ul ? mulNodes[0] : fold(mulNodes, 1ul)); + multiply = std::make_shared(parent, mulConst); } @@ -287,19 +352,24 @@ FakeQuantizeDequantization ConcatMultiChannelsTransformation::getFoldedDequantiz const size_t sourceOutputIdx) { OutputVector inputs = operation->input_values(); OutputVector outputs(operation->get_output_size()); - Output data = operation->output(sourceOutputIdx); std::shared_ptr parent = operation; std::shared_ptr convert; if (dequantization.convert) { - convert = as_type_ptr(dequantization.convert->clone_with_new_inputs({ data })); + convert = as_type_ptr(dequantization.convert->clone_with_new_inputs({ parent })); parent = convert; } std::shared_ptr subtract; std::shared_ptr subConst; if (dequantization.subtract) { - subConst = NetworkHelper::foldDequantizationConstant(dequantization.subtractConstant, operation, sourceOutputIdx); + inputs[0] = dequantization.subtractConstant; + const auto op = operation->clone_with_new_inputs(inputs); + + // constant folding of subtract constant + op->constant_fold(outputs, inputs); + + subConst = as_type_ptr(outputs[sourceOutputIdx].get_node_shared_ptr()); subtract = std::make_shared(parent, subConst); parent = subtract; } @@ -307,11 +377,17 @@ FakeQuantizeDequantization ConcatMultiChannelsTransformation::getFoldedDequantiz std::shared_ptr multiply; std::shared_ptr mulConst; if (dequantization.multiply) { - mulConst = NetworkHelper::foldDequantizationConstant(dequantization.multiplyConstant, operation, sourceOutputIdx); + inputs[0] = dequantization.multiplyConstant; + const auto op = operation->clone_with_new_inputs(inputs); + + // constant folding of multiply constant + op->constant_fold(outputs, inputs); + + mulConst = as_type_ptr(outputs[sourceOutputIdx].get_node_shared_ptr()); multiply = std::make_shared(parent, mulConst); } - return FakeQuantizeDequantization(data, convert, subtract, nullptr, subConst, multiply, mulConst); + return FakeQuantizeDequantization(operation->output(sourceOutputIdx), convert, subtract, nullptr, subConst, multiply, mulConst); } } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp index 258c65f91d2..6afdaa070a3 100644 --- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp +++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp @@ -87,31 +87,6 @@ bool NetworkHelper::isConstantPath(const std::shared_ptr& op) { return true; } -std::shared_ptr NetworkHelper::foldDequantizationConstant( - const std::shared_ptr& foldingConstant, - const std::shared_ptr& operation, - const size_t outIdx) { - OutputVector inputs = operation->input_values(); - OutputVector outputs(operation->get_output_size()); - - if (shape_size(foldingConstant->get_shape()) == 1ul) { - return toScalar(foldingConstant); - } else { - inputs[0] = foldingConstant; - const auto op = operation->clone_with_new_inputs(inputs); - - // constant folding of constant - op->constant_fold(outputs, inputs); - - const auto result = as_type_ptr(outputs[outIdx].get_node_shared_ptr()); - if (result == nullptr) { - THROW_IE_LPT_EXCEPTION(*result) << "result of constant folding is not constant"; - } - - return result; - } -} - size_t NetworkHelper::getOutputChannelsCount(std::shared_ptr layer, bool isOnWeights) { if (layer->outputs().size() == 0) { THROW_TRANSFORMATION_EXCEPTION << "Layer " << layer->get_friendly_name() << " doesn't have output tensors"; diff --git a/inference-engine/src/low_precision_transformations/src/split.cpp b/inference-engine/src/low_precision_transformations/src/split.cpp index 5f0dbaebef0..a14867fc8e4 100644 --- a/inference-engine/src/low_precision_transformations/src/split.cpp +++ b/inference-engine/src/low_precision_transformations/src/split.cpp @@ -5,7 +5,6 @@ #include "low_precision/split.hpp" #include "ngraph/node.hpp" #include "low_precision/network_helper.hpp" -#include "low_precision/common/dequantization_op.hpp" namespace ngraph { namespace pass { @@ -23,68 +22,81 @@ bool SplitTransformation::transform(TransformationContext& context, ngraph::patt return false; } - const auto split = NetworkHelper::separateInStandaloneBranch(m.get_match_root()); - const auto dequantization = NetworkHelper::getDequantization(split); + const std::shared_ptr split = NetworkHelper::separateInStandaloneBranch(m.get_match_root()); + auto dequantization = NetworkHelper::getDequantization(split); - OutputVector inputs = split->input_values(); - inputs[0] = dequantization.data; + OutputVector inputs(split->get_input_size()); + for (size_t i = 0; i < split->get_input_size(); ++i) { + inputs[i] = split->get_input_node_shared_ptr(i); + } - const auto newSplit = split->clone_with_new_inputs(inputs); + const size_t dequantizationIndex = NetworkHelper::getChildInputIndex(dequantization.multiply, split); + inputs[dequantizationIndex] = dequantization.data; + + std::shared_ptr newSplit = split->clone_with_new_inputs(inputs); newSplit->set_friendly_name(split->get_friendly_name()); - ngraph::copy_runtime_info(split, newSplit); - const int64_t axis = as_type_ptr(split->get_input_node_shared_ptr(1))->cast_vector()[0]; - const size_t normalizedAxis = normalize_axis(split->get_friendly_name(), axis, split->get_input_partial_shape(0).rank()); - const size_t outputSize = newSplit->get_output_size(); + const ngraph::Shape subConstShape = dequantization.subtract ? + dequantization.subtract->get_input_node_shared_ptr(1)->get_shape() : Shape{}; + std::vector subValues = dequantization.subtract ? as_type_ptr( + dequantization.subtract->get_input_node_shared_ptr(1))->cast_vector() : std::vector(); - const auto splitConstant = [&](const std::shared_ptr operation) { - // if batch is absent in constant shape - add batch - const auto normalizedConstant = NetworkHelper::normalizeDequantizationShape(operation); - const auto constantShape = normalizedConstant->get_shape(); + const ngraph::Shape mulConstShape = dequantization.multiply->get_input_node_shared_ptr(1)->get_shape(); + std::vector mulValues = as_type_ptr( + dequantization.multiply->get_input_node_shared_ptr(1))->cast_vector(); - OutputVector results(outputSize); - if ((shape_size(constantShape) == 1ul) || (constantShape[normalizedAxis] == 1ul)) { - std::for_each(results.begin(), results.end(), [&](Output& elem) { elem = normalizedConstant->clone_with_new_inputs({}); }); - } else { - // prepare new inputs for constant folding - OutputVector inputs = newSplit->input_values(); - inputs[0] = normalizedConstant; - const auto foldSplit = newSplit->clone_with_new_inputs(inputs); + int64_t SplitedAxis = as_type_ptr(split->get_input_node_shared_ptr(1))->cast_vector()[0]; + size_t axis = SplitedAxis > 0 ? SplitedAxis : split->get_input_shape(0).size() + SplitedAxis; + size_t outputSize = newSplit->get_output_size(); - // fold and fill results - foldSplit->constant_fold(results, inputs); - } + const auto subSplitLengths = getConstSplitLengths(inputs, subConstShape, outputSize); + const auto mulSplitLengths = getConstSplitLengths(inputs, mulConstShape, outputSize); - for (auto& result : results) { - result = NetworkHelper::toScalarIfPossible(result.get_node_shared_ptr()); - } - - return results; - }; - - // get splited dequantization constants - OutputVector splitedSub = dequantization.subtract ? splitConstant(dequantization.subtract) : OutputVector{}; - OutputVector splitedMul = splitConstant(dequantization.multiply); - - NodeVector lastNodes; - OutputVector replacement; + std::vector> lastNodes(outputSize); + ngraph::OutputVector replacement; for (size_t i = 0; i < outputSize; ++i) { - Output parent = newSplit->output(i); + Output previous = newSplit->output(i); - if (dequantization.convert) { - const auto convert = dequantization.convert->clone_with_new_inputs({ newSplit->output(i) }); - copy_runtime_info({ newSplit, convert }, convert); - parent = convert; + if (dequantization.convert != nullptr) { + const std::shared_ptr convert = + dequantization.convert->clone_with_new_inputs({ newSplit->output(i) }); + previous = convert; } - if (dequantization.subtract) { - const auto subtract = std::make_shared(parent, splitedSub[i]); - copy_runtime_info({ newSplit, subtract }, subtract); - parent = subtract; + if (dequantization.subtract != nullptr) { + std::shared_ptr subConst; + if (!subSplitLengths.empty()) { + const auto newSubConstShape = getConstSplitShape(subSplitLengths, subConstShape, axis, i); + + std::vector newSubValues( + subValues.begin() + subSplitLengths[i], + subValues.begin() + subSplitLengths[i + 1]); + + subConst = as_type_ptr(std::make_shared( + dequantization.subtract->get_input_element_type(1), + newSubConstShape, + newSubValues)); + } else { + subConst = as_type_ptr(dequantization.subtract->get_input_node_shared_ptr(1)->clone_with_new_inputs({})); + } + const std::shared_ptr subtract = std::make_shared(previous, subConst); + previous = subtract; } - const auto multiply = std::make_shared(parent, splitedMul[i]); - copy_runtime_info({ newSplit, multiply }, multiply); + std::shared_ptr mulConst; + if (!mulSplitLengths.empty()) { + const auto newMulConstShape = getConstSplitShape(mulSplitLengths, mulConstShape, axis, i); + + std::vector newMulValues( + mulValues.begin() + mulSplitLengths[i], + mulValues.begin() + mulSplitLengths[i + 1]); + + mulConst = as_type_ptr(std::make_shared( + dequantization.multiply->get_input_element_type(1), newMulConstShape, newMulValues)); + } else { + mulConst = as_type_ptr(dequantization.multiply->get_input_node_shared_ptr(1)->clone_with_new_inputs({})); + } + const std::shared_ptr multiply = std::make_shared(previous, mulConst); lastNodes.push_back(multiply); replacement.push_back(multiply); @@ -95,6 +107,33 @@ bool SplitTransformation::transform(TransformationContext& context, ngraph::patt return true; } +std::vector SplitTransformation::getConstSplitLengths( + const OutputVector& inputs, + const ngraph::Shape& constShape, + const size_t outputSize) const { + int64_t axis = as_type_ptr(inputs[1].get_node_shared_ptr())->cast_vector()[0]; + size_t splitedAxis = axis > 0 ? axis : inputs[0].get_shape().size() + axis; + + if ((!constShape.empty()) && (constShape[splitedAxis] != 1)) { + std::vector result(outputSize + 1); + result[0] = 0; + for (size_t i = 1; i < result.size(); ++i) { + result[i] = result[i - 1] + constShape[splitedAxis] / outputSize; + } + return result; + } else { + return std::vector(); + } +} + +ngraph::Shape SplitTransformation::getConstSplitShape( + const std::vector& constSplitLengths, + const ngraph::Shape& constShape, const size_t axis, + const size_t idx) const { + Shape result(constShape); + result[axis] = constSplitLengths[idx + 1] - constSplitLengths[idx]; + return result; +} void SplitTransformation::updateOutputs( TransformationContext& context, diff --git a/inference-engine/src/low_precision_transformations/src/strided_slice.cpp b/inference-engine/src/low_precision_transformations/src/strided_slice.cpp index a269e392302..e7b7a796566 100644 --- a/inference-engine/src/low_precision_transformations/src/strided_slice.cpp +++ b/inference-engine/src/low_precision_transformations/src/strided_slice.cpp @@ -23,7 +23,7 @@ std::shared_ptr stridedSliceDeqConstant( //} const auto stridedSliceShape = strSlice->get_input_shape(0); - auto constantShape = constant->get_shape(); + const auto constantShape = constant->get_shape(); if (stridedSliceShape.size() != constantShape.size()) { ngraph::Shape newConstantShape; if (ngraph::shape_size(constantShape) == 1) { @@ -37,7 +37,6 @@ std::shared_ptr stridedSliceDeqConstant( newConstantShape.insert(newConstantShape.begin(), stridedSliceShape[0]); } } - constantShape = newConstantShape; const auto newConstant = fold( constant, @@ -46,24 +45,13 @@ std::shared_ptr stridedSliceDeqConstant( } const auto stridedSlice = as_type_ptr(strSlice); - - auto beginMask = stridedSlice->get_begin_mask(); - auto endMask = stridedSlice->get_end_mask(); - for (size_t i = 0; i < constantShape.size(); ++i) { - // don't slice constant if current dimension is 1 - if (constantShape[i] == 1ul) { - beginMask[i] = 1ul; - endMask[i] = 1ul; - } - } - const auto result = fold( constant, stridedSlice->get_input_node_shared_ptr(1), stridedSlice->get_input_node_shared_ptr(2), stridedSlice->get_input_node_shared_ptr(3), - beginMask, - endMask, + stridedSlice->get_begin_mask(), + stridedSlice->get_end_mask(), stridedSlice->get_new_axis_mask(), stridedSlice->get_shrink_axis_mask(), stridedSlice->get_ellipsis_mask()); diff --git a/inference-engine/src/low_precision_transformations/src/subgraph.cpp b/inference-engine/src/low_precision_transformations/src/subgraph.cpp index 7638fcb0714..c7f1caf56dd 100644 --- a/inference-engine/src/low_precision_transformations/src/subgraph.cpp +++ b/inference-engine/src/low_precision_transformations/src/subgraph.cpp @@ -22,15 +22,16 @@ namespace ngraph { namespace pass { namespace low_precision { -bool operationIsSupportedInConcat(const std::shared_ptr& node) { - // list of operations, which change channels, but supported in ConcatTransformation - if (ngraph::is_type(node) || - ngraph::is_type(node) || - ngraph::is_type(node)) { +bool isQuantizationPerChannel(const std::shared_ptr& node) { + if (node->outputs().size() > 1ul) { + return false; + } + + //WA to support StridedSlice in ConcatTransformation + if (ngraph::is_type(node)) { return true; } - // operations, which change channels, usually don't support in ConcatTransformation const auto inputs = node->input_values(); for (const auto& input : inputs) { if (ngraph::is_type(input.get_node())) { @@ -81,7 +82,7 @@ bool Subgraph::fillSubgraphForQuantization( if (fakeQuantizeChild != nullptr) { // } else { - if (layerTransformationsManager->isPrecisionPreserved(child) && operationIsSupportedInConcat(child)) { + if (layerTransformationsManager->isPrecisionPreserved(child) && isQuantizationPerChannel(child)) { if (!fillSubgraphForIntermediate(child, handledLayers)) { return false; } @@ -103,7 +104,7 @@ bool Subgraph::atLeastOneIsIntermediate(const std::shared_ptr& nod return true; } - if (!layerTransformationsManager->isPrecisionPreserved(child) || !operationIsSupportedInConcat(child)) { + if (!layerTransformationsManager->isPrecisionPreserved(child) || !isQuantizationPerChannel(child)) { // child branch is out of subgraph continue; } @@ -143,6 +144,10 @@ bool Subgraph::fill(const std::shared_ptr& layer, std::unordered_s return false; } } else { + // WA: issue #46906 + if (parent->get_output_size() != 1ul) { + return false; + } const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(parent, 0, true); const std::shared_ptr fakeQuantizeParent = dequantization.empty() ? ngraph::as_type_ptr(parent) : @@ -156,7 +161,7 @@ bool Subgraph::fill(const std::shared_ptr& layer, std::unordered_s if (constant != nullptr) { // } else { - if (layerTransformationsManager->isPrecisionPreserved(parent) && operationIsSupportedInConcat(parent)) { + if (layerTransformationsManager->isPrecisionPreserved(parent) && isQuantizationPerChannel(parent)) { if (!fillSubgraphForIntermediate(parent, handledLayers)) { return false; } @@ -192,7 +197,7 @@ bool Subgraph::fill(const std::shared_ptr& layer, std::unordered_s const std::shared_ptr fakeQuantizeChild = ngraph::as_type_ptr(child); if (fakeQuantizeChild != nullptr) { // - } else if (layerTransformationsManager->isPrecisionPreserved(child) && operationIsSupportedInConcat(child)) { + } else if (layerTransformationsManager->isPrecisionPreserved(child) && isQuantizationPerChannel(child)) { if (!fillSubgraphForIntermediate(child, handledLayers)) { return false; } @@ -216,13 +221,6 @@ bool Subgraph::empty() const { } bool Subgraph::fillSubgraphForConcat(const std::shared_ptr& concat, std::unordered_set& handledLayers) { - const auto axis = concat->get_axis(); - const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, concat->get_output_partial_shape(0).rank()); - // supported only per-channel concat - if (normalizedAxis != 1ul) { - return false; - } - concatLayers.push_back(concat); handledLayers.insert(concat->get_friendly_name()); layers.emplace(concat->get_friendly_name(), concat); diff --git a/inference-engine/src/low_precision_transformations/src/transformer.cpp b/inference-engine/src/low_precision_transformations/src/transformer.cpp index d66263bdf07..205cd77e930 100644 --- a/inference-engine/src/low_precision_transformations/src/transformer.cpp +++ b/inference-engine/src/low_precision_transformations/src/transformer.cpp @@ -229,11 +229,9 @@ LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const add(params). add(params). add(params). - add(params). add(params). add(params). add(params). - add(params). addCleanup(params). addCleanup(params). diff --git a/inference-engine/src/low_precision_transformations/src/variadic_split.cpp b/inference-engine/src/low_precision_transformations/src/variadic_split.cpp index 685219f2773..ccc8e72634d 100644 --- a/inference-engine/src/low_precision_transformations/src/variadic_split.cpp +++ b/inference-engine/src/low_precision_transformations/src/variadic_split.cpp @@ -20,6 +20,26 @@ void VariadicSplitTransformation::registerMatcherIn(GraphRewrite& pass, Transfor make_op_label() })); } +std::vector VariadicSplitTransformation::getConstSplitLengths( + const OutputVector& inputs, + const ngraph::Shape& constShape, + const size_t outputSize) const { + std::vector lengths = as_type_ptr(inputs[2].get_node_shared_ptr())->cast_vector(); + + int64_t axis = as_type_ptr(inputs[1].get_node_shared_ptr())->cast_vector()[0]; + size_t splitedAxis = axis > 0 ? axis : inputs[0].get_shape().size() + axis; + + if ((!constShape.empty()) && (constShape[splitedAxis] != 1)) { + std::vector result(outputSize + 1); + result[0] = 0; + for (size_t i = 1; i < result.size(); ++i) { + result[i] = result[i - 1] + lengths[i - 1]; + } + return result; + } else { + return std::vector(); + } +} } // namespace low_precision } // namespace pass } // namespace ngraph diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp index 148358f9409..4f58c1f3573 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp @@ -74,7 +74,6 @@ class ConcatTransformationTestValues { public: ngraph::pass::low_precision::LayerTransformation::Params params; bool multiChannels; - std::int64_t axis; ConcatTransformationActualValues actual; ConcatTransformationResultValues result; }; @@ -115,8 +114,7 @@ public: testValues.actual.convert2, testValues.actual.dequantization2, ngraph::element::undefined, - {}, - testValues.axis); + {}); SimpleLowPrecisionTransformer transform; if (testValues.multiChannels) { @@ -148,8 +146,7 @@ public: testValues.result.convert2, testValues.result.dequantization2, testValues.result.precisionAfterOperation, - testValues.result.dequantizationAfter, - testValues.axis); + testValues.result.dequantizationAfter); } static std::string getTestCaseName(testing::TestParamInfo obj) { @@ -161,7 +158,6 @@ public: result << LayerTransformation::getTestCaseNameByParams(precision, shape, testValues.params) << "_" << (testValues.multiChannels ? "multiChannels_" : "notMultiChannels_") << - "axis_" << testValues.axis << "_" << testValues.actual << "_" << testValues.result << "_"; return result.str(); @@ -184,7 +180,6 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, - 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -206,7 +201,6 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, - 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, { ngraph::element::u8 }, @@ -238,7 +232,6 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), true, - 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} }, { ngraph::element::u8 }, @@ -270,7 +263,6 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, - 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -298,7 +290,6 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), true, - 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -326,7 +317,6 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, - 1, { { 256ul, {{1}, {1}, {1}, {1}}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -350,7 +340,6 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, - 1, { { 256ul, {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -374,7 +363,6 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), true, - 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -398,7 +386,6 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), true, - 1, { { 256ul, {{1}, {1}, {1}, {1}}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -422,7 +409,6 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), true, - 1, { { 256ul, @@ -464,7 +450,6 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), true, - 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -492,7 +477,6 @@ const std::vector testValues = { { LayerTransformation::createParamsI8I8(), false, - 1, { { 256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f} }, {}, @@ -516,7 +500,6 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, - 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -540,7 +523,6 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), true, - 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, @@ -564,7 +546,6 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, - 1, { { 256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f} }, {}, @@ -588,7 +569,6 @@ const std::vector testValues = { { LayerTransformation::createParamsU8I8(), false, - 1, { { 256ul, {}, {-1.28f}, {1.27f}, {0.f}, {2.3007815f} }, {}, @@ -608,61 +588,10 @@ const std::vector testValues = { { ngraph::element::f32, { 128 }, { 0.0302619f } } } }, - // U8: concat multi channels with subtract, negative axis - { - LayerTransformation::createParamsU8I8(), - true, - -3, - { - { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, - {}, - {}, - { 256ul, {}, {1.275f}, {2.55f}, {1.275f}, {2.55f} }, - {}, - {} - }, - { - { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, - {}, - {}, - { 256ul, {}, {1.275f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8 }, - {}, - {}, - ngraph::element::u8, - { - ngraph::element::f32, - {{ 0.f, 0.f, 0.f, -255.f, -255.f, -255.f }}, - {{ 0.01f, 0.01f, 0.01f, 0.005f, 0.005f, 0.005f }} - } - } - }, - // U8: concat multi channels with subtract, not supported axis - { - LayerTransformation::createParamsU8I8(), - true, - 0, - { - { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, - {}, - {}, - { 256ul, {}, {1.275f}, {2.55f}, {1.275f}, {2.55f} }, - {}, - {} - }, - { - { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, - {}, - {}, - { 256ul, {}, {1.275f}, {2.55f}, {1.275f}, {2.55f} }, - {}, - {} - }, - }, // not update precisions { LayerTransformation::createParamsU8I8().setUpdatePrecisions(false), false, - 1, { { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, {}, diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_split_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_split_transformation.cpp index 76b137d7468..dbbe4b35f11 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_split_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_with_split_transformation.cpp @@ -217,40 +217,6 @@ const std::vector testValues = { { ngraph::element::f32, {}, { 0.005f } } } }, - // U8: concat multi channels with per-channel quantization - { - { 1, 6, 10, 10 }, - LayerTransformation::createParamsU8I8(), - true, - { - { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, {2.55f / 2.f} }, - { - 256ul, - ngraph::Shape({ 1, 6, 1, 1 }), - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {255.f, 25.5f, 2.55f, 25.5f, 255.f, 2.55f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {255.f, 25.5f, 2.55f, 25.5f, 255.f, 2.55f} - } - }, - { - { 256ul, ngraph::Shape({}), {0.f}, {2.55f / 2.f}, {0.f}, {255.f}}, - { - 256ul, - ngraph::Shape({ 1, 6, 1, 1 }), - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {255.f, 25.5f, 2.55f, 25.5f, 255.f, 2.55f}, - {0.f}, - {255.f} - }, - ngraph::element::u8, - {{}, {}, {}}, - {{}, {}, {}}, - ngraph::element::u8, - { ngraph::element::f32, {}, {{ 0.005f, 0.005f, 0.005f, 1.f, 0.1f, 0.01f }} }, - { ngraph::element::f32, {}, {{ 0.1f, 1.f, 0.01f }} } - } - }, // I8: concat multi channels { { 1, 6, 10, 10 }, @@ -293,8 +259,9 @@ const std::vector testValues = { }, }; +// TODO: Split/VariadicSplit operations are not supported in ConcatTransformation INSTANTIATE_TEST_CASE_P( - smoke_LPT, + DISABLED_smoke_LPT, ConcatWithSplitTransformation, ::testing::Combine( ::testing::ValuesIn(precisions), diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/split_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/split_transformation.cpp index 9f04c18a580..bc4e5580b4b 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/split_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/split_transformation.cpp @@ -160,30 +160,21 @@ const std::vector testValues = { {}, ngraph::element::u8, { - {{ngraph::element::f32}, {1.f}, {11.f}}, - {{ngraph::element::f32}, {2.f}, {22.f}}, - {{ngraph::element::f32}, {3.f}, {33.f}}, - } - } - }, - // U8 per channel quantization with different values (constants without batch) - { - ngraph::Shape({ 1, 3, 16, 16 }), std::int64_t{-3}, size_t{3}, - LayerTransformation::createParamsU8I8(), - { - ngraph::element::u8, - {{ngraph::element::f32}, - {{1.f, 2.f, 3.f}, ngraph::element::f32, {3, 1, 1}}, - {{11.f, 22.f, 33.f}, ngraph::element::f32, {3, 1, 1}}} - }, - { - ngraph::element::u8, - {}, - ngraph::element::u8, - { - {{ngraph::element::f32}, {1.f}, {11.f}}, - {{ngraph::element::f32}, {2.f}, {22.f}}, - {{ngraph::element::f32}, {3.f}, {33.f}}, + { + {ngraph::element::f32}, + {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, + { + {ngraph::element::f32}, + {{2.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{22.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, + { + {ngraph::element::f32}, + {{3.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{33.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, } } }, @@ -202,9 +193,21 @@ const std::vector testValues = { {}, ngraph::element::i8, { - {{ngraph::element::f32}, {1.f}, {11.f}}, - {{ngraph::element::f32}, {2.f}, {22.f}}, - {{ngraph::element::f32}, {3.f}, {33.f}}, + { + {ngraph::element::f32}, + {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, + { + {ngraph::element::f32}, + {{2.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{22.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, + { + {ngraph::element::f32}, + {{3.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{33.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, } } }, @@ -223,9 +226,21 @@ const std::vector testValues = { {}, ngraph::element::u8, { - {{ngraph::element::f32}, {1.f}, {11.f}}, - {{ngraph::element::f32}, {1.f}, {11.f}}, - {{ngraph::element::f32}, {1.f}, {11.f}}, + { + {ngraph::element::f32}, + {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, + { + {ngraph::element::f32}, + {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, + { + {ngraph::element::f32}, + {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, } } }, @@ -244,9 +259,21 @@ const std::vector testValues = { {}, ngraph::element::i8, { - {{ngraph::element::f32}, {1.f}, {11.f}}, - {{ngraph::element::f32}, {1.f}, {11.f}}, - {{ngraph::element::f32}, {1.f}, {11.f}} + { + {ngraph::element::f32}, + {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, + { + {ngraph::element::f32}, + {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, + { + {ngraph::element::f32}, + {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, } } }, @@ -331,9 +358,21 @@ const std::vector testValues = { {}, ngraph::element::u8, { - {{ngraph::element::f32}, {}, {11.f}}, - {{ngraph::element::f32}, {}, {22.f}}, - {{ngraph::element::f32}, {}, {33.f}}, + { + {ngraph::element::f32}, + {}, + {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, + { + {ngraph::element::f32}, + {}, + {{22.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, + { + {ngraph::element::f32}, + {}, + {{33.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, } } }, @@ -352,9 +391,21 @@ const std::vector testValues = { {}, ngraph::element::i8, { - {{ngraph::element::f32}, {}, {11.f}}, - {{ngraph::element::f32}, {}, {22.f}}, - {{ngraph::element::f32}, {}, {33.f}}, + { + {ngraph::element::f32}, + {}, + {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, + { + {ngraph::element::f32}, + {}, + {{22.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, + { + {ngraph::element::f32}, + {}, + {{33.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, } } }, diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/strided_slice_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/strided_slice_transformation.cpp index 8b16ce99d75..8f570484c81 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/strided_slice_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/strided_slice_transformation.cpp @@ -150,17 +150,6 @@ StridedSliceTransformationTestValues::LayerParams specialDimensionSlice = { {} }; -StridedSliceTransformationTestValues::LayerParams specialDimensionEndSlice = { - { 0, 0, 20, 0 }, - { 1, 3, 24, 24 }, - { 1, 1, 1, 1 }, - { 1, 1, 0, 1 }, - { 1, 1, 0, 1 }, - {}, - {}, - {} -}; - const std::vector stridedSliceTransformationTestValues = { // U8: channel slice, per-tensor quantization { @@ -322,38 +311,6 @@ const std::vector stridedSliceTransformati {{ngraph::element::f32}, {{ 32.f, 64.f, 32.f }}, {{ 0.1f, 0.01f, 1.f }}} } }, - // I8: special dimension end slice, per-channel quantization with different values - { - ngraph::Shape{1, 3, 24, 24}, - LayerTransformation::createParamsI8I8(), - specialDimensionEndSlice, - { - ngraph::element::i8, - {{ngraph::element::f32}, {{ 32.f, 64.f, 32.f }}, {{ 0.1f, 0.01f, 1.f }}} - }, - { - ngraph::element::i8, - {}, - ngraph::element::i8, - {{ngraph::element::f32}, {{ 32.f, 64.f, 32.f }}, {{ 0.1f, 0.01f, 1.f }}} - } - }, - // I8: special dimension end slice, per-tensor quantization with different values - { - ngraph::Shape{1, 3, 24, 24}, - LayerTransformation::createParamsI8I8(), - specialDimensionEndSlice, - { - ngraph::element::i8, - {{ngraph::element::f32}, { 32.f }, { 0.1f }} - }, - { - ngraph::element::i8, - {}, - ngraph::element::i8, - {{ngraph::element::f32}, { 32.f }, { 0.1f }} - } - }, // I8: channel slice, quantization by special dimension { ngraph::Shape{1, 3, 4, 4}, diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/variadic_split_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/variadic_split_transformation.cpp index 990de4d98d7..d77e80b21d7 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/variadic_split_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/variadic_split_transformation.cpp @@ -177,31 +177,11 @@ const std::vector testValues = { {{1.f, 2.f}, ngraph::element::f32, {1, 2, 1, 1}}, {{11.f, 22.f}, ngraph::element::f32, {1, 2, 1, 1}} }, - {{ngraph::element::f32}, {3.f}, {33.f}} - } - } - }, - // U8 per channel quantization with different values (constants without batch) - { - ngraph::Shape({ 1, 3, 16, 16 }), std::int64_t{ -3 }, std::vector{ 2, 1 }, - LayerTransformation::createParamsU8I8(), - { - ngraph::element::u8, - {{ngraph::element::f32}, - {{1.f, 2.f, 3.f}, ngraph::element::f32, {3, 1, 1}}, - {{11.f, 22.f, 33.f}, ngraph::element::f32, {3, 1, 1}}} - }, - { - ngraph::element::u8, - {}, - ngraph::element::u8, - { { {ngraph::element::f32}, - {{1.f, 2.f}, ngraph::element::f32, {1, 2, 1, 1}}, - {{11.f, 22.f}, ngraph::element::f32, {1, 2, 1, 1}} - }, - {{ngraph::element::f32}, {3.f}, {33.f}} + {{3.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{33.f}, ngraph::element::f32, {1, 1, 1, 1}} + } } } }, @@ -225,7 +205,11 @@ const std::vector testValues = { {{1.f, 2.f}, ngraph::element::f32, {1, 2, 1, 1}}, {{11.f, 22.f}, ngraph::element::f32, {1, 2, 1, 1}} }, - {{ngraph::element::f32}, {3.f}, {33.f}} + { + {ngraph::element::f32}, + {{3.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{33.f}, ngraph::element::f32, {1, 1, 1, 1}} + } } } }, @@ -244,8 +228,16 @@ const std::vector testValues = { {}, ngraph::element::u8, { - {{ngraph::element::f32}, {1.f}, {11.f}}, - {{ngraph::element::f32}, {1.f}, {11.f}} + { + {ngraph::element::f32}, + {{1.f, 1.f}, ngraph::element::f32, {1, 2, 1, 1}}, + {{11.f, 11.f}, ngraph::element::f32, {1, 2, 1, 1}} + }, + { + {ngraph::element::f32}, + {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} + } } } }, @@ -264,8 +256,16 @@ const std::vector testValues = { {}, ngraph::element::i8, { - {{ngraph::element::f32}, {1.f}, {11.f}}, - {{ngraph::element::f32}, {1.f}, {11.f}} + { + {ngraph::element::f32}, + {{1.f, 1.f}, ngraph::element::f32, {1, 2, 1, 1}}, + {{11.f, 11.f}, ngraph::element::f32, {1, 2, 1, 1}} + }, + { + {ngraph::element::f32}, + {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} + } } } }, @@ -322,13 +322,21 @@ const std::vector testValues = { {}, ngraph::element::i8, { - {{ngraph::element::f32}, {1.f}, {11.f}}, + { + {ngraph::element::f32}, + {{1.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{11.f}, ngraph::element::f32, {1, 1, 1, 1}} + }, { {ngraph::element::f32}, {{2.f, 3.f}, ngraph::element::f32, {1, 2, 1, 1}}, {{22.f, 33.f}, ngraph::element::f32, {1, 2, 1, 1}} }, - {{ngraph::element::f32}, {4.f}, {44.f}} + { + {ngraph::element::f32}, + {{4.f}, ngraph::element::f32, {1, 1, 1, 1}}, + {{44.f}, ngraph::element::f32, {1, 1, 1, 1}} + } } } }, diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp index 4c9d43c124f..f035f132e01 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp @@ -45,7 +45,8 @@ const std::vector testValues = { } }; -INSTANTIATE_TEST_CASE_P(smoke_LPT, ConcatWithSplitTransformation, +// TODO: Split/VariadicSplit operations are not supported in ConcatTransformation +INSTANTIATE_TEST_CASE_P(DISABLED_smoke_LPT, ConcatWithSplitTransformation, ::testing::Combine( ::testing::ValuesIn(netPrecisions), ::testing::Values(ngraph::Shape({ 1, 6, 10, 10 })), diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp index cf789286f7f..c2e16a810cd 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp @@ -45,7 +45,8 @@ const std::vector testValues = { } }; -INSTANTIATE_TEST_CASE_P(smoke_LPT, ConcatWithSplitTransformation, +// TODO: Split/VariadicSplit operations are not supported in ConcatTransformation +INSTANTIATE_TEST_CASE_P(DISABLED_smoke_LPT, ConcatWithSplitTransformation, ::testing::Combine( ::testing::ValuesIn(netPrecisions), ::testing::Values(ngraph::Shape({ 1, 6, 10, 10 })), diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp index 95da3db91ef..e69fd113be5 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/concat_function.hpp @@ -114,8 +114,7 @@ public: const DequantizationOperations::Convert& convert2, const DequantizationOperations& dequantization2, const ngraph::element::Type precisionAfterOperation, - const DequantizationOperations& dequantizationAfter, - const std::int64_t& axis); + const DequantizationOperations& dequantizationAfter); static std::shared_ptr getReferenceWithNeighbors( const ngraph::element::Type precision, diff --git a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp index 8b251a4d9be..2d9bb24453a 100644 --- a/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp +++ b/inference-engine/tests/ngraph_helpers/lpt_ngraph_functions/src/concat_function.cpp @@ -752,8 +752,7 @@ std::shared_ptr ConcatFunction::get( const DequantizationOperations::Convert& convert2, const DequantizationOperations& dequantization2, const ngraph::element::Type precisionAfterOperation, - const DequantizationOperations& dequantizationAfter, - const std::int64_t& axis) { + const DequantizationOperations& dequantizationAfter) { const auto input1 = std::make_shared(inputPrecision, inputShape); input1->set_friendly_name("input1"); @@ -776,7 +775,7 @@ std::shared_ptr ConcatFunction::get( parent2 = makeDequantization(parent2, dequantization2); } - const std::shared_ptr concat = std::make_shared(ngraph::OutputVector{ parent1, parent2 }, axis); + const std::shared_ptr concat = std::make_shared(ngraph::OutputVector{ parent1, parent2 }, 1); auto& rtInfo = concat->get_rt_info(); rtInfo["Variant::std::string"] = std::make_shared>("concat"); @@ -990,13 +989,6 @@ std::shared_ptr ConcatFunction::getReferenceWithSplitedInterme input2->set_friendly_name("input2"); const auto fakeQuantize2 = makeFakeQuantizeTypeRelaxed(input2, precision, fqOnData2); - replace_node( - fakeQuantize2->get_input_node_shared_ptr(3), - ngraph::pass::low_precision::NetworkHelper::toScalarIfPossible(fakeQuantize2->get_input_node_shared_ptr(3))); - replace_node( - fakeQuantize2->get_input_node_shared_ptr(4), - ngraph::pass::low_precision::NetworkHelper::toScalarIfPossible(fakeQuantize2->get_input_node_shared_ptr(4))); - fakeQuantize2->set_friendly_name("fakeQuantize2"); low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(fakeQuantize2, precisionAfterOperation); const auto deqBefore2 = makeDequantization(fakeQuantize2, dequantizationBefore1);