From 3f6a026ae98c2f1b1ae61e88c7139db51e8328e4 Mon Sep 17 00:00:00 2001 From: Egor Duplensky Date: Tue, 14 Dec 2021 19:44:38 +0300 Subject: [PATCH] [CPU] Enable matmul deconv bin postops (#8009) --- .../mkldnn_plugin/mkldnn_graph_optimizer.cpp | 45 ++-- .../mkldnn_plugin/mkldnn_graph_optimizer.h | 2 +- .../src/mkldnn_plugin/mkldnn_node.cpp | 35 ++- .../src/mkldnn_plugin/mkldnn_node.h | 14 +- .../convert_matmul_to_fc.cpp | 6 +- .../convert_to_cpu_specific_opset.hpp | 2 - .../reshape_fully_connected.cpp | 114 ---------- .../reshape_fully_connected.hpp | 25 --- .../mkldnn_plugin/nodes/mkldnn_conv_node.cpp | 69 +++--- .../mkldnn_plugin/nodes/mkldnn_conv_node.h | 3 +- .../nodes/mkldnn_deconv_node.cpp | 23 +- .../nodes/mkldnn_eltwise_node.cpp | 209 +++++++++++------- .../mkldnn_plugin/nodes/mkldnn_eltwise_node.h | 16 +- .../nodes/mkldnn_fake_quantize_node.cpp | 148 +++++++------ .../nodes/mkldnn_fake_quantize_node.h | 18 +- .../nodes/mkldnn_fullyconnected_node.cpp | 56 ++--- .../nodes/mkldnn_fullyconnected_node.h | 9 +- .../nodes/mkldnn_matmul_node.cpp | 135 ++++++++--- .../mkldnn_plugin/nodes/mkldnn_matmul_node.h | 14 +- .../nodes/mkldnn_pooling_node.cpp | 2 +- .../mkldnn_plugin/nodes/mkldnn_pooling_node.h | 2 +- .../src/mkldnn_plugin/utils/cpu_utils.hpp | 13 +- .../mat_mul_with_constant_transformation.cpp | 4 +- .../single_layer_tests/mat_mul.cpp | 3 +- .../plugin/cpu/single_layer_tests/mat_mul.cpp | 174 ++++++++++++--- .../cpu/subgraph_tests/src/reshape_fc.cpp | 101 --------- .../cpu/test_utils/fusing_test_utils.hpp | 103 ++++----- .../src/single_layer/mat_mul.cpp | 2 +- .../convert_matmul_test.cpp | 78 +------ 29 files changed, 714 insertions(+), 711 deletions(-) delete mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp delete mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.hpp delete mode 100644 inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/reshape_fc.cpp diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp index 1261df4b559..b789d1c01ff 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp @@ -59,7 +59,7 @@ MKLDNNGraphOptimizer::MKLDNNGraphOptimizer() {} void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "ApplyCommonGraphOptimizations", "FuseConvolutionAndBias"); - FuseConvolutionAndBias(graph); + FuseConvolutionMatMulAndBias(graph); graph.RemoveDroppedNodes(); OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMultiplyAndAdd"); @@ -166,37 +166,38 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap graph.RemoveDroppedEdges(); } -void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) { +void MKLDNNGraphOptimizer::FuseConvolutionMatMulAndBias(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableParentNode = [](MKLDNNNodePtr node) { - return node->getType() == Convolution && + auto isSuitableParentNode = [](const MKLDNNNodePtr& node) { + return (node->getType() == Convolution || node->getType() == MatMul) && node->getChildEdges().size() == 1 && node->getParentEdges().size() == 2 && node->getFusedWith().empty(); }; - auto isSuitableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { + auto isSuitableChildNode = [&](const MKLDNNNodePtr& parentNode, const MKLDNNNodePtr& childNode) { if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2) return false; - auto biasNode = childNode->getParentEdgesAtPort(1)[0]->getParent(); + const auto biasNode = childNode->getParentEdgesAtPort(1)[0]->getParent(); if (biasNode->getType() != Input || !biasNode->isConstant() || biasNode->getChildEdges().size() != 1) return false; - auto convOutDims = parentNode->getOutputShapeAtPort(0).getDims(); - auto biasDims = getNormalizedDimsBySize(biasNode->getOutputShapeAtPort(0).getDims(), - convOutDims.size()); + const auto parentOutDims = parentNode->getOutputShapeAtPort(0).getDims(); + const auto biasDims = getNormalizedDimsBySize(biasNode->getOutputShapeAtPort(0).getDims(), + parentOutDims.size()); // TODO [NM]: Legacy ConvBias fusion transformation supports both per-tensor (via explicit broadcasing) and per-channel cases. // Most of the real models contain per-channel bias, so we need to reavaluate the need to support per-tensor variant. - if (convOutDims.size() != biasDims.size() || biasDims.size() < 2) + if (parentOutDims.size() != biasDims.size() || biasDims.size() < 2) return false; - if (biasDims[0] != 1 || !dimsEqualStrong(biasDims[1], convOutDims[1])) + const auto channelAxis = parentNode->getFusingAxis(); + if (!dimsEqualStrong(biasDims[channelAxis], parentOutDims[channelAxis])) return false; - for (int i = 2; i < biasDims.size(); i++) { - if (biasDims[i] != 1) + for (int i = 0; i < biasDims.size(); i++) { + if (biasDims[i] != 1 && i != channelAxis) return false; } @@ -262,13 +263,13 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) { graph.RemoveEdge(remEdge); } - auto parentEltwise = parentNode; + const auto& parentEltwise = parentNode; MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size())); - auto &graphEdges = graph.GetEdges(); + auto& graphEdges = graph.GetEdges(); graphEdges.push_back(newEdge); parent->addEdge(newEdge); - auto partialShape = { parentEltwise->outputShapes[0].toPartialShape()[1] }; + auto partialShape = { parentEltwise->outputShapes[0].toPartialShape()[parentEltwise->getFusingAxis()] }; parent->outputShapes[inNum] = Shape(partialShape); parentEltwise->inputShapes.push_back(parent->outputShapes[0]); } @@ -627,7 +628,15 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { } } -static bool BF16QuantizeNodeFusing(MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { +/** + * @todo FQ fusing was disabled for BF16 output since oneDNN primitives lack support + * for bf16 depthwise postops. + * This is not the case anymore, because after migration to oneDNN 2.3 FQ will be fused as + * multiple binary post ops. + * This check can already be removed for FC fusing, but should be kept for Convolution, + * which still uses legacy depthwise postops for performance reasons. + */ +static bool BF16QuantizeNodeFusing(const MKLDNNNodePtr& parentNode, const MKLDNNNodePtr& childNode) { return childNode->getType() == FakeQuantize && one_of(Precision::BF16, parentNode->getOriginalOutputPrecisionAtPort(0), @@ -638,7 +647,7 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra auto& graphNodes = graph.GetNodes(); auto isSuitableParentNode = [](MKLDNNNodePtr node) { - return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getInputShapeAtPort(0).getRank() != 3; + return node->getType() == FullyConnected && node->getChildEdges().size() == 1; }; auto parent = graphNodes.begin(); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h index 0b896da6272..0be66e5ba08 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h @@ -19,7 +19,7 @@ public: void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph); private: - void FuseConvolutionAndBias(MKLDNNGraph &graph); + void FuseConvolutionMatMulAndBias(MKLDNNGraph &graph); void FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &graph); void FuseMultiplyAndAdd(MKLDNNGraph &graph); void FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index 55703b16384..44ea9e933d2 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -4,6 +4,7 @@ #include "mkldnn_node.h" #include "dnnl_debug.h" +#include "mkldnn_edge.h" #include "mkldnn_extension_mngr.h" #include "mkldnn_itt.h" @@ -1048,6 +1049,16 @@ void MKLDNNNode::setDynamicBatchLim(int lim) { } } +void MKLDNNNode::appendPostOpArgs(const mkldnn::primitive_attr& attr) { + auto post_ops = attr.get_post_ops(); + int idx = 0; + for (int i = 0; i < post_ops.len(); i++) { + if (post_ops.kind(i) == mkldnn::primitive::kind::binary) { + primArgs.insert({DNNL_ARG_ATTR_MULTIPLE_POST_OP(i) | DNNL_ARG_SRC_1, binaryPostOpsArgs[idx++]->GetPrimitive()}); + } + } +} + bool MKLDNNNode::isFusedWith(Type fusedNodeType) const { for (auto fusedNode : fusedWith) { if (fusedNode->type == fusedNodeType) @@ -1078,10 +1089,14 @@ Layout MKLDNNNode::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) { } } -void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) { +void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) { IE_THROW() << "Fusing of " << this->getType() << " operation is not implemented"; } +void MKLDNNNode::appendBinPostOps(mkldnn::post_ops& ops, const std::vector& binaryShape, std::vector& binaryPostOpsMem) { + IE_THROW() << "Binary fusing of " << this->getType() << " operation is not implemented"; +} + std::vector MKLDNNNode::getInputPrecisions() const { std::vector inputPrecisions; for (size_t i = 0; i < getParentEdges().size(); i++) { @@ -1205,6 +1220,9 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const { size_t fusingPort = 0; + // @todo graph optimizer can provide parentNode as nullptr. Should be avoided + const size_t channelAxis = parentNode ? parentNode->getFusingAxis() : MKLDNNNode::getFusingAxis(); + for (size_t i = (parentNode == nullptr ? 1 : 0); i < getParentEdges().size(); i++) { MKLDNNNode *node = getParentEdgesAtPort(i)[0]->getParent().get(); if (node == nullptr) { @@ -1225,7 +1243,8 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const if (i == fusingPort) continue; auto& weightShape = getInputShapeAtPort(i).getDims(); - if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape, true)) + if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || + !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape, channelAxis, true)) return false; } return true; @@ -1246,6 +1265,9 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const || isConvertablePowerStatic(); } +// @todo shifts for Subtract and scales for Divide are replaced with +// Add (with opposite sign) and Multiply (with inverse value) for legacy dephwise post ops +// This can be avoided after dephwise post ops are gone std::pair, std::vector> MKLDNNNode::getScalesAndShifts(const MKLDNNNode *parentNode) const { std::vector scales, shifts; @@ -1408,10 +1430,11 @@ bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const { } return ret; } else if (node->getType() == Eltwise) { - return one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh, - EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, - EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu) || - node->canBePerformedAsScaleShift(this); + return one_of(node->getAlgorithm(), + EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh, + EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, + EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu) || + node->canBePerformedAsScaleShift(this); } return false; } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index 3e0448f0db6..aee4f876806 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -204,6 +204,12 @@ public: bool isConstant(); + virtual size_t getFusingAxis() const { + return 1; + } + + void appendPostOpArgs(const mkldnn::primitive_attr& attr); + bool isFusedWith(Type type) const; void addFusedNode(const MKLDNNNodePtr &fusingNode) { @@ -594,8 +600,10 @@ protected: * Seed node should call this routine and pass its post operations list as parameter. * @param ops List of fused post operations */ - virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false); - virtual AttrPtr initPrimitiveAttr() const { return nullptr; } + virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, int align = -1); + virtual void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector& binaryPostOpsMem); + + virtual std::shared_ptr initPrimitiveAttr() { return nullptr; } typedef std::function GetPrimitiveMemoryFormatFunc; @@ -636,7 +644,7 @@ protected: std::vector internalBlobMemory; std::vector supportedPrimitiveDescriptors; std::unordered_map primArgs; - std::vector binaryPostOpsArgs; + std::vector binaryPostOpsArgs; MKLDNNPrimitive prim; std::vector descs; diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc.cpp index d43953d46c6..b3ff0ef1d9e 100644 --- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc.cpp +++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_matmul_to_fc.cpp @@ -36,8 +36,9 @@ MKLDNNPlugin::ConvertMatMulToFC::ConvertMatMulToFC() { auto rank_a = shape_a.rank().get_length(); auto rank_b = shape_b.rank().get_length(); - // Transformation to FC is not supported for 1D second input - if (rank_b == 1) { + // Transformation to FC is not supported for 1D inputs + if (rank_a == 1 || rank_b == 1 || + rank_a > 3 || rank_b > 3) { return false; } @@ -47,7 +48,6 @@ MKLDNNPlugin::ConvertMatMulToFC::ConvertMatMulToFC() { std::count_if(shape_b.begin(), shape_b.end(), [](ngraph::Dimension x) { return x != 1; }) > 2) { return false; } - /* * get_aligned_shapes function align two input shapes to have the same size and * the same batch dimensions (last two dimensions are not comparable). diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp index ff901fbafc0..078fb75c14d 100644 --- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp +++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp @@ -7,7 +7,6 @@ #include "ngraph/op/fake_quantize.hpp" #include "ngraph/pass/manager.hpp" #include "reshape_fc_fusion.hpp" -#include "reshape_fully_connected.hpp" #include "align_matmul_input_ranks.hpp" #include "reshape_prelu.hpp" #include "convert_broadcast_to_tiles.hpp" @@ -29,7 +28,6 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr &nGraphF manager.register_pass(); manager.register_pass(); manager.register_pass(); - manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp deleted file mode 100644 index 2446e7694a8..00000000000 --- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.cpp +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright (C) 2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "reshape_fully_connected.hpp" -#include "op/fully_connected.hpp" -#include -#include -#include -#include -#include -#include -#include - -NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ReshapeFullyConnected, "ReshapeFullyConnected", 0); - -MKLDNNPlugin::ReshapeFullyConnected::ReshapeFullyConnected() { - ngraph::OutputVector twoInputs = { - ngraph::pattern::any_input(ngraph::pattern::has_static_rank()), ngraph::pattern::any_input(ngraph::pattern::has_static_shape())}; - ngraph::OutputVector threeInputs = { - ngraph::pattern::any_input(ngraph::pattern::has_static_rank()), ngraph::pattern::any_input(ngraph::pattern::has_static_shape()), - ngraph::pattern::any_input()}; - auto fcTwoInputs = ngraph::pattern::wrap_type(twoInputs, ngraph::pattern::has_static_rank()); - auto fcThreeInputs = ngraph::pattern::wrap_type(threeInputs, ngraph::pattern::has_static_rank()); - const auto fcTwoOrThreeInputs = std::make_shared(ngraph::OutputVector{fcTwoInputs, fcThreeInputs}); - - ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) { - auto fc = std::dynamic_pointer_cast(m.get_match_root()); - if (!fc || transformation_callback(fc)) { - return false; - } - - auto fc_input_shape = fc->get_input_partial_shape(0); - auto input_rank = fc_input_shape.rank().get_length(); - auto output_shape = fc->get_output_partial_shape(0); - - if (input_rank == 2 || input_rank == 0) { - return false; - } - - ngraph::NodeVector new_ops; - int64_t K = *(fc->get_input_shape(1).rbegin()); // requested 2nd input with static shape in the matcher - auto reshape = std::make_shared( - fc->input_value(0), ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{2}, std::vector{-1, K}), false); - if (reshape->get_output_partial_shape(0).rank().is_dynamic()) - return false; - new_ops.push_back(reshape); - - reshape->set_friendly_name(fc->get_friendly_name() + "/Reshape"); - - // Calculate output shape for new FullyConnected layer - // [I, K] * [O, K] = [I, O] - auto I = reshape->get_output_partial_shape(0)[0]; - auto O = fc->get_input_partial_shape(1)[0]; - ngraph::PartialShape output_shape_new{I, O}; - - std::shared_ptr fc_new; - if (fc->get_input_size() == 2) { - fc_new = std::make_shared(reshape, - fc->input_value(1), - output_shape_new.rank(), - fc->get_output_type()); - } else if (fc->get_input_size() == 3) { - fc_new = std::make_shared(reshape, - fc->input_value(1), - fc->input_value(2), - output_shape_new.rank(), - fc->get_output_type()); - } else { - return false; - } - new_ops.push_back(fc_new); - - if (output_shape != output_shape_new) { - auto I_idxs = std::vector(input_rank - 1); - std::iota(I_idxs.begin(), I_idxs.end(), 0); - auto A_input_shape = ngraph::op::util::make_try_fold(fc->input_value(0)); - auto B_input_shape = ngraph::op::util::make_try_fold(fc->input_value(1)); - auto I_node = ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_node(A_input_shape, {I_idxs}); - auto O_node = ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_node(B_input_shape, {0}); - ngraph::OutputVector output_shape_dims{I_node, O_node}; - - const auto original_rank = fc->get_output_rank(); - NGRAPH_CHECK(original_rank.is_static()); - if (input_rank < original_rank.get_length()) { - const size_t const_shape_value = original_rank.get_length() - input_rank; - output_shape_dims.insert( - output_shape_dims.begin(), ngraph::opset1::Constant::create(I_node->get_element_type(), { const_shape_value }, { 1 })); - } - - auto reshape_output_shape = ngraph::op::util::make_try_fold(output_shape_dims, 0); - auto reshape_output = std::make_shared(fc_new, reshape_output_shape, false); - new_ops.push_back(A_input_shape); - new_ops.push_back(B_input_shape); - new_ops.push_back(I_node); - new_ops.push_back(O_node); - new_ops.push_back(reshape_output_shape); - new_ops.push_back(reshape_output); - reshape_output->set_friendly_name(fc->get_friendly_name()); - fc_new->set_friendly_name(fc->get_friendly_name() + "/FC"); - ngraph::copy_runtime_info(fc, new_ops); - ngraph::replace_node(fc, reshape_output); - } else { - fc_new->set_friendly_name(fc->get_friendly_name()); - ngraph::copy_runtime_info(fc, new_ops); - ngraph::replace_node(fc, fc_new); - } - - return true; - }; - - auto m = std::make_shared(fcTwoOrThreeInputs, "ReshapeFullyConnected"); - this->register_matcher(m, callback); -} diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.hpp deleted file mode 100644 index 162427de5de..00000000000 --- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/reshape_fully_connected.hpp +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -/* - * Description: - * ReshapeFullyConnected transformation detects FullyConnected operations - * and for each operation where input shape is greater than 2 inserts Reshape - * operations before and after FullyConnected operation. This transformation is - * required because of IE restrictions. - */ - -namespace MKLDNNPlugin { - -class ReshapeFullyConnected: public ngraph::pass::MatcherPass { -public: - NGRAPH_RTTI_DECLARATION; - ReshapeFullyConnected(); -}; - -} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp index 6132525193e..e2f01e85cef 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp @@ -330,48 +330,42 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { } } -void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false, bool initAsBinary = false) { - bool initBinaryMemory = initWeights; +void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false) { mkldnn::post_ops ops; + bool useLegacyPostOps = true; // @todo remove after issue with performance of binary post ops fixed + + auto getBinPostOpShape = [&](){ + const auto outShape = getOutputShapeAtPort(0).getStaticDims(); + const auto outShapeRank = getOutputShapeAtPort(0).getRank(); + const auto chIdx = getFusingAxis(); + std::vector binaryShape(outShapeRank, 1); + binaryShape[chIdx] = outShape[chIdx]; + return binaryShape; + }; for (auto &node : fusedWith) { if (node->getType() == Split || node->getType() == Concatenation) continue; - auto* eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode) { + if (auto* eltwiseNode = dynamic_cast(node.get())) { if (eltwiseNode->isSpecialConvolutionAddFusing()) { ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision)); } else { - constexpr int align = 16; - eltwiseNode->appendPostOps(ops, dims, align, initAsBinary, initBinaryMemory); - if (initBinaryMemory) { - if (eltwiseNode->scalesMemory) - binaryPostOpsArgs.push_back(eltwiseNode->scalesMemory->GetPrimitive()); - if (eltwiseNode->shiftsMemory) - binaryPostOpsArgs.push_back(eltwiseNode->shiftsMemory->GetPrimitive()); + if (useLegacyPostOps || eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) { + constexpr int align = 16; + eltwiseNode->appendPostOps(ops, dims, align); + } else { + eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs); } } continue; } - auto* fakeQuantizeNode = dynamic_cast(node.get()); - if (fakeQuantizeNode) { - constexpr int align = -1; - fakeQuantizeNode->appendPostOps(ops, dims, align, initAsBinary, initBinaryMemory); - if (initBinaryMemory) { - if (fakeQuantizeNode->cropHighMemory) - binaryPostOpsArgs.push_back(fakeQuantizeNode->cropHighMemory->GetPrimitive()); - if (fakeQuantizeNode->cropLowMemory) - binaryPostOpsArgs.push_back(fakeQuantizeNode->cropLowMemory->GetPrimitive()); - if (fakeQuantizeNode->inputScaleMemory) - binaryPostOpsArgs.push_back(fakeQuantizeNode->inputScaleMemory->GetPrimitive()); - if (fakeQuantizeNode->inputShiftMemory) - binaryPostOpsArgs.push_back(fakeQuantizeNode->inputShiftMemory->GetPrimitive()); - if (fakeQuantizeNode->outputScaleMemory) - binaryPostOpsArgs.push_back(fakeQuantizeNode->outputScaleMemory->GetPrimitive()); - if (fakeQuantizeNode->outputShiftMemory) - binaryPostOpsArgs.push_back(fakeQuantizeNode->outputShiftMemory->GetPrimitive()); + if (auto* fakeQuantizeNode = dynamic_cast(node.get())) { + if (useLegacyPostOps) { + fakeQuantizeNode->appendPostOps(ops, dims); + } else { + fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs); } continue; } @@ -416,7 +410,6 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { // attr[1] - binary mkldnn::primitive_attr attrs[1]; setPostOps(attrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims()); -// setPostOps(attrs[1], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), false, true); bool containJitImpl = false; @@ -630,7 +623,6 @@ void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) { // attr[1] - binary mkldnn::primitive_attr attrs[1]; setPostOps(attrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims()); -// setPostOps(attrs[1], false, true); auto rightConfig = selectedPD->getConfig(); size_t selected_count = 0; @@ -926,13 +918,8 @@ void MKLDNNConvolutionNode::prepareParams() { auto initPrimitiveAttr = [&]() { mkldnn::primitive_attr attr; addZeroPoints(attr); + setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), true); - // todo: [AV] delete "false" to use binary mechanism - if (false && getSelectedPrimitiveDescriptor()->getImplementationType() == jit_gemm) { - setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), true, true); - } else { - setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), true); - } return std::make_shared(std::move(attr)); }; @@ -991,14 +978,8 @@ void MKLDNNConvolutionNode::prepareParams() { if (withBiases) { primArgs[DNNL_ARG_BIAS] = getBias(); } -// todo: [AV] uncomment to use binary mechanism -// auto post_ops = attr.get_post_ops(); -// int idx = 0; -// for (int i = 0; i < post_ops.len(); i++) { -// if (post_ops.kind(i) == mkldnn::primitive::kind::binary) { -// primArgs.insert({DNNL_ARG_ATTR_MULTIPLE_POST_OP(i) | DNNL_ARG_SRC_1, binaryPostOpsArgs[idx++]}); -// } -// } + + appendPostOpArgs(*pAttrLocal); } void MKLDNNConvolutionNode::executeDynamicImpl(dnnl::stream strm) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h index 39ef625f503..dcdd18092d5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h @@ -69,7 +69,7 @@ private: void executeDynamicImpl(mkldnn::stream strm) override; void addZeroPoints(mkldnn::primitive_attr& attr) const; - void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights, bool initAsBinary); + void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights); void filterSupportedDescriptors(); bool isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const; bool isNspcAvailable() const; @@ -122,4 +122,3 @@ private: }; } // namespace MKLDNNPlugin - diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp index 1cb58a478ec..6a2c6332e38 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp @@ -157,9 +157,6 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const { return false; } - // todo: [antonvor] added these checks to fix performance problems - if (kernel.size() == 3) - return false; if (!withGroups && stride.back() > 3) return false; if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) { @@ -271,17 +268,25 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) { mkldnn::post_ops ops; + auto getBinPostOpShape = [&](){ + const auto outShape = getOutputShapeAtPort(0).getStaticDims(); + const auto outShapeRank = getOutputShapeAtPort(0).getRank(); + const auto chIdx = getFusingAxis(); + std::vector binaryShape(outShapeRank, 1); + binaryShape[chIdx] = outShape[chIdx]; + return binaryShape; + }; + for (auto &node : fusedWith) { - auto* eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode) { + if (auto* eltwiseNode = dynamic_cast(node.get())) { // TODO [DS]: change to shape from memory constexpr int align = 16; + // use legacy depthwise since backprop convolution does not support binary post ops eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align); continue; } - auto* fakeQuantizeNode = dynamic_cast(node.get()); - if (fakeQuantizeNode) { - fakeQuantizeNode->appendPostOps(ops); + if (auto* fakeQuantizeNode = dynamic_cast(node.get())) { + fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs); continue; } IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented"; @@ -358,6 +363,8 @@ void MKLDNNDeconvolutionNode::createPrimitive() { auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); primArgs = {{DNNL_ARG_DIFF_DST, src}, {DNNL_ARG_WEIGHTS, weights}, {DNNL_ARG_DIFF_SRC, dst}}; } + + appendPostOpArgs(attr); } void MKLDNNDeconvolutionNode::createDescriptor(const std::vector &inputDesc, diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp index 353a022faf9..f87e32c1476 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp @@ -7,6 +7,7 @@ #include #include +#include "cpu_types.h" #include "utils/bfloat16.hpp" #include #include @@ -31,6 +32,7 @@ #include "ngraph_transformations/op/leaky_relu.hpp" #include "ngraph_transformations/op/swish_cpu.hpp" +#include #include #include #include @@ -791,18 +793,41 @@ private: } }; +MKLDNNEltwiseNode::BroadcastingPolicy MKLDNNEltwiseNode::determineBroadcastingPolicy(const std::shared_ptr& op) { + const auto const1 = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(0)); + const auto const2 = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(1)); + int constPort = -1; + if (const2) { + constPort = 1; + } else if (const1) { + constPort = 0; + } else { + return Undefined; + } + + auto const_shape = op->get_input_shape(constPort); + if (ngraph::shape_size(const_shape) == 1) + return PerTensor; + else + return PerChannel; +} + const std::map MKLDNNEltwiseNode::initializers = { {ngraph::op::v1::Add::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { node.algorithm = EltwiseAdd; + node.broadcastingPolicy = determineBroadcastingPolicy(op); }}, {ngraph::op::v1::Subtract::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { node.algorithm = EltwiseSubtract; + node.broadcastingPolicy = determineBroadcastingPolicy(op); }}, {ngraph::op::v1::Multiply::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { node.algorithm = EltwiseMultiply; + node.broadcastingPolicy = determineBroadcastingPolicy(op); }}, {ngraph::op::v1::Divide::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { node.algorithm = EltwiseDivide; + node.broadcastingPolicy = determineBroadcastingPolicy(op); }}, {ngraph::op::v0::SquaredDifference::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { node.algorithm = EltwiseSquaredDifference; @@ -828,6 +853,7 @@ const std::map M node.alpha = powerStatic->get_power(); node.beta = powerStatic->get_scale(); node.gamma = powerStatic->get_shift(); + node.broadcastingPolicy = PerTensor; }}, {ngraph::op::v1::Equal::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { node.algorithm = EltwiseEqual; @@ -954,6 +980,7 @@ const std::map M }}, {ngraph::op::v0::PRelu::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { node.algorithm = EltwisePrelu; + node.broadcastingPolicy = determineBroadcastingPolicy(op); }}, {ngraph::op::v0::Erf::get_type_info_static(), [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { node.algorithm = EltwiseErf; @@ -984,7 +1011,7 @@ bool MKLDNNEltwiseNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) { + MKLDNNNode(op, eng, cache), broadcastingPolicy(Undefined) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -1713,106 +1740,124 @@ void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) { getInputShapeAtPort(0) == getInputShapeAtPort(1); if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) { std::tie(scales, shifts) = getScalesAndShifts(parentNode.get()); + if ((parentNode->getType() == FullyConnected || parentNode->getType() == MatMul) && one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract, + EltwiseMultiply, EltwiseDivide, EltwiseMulAdd, EltwisePowerStatic, EltwisePrelu)) { + std::tie(scales, shifts) = getScalesAndShifts(parentNode.get()); + } } MKLDNNNode::fuseInto(parentNode); } -void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) { +void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) { const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' "; if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) { switch (getMKLDNNAlgorithm()) { - case mkldnn::algorithm::eltwise_relu: - case mkldnn::algorithm::eltwise_tanh: - case mkldnn::algorithm::eltwise_elu: - case mkldnn::algorithm::eltwise_square: - case mkldnn::algorithm::eltwise_abs: - case mkldnn::algorithm::eltwise_sqrt: - case mkldnn::algorithm::eltwise_linear: - case mkldnn::algorithm::eltwise_bounded_relu: - case mkldnn::algorithm::eltwise_soft_relu: - case mkldnn::algorithm::eltwise_logistic: - case mkldnn::algorithm::eltwise_exp: - case mkldnn::algorithm::eltwise_gelu_erf: - case mkldnn::algorithm::eltwise_gelu_tanh: - case mkldnn::algorithm::eltwise_clip: - case mkldnn::algorithm::eltwise_swish: - case mkldnn::algorithm::eltwise_hardswish: - case mkldnn::algorithm::eltwise_mish: - case mkldnn::algorithm::eltwise_hsigmoid: - case mkldnn::algorithm::eltwise_round_half_to_even: - case mkldnn::algorithm::eltwise_round_half_away_from_zero: - ops.append_eltwise(1.0, getMKLDNNAlgorithm(), getAlpha(), getBeta()); - break; - default: IE_THROW() << errorPrefix << "as post operation is not supported"; + case mkldnn::algorithm::eltwise_relu: + case mkldnn::algorithm::eltwise_tanh: + case mkldnn::algorithm::eltwise_elu: + case mkldnn::algorithm::eltwise_square: + case mkldnn::algorithm::eltwise_abs: + case mkldnn::algorithm::eltwise_sqrt: + case mkldnn::algorithm::eltwise_linear: + case mkldnn::algorithm::eltwise_bounded_relu: + case mkldnn::algorithm::eltwise_soft_relu: + case mkldnn::algorithm::eltwise_logistic: + case mkldnn::algorithm::eltwise_exp: + case mkldnn::algorithm::eltwise_gelu_erf: + case mkldnn::algorithm::eltwise_gelu_tanh: + case mkldnn::algorithm::eltwise_clip: + case mkldnn::algorithm::eltwise_swish: + case mkldnn::algorithm::eltwise_hardswish: + case mkldnn::algorithm::eltwise_mish: + case mkldnn::algorithm::eltwise_hsigmoid: + case mkldnn::algorithm::eltwise_round_half_to_even: + case mkldnn::algorithm::eltwise_round_half_away_from_zero: + ops.append_eltwise(1.0, getMKLDNNAlgorithm(), getAlpha(), getBeta()); + break; + default: IE_THROW() << errorPrefix << "as post operation is not supported"; } } else { - const size_t chIdx = postOpDims.size() > 1 ? 1 : 0; + const size_t chIdx = postOpDims.size() > 1 ? getFusingAxis() : 0; scalesBuffer = makeAlignedBuffer(postOpDims[chIdx], scales, align); if (getAlgorithm() != EltwisePrelu) { shiftsBuffer = makeAlignedBuffer(postOpDims[chIdx], shifts, align); } - if (initAsBinary) { - auto appendBinary = [&](const mkldnn::algorithm alg, MKLDNNMemoryPtr &memPtr, const std::vector &data) { - if (data.empty()) - IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated"; - - std::vector binaryDims(postOpDims.size(), 1); - binaryDims[chIdx] = postOpDims[chIdx]; - - DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryDims)); - ops.append_binary(alg, memoryDesc.getDnnlDesc()); - - if (initBinaryMemory) { - memPtr.reset(new MKLDNNMemory(getEngine())); - memPtr->Create(memoryDesc, &data[0]); - } - }; - switch (getAlgorithm()) { - case EltwiseAdd: - case EltwiseSubtract: - appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer); - break; - case EltwiseMultiply: - case EltwiseDivide: - appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer); - break; - case EltwiseMulAdd: - case EltwisePowerStatic: - appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer); - appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer); - break; - case EltwisePrelu: - appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scalesBuffer); - break; - default: - IE_THROW() << errorPrefix << "as post operation is not supported"; - } - } else { - switch (getAlgorithm()) { - case EltwiseAdd: - case EltwiseSubtract: - case EltwiseMultiply: - case EltwiseDivide: - case EltwiseMulAdd: - case EltwisePowerStatic: - if (scalesBuffer.empty() || shiftsBuffer.empty()) - IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated"; - ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scalesBuffer[0], &shiftsBuffer[0]); - break; - case EltwisePrelu: - if (scalesBuffer.empty()) - IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated"; - ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scalesBuffer[0], nullptr); - break; - default: - IE_THROW() << errorPrefix << "as post operation is not supported"; - } + /* @todo legacy depthwise post ops are kept for now + * for performance reasons + */ + switch (getAlgorithm()) { + case EltwiseAdd: + case EltwiseSubtract: + case EltwiseMultiply: + case EltwiseDivide: + case EltwiseMulAdd: + case EltwisePowerStatic: + if (scales.empty() || shifts.empty()) + IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated"; + ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scalesBuffer[0], &shiftsBuffer[0]); + break; + case EltwisePrelu: + if (scales.empty()) + IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated"; + ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scalesBuffer[0], nullptr); + break; + default: + IE_THROW() << errorPrefix << "as post operation is not supported"; } } } +void MKLDNNEltwiseNode::appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector& binaryPostOpsMem) { + const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' as binary post op "; + VectorDims broadcastBinaryShape(postOpDims.size(), 1); + + auto appendBinary = [&](const mkldnn::algorithm alg, MKLDNNMemoryPtr &memPtr, const std::vector &data) { + if (data.empty()) + IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated"; + if (broadcastingPolicy == Undefined) + IE_THROW() << errorPrefix << "cannot be performed since policy is Undefined"; + + DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, broadcastingPolicy == PerTensor ? Shape(broadcastBinaryShape) : Shape(postOpDims)); + + ops.append_binary(alg, memoryDesc.getDnnlDesc()); + + if (!memPtr) { + memPtr.reset(new MKLDNNMemory(getEngine())); + memPtr->Create(memoryDesc, &data[0]); + + binaryPostOpsMem.push_back(memPtr); + } + }; + + switch (getAlgorithm()) { + case EltwiseAdd: + case EltwiseSubtract: + appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts); + break; + case EltwiseDivide: + case EltwiseMultiply: + appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales); + break; + case EltwiseMulAdd: + appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales); + appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts); + break; + case EltwisePowerStatic: + if (beta != 1.0f) // Multiply if has scales + appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales); + if (gamma != 0.0f) // Add only if has shifts + appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts); + break; + case EltwisePrelu: + appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scales); + break; + default: + IE_THROW() << errorPrefix << "as post operation is not supported"; + } +} + bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const { auto isSuitableNode = [this](const MKLDNNEltwiseNode* node) { // [WA] Since execution precision change from I32 to FP32 for Divide operation may lead to incorrect results diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h index 5471a14b2c9..b5e7768b52a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h @@ -75,7 +75,8 @@ public: bool created() const override; bool canBeInPlace() const override; bool canFuse(const MKLDNNNodePtr& node) const override; - void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false) override; + void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1) override; + void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& binaryPostOpsMem) override; void fuseInto(MKLDNNNodePtr& parentNode) override; InferenceEngine::Precision getRuntimePrecision() const override; @@ -97,8 +98,17 @@ public: void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); } + enum BroadcastingPolicy { + PerChannel, + PerTensor, + Undefined, + }; + + BroadcastingPolicy getBroadcastingPolicy() const { return broadcastingPolicy; } + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + private: struct EltwiseExecutor { EltwiseExecutor(size_t batch) : batchDimIdx(batch) {} @@ -130,6 +140,8 @@ private: size_t fullWorkAmount = 0; }; + BroadcastingPolicy broadcastingPolicy; + mkldnn::algorithm mkldnnAlgorithm = mkldnn::algorithm::undef; static const int optimalTensorRank = 6; @@ -157,6 +169,8 @@ private: using Initializer = std::function&, MKLDNNEltwiseNode& node)>; static const std::map initializers; + static BroadcastingPolicy determineBroadcastingPolicy(const std::shared_ptr& op); + void executeOptimized6D(const std::unique_ptr &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs, const VectorDims &dims_out) const; void executeOptimizedGeneric(const std::unique_ptr &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs, diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp index 319a41528e6..3597719521b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp @@ -860,7 +860,15 @@ bool MKLDNNFakeQuantizeNode::isSupportedOperation(const std::shared_ptr 1 || not_unit_axis > 1) { + + /* @todo + * Channel axis 2 is added for 3D MatMul (most common one). + * FQ for non-1 channel fallbacks to reference implementation. + * Expected to be fused for 3D MatMul + * Long term idea: restore limitation for channel axis 1 and + * support fusing of unfolded FQ (see FakeQuantizeDecomposition transformation) + */ + if (count_not_unit_axis > 1 || !one_of(not_unit_axis, 1, 2)) { errorMessage = "Supports only per-tensor and per-channel quantizations"; return false; } @@ -1057,6 +1065,13 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr 1 ? 1 : 0]; + const auto realAxisSize = dims[dims.size() > 1 ? 1 : 0]; const auto axisPaddedSize = rnd_up(realAxisSize, bufferAlignment); if (!isPostOpDataInitialized) { binarizationThresholds.resize(axisPaddedSize, 0); @@ -1671,73 +1684,76 @@ void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, const VectorDi std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0); } } - - ops.append_binarization(mkldnn::algorithm::binarization_depthwise, (const float*)&binarizationThresholds[0], (const float*)&binarizationOutputMask[0]); - - if (!isInputLowBroadcasted && !isOutputHighBroadcasted) { - isPostOpDataInitialized = true; - } } else { - if (!isPostOpDataInitialized) { - if (cropLow.size() > 1) - cropLow.resize(rnd_up(cropLow.size(), bufferAlignment), 0); - if (cropHigh.size() > 1) - cropHigh.resize(rnd_up(cropHigh.size(), bufferAlignment), 0); - if (inputScale.size() > 1) - inputScale.resize(rnd_up(inputScale.size(), bufferAlignment), 0); - if (inputShift.size() > 1) - inputShift.resize(rnd_up(inputShift.size(), bufferAlignment), 0); - if (outputScale.size() > 1) - outputScale.resize(rnd_up(outputScale.size(), bufferAlignment), 0); - if (outputShift.size() > 1) - outputShift.resize(rnd_up(outputShift.size(), bufferAlignment), 0); + if (cropLow.size() > 1) + cropLow.resize(rnd_up(cropLow.size(), bufferAlignment), 0); + if (cropHigh.size() > 1) + cropHigh.resize(rnd_up(cropHigh.size(), bufferAlignment), 0); + if (inputScale.size() > 1) + inputScale.resize(rnd_up(inputScale.size(), bufferAlignment), 0); + if (inputShift.size() > 1) + inputShift.resize(rnd_up(inputShift.size(), bufferAlignment), 0); + if (outputScale.size() > 1) + outputScale.resize(rnd_up(outputScale.size(), bufferAlignment), 0); + if (outputShift.size() > 1) + outputShift.resize(rnd_up(outputShift.size(), bufferAlignment), 0); - cropLowData.set(cropLow.size(), 1 << 1, &cropLow[0]); - cropHighData.set(cropHigh.size(), 1 << 1, &cropHigh[0]); - inputScaleData.set(inputScale.size(), 1 << 1, &inputScale[0]); - inputShiftData.set(inputShift.size(), 1 << 1, &inputShift[0]); - outputScaleData.set(outputScale.size(), 1 << 1, &outputScale[0]); - outputShiftData.set(outputShift.size(), 1 << 1, &outputShift[0]); - } + cropLowData.set(cropLow.size(), 1 << 1, &cropLow[0]); + cropHighData.set(cropHigh.size(), 1 << 1, &cropHigh[0]); + inputScaleData.set(inputScale.size(), 1 << 1, &inputScale[0]); + inputShiftData.set(inputShift.size(), 1 << 1, &inputShift[0]); + outputScaleData.set(outputScale.size(), 1 << 1, &outputScale[0]); + outputShiftData.set(outputShift.size(), 1 << 1, &outputShift[0]); + } + isPostOpDataInitialized = true; +} + +void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) { + initializePostOpData(postOpDims, align); + + if (getAlgorithm() == FQBinarization) { + ops.append_binarization(mkldnn::algorithm::binarization_depthwise, (const float*)&binarizationThresholds[0], (const float*)&binarizationOutputMask[0]); + } else { mkldnn::algorithm alg = getAlgorithm() == FQCommon ? mkldnn::algorithm::quantization_quantize_dequantize : mkldnn::algorithm::quantization_quantize; - - if (initAsBinary) { - auto appendBinary = [&](const mkldnn::algorithm alg, const size_t dataSize, MKLDNNMemoryPtr &memPtr, const void *data) { - const auto rank = getOutputShapeAtPort(0).getRank(); - auto chIdx = rank > 1 ? 1 : 0; - - std::vector binaryShape(rank, 1); - binaryShape[chIdx] = dataSize; - - DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryShape)); - ops.append_binary(alg, memoryDesc.getDnnlDesc()); - - if (initBinaryMemory) { - memPtr.reset(new MKLDNNMemory(getEngine())); - memPtr->Create(memoryDesc, data); - } - }; - - appendBinary(mkldnn::algorithm::binary_min, cropHighSize, cropHighMemory, &cropHighData.shifts_[0]); - appendBinary(mkldnn::algorithm::binary_max, cropLowSize, cropLowMemory, &cropLowData.shifts_[0]); - appendBinary(mkldnn::algorithm::binary_mul, inputScaleSize, inputScaleMemory, &inputScaleData.scales_[0]); - appendBinary(mkldnn::algorithm::binary_add, inputShiftSize, inputShiftMemory, &inputShiftData.shifts_[0]); - if (alg == mkldnn::algorithm::quantization_quantize_dequantize) { - ops.append_eltwise(1.0f, mkldnn::algorithm::eltwise_round_half_to_even, 0, 0); - } - appendBinary(mkldnn::algorithm::binary_mul, outputScaleSize, outputScaleMemory, &outputScaleData.scales_[0]); - appendBinary(mkldnn::algorithm::binary_add, outputShiftSize, outputShiftMemory, &outputShiftData.shifts_[0]); - - } else { - ops.append_quantization(alg, &cropLowData, &cropHighData, &inputScaleData, &inputShiftData, &outputScaleData, &outputShiftData); - } - - isPostOpDataInitialized = true; + ops.append_quantization(alg, &cropLowData, &cropHighData, &inputScaleData, &inputShiftData, &outputScaleData, &outputShiftData); } } +void MKLDNNFakeQuantizeNode::appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector& binaryPostOpsMem) { + static const size_t bufferAlignment = 1; + + initializePostOpData(postOpDims, bufferAlignment); + + VectorDims broadcastBinaryShape(postOpDims.size(), 1); + + auto appendBinary = [&](const mkldnn::algorithm alg, const size_t dataSize, MKLDNNMemoryPtr &memPtr, const void *data) { + DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, dataSize == 1 ? Shape(broadcastBinaryShape) : Shape(postOpDims)); + ops.append_binary(alg, memoryDesc.getDnnlDesc()); + + if (!memPtr) { + memPtr.reset(new MKLDNNMemory(getEngine())); + memPtr->Create(memoryDesc, data); + + binaryPostOpsMem.push_back(memPtr); + } + }; + + mkldnn::algorithm alg = getAlgorithm() == FQCommon ? mkldnn::algorithm::quantization_quantize_dequantize : + mkldnn::algorithm::quantization_quantize; + + appendBinary(mkldnn::algorithm::binary_min, cropHighSize, cropHighMemory, &cropHighData.shifts_[0]); + appendBinary(mkldnn::algorithm::binary_max, cropLowSize, cropLowMemory, &cropLowData.shifts_[0]); + appendBinary(mkldnn::algorithm::binary_mul, inputScaleSize, inputScaleMemory, &inputScaleData.scales_[0]); + appendBinary(mkldnn::algorithm::binary_add, inputShiftSize, inputShiftMemory, &inputShiftData.shifts_[0]); + if (alg == mkldnn::algorithm::quantization_quantize_dequantize) { + ops.append_eltwise(1.0f, mkldnn::algorithm::eltwise_round_half_to_even, 0, 0); + } + appendBinary(mkldnn::algorithm::binary_mul, outputScaleSize, outputScaleMemory, &outputScaleData.scales_[0]); + appendBinary(mkldnn::algorithm::binary_add, outputShiftSize, outputShiftMemory, &outputShiftData.shifts_[0]); +} + MKLDNNFakeQuantizeNode::FakeQuantizeJitExecutor::FakeQuantizeJitExecutor(const jit_quantize_params &_jqp) { bool isBinarization = _jqp.op_type == FQBinarization; if (mayiuse(cpu::x64::avx512_common)) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h index af75677ab82..a56b94fdf40 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.h @@ -121,11 +121,22 @@ public: InferenceEngine::Precision getInputPrecision() const { return inputPrecision; } InferenceEngine::Precision getOutputPrecision() const { return outputPrecision; } - void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims = {}, int align = -1, bool initAsBinary = false, - bool initBinaryMemory = false) override; + // MKLDNN quantization_injectors assumes that quantization data memory is always aligned on 16 + // by length of AVX512 vector register which is also enough for AVX2 and SSE42 implementations. + // Otherwise it can lead to buffer over-read and performance penalties due to denormals. + void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims = {}, int align = 16) override; + void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector& binaryPostOpsMem) override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + enum BroadcastingPolicy { + PerChannel, // all FQ operations are per channel + PerTensor, // all FQ operations are per tensor + Mixed, // some per channel, some per tensor + }; + + BroadcastingPolicy getBroadcastingPolicy() const { return broadcastingPolicy; } + MKLDNNMemoryPtr cropLowMemory; MKLDNNMemoryPtr cropHighMemory; MKLDNNMemoryPtr inputScaleMemory; @@ -149,6 +160,7 @@ private: void init() override; std::vector getDataFormats() const; + void initializePostOpData(const VectorDims &postOpDims, const size_t bufferAlignment); void executeReference(); void executeBinarization(const std::unique_ptr &pKernel) const; void executeQuantization(const std::unique_ptr &pKernel) const; @@ -195,6 +207,8 @@ private: InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32; std::string errorPrefix; + + BroadcastingPolicy broadcastingPolicy; }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp index 68bad2f2f92..8eaea33af95 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp @@ -147,13 +147,7 @@ void MKLDNNFullyConnectedNode::createPrimitive() { else primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getParentEdgeAt(WEIGHTS_ID)->getMemory().GetPrimitive()}, {DNNL_ARG_DST, dst}}; - auto post_ops = attr->get_post_ops(); - int idx = 0; - for (int i = 0; i < post_ops.len(); i++) { - if (post_ops.kind(i) == mkldnn::primitive::kind::binary) { - primArgs.insert({DNNL_ARG_ATTR_MULTIPLE_POST_OP(i) | DNNL_ARG_SRC_1, binaryPostOpsArgs[idx++]}); - } - } + appendPostOpArgs(*attr); } void MKLDNNFullyConnectedNode::execute(mkldnn::stream strm) { @@ -183,42 +177,32 @@ bool MKLDNNFullyConnectedNode::canFuse(const MKLDNNNodePtr& node) const { return canFuseSimpleOperation(node); } -void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false, bool initAsBinary = false) { - bool initBinaryMemory = initWeights; +void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) { mkldnn::post_ops ops; + auto getBinPostOpShape = [&](){ + const size_t binaryShapeRank = getOutputShapeAtPort(0).getRank() == 3 ? 2 : getOutputShapeAtPort(0).getRank(); + VectorDims binaryShape(binaryShapeRank, 1); + const size_t channelAxis = getFusingAxis(); + // always use 1 as channelAxis for binary Shape, since oneDNN primitive is actually always 2D + binaryShape[1] = getOutputShapeAtPort(0).getStaticDims()[channelAxis]; + + return binaryShape; + }; + for (auto &node : fusedWith) { - auto* fakeQuantizeNode = dynamic_cast(node.get()); - if (fakeQuantizeNode) { - // no need to fill post ops dims for fq, make sense only for bin fq - fakeQuantizeNode->appendPostOps(ops, VectorDims{}, -1, initAsBinary, initBinaryMemory); - if (initBinaryMemory) { - if (fakeQuantizeNode->cropHighMemory) - binaryPostOpsArgs.push_back(fakeQuantizeNode->cropHighMemory->GetPrimitive()); - if (fakeQuantizeNode->cropLowMemory) - binaryPostOpsArgs.push_back(fakeQuantizeNode->cropLowMemory->GetPrimitive()); - if (fakeQuantizeNode->inputScaleMemory) - binaryPostOpsArgs.push_back(fakeQuantizeNode->inputScaleMemory->GetPrimitive()); - if (fakeQuantizeNode->inputShiftMemory) - binaryPostOpsArgs.push_back(fakeQuantizeNode->inputShiftMemory->GetPrimitive()); - if (fakeQuantizeNode->outputScaleMemory) - binaryPostOpsArgs.push_back(fakeQuantizeNode->outputScaleMemory->GetPrimitive()); - if (fakeQuantizeNode->outputShiftMemory) - binaryPostOpsArgs.push_back(fakeQuantizeNode->outputShiftMemory->GetPrimitive()); - } + if (auto* fakeQuantizeNode = dynamic_cast(node.get())) { + fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs); continue; } - auto* eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode) { + if (auto* eltwiseNode = dynamic_cast(node.get())) { // TODO [DS]: change to shape from memory constexpr int align = -1; - eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align, initAsBinary, initBinaryMemory); - if (initBinaryMemory) { - if (eltwiseNode->scalesMemory) - binaryPostOpsArgs.push_back(eltwiseNode->scalesMemory->GetPrimitive()); - if (eltwiseNode->shiftsMemory) - binaryPostOpsArgs.push_back(eltwiseNode->shiftsMemory->GetPrimitive()); + if (eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) { + eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align); + } else { + eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs); } continue; } @@ -280,7 +264,7 @@ const std::vector& MKLDNNFullyConnectedNode::getPrimitivesPriori MKLDNNNode::AttrPtr MKLDNNFullyConnectedNode::initPrimitiveAttr() { auto attr = std::make_shared(mkldnn::primitive_attr()); - setPostOps(*attr, true, true); + setPostOps(*attr); return attr; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h index 6749c9451c0..c8394bb1afd 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h @@ -26,6 +26,10 @@ public: return false; } + size_t getFusingAxis() const override { + return getOutputShapeAtPort(0).getRank() == 3 ? 2 : 1; + } + const std::vector& getPrimitivesPriority() override; void createDescriptor(const std::vector& inputDesc, const std::vector& outputDesc) override; @@ -43,8 +47,7 @@ public: static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; -protected: - AttrPtr initPrimitiveAttr(); + std::shared_ptr initPrimitiveAttr() override; private: void createDescriptorInternal(const mkldnn::memory::desc &inputDesc, @@ -54,7 +57,7 @@ private: InferenceEngine::SizeVector biasesDims; std::vector PostOpsIntBlobMemory; - void setPostOps(mkldnn::primitive_attr &attr, bool initWeights, bool initAsBinary); + void setPostOps(mkldnn::primitive_attr &attr, bool initWeights); bool withBiases = false; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp index e9ebde02cff..944f65ff5f0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp @@ -17,6 +17,7 @@ #include "common/cpu_memcpy.h" #include #include "memory_desc/dnnl_blocked_memory_desc.h" +#include "nodes/mkldnn_fake_quantize_node.h" #include "utils/general_utils.h" #include "memory_desc/cpu_memory_desc_utils.h" #include "mkldnn_extension_utils.h" @@ -54,31 +55,65 @@ bool MKLDNNMatMulNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) { + MKLDNNNode(op, eng, cache), withBiases(false) { std::string errorMessage; + errorPrefix = "MatMul node with name '" + getName() + "'"; + if (!isSupportedOperation(op, errorMessage)) IE_THROW(NotImplemented) << errorMessage; - errorPrefix = "MatMul node with name '" + getName() + "'"; - const auto matMul = std::dynamic_pointer_cast(op); + if (!matMul) { + IE_THROW(NotImplemented) << "Operation with name " << op->get_friendly_name() << ":" << op->get_type_name() << + " is not an instance of MatMul from opset1"; + } + transposeIn[0] = matMul->get_transpose_a(); transposeIn[1] = matMul->get_transpose_b(); } bool MKLDNNMatMulNode::canFuse(const MKLDNNNodePtr& node) const { - return one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh, - EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, - EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu); + // per channel binary post op for rank > 2D is supported only by oneDNN reference implementation because of unusual MatMul channel axis (issue 6669) + if (getOutputShapeAtPort(0).getRank() > 2) { + if (const auto* eltwiseNode = dynamic_cast(node.get())) { + if (one_of(eltwiseNode->getAlgorithm(), + EltwiseAdd, EltwiseMultiply, EltwiseSubtract, EltwiseDivide, EltwisePrelu, EltwiseMulAdd, EltwisePowerStatic) && + eltwiseNode->getBroadcastingPolicy() != MKLDNNEltwiseNode::PerTensor) { + return false; + } + } else if (const auto* fakeQuantizeNode = dynamic_cast(node.get())) { + if (fakeQuantizeNode->getBroadcastingPolicy() != MKLDNNFakeQuantizeNode::PerTensor) { + return false; + } + } + } + + return canFuseSimpleOperation(node); } -void MKLDNNMatMulNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims& dims, bool initWeights = false) const { +void MKLDNNMatMulNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims& dims, bool initWeights = false) { mkldnn::post_ops ops; - for (auto &node : fusedWith) { + auto getBinPostOpShape = [&](){ + const auto outShapeRank = dims.size(); + const auto chIdx = getFusingAxis(); + std::vector binaryShape(outShapeRank, 1); + binaryShape[chIdx] = dims[chIdx]; + return binaryShape; + }; + + for (const auto &node : fusedWith) { if (auto* eltwiseNode = dynamic_cast(node.get())) { - eltwiseNode->appendPostOps(ops, dims); + // TODO [DS]: change to shape from memory + if (eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) { + eltwiseNode->appendPostOps(ops, dims); + } else { + eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs); + } + continue; + } else if (auto* fakeQuantizeNode = dynamic_cast(node.get())) { + fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs); continue; } @@ -88,8 +123,7 @@ void MKLDNNMatMulNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims attr.set_post_ops(ops); } - -MKLDNNNode::AttrPtr MKLDNNMatMulNode::initPrimitiveAttr(const VectorDims &dims) const { +MKLDNNNode::AttrPtr MKLDNNMatMulNode::initPrimitiveAttr(const VectorDims &dims) { auto attr = std::make_shared(mkldnn::primitive_attr()); setPostOps(*attr, dims, true); @@ -97,7 +131,7 @@ MKLDNNNode::AttrPtr MKLDNNMatMulNode::initPrimitiveAttr(const VectorDims &dims) return attr; } -MKLDNNNode::AttrPtr MKLDNNMatMulNode::initPrimitiveAttr() const { +MKLDNNNode::AttrPtr MKLDNNMatMulNode::initPrimitiveAttr() { auto dummyShape = MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)); return initPrimitiveAttr(dummyShape.getStaticDims()); } @@ -131,12 +165,29 @@ static VectorDims getStridesAndModifyShape(Shape& shape, const bool transpose) { return strides; } +mkldnn::memory::desc MKLDNNMatMulNode::getBiasDescFrom(const DnnlMemoryDescCPtr outMemDesc) { + // oneDNN matmul requires shape for bias desc to be the same rank + VectorDims biasDims(outMemDesc->getShape().getRank(), 1); + const auto outDims = outMemDesc->getShape().getStaticDims(); + const auto chIdx = getFusingAxis(); + biasDims[chIdx] = outDims[chIdx]; + const auto bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(2)); + + return mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(biasDims), bdt, memory::format_tag::any); +} + void MKLDNNMatMulNode::getSupportedDescriptors() { - if (getParentEdges().size() != 2) + if (getParentEdges().size() != getOriginalInputsNumber()) IE_THROW() << errorPrefix << " has incorrect number of input edges for layer " << getName(); if (getChildEdges().empty()) IE_THROW() << errorPrefix << " has incorrect number of output edges for layer " << getName(); + withBiases = getOriginalInputsNumber() == 3; + + auto canBeExecutedInInt8 = [](const Precision firstInput, const Precision secondInput) { + return one_of(firstInput, Precision::U8, Precision::I8) && secondInput == Precision::I8; + }; + auto firstInPortPrec = getOriginalInputPrecisionAtPort(0); auto secondInPortPrec = getOriginalInputPrecisionAtPort(1); auto outPortPrec = getOriginalOutputPrecisionAtPort(0); @@ -154,6 +205,9 @@ void MKLDNNMatMulNode::getSupportedDescriptors() { outPortPrec = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0); } + if (!canBeExecutedInInt8(firstInPortPrec, secondInPortPrec) && one_of(outPortPrec, Precision::U8, Precision::I8)) + outPortPrec = Precision::FP32; // INT output is not supported for non-INT inputs + const auto& inputShape0 = getInputShapeAtPort(0); const auto& inputShape1 = getInputShapeAtPort(1); const auto& outputShape = getOutputShapeAtPort(0); @@ -206,12 +260,19 @@ void MKLDNNMatMulNode::getSupportedDescriptors() { void MKLDNNMatMulNode::createDescriptor(const std::vector& inputDesc, const std::vector& outputDesc) { - MKLDNNDescriptor desc{ - std::make_shared(inDataDesc[0]->getDnnlDesc(), - inDataDesc[1]->getDnnlDesc(), - outDataDesc->getDnnlDesc())}; + std::shared_ptr matmul_desc; + if (withBiases) { + matmul_desc.reset(new matmul::desc(inDataDesc[0]->getDnnlDesc(), + inDataDesc[1]->getDnnlDesc(), + getBiasDescFrom(outDataDesc), + outDataDesc->getDnnlDesc())); + } else { + matmul_desc.reset(new matmul::desc(inDataDesc[0]->getDnnlDesc(), + inDataDesc[1]->getDnnlDesc(), + outDataDesc->getDnnlDesc())); + } - descs.push_back(desc); + descs.emplace_back(matmul_desc); } void MKLDNNMatMulNode::initSupportedPrimitiveDescriptors() { @@ -262,9 +323,13 @@ void MKLDNNMatMulNode::createPrimitive() { MemoryDescPtr MKLDNNMatMulNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1): primitive_desc_it.src_desc(idx); - return std::make_shared( - MKLDNNExtensionUtils::DataTypeToIEPrecision(static_cast(desc.data.data_type)), - getInputShapeAtPort(idx)); /* provide initial shapes, so hide transpose effect */ + + if (idx < 2) // inputs + return std::make_shared( + MKLDNNExtensionUtils::DataTypeToIEPrecision(static_cast(desc.data.data_type)), + getInputShapeAtPort(idx)); /* provide initial shapes, so hide transpose effect */ + else // bias + return MKLDNNExtensionUtils::makeDescriptor(desc); } bool MKLDNNMatMulNode::created() const { @@ -300,10 +365,7 @@ void MKLDNNMatMulNode::prepareParams() { AttrPtr attr; if (isDynamicNode()) { - if (!pAttr) { - pAttr = initPrimitiveAttr(src0MemPtr->getStaticDims()); - } - attr = pAttr; + attr = initPrimitiveAttr(dstMemPtr->getStaticDims()); const auto& src0Desc = src0MemPtr->getDesc(); const auto& src1Desc = src1MemPtr->getDesc(); @@ -323,13 +385,22 @@ void MKLDNNMatMulNode::prepareParams() { auto dstDnnlDesc = dstMemPtr->GetDescWithType(); - MKLDNNDescriptor desc{ - std::make_shared(src0TransposedDesc->getDnnlDesc(), - src1TransposedDesc->getDnnlDesc(), - dstDnnlDesc->getDnnlDesc())}; + std::shared_ptr matmul_desc; - matmul::primitive_desc prim_desc; + if (withBiases) { + matmul_desc.reset(new mkldnn::matmul::desc{src0TransposedDesc->getDnnlDesc(), + src1TransposedDesc->getDnnlDesc(), + getBiasDescFrom(dstDnnlDesc), + dstDnnlDesc->getDnnlDesc()}); + } else { + matmul_desc.reset(new mkldnn::matmul::desc(src0TransposedDesc->getDnnlDesc(), + src1TransposedDesc->getDnnlDesc(), + dstDnnlDesc->getDnnlDesc())); + } + + MKLDNNDescriptor desc(matmul_desc); primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), *attr); + matmul::primitive_desc prim_desc; while (static_cast(itpd)) { impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); @@ -347,6 +418,10 @@ void MKLDNNMatMulNode::prepareParams() { primArgs[DNNL_ARG_SRC_0] = src0MemPtr->GetPrimitive(); primArgs[DNNL_ARG_WEIGHTS_0] = src1MemPtr->GetPrimitive(); primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive(); + if (withBiases) + primArgs[DNNL_ARG_BIAS] = getParentEdgeAt(2)->getMemoryPtr()->GetPrimitive(); + + appendPostOpArgs(*attr); } void MKLDNNMatMulNode::executeDynamicImpl(dnnl::stream strm) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h index 9c9489df237..ab3abd3a35c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h @@ -32,6 +32,10 @@ public: return getOriginalInputsNumber(); } + size_t getFusingAxis() const override { + return getOutputShapeAtPort(0).getRank() - 1; + } + void prepareParams() override; void executeDynamicImpl(mkldnn::stream strm) override; @@ -39,11 +43,15 @@ public: const std::vector& getPrimitivesPriority() override; protected: - AttrPtr initPrimitiveAttr() const override; - AttrPtr initPrimitiveAttr(const VectorDims& dims) const; + AttrPtr initPrimitiveAttr() override; + AttrPtr initPrimitiveAttr(const VectorDims& dims); private: - void setPostOps(mkldnn::primitive_attr &attr, const VectorDims& dims, bool initWeights) const; + mkldnn::memory::desc getBiasDescFrom(const DnnlMemoryDescCPtr outMemDesc); + + bool withBiases; + + void setPostOps(mkldnn::primitive_attr &attr, const VectorDims& dims, bool initWeights); std::string errorPrefix; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp index 379253233ee..0a95c61d86e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp @@ -511,7 +511,7 @@ void MKLDNNPoolingNode::initDescriptor(const NodeConfig& config) { selectedPD->setConfig(rightConfig); } -MKLDNNNode::AttrPtr MKLDNNPoolingNode::initPrimitiveAttr() const { +MKLDNNNode::AttrPtr MKLDNNPoolingNode::initPrimitiveAttr() { auto attr = std::make_shared(mkldnn::primitive_attr()); setPostOps(*attr, true); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h index f3a6fc781cc..75f726d567b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h @@ -34,7 +34,7 @@ public: static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; protected: - AttrPtr initPrimitiveAttr() const override; + AttrPtr initPrimitiveAttr() override; private: void setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) const; diff --git a/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp b/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp index e9c213f23fb..ed4e94e1516 100644 --- a/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp +++ b/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp @@ -4,6 +4,13 @@ #pragma once +#include +#include +#include + +#include "ie_common.h" +#include "ie_layouts.h" + namespace MKLDNNPlugin { /** @@ -36,7 +43,9 @@ inline std::vector getNormalizedDimsBySize(const InferenceEngine::SizeVe * flag which specify how we compare C dims if value is undefined (weak or strong) * @return true if broadcastable, false otherwise. */ -inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVector &firstInputDims, const InferenceEngine::SizeVector& secondInputDims, +inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVector &firstInputDims, + const InferenceEngine::SizeVector& secondInputDims, + size_t channelAxis, bool weakComparison = false) { bool (*dimsEqual)(size_t, size_t) = weakComparison ? static_cast(dimsEqualWeak) : static_cast(dimsEqualStrong); @@ -47,7 +56,7 @@ inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVect std::vector normalizedSecondInputDims = getNormalizedDimsBySize(secondInputDims, firstInputDims.size()); for (size_t i = 0; i < normalizedSecondInputDims.size(); i++) { - if ((i == 1 && !dimsEqual(normalizedSecondInputDims[i], firstInputDims[1])) || (i != 1 && normalizedSecondInputDims[i] != 1)) + if ((i == channelAxis && !dimsEqual(normalizedSecondInputDims[i], firstInputDims[i])) || (i != channelAxis && normalizedSecondInputDims[i] != 1)) return false; } return true; diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp index 776be228425..b612da99596 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp @@ -51,7 +51,7 @@ std::vector testValues = { { std::vector(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } }, { 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} }, { {}, {}, {} }, - "FullyConnected", + "MatMul", "U8" }, // 4D with Dq on weights @@ -61,7 +61,7 @@ std::vector testValues = { { std::vector(4 * 2, 2.f), ngraph::element::i8, ngraph::Shape{ 2, 4 } }, {}, { ngraph::element::f32, {}, {{0.1f, 0.01}, ngraph::element::f32, ngraph::Shape{ 2, 1 }} }, - "FullyConnected", + "MatMul", "U8" }, // 3D with the same values diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mat_mul.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mat_mul.cpp index 3241ebef007..58b2bbde7bb 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mat_mul.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/mat_mul.cpp @@ -11,7 +11,8 @@ using namespace LayerTestsDefinitions; namespace { const std::vector inputPrecisions = { - InferenceEngine::Precision::FP32 + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::I32, }; const std::vector shapeRelatedParams = { diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp index 367886f31e2..063e13e5d7d 100644 --- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/mat_mul.cpp @@ -2,9 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "shared_test_classes/single_layer/mat_mul.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" +#include "ie_precision.hpp" #include "test_utils/fusing_test_utils.hpp" #include "ngraph_functions/builders.hpp" +#include using namespace ngraph; using namespace InferenceEngine; @@ -139,11 +142,10 @@ protected: const auto& inShapeA = inputDynamicShapes[0]; const auto& inShapeB = inputDynamicShapes[1]; - /* @todo - * Currently nodes are not fused thought Reshape - * Check can be deleted after this limitation is gone - */ - if (nodeType == MatMulNodeType::MatMul && inShapeA.size() < 4 && inShapeB.size() < 4) + // see comment in MKLDNNMatMulNode::canFuse + if (!(nodeType == MatMulNodeType::MatMul && + std::get<0>(fusingParams) && std::get<0>(fusingParams)->getFusedOpsNames().find("(PerChannel)") != std::string::npos && + std::max(inShapeA.size(), inShapeB.size()) > 2)) std::tie(postOpMgrPtr, fusedOps) = fusingParams; configuration.insert(additionalConfig.begin(), additionalConfig.end()); @@ -179,6 +181,8 @@ TEST_P(MatMulLayerCPUTest, CompareWithRefs) { namespace { /* ============= Common params ============= */ +std::map emptyAdditionalConfig; + std::vector> additionalConfig { std::map{/* empty config */}, {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}} @@ -196,15 +200,16 @@ std::vector filterSpecificParams() { return specificParams; } +const auto fusingBias = fusingSpecificParams{std::make_shared(std::vector{ + {[](std::shared_ptr inpNode, const element::Type& ngPrc, ParameterVector& params) { + size_t last_dim = inpNode->get_output_partial_shape(0).rbegin()->get_length(); + auto bias = builder::makeConstant(ngPrc, Shape{last_dim}, std::vector{}, true); + return std::make_shared(inpNode, bias); + }, "fusingBias"}}), {"Add"}}; + /* ============= FullyConnected ============= */ namespace fullyConnected { -const auto fusingBiasFC = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const element::Type& ngPrc, ParameterVector& params) { - auto bias = builder::makeConstant(ngPrc, Shape({inpNode->get_output_shape(0).back()}), std::vector{}, true); - return std::make_shared(inpNode, bias); - }, "fusingBiasFC"}}), {"Add"}}; - const std::vector IS2D = { {static_shapes_to_test_representation({{59, 1}, {1, 120}}), {false, false}}, {static_shapes_to_test_representation({{59, 1}, {1, 120}}), {true, false}}, @@ -229,26 +234,46 @@ const std::vector IS2D = { std::vector fusingParamsSet2D { emptyFusingSpec, - fusingBiasFC, + fusingBias, fusingRelu, fusingMultiplyPerChannel, - fusingPReluPerTensor + fusingScaleShift, // EltwiseMulAdd fusing + fusingPReluPerTensor, + fusingFakeQuantizePerChannelRelu, + fusingFakeQuantizePerTensorRelu, }; -const auto fullyConnectedParams2D = ::testing::Combine(::testing::ValuesIn(IS2D), - ::testing::ValuesIn(netPRCs), - ::testing::Values(ElementType::undefined), - ::testing::Values(ElementType::undefined), - ::testing::Values(helpers::InputLayerType::CONSTANT), - ::testing::Values(CommonTestUtils::DEVICE_CPU), - ::testing::ValuesIn(additionalConfig)); +std::vector fusingParamsSet2DBF16 { + emptyFusingSpec, + fusingBias, + fusingRelu, + fusingPReluPerTensor, +}; -const auto testParams2D = ::testing::Combine(fullyConnectedParams2D, +const auto testParams2D = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::Values(emptyAdditionalConfig)), ::testing::Values(MatMulNodeType::FullyConnected), ::testing::ValuesIn(fusingParamsSet2D), ::testing::ValuesIn(filterSpecificParams())); +const auto testParams2DBF16 = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D), + ::testing::ValuesIn(netPRCs), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig)), + ::testing::Values(MatMulNodeType::FullyConnected), + ::testing::ValuesIn(fusingParamsSet2DBF16), + ::testing::ValuesIn(filterSpecificParams())); + INSTANTIATE_TEST_SUITE_P(smoke_FC_2D, MatMulLayerCPUTest, testParams2D, MatMulLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_BF16, MatMulLayerCPUTest, testParams2DBF16, MatMulLayerCPUTest::getTestCaseName); const std::vector IS3D = { {static_shapes_to_test_representation({{1, 32, 120}, {120, 5}}), {false, false}}, @@ -266,23 +291,46 @@ const std::vector IS3D = { std::vector fusingParamsSet3D { emptyFusingSpec, - fusingBiasFC + fusingBias, + fusingMultiplyPerChannel, + fusingFakeQuantizePerChannel, + fusingFakeQuantizePerTensorRelu, +}; + +std::vector fusingParamsSet3DBF16 { + emptyFusingSpec, + fusingBias, + fusingMultiplyPerChannel, }; const auto fullyConnectedParams3D = ::testing::Combine(::testing::ValuesIn(IS3D), - ::testing::ValuesIn(netPRCs), + ::testing::Values(ElementType::f32), ::testing::Values(ElementType::undefined), ::testing::Values(ElementType::undefined), ::testing::Values(helpers::InputLayerType::CONSTANT), ::testing::Values(CommonTestUtils::DEVICE_CPU), - ::testing::ValuesIn(additionalConfig)); + ::testing::Values(emptyAdditionalConfig)); + +const auto fullyConnectedParams3DBF16 = ::testing::Combine(::testing::ValuesIn(IS3D), + ::testing::ValuesIn(netPRCs), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::CONSTANT), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig)); const auto testParams3D = ::testing::Combine(fullyConnectedParams3D, ::testing::Values(MatMulNodeType::FullyConnected), ::testing::ValuesIn(fusingParamsSet3D), ::testing::ValuesIn(filterSpecificParams())); +const auto testParams3DBF16 = ::testing::Combine(fullyConnectedParams3DBF16, + ::testing::Values(MatMulNodeType::FullyConnected), + ::testing::ValuesIn(fusingParamsSet3DBF16), + ::testing::ValuesIn(filterSpecificParams())); + INSTANTIATE_TEST_SUITE_P(smoke_FC_3D, MatMulLayerCPUTest, testParams3D, MatMulLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_BF16, MatMulLayerCPUTest, testParams3DBF16, MatMulLayerCPUTest::getTestCaseName); std::vector> filterAdditionalConfig_Brgemm() { std::vector> additionalConfig = { @@ -357,7 +405,9 @@ const std::vector IS = { {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, false}}, {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {false, true}}, {static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, true}}, +}; +const std::vector IS_Dynamic = { { { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} {{-1, -1}, {{55, 12}, {33, 7}}}, // input 0 @@ -507,7 +557,16 @@ const std::vector IS = { std::vector matmulFusingParams { emptyFusingSpec, fusingElu, - fusingSqrt + fusingSqrt, + fusingPReluPerTensor, + fusingMultiplyPerChannel, + fusingAddPerTensor, + fusingBias, + fusingFakeQuantizePerChannel, + /* @todo FQ unfolds into FQ + Convert + Substract + Multiply after LPT, + * so Relu cannot be fused in this case. Should be analysed */ + // fusingFakeQuantizePerChannelRelu, + fusingFakeQuantizePerTensorRelu, }; const auto matMulParams = ::testing::Combine(::testing::ValuesIn(IS), @@ -523,7 +582,70 @@ const auto testParams = ::testing::Combine(matMulParams, ::testing::ValuesIn(matmulFusingParams), ::testing::ValuesIn(filterSpecificParams())); -INSTANTIATE_TEST_SUITE_P(smoke_MM, MatMulLayerCPUTest, testParams, MatMulLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_MM_Static, MatMulLayerCPUTest, testParams, MatMulLayerCPUTest::getTestCaseName); + + +const auto matMulParamsDynamic = ::testing::Combine(::testing::ValuesIn(IS_Dynamic), + ::testing::ValuesIn(netPRCs), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::PARAMETER), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig)); + +const auto testParamsDynamic = ::testing::Combine(matMulParamsDynamic, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::Values(emptyFusingSpec), + ::testing::ValuesIn(filterSpecificParams())); + +INSTANTIATE_TEST_SUITE_P(smoke_MM_Dynamic, MatMulLayerCPUTest, testParamsDynamic, MatMulLayerCPUTest::getTestCaseName); + + +const std::vector IS_Dynamic_Fusing = { + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1}, {{16, 12}, {33, 7}}}, // input 0 + {{-1, 33}, {{12, 33}, {7, 33}}} // input 1 + }, + {false, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, // input 0 + {{-1, 5}, {{60, 5}, {30, 5}}} // input 1 + }, + {false, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1}, {{7, 32, 60}, {7, 32, 30}}}, // input 0 + {{-1, -1, -1, 25}, {{3, 7, 60, 25}, {3, 7, 30, 25}}} // input 1 + }, + {false, false} + }, + { + { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}}, // input 0 + {{-1, -1, 5}, {{10, 10, 5}, {5, 5, 5}}} // input 1 + }, + {false, false} + }, +}; + +const auto matMulParamsDynamicFusing = ::testing::Combine(::testing::ValuesIn(IS_Dynamic_Fusing), + ::testing::ValuesIn(netPRCs), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(helpers::InputLayerType::PARAMETER), + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(additionalConfig)); + +const auto testParamsDynamicFusing = ::testing::Combine(matMulParamsDynamicFusing, + ::testing::Values(MatMulNodeType::MatMul), + ::testing::ValuesIn(matmulFusingParams), + ::testing::ValuesIn(filterSpecificParams())); + +INSTANTIATE_TEST_SUITE_P(smoke_MM_Dynamic_Fusing, MatMulLayerCPUTest, testParamsDynamicFusing, MatMulLayerCPUTest::getTestCaseName); } // namespace matmul diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/reshape_fc.cpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/reshape_fc.cpp deleted file mode 100644 index 40b5215bd74..00000000000 --- a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/reshape_fc.cpp +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright (C) 2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "test_utils/fusing_test_utils.hpp" -#include "ngraph_functions/builders.hpp" - -using namespace ngraph; -using namespace InferenceEngine; -using namespace CPUTestUtils; - -namespace SubgraphTestsDefinitions { - -using ReshapeFCTestParams = std::tuple, // IS fully connected - bool, // transpose B - fusingSpecificParams>; - -class ReshapeFCTest : public testing::WithParamInterface, public CpuTestWithFusing, - virtual public LayerTestsUtils::LayerTestsCommon { -public: - static std::string getTestCaseName(testing::TestParamInfo obj) { - std::pair isFc; - bool transpB; - fusingSpecificParams fusingParams; - std::tie(isFc, transpB, fusingParams) = obj.param; - SizeVector isA = isFc.first; SizeVector isB = isFc.second; - - std::ostringstream result; - result << "IS_reshape=" << CommonTestUtils::vec2str(isA) << "_"; - result << "IS_fc_B=" << CommonTestUtils::vec2str(isB) << "_"; - result << "Transp_B=" << transpB; - result << CpuTestWithFusing::getTestCaseName(fusingParams); - - return result.str(); - } - -protected: - void SetUp() override { - targetDevice = CommonTestUtils::DEVICE_CPU; - std::pair isFc; - bool transpB; - fusingSpecificParams fusingParams; - std::tie(isFc, transpB, fusingParams) = this->GetParam(); - std::tie(postOpMgrPtr, fusedOps) = fusingParams; - SizeVector isReshape = isFc.first; SizeVector isB = isFc.second; - SizeVector isA(2); - isA[0] = isReshape[0]; - isA[1] = std::accumulate(isReshape.begin() + 1, isReshape.end(), size_t{1}, std::multiplies()); - if (transpB) { - std::swap(*(isB.end() - 1), *(isB.end() - 2)); - } - - auto inputParams = builder::makeParams(element::f32, {isReshape}); - auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(inputParams)); - - auto constNode = builder::makeConstant(element::i64, {isA.size()}, isA); - auto reshape = std::make_shared(paramOuts[0], constNode, true); - - auto matrixB = builder::makeConstant(element::f32, isB, {}, true); - auto matMul = builder::makeMatMul(reshape, matrixB, false, transpB); - - const auto netType = element::f32; - selectedType = makeSelectedTypeStr("jit_gemm", netType); - - function = makeNgraphFunction(netType, inputParams, matMul, "ReshapeFC"); - } -}; - -TEST_P(ReshapeFCTest, CompareWithRefs) { - SKIP_IF_CURRENT_TEST_IS_DISABLED() - - Run(); - CheckNodeOfTypeCount(executableNetwork, "Reshape", 0); - CheckPluginRelatedResults(executableNetwork, "FullyConnected"); -} - -namespace { - -const std::vector transpose = { - true, false -}; - -const std::vector> isFC = { - {{71, 128, 1, 1}, {128, 20}}, - {{1, 24, 2, 7}, {336, 16}} -}; - -std::vector fusingParamsSet { - emptyFusingSpec, - fusingAddPerChannel -}; - -const auto reshapeFCParams = ::testing::Combine(::testing::ValuesIn(isFC), - ::testing::ValuesIn(transpose), - ::testing::ValuesIn(fusingParamsSet)); - -INSTANTIATE_TEST_SUITE_P(smoke_Check, ReshapeFCTest, reshapeFCParams, ReshapeFCTest::getTestCaseName); - -} // namespace - -} // namespace SubgraphTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp index 22fd8278b4a..709269dc4ab 100644 --- a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp +++ b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp @@ -5,6 +5,7 @@ #pragma once #include "cpu_test_utils.hpp" +#include #include namespace CPUTestUtils { @@ -75,6 +76,24 @@ protected: bool checkFusingPosition = true; }; +static size_t getFusingAxis(const std::shared_ptr& node) { + if (std::dynamic_pointer_cast(node)) + return node->get_output_partial_shape(0).size() - 1; // last dimension + else + return 1; // second dimension +} + +static ngraph::Shape generatePerChannelShape(const std::shared_ptr& node) { + const auto shape = node->get_output_partial_shape(0); + if (shape.size() == 1) + IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only"; + ngraph::Shape perChannelShape(shape.size(), 1); + const auto channelAxis = getFusingAxis(node); + perChannelShape[channelAxis] = shape[channelAxis].get_length(); + + return perChannelShape; +} + /* FUSING PATTERNS */ const auto emptyFusingSpec = fusingSpecificParams{nullptr, {}}; @@ -120,11 +139,7 @@ const auto fusingSqrt = fusingSpecificParams{std::make_shared(std: const auto fusingPReluPerChannel = fusingSpecificParams{std::make_shared(std::vector{ {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto shape = inpNode->get_output_partial_shape(0); - if (shape.size() == 1) - IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only"; - ngraph::Shape newShape(shape.size(), 1); - newShape[1] = shape[1].get_length(); + ngraph::Shape newShape = generatePerChannelShape(inpNode); auto data = NGraphFunctions::Utils::generateVector(ngraph::shape_size(newShape)); return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::LeakyRelu, newShape, data); }, "PRelu(PerChannel)"}}), {"PRelu"}}; @@ -166,11 +181,7 @@ const auto fusingReluAdd = fusingSpecificParams{std::make_shared(s return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Relu); }, "Relu"}, {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto shape = inpNode->get_output_partial_shape(0); - if (shape.size() == 1) - IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only"; - ngraph::Shape newShape(shape.size(), 1); - newShape[1] = shape[1].get_length(); + ngraph::Shape newShape = generatePerChannelShape(inpNode); auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); return std::make_shared(inpNode, constNode); }, "Add(PerChannel)"}}), {"Relu", "Add"}}; @@ -180,40 +191,24 @@ const auto fusingReluScaleShift = fusingSpecificParams{std::make_shared inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto shape = inpNode->get_output_partial_shape(0); - if (shape.size() == 1) - IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only"; - ngraph::Shape newShape(shape.size(), 1); - newShape[1] = shape[1].get_length(); - auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); - return std::make_shared(inpNode, constNode); + ngraph::Shape newShape = generatePerChannelShape(inpNode); + auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); + return std::make_shared(inpNode, constNode); }, "Multiply(PerChannel)"}, {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto shape = inpNode->get_output_partial_shape(0); - if (shape.size() == 1) - IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only"; - ngraph::Shape newShape(shape.size(), 1); - newShape[1] = shape[1].get_length(); + ngraph::Shape newShape = generatePerChannelShape(inpNode); auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); return std::make_shared(inpNode, constNode); }, "Add(PerChannel)"}}), {"Relu", "Add"}}; const auto fusingScaleShift = fusingSpecificParams{ std::make_shared(std::vector{ {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { - auto shape = inpNode->get_output_partial_shape(0); - if (shape.size() == 1) - IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only"; - ngraph::Shape newShape(shape.size(), 1); - newShape[1] = shape[1].get_length(); - auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); - return std::make_shared(inpNode, constNode); + ngraph::Shape newShape = generatePerChannelShape(inpNode); + auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); + return std::make_shared(inpNode, constNode); }, "Multiply(PerChannel)"}, {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { - auto shape = inpNode->get_output_partial_shape(0); - if (shape.size() == 1) - IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only"; - ngraph::Shape newShape(shape.size(), 1); - newShape[1] = shape[1].get_length(); + ngraph::Shape newShape = generatePerChannelShape(inpNode); auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); return std::make_shared(inpNode, constNode); }, "Add(PerChannel)"}}), {"Add"} }; @@ -228,22 +223,14 @@ const auto fusingFakeQuantizePerTensor = fusingSpecificParams{ std::make_shared< const auto fusingFakeQuantizePerChannel = fusingSpecificParams{std::make_shared(std::vector{ {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ auto localPrc = inpNode->get_element_type(); - auto shape = inpNode->get_output_partial_shape(0); - if (shape.size() == 1) - IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only"; - ngraph::Shape newShape(shape.size(), 1); - newShape[1] = shape[1].get_length(); + ngraph::Shape newShape = generatePerChannelShape(inpNode); return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape); }, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"}}; const auto fusingFakeQuantizePerChannelRelu = fusingSpecificParams{std::make_shared(std::vector{ {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ auto localPrc = inpNode->get_element_type(); - auto shape = inpNode->get_output_partial_shape(0); - if (shape.size() == 1) - IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only"; - ngraph::Shape newShape(shape.size(), 1); - newShape[1] = shape[1].get_length(); + ngraph::Shape newShape = generatePerChannelShape(inpNode); return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape); }, "FakeQuantize(PerChannel)"}, {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ @@ -291,60 +278,56 @@ const auto fusingSumEluFQ = fusingSpecificParams{std::make_shared( const auto fusingMultiplyPerTensor = fusingSpecificParams{std::make_shared(std::vector{ {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ ngraph::Shape secondMultInShape(1, 1); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector{}, true); + auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); return std::make_shared(inpNode, secondMultInput); }, "Multiply(PerTensor)"}}), {"Multiply"}}; const auto fusingMultiplyPerChannel = fusingSpecificParams{std::make_shared(std::vector{ {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - ngraph::Shape secondMultInShape(inpNode->get_output_partial_shape(0).size(), 1); - secondMultInShape[1] = inpNode->get_output_partial_shape(0)[1].get_length(); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector{}, true); + ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode); + auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); return std::make_shared(inpNode, secondMultInput); }, "Multiply(PerChannel)"}}), {"Multiply"}}; const auto fusingAddPerTensor = fusingSpecificParams{std::make_shared(std::vector{ {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ ngraph::Shape secondMultInShape(1, 1); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector{}, true); + auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); return std::make_shared(inpNode, secondMultInput); }, "Add(PerTensor)"}}), {"Add"}}; const auto fusingAddPerChannel = fusingSpecificParams{std::make_shared(std::vector{ {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - ngraph::Shape secondMultInShape(inpNode->get_output_partial_shape(0).size(), 1); - secondMultInShape[1] = inpNode->get_output_partial_shape(0)[1].get_length(); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector{}, true); + ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode); + auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); return std::make_shared(inpNode, secondMultInput); }, "Add(PerChannel)"}}), {"Add"}}; const auto fusingSubtractPerTensor = fusingSpecificParams{std::make_shared(std::vector{ {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ ngraph::Shape secondMultInShape(1, 1); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector{}, true); + auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); return std::make_shared(inpNode, secondMultInput); }, "Subtract(PerTensor)"}}), {"Subtract"}}; const auto fusingSubtractPerChannel = fusingSpecificParams{std::make_shared(std::vector{ {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - ngraph::Shape secondMultInShape(inpNode->get_output_partial_shape(0).size(), 1); - secondMultInShape[1] = inpNode->get_output_partial_shape(0)[1].get_length(); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector{}, true); + ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode); + auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); return std::make_shared(inpNode, secondMultInput); }, "Subtract(PerChannel)"}}), {"Subtract"}}; const auto fusingDividePerTensor = fusingSpecificParams{std::make_shared(std::vector{ {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ ngraph::Shape secondMultInShape(1, 1); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector{}, true); + auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); return std::make_shared(inpNode, secondMultInput); }, "Divide(PerTensor)"}}), {"Divide"}}; const auto fusingDividePerChannel = fusingSpecificParams{std::make_shared(std::vector{ {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - ngraph::Shape secondMultInShape(inpNode->get_output_partial_shape(0).size(), 1); - secondMultInShape[1] = inpNode->get_output_partial_shape(0)[1].get_length(); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector{}, true); + ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode); + auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); return std::make_shared(inpNode, secondMultInput); }, "Divide(PerChannel)"}}), {"Divide"}}; diff --git a/inference-engine/tests/functional/shared_test_classes/src/single_layer/mat_mul.cpp b/inference-engine/tests/functional/shared_test_classes/src/single_layer/mat_mul.cpp index c75c8995205..e3a54df0812 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/single_layer/mat_mul.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/single_layer/mat_mul.cpp @@ -44,7 +44,7 @@ std::string MatMulTest::getTestCaseName(const testing::TestParamInfo #include #include -#include #include #include #include @@ -171,7 +170,6 @@ TEST(TransformationTests, ConvertMatMulToFCTest7) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.register_pass(); m.run_passes(f); ASSERT_NO_THROW(check_rt_info(f)); } @@ -179,12 +177,9 @@ TEST(TransformationTests, ConvertMatMulToFCTest7) { { auto input1 = std::make_shared(ngraph::element::f32, ngraph::Shape{3, 2, 2}); auto input2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{3, 2}, {1}); - auto reshape_begin = std::make_shared( - input1, ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{2}, std::vector{-1, 2}), false); - auto fc = std::make_shared(reshape_begin, input2, ngraph::Rank(2)); - auto reshape_end = ngraph::op::util::reshapeTo(fc, ngraph::Shape{3, 2, 3}); + auto fc = std::make_shared(input1, input2, ngraph::Rank(2)); - f_ref = std::make_shared(ngraph::NodeVector{reshape_end}, ngraph::ParameterVector{input1}); + f_ref = std::make_shared(ngraph::NodeVector{fc}, ngraph::ParameterVector{input1}); } auto res = compare_functions(f, f_ref, true); @@ -202,7 +197,6 @@ TEST(TransformationTests, ConvertMatMulToFCTest8) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.register_pass(); m.run_passes(f); ASSERT_NO_THROW(check_rt_info(f)); } @@ -211,18 +205,14 @@ TEST(TransformationTests, ConvertMatMulToFCTest8) { auto input1 = std::make_shared(ngraph::element::f32, ngraph::PartialShape{-1, -1, 2}); auto input2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{3, 2}, {1}); - auto reshape_begin = std::make_shared( - input1, ngraph::opset1::Constant::create(ngraph::element::i64, {2}, {-1, 2}), false); - - auto fc = std::make_shared(reshape_begin, input2, ngraph::Rank(2)); + auto fc = std::make_shared(input1, input2, ngraph::Rank(2)); auto a_shape = std::make_shared(input1); auto I = ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_node(a_shape, {0, 1}); auto O = ngraph::opset1::Constant::create(ngraph::element::i64, { 1 }, { 3 }); auto output_shape = std::make_shared(ngraph::OutputVector{I, O}, 0); - auto reshape_end = std::make_shared(fc, output_shape, false); - f_ref = std::make_shared(ngraph::NodeVector{reshape_end}, ngraph::ParameterVector{input1}); + f_ref = std::make_shared(ngraph::NodeVector{fc}, ngraph::ParameterVector{input1}); } auto res = compare_functions(f, f_ref, true); @@ -268,7 +258,6 @@ TEST(TransformationTests, ConvertMatMulToFCTest10) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.register_pass(); ASSERT_NO_THROW(m.run_passes(f)); } @@ -439,25 +428,22 @@ TEST(TransformationTests, ConvertMatMulToFCTest_second_input_rank_adj_1) { std::shared_ptr f(nullptr), f_ref(nullptr); { auto input1 = std::make_shared(ngraph::element::f32, ngraph::Shape{5, 2, 3}); - auto input2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 2, 3}, {1}); + auto input2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{1, 2, 3}, {1}); auto matmul = std::make_shared(input1, input2, false, true); f = std::make_shared(ngraph::NodeVector{matmul}, ngraph::ParameterVector{input1}); ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.register_pass(); m.run_passes(f); ASSERT_NO_THROW(check_rt_info(f)); } { auto input1 = std::make_shared(ngraph::element::f32, ngraph::Shape{5, 2, 3}); - auto reshape_1 = std::make_shared(input1, ngraph::opset1::Constant::create(ngraph::element::i64, {2}, {-1, 3}), false); auto input2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{2, 3}, {1}); - auto matmul = std::make_shared(reshape_1, input2, ngraph::Rank(2)); - auto reshape_out = std::make_shared(matmul, ngraph::opset1::Constant::create(ngraph::element::i64, {4}, {1, 5, 2, 2}), false); - f_ref = std::make_shared(ngraph::NodeVector{reshape_out}, ngraph::ParameterVector{input1}); + auto matmul = std::make_shared(input1, input2, ngraph::Rank(2)); + f_ref = std::make_shared(ngraph::NodeVector{matmul}, ngraph::ParameterVector{input1}); } auto res = compare_functions(f, f_ref, true); @@ -475,7 +461,6 @@ TEST(TransformationTests, ConvertMatMulToFCTest_second_input_rank_adj_2) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.register_pass(); m.run_passes(f); ASSERT_NO_THROW(check_rt_info(f)); } @@ -495,9 +480,9 @@ TEST(TransformationTests, ConvertMatMulToFCTest_second_input_rank_adj_3) { std::shared_ptr f(nullptr), f_ref(nullptr); { auto input1 = std::make_shared(ngraph::element::f32, ngraph::Shape{ 5, 2, 3 }); - auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{ 1, 1, 2, 3 }, { 1 }); + auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{ 1, 2, 3 }, { 1 }); auto matmul = std::make_shared(input1, weights, false, true); - auto biases = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{ 1, 1, 1, 2 }, { 1 }); + auto biases = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{ 1, 1, 2 }, { 1 }); auto add = std::make_shared(matmul, biases); f = std::make_shared(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input1 }); @@ -505,7 +490,6 @@ TEST(TransformationTests, ConvertMatMulToFCTest_second_input_rank_adj_3) { m.register_pass(); m.register_pass(); m.register_pass(); - m.register_pass(); m.run_passes(f); ASSERT_NO_THROW(check_rt_info(f)); } @@ -513,53 +497,13 @@ TEST(TransformationTests, ConvertMatMulToFCTest_second_input_rank_adj_3) { { auto input1 = std::make_shared(ngraph::element::f32, ngraph::Shape{ 5, 2, 3 }); auto reshape_before_const = ngraph::opset1::Constant::create(ngraph::element::i64, { 2 }, { -1, 3 }); - auto reshape_1 = std::make_shared(input1, reshape_before_const, false); auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{ 2, 3 }, { 1 }); auto biases = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{ 2 }, { 1 }); - auto matmul = std::make_shared(reshape_1, weights, biases, ngraph::Rank(2)); + auto matmul = std::make_shared(input1, weights, biases, ngraph::Rank(2)); auto reshape_after_const = ngraph::opset1::Constant::create(ngraph::element::i64, { 4 }, { 1, 5, 2, 2 }); - auto reshape_out = std::make_shared(matmul, reshape_after_const, false); - f_ref = std::make_shared(ngraph::NodeVector{ reshape_out }, ngraph::ParameterVector{ input1 }); - } - - auto res = compare_functions(f, f_ref, true); - ASSERT_TRUE(res.first) << res.second; -} - -TEST(TransformationTests, ConvertMatMulToFCTest_second_input_rank_adj_dynamic) { - std::shared_ptr f(nullptr), f_ref(nullptr); - { - auto input1 = std::make_shared(ngraph::element::f32, ngraph::PartialShape{-1, 2, 3}); - auto input2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 2, 3}, {1}); - auto matmul = std::make_shared(input1, input2, false, true); - - f = std::make_shared(ngraph::NodeVector{matmul}, ngraph::ParameterVector{input1}); - ngraph::pass::Manager m; - m.register_pass(); - m.register_pass(); - m.register_pass(); - m.run_passes(f); - ASSERT_NO_THROW(check_rt_info(f)); - } - - { - auto input1 = std::make_shared(ngraph::element::f32, ngraph::PartialShape{-1, 2, 3}); - auto reshape_1 = std::make_shared(input1, ngraph::opset1::Constant::create(ngraph::element::i64, {2}, {-1, 3}), false); - auto input2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{2, 3}, {1}); - auto matmul = std::make_shared(reshape_1, input2, ngraph::Rank(2)); - - auto shape_of = std::make_shared(input1); - auto gather = std::make_shared( - shape_of, ngraph::opset1::Constant::create(ngraph::element::i64, {2}, {0, 1}), ngraph::opset1::Constant::create(ngraph::element::i64, {}, {0})); - auto concat = std::make_shared(ngraph::OutputVector{ - ngraph::opset1::Constant::create(ngraph::element::i64, {1}, {1}), - gather, - ngraph::opset1::Constant::create(ngraph::element::i64, {1}, {2}), - }, 0); - auto reshape_out = std::make_shared(matmul, concat, false); - f_ref = std::make_shared(ngraph::NodeVector{reshape_out}, ngraph::ParameterVector{input1}); + f_ref = std::make_shared(ngraph::NodeVector{ matmul }, ngraph::ParameterVector{ input1 }); } auto res = compare_functions(f, f_ref, true);