[CPU] Enable matmul deconv bin postops (#8009)
This commit is contained in:
parent
2b9c4a7f42
commit
3f6a026ae9
@ -59,7 +59,7 @@ MKLDNNGraphOptimizer::MKLDNNGraphOptimizer() {}
|
||||
|
||||
void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
|
||||
OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "ApplyCommonGraphOptimizations", "FuseConvolutionAndBias");
|
||||
FuseConvolutionAndBias(graph);
|
||||
FuseConvolutionMatMulAndBias(graph);
|
||||
graph.RemoveDroppedNodes();
|
||||
|
||||
OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMultiplyAndAdd");
|
||||
@ -166,37 +166,38 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
|
||||
graph.RemoveDroppedEdges();
|
||||
}
|
||||
|
||||
void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) {
|
||||
void MKLDNNGraphOptimizer::FuseConvolutionMatMulAndBias(MKLDNNGraph &graph) {
|
||||
auto& graphNodes = graph.GetNodes();
|
||||
|
||||
auto isSuitableParentNode = [](MKLDNNNodePtr node) {
|
||||
return node->getType() == Convolution &&
|
||||
auto isSuitableParentNode = [](const MKLDNNNodePtr& node) {
|
||||
return (node->getType() == Convolution || node->getType() == MatMul) &&
|
||||
node->getChildEdges().size() == 1 &&
|
||||
node->getParentEdges().size() == 2 &&
|
||||
node->getFusedWith().empty();
|
||||
};
|
||||
|
||||
auto isSuitableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
|
||||
auto isSuitableChildNode = [&](const MKLDNNNodePtr& parentNode, const MKLDNNNodePtr& childNode) {
|
||||
if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2)
|
||||
return false;
|
||||
|
||||
auto biasNode = childNode->getParentEdgesAtPort(1)[0]->getParent();
|
||||
const auto biasNode = childNode->getParentEdgesAtPort(1)[0]->getParent();
|
||||
if (biasNode->getType() != Input || !biasNode->isConstant() || biasNode->getChildEdges().size() != 1)
|
||||
return false;
|
||||
|
||||
auto convOutDims = parentNode->getOutputShapeAtPort(0).getDims();
|
||||
auto biasDims = getNormalizedDimsBySize(biasNode->getOutputShapeAtPort(0).getDims(),
|
||||
convOutDims.size());
|
||||
const auto parentOutDims = parentNode->getOutputShapeAtPort(0).getDims();
|
||||
const auto biasDims = getNormalizedDimsBySize(biasNode->getOutputShapeAtPort(0).getDims(),
|
||||
parentOutDims.size());
|
||||
// TODO [NM]: Legacy ConvBias fusion transformation supports both per-tensor (via explicit broadcasing) and per-channel cases.
|
||||
// Most of the real models contain per-channel bias, so we need to reavaluate the need to support per-tensor variant.
|
||||
if (convOutDims.size() != biasDims.size() || biasDims.size() < 2)
|
||||
if (parentOutDims.size() != biasDims.size() || biasDims.size() < 2)
|
||||
return false;
|
||||
|
||||
if (biasDims[0] != 1 || !dimsEqualStrong(biasDims[1], convOutDims[1]))
|
||||
const auto channelAxis = parentNode->getFusingAxis();
|
||||
if (!dimsEqualStrong(biasDims[channelAxis], parentOutDims[channelAxis]))
|
||||
return false;
|
||||
|
||||
for (int i = 2; i < biasDims.size(); i++) {
|
||||
if (biasDims[i] != 1)
|
||||
for (int i = 0; i < biasDims.size(); i++) {
|
||||
if (biasDims[i] != 1 && i != channelAxis)
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -262,13 +263,13 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) {
|
||||
graph.RemoveEdge(remEdge);
|
||||
}
|
||||
|
||||
auto parentEltwise = parentNode;
|
||||
const auto& parentEltwise = parentNode;
|
||||
MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size()));
|
||||
auto &graphEdges = graph.GetEdges();
|
||||
auto& graphEdges = graph.GetEdges();
|
||||
graphEdges.push_back(newEdge);
|
||||
parent->addEdge(newEdge);
|
||||
|
||||
auto partialShape = { parentEltwise->outputShapes[0].toPartialShape()[1] };
|
||||
auto partialShape = { parentEltwise->outputShapes[0].toPartialShape()[parentEltwise->getFusingAxis()] };
|
||||
parent->outputShapes[inNum] = Shape(partialShape);
|
||||
parentEltwise->inputShapes.push_back(parent->outputShapes[0]);
|
||||
}
|
||||
@ -627,7 +628,15 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
|
||||
}
|
||||
}
|
||||
|
||||
static bool BF16QuantizeNodeFusing(MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
|
||||
/**
|
||||
* @todo FQ fusing was disabled for BF16 output since oneDNN primitives lack support
|
||||
* for bf16 depthwise postops.
|
||||
* This is not the case anymore, because after migration to oneDNN 2.3 FQ will be fused as
|
||||
* multiple binary post ops.
|
||||
* This check can already be removed for FC fusing, but should be kept for Convolution,
|
||||
* which still uses legacy depthwise postops for performance reasons.
|
||||
*/
|
||||
static bool BF16QuantizeNodeFusing(const MKLDNNNodePtr& parentNode, const MKLDNNNodePtr& childNode) {
|
||||
return childNode->getType() == FakeQuantize &&
|
||||
one_of(Precision::BF16,
|
||||
parentNode->getOriginalOutputPrecisionAtPort(0),
|
||||
@ -638,7 +647,7 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
|
||||
auto& graphNodes = graph.GetNodes();
|
||||
|
||||
auto isSuitableParentNode = [](MKLDNNNodePtr node) {
|
||||
return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getInputShapeAtPort(0).getRank() != 3;
|
||||
return node->getType() == FullyConnected && node->getChildEdges().size() == 1;
|
||||
};
|
||||
|
||||
auto parent = graphNodes.begin();
|
||||
|
@ -19,7 +19,7 @@ public:
|
||||
void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph);
|
||||
|
||||
private:
|
||||
void FuseConvolutionAndBias(MKLDNNGraph &graph);
|
||||
void FuseConvolutionMatMulAndBias(MKLDNNGraph &graph);
|
||||
void FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &graph);
|
||||
void FuseMultiplyAndAdd(MKLDNNGraph &graph);
|
||||
void FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph);
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include "mkldnn_node.h"
|
||||
#include "dnnl_debug.h"
|
||||
#include "mkldnn_edge.h"
|
||||
#include "mkldnn_extension_mngr.h"
|
||||
#include "mkldnn_itt.h"
|
||||
|
||||
@ -1048,6 +1049,16 @@ void MKLDNNNode::setDynamicBatchLim(int lim) {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNNode::appendPostOpArgs(const mkldnn::primitive_attr& attr) {
|
||||
auto post_ops = attr.get_post_ops();
|
||||
int idx = 0;
|
||||
for (int i = 0; i < post_ops.len(); i++) {
|
||||
if (post_ops.kind(i) == mkldnn::primitive::kind::binary) {
|
||||
primArgs.insert({DNNL_ARG_ATTR_MULTIPLE_POST_OP(i) | DNNL_ARG_SRC_1, binaryPostOpsArgs[idx++]->GetPrimitive()});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool MKLDNNNode::isFusedWith(Type fusedNodeType) const {
|
||||
for (auto fusedNode : fusedWith) {
|
||||
if (fusedNode->type == fusedNodeType)
|
||||
@ -1078,10 +1089,14 @@ Layout MKLDNNNode::getWeightsLayoutByDims(SizeVector dims, bool isGrouped) {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
|
||||
void MKLDNNNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) {
|
||||
IE_THROW() << "Fusing of " << this->getType() << " operation is not implemented";
|
||||
}
|
||||
|
||||
void MKLDNNNode::appendBinPostOps(mkldnn::post_ops& ops, const std::vector<size_t>& binaryShape, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) {
|
||||
IE_THROW() << "Binary fusing of " << this->getType() << " operation is not implemented";
|
||||
}
|
||||
|
||||
std::vector<InferenceEngine::Precision> MKLDNNNode::getInputPrecisions() const {
|
||||
std::vector<InferenceEngine::Precision> inputPrecisions;
|
||||
for (size_t i = 0; i < getParentEdges().size(); i++) {
|
||||
@ -1205,6 +1220,9 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr<ngraph::Node>
|
||||
|
||||
bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const {
|
||||
size_t fusingPort = 0;
|
||||
// @todo graph optimizer can provide parentNode as nullptr. Should be avoided
|
||||
const size_t channelAxis = parentNode ? parentNode->getFusingAxis() : MKLDNNNode::getFusingAxis();
|
||||
|
||||
for (size_t i = (parentNode == nullptr ? 1 : 0); i < getParentEdges().size(); i++) {
|
||||
MKLDNNNode *node = getParentEdgesAtPort(i)[0]->getParent().get();
|
||||
if (node == nullptr) {
|
||||
@ -1225,7 +1243,8 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
|
||||
if (i == fusingPort)
|
||||
continue;
|
||||
auto& weightShape = getInputShapeAtPort(i).getDims();
|
||||
if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape, true))
|
||||
if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 ||
|
||||
!isPerTensorOrPerChannelBroadcastable(dataShape, weightShape, channelAxis, true))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -1246,6 +1265,9 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const
|
||||
|| isConvertablePowerStatic();
|
||||
}
|
||||
|
||||
// @todo shifts for Subtract and scales for Divide are replaced with
|
||||
// Add (with opposite sign) and Multiply (with inverse value) for legacy dephwise post ops
|
||||
// This can be avoided after dephwise post ops are gone
|
||||
std::pair<std::vector<float>, std::vector<float>> MKLDNNNode::getScalesAndShifts(const MKLDNNNode *parentNode) const {
|
||||
std::vector<float> scales, shifts;
|
||||
|
||||
@ -1408,10 +1430,11 @@ bool MKLDNNNode::canFuseSimpleOperation(const MKLDNNNodePtr& node) const {
|
||||
}
|
||||
return ret;
|
||||
} else if (node->getType() == Eltwise) {
|
||||
return one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
|
||||
EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
|
||||
EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu) ||
|
||||
node->canBePerformedAsScaleShift(this);
|
||||
return one_of(node->getAlgorithm(),
|
||||
EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
|
||||
EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
|
||||
EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu) ||
|
||||
node->canBePerformedAsScaleShift(this);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -204,6 +204,12 @@ public:
|
||||
|
||||
bool isConstant();
|
||||
|
||||
virtual size_t getFusingAxis() const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
void appendPostOpArgs(const mkldnn::primitive_attr& attr);
|
||||
|
||||
bool isFusedWith(Type type) const;
|
||||
|
||||
void addFusedNode(const MKLDNNNodePtr &fusingNode) {
|
||||
@ -594,8 +600,10 @@ protected:
|
||||
* Seed node should call this routine and pass its post operations list as parameter.
|
||||
* @param ops List of fused post operations
|
||||
*/
|
||||
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false);
|
||||
virtual AttrPtr initPrimitiveAttr() const { return nullptr; }
|
||||
virtual void appendPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, int align = -1);
|
||||
virtual void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem);
|
||||
|
||||
virtual std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr() { return nullptr; }
|
||||
|
||||
typedef std::function<DnnlMemoryDescPtr (mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx)>
|
||||
GetPrimitiveMemoryFormatFunc;
|
||||
@ -636,7 +644,7 @@ protected:
|
||||
std::vector<MKLDNNMemoryPtr> internalBlobMemory;
|
||||
std::vector<NodeDesc> supportedPrimitiveDescriptors;
|
||||
std::unordered_map<int, mkldnn::memory> primArgs;
|
||||
std::vector<mkldnn::memory> binaryPostOpsArgs;
|
||||
std::vector<MKLDNNMemoryPtr> binaryPostOpsArgs;
|
||||
MKLDNNPrimitive prim;
|
||||
std::vector<MKLDNNDescriptor> descs;
|
||||
|
||||
|
@ -36,8 +36,9 @@ MKLDNNPlugin::ConvertMatMulToFC::ConvertMatMulToFC() {
|
||||
auto rank_a = shape_a.rank().get_length();
|
||||
auto rank_b = shape_b.rank().get_length();
|
||||
|
||||
// Transformation to FC is not supported for 1D second input
|
||||
if (rank_b == 1) {
|
||||
// Transformation to FC is not supported for 1D inputs
|
||||
if (rank_a == 1 || rank_b == 1 ||
|
||||
rank_a > 3 || rank_b > 3) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -47,7 +48,6 @@ MKLDNNPlugin::ConvertMatMulToFC::ConvertMatMulToFC() {
|
||||
std::count_if(shape_b.begin(), shape_b.end(), [](ngraph::Dimension x) { return x != 1; }) > 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* get_aligned_shapes function align two input shapes to have the same size and
|
||||
* the same batch dimensions (last two dimensions are not comparable).
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include "ngraph/op/fake_quantize.hpp"
|
||||
#include "ngraph/pass/manager.hpp"
|
||||
#include "reshape_fc_fusion.hpp"
|
||||
#include "reshape_fully_connected.hpp"
|
||||
#include "align_matmul_input_ranks.hpp"
|
||||
#include "reshape_prelu.hpp"
|
||||
#include "convert_broadcast_to_tiles.hpp"
|
||||
@ -29,7 +28,6 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphF
|
||||
manager.register_pass<AlignMatMulInputRanks>();
|
||||
manager.register_pass<ConvertTileToSeqTiles>();
|
||||
manager.register_pass<FullyConnectedBiasFusion>();
|
||||
manager.register_pass<ReshapeFullyConnected>();
|
||||
manager.register_pass<ConvertToPowerStatic>();
|
||||
manager.register_pass<ConvertToLeakyRelu>();
|
||||
manager.register_pass<ReshapePRelu>();
|
||||
|
@ -1,114 +0,0 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "reshape_fully_connected.hpp"
|
||||
#include "op/fully_connected.hpp"
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/opsets/opset7.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/pattern/op/or.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <numeric>
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::ReshapeFullyConnected, "ReshapeFullyConnected", 0);
|
||||
|
||||
MKLDNNPlugin::ReshapeFullyConnected::ReshapeFullyConnected() {
|
||||
ngraph::OutputVector twoInputs = {
|
||||
ngraph::pattern::any_input(ngraph::pattern::has_static_rank()), ngraph::pattern::any_input(ngraph::pattern::has_static_shape())};
|
||||
ngraph::OutputVector threeInputs = {
|
||||
ngraph::pattern::any_input(ngraph::pattern::has_static_rank()), ngraph::pattern::any_input(ngraph::pattern::has_static_shape()),
|
||||
ngraph::pattern::any_input()};
|
||||
auto fcTwoInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(twoInputs, ngraph::pattern::has_static_rank());
|
||||
auto fcThreeInputs = ngraph::pattern::wrap_type<MKLDNNPlugin::FullyConnectedNode>(threeInputs, ngraph::pattern::has_static_rank());
|
||||
const auto fcTwoOrThreeInputs = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{fcTwoInputs, fcThreeInputs});
|
||||
|
||||
ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) {
|
||||
auto fc = std::dynamic_pointer_cast<MKLDNNPlugin::FullyConnectedNode>(m.get_match_root());
|
||||
if (!fc || transformation_callback(fc)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto fc_input_shape = fc->get_input_partial_shape(0);
|
||||
auto input_rank = fc_input_shape.rank().get_length();
|
||||
auto output_shape = fc->get_output_partial_shape(0);
|
||||
|
||||
if (input_rank == 2 || input_rank == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ngraph::NodeVector new_ops;
|
||||
int64_t K = *(fc->get_input_shape(1).rbegin()); // requested 2nd input with static shape in the matcher
|
||||
auto reshape = std::make_shared<ngraph::opset1::Reshape>(
|
||||
fc->input_value(0), ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{-1, K}), false);
|
||||
if (reshape->get_output_partial_shape(0).rank().is_dynamic())
|
||||
return false;
|
||||
new_ops.push_back(reshape);
|
||||
|
||||
reshape->set_friendly_name(fc->get_friendly_name() + "/Reshape");
|
||||
|
||||
// Calculate output shape for new FullyConnected layer
|
||||
// [I, K] * [O, K] = [I, O]
|
||||
auto I = reshape->get_output_partial_shape(0)[0];
|
||||
auto O = fc->get_input_partial_shape(1)[0];
|
||||
ngraph::PartialShape output_shape_new{I, O};
|
||||
|
||||
std::shared_ptr<ngraph::Node> fc_new;
|
||||
if (fc->get_input_size() == 2) {
|
||||
fc_new = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape,
|
||||
fc->input_value(1),
|
||||
output_shape_new.rank(),
|
||||
fc->get_output_type());
|
||||
} else if (fc->get_input_size() == 3) {
|
||||
fc_new = std::make_shared<MKLDNNPlugin::FullyConnectedNode>(reshape,
|
||||
fc->input_value(1),
|
||||
fc->input_value(2),
|
||||
output_shape_new.rank(),
|
||||
fc->get_output_type());
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
new_ops.push_back(fc_new);
|
||||
|
||||
if (output_shape != output_shape_new) {
|
||||
auto I_idxs = std::vector<size_t>(input_rank - 1);
|
||||
std::iota(I_idxs.begin(), I_idxs.end(), 0);
|
||||
auto A_input_shape = ngraph::op::util::make_try_fold<ngraph::opset7::ShapeOf>(fc->input_value(0));
|
||||
auto B_input_shape = ngraph::op::util::make_try_fold<ngraph::opset7::ShapeOf>(fc->input_value(1));
|
||||
auto I_node = ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_node(A_input_shape, {I_idxs});
|
||||
auto O_node = ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_node(B_input_shape, {0});
|
||||
ngraph::OutputVector output_shape_dims{I_node, O_node};
|
||||
|
||||
const auto original_rank = fc->get_output_rank();
|
||||
NGRAPH_CHECK(original_rank.is_static());
|
||||
if (input_rank < original_rank.get_length()) {
|
||||
const size_t const_shape_value = original_rank.get_length() - input_rank;
|
||||
output_shape_dims.insert(
|
||||
output_shape_dims.begin(), ngraph::opset1::Constant::create(I_node->get_element_type(), { const_shape_value }, { 1 }));
|
||||
}
|
||||
|
||||
auto reshape_output_shape = ngraph::op::util::make_try_fold<ngraph::opset1::Concat>(output_shape_dims, 0);
|
||||
auto reshape_output = std::make_shared<ngraph::opset1::Reshape>(fc_new, reshape_output_shape, false);
|
||||
new_ops.push_back(A_input_shape);
|
||||
new_ops.push_back(B_input_shape);
|
||||
new_ops.push_back(I_node);
|
||||
new_ops.push_back(O_node);
|
||||
new_ops.push_back(reshape_output_shape);
|
||||
new_ops.push_back(reshape_output);
|
||||
reshape_output->set_friendly_name(fc->get_friendly_name());
|
||||
fc_new->set_friendly_name(fc->get_friendly_name() + "/FC");
|
||||
ngraph::copy_runtime_info(fc, new_ops);
|
||||
ngraph::replace_node(fc, reshape_output);
|
||||
} else {
|
||||
fc_new->set_friendly_name(fc->get_friendly_name());
|
||||
ngraph::copy_runtime_info(fc, new_ops);
|
||||
ngraph::replace_node(fc, fc_new);
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(fcTwoOrThreeInputs, "ReshapeFullyConnected");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
/*
|
||||
* Description:
|
||||
* ReshapeFullyConnected transformation detects FullyConnected operations
|
||||
* and for each operation where input shape is greater than 2 inserts Reshape
|
||||
* operations before and after FullyConnected operation. This transformation is
|
||||
* required because of IE restrictions.
|
||||
*/
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
class ReshapeFullyConnected: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ReshapeFullyConnected();
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
@ -330,48 +330,42 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false, bool initAsBinary = false) {
|
||||
bool initBinaryMemory = initWeights;
|
||||
void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights = false) {
|
||||
mkldnn::post_ops ops;
|
||||
bool useLegacyPostOps = true; // @todo remove after issue with performance of binary post ops fixed
|
||||
|
||||
auto getBinPostOpShape = [&](){
|
||||
const auto outShape = getOutputShapeAtPort(0).getStaticDims();
|
||||
const auto outShapeRank = getOutputShapeAtPort(0).getRank();
|
||||
const auto chIdx = getFusingAxis();
|
||||
std::vector<size_t> binaryShape(outShapeRank, 1);
|
||||
binaryShape[chIdx] = outShape[chIdx];
|
||||
return binaryShape;
|
||||
};
|
||||
|
||||
for (auto &node : fusedWith) {
|
||||
if (node->getType() == Split || node->getType() == Concatenation)
|
||||
continue;
|
||||
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
|
||||
if (eltwiseNode->isSpecialConvolutionAddFusing()) {
|
||||
ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision));
|
||||
} else {
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, dims, align, initAsBinary, initBinaryMemory);
|
||||
if (initBinaryMemory) {
|
||||
if (eltwiseNode->scalesMemory)
|
||||
binaryPostOpsArgs.push_back(eltwiseNode->scalesMemory->GetPrimitive());
|
||||
if (eltwiseNode->shiftsMemory)
|
||||
binaryPostOpsArgs.push_back(eltwiseNode->shiftsMemory->GetPrimitive());
|
||||
if (useLegacyPostOps || eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
|
||||
constexpr int align = 16;
|
||||
eltwiseNode->appendPostOps(ops, dims, align);
|
||||
} else {
|
||||
eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
|
||||
if (fakeQuantizeNode) {
|
||||
constexpr int align = -1;
|
||||
fakeQuantizeNode->appendPostOps(ops, dims, align, initAsBinary, initBinaryMemory);
|
||||
if (initBinaryMemory) {
|
||||
if (fakeQuantizeNode->cropHighMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->cropHighMemory->GetPrimitive());
|
||||
if (fakeQuantizeNode->cropLowMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->cropLowMemory->GetPrimitive());
|
||||
if (fakeQuantizeNode->inputScaleMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->inputScaleMemory->GetPrimitive());
|
||||
if (fakeQuantizeNode->inputShiftMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->inputShiftMemory->GetPrimitive());
|
||||
if (fakeQuantizeNode->outputScaleMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->outputScaleMemory->GetPrimitive());
|
||||
if (fakeQuantizeNode->outputShiftMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->outputShiftMemory->GetPrimitive());
|
||||
if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
|
||||
if (useLegacyPostOps) {
|
||||
fakeQuantizeNode->appendPostOps(ops, dims);
|
||||
} else {
|
||||
fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -416,7 +410,6 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
|
||||
// attr[1] - binary
|
||||
mkldnn::primitive_attr attrs[1];
|
||||
setPostOps(attrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims());
|
||||
// setPostOps(attrs[1], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims(), false, true);
|
||||
|
||||
bool containJitImpl = false;
|
||||
|
||||
@ -630,7 +623,6 @@ void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) {
|
||||
// attr[1] - binary
|
||||
mkldnn::primitive_attr attrs[1];
|
||||
setPostOps(attrs[0], MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0)).getStaticDims());
|
||||
// setPostOps(attrs[1], false, true);
|
||||
|
||||
auto rightConfig = selectedPD->getConfig();
|
||||
size_t selected_count = 0;
|
||||
@ -926,13 +918,8 @@ void MKLDNNConvolutionNode::prepareParams() {
|
||||
auto initPrimitiveAttr = [&]() {
|
||||
mkldnn::primitive_attr attr;
|
||||
addZeroPoints(attr);
|
||||
setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), true);
|
||||
|
||||
// todo: [AV] delete "false" to use binary mechanism
|
||||
if (false && getSelectedPrimitiveDescriptor()->getImplementationType() == jit_gemm) {
|
||||
setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), true, true);
|
||||
} else {
|
||||
setPostOps(attr, outMemoryDesc->getShape().getStaticDims(), true);
|
||||
}
|
||||
return std::make_shared<mkldnn::primitive_attr>(std::move(attr));
|
||||
};
|
||||
|
||||
@ -991,14 +978,8 @@ void MKLDNNConvolutionNode::prepareParams() {
|
||||
if (withBiases) {
|
||||
primArgs[DNNL_ARG_BIAS] = getBias();
|
||||
}
|
||||
// todo: [AV] uncomment to use binary mechanism
|
||||
// auto post_ops = attr.get_post_ops();
|
||||
// int idx = 0;
|
||||
// for (int i = 0; i < post_ops.len(); i++) {
|
||||
// if (post_ops.kind(i) == mkldnn::primitive::kind::binary) {
|
||||
// primArgs.insert({DNNL_ARG_ATTR_MULTIPLE_POST_OP(i) | DNNL_ARG_SRC_1, binaryPostOpsArgs[idx++]});
|
||||
// }
|
||||
// }
|
||||
|
||||
appendPostOpArgs(*pAttrLocal);
|
||||
}
|
||||
|
||||
void MKLDNNConvolutionNode::executeDynamicImpl(dnnl::stream strm) {
|
||||
|
@ -69,7 +69,7 @@ private:
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
|
||||
void addZeroPoints(mkldnn::primitive_attr& attr) const;
|
||||
void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights, bool initAsBinary);
|
||||
void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims, bool initWeights);
|
||||
void filterSupportedDescriptors();
|
||||
bool isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) const;
|
||||
bool isNspcAvailable() const;
|
||||
@ -122,4 +122,3 @@ private:
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
||||
|
@ -157,9 +157,6 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
// todo: [antonvor] added these checks to fix performance problems
|
||||
if (kernel.size() == 3)
|
||||
return false;
|
||||
if (!withGroups && stride.back() > 3)
|
||||
return false;
|
||||
if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) {
|
||||
@ -271,17 +268,25 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
|
||||
void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) {
|
||||
mkldnn::post_ops ops;
|
||||
|
||||
auto getBinPostOpShape = [&](){
|
||||
const auto outShape = getOutputShapeAtPort(0).getStaticDims();
|
||||
const auto outShapeRank = getOutputShapeAtPort(0).getRank();
|
||||
const auto chIdx = getFusingAxis();
|
||||
std::vector<size_t> binaryShape(outShapeRank, 1);
|
||||
binaryShape[chIdx] = outShape[chIdx];
|
||||
return binaryShape;
|
||||
};
|
||||
|
||||
for (auto &node : fusedWith) {
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
|
||||
// TODO [DS]: change to shape from memory
|
||||
constexpr int align = 16;
|
||||
// use legacy depthwise since backprop convolution does not support binary post ops
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
|
||||
continue;
|
||||
}
|
||||
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
|
||||
if (fakeQuantizeNode) {
|
||||
fakeQuantizeNode->appendPostOps(ops);
|
||||
if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
|
||||
fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
|
||||
continue;
|
||||
}
|
||||
IE_THROW() << "Fusing of " << NameFromType(node->getType()) << " operation to " << NameFromType(this->getType()) << " node is not implemented";
|
||||
@ -358,6 +363,8 @@ void MKLDNNDeconvolutionNode::createPrimitive() {
|
||||
auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive();
|
||||
primArgs = {{DNNL_ARG_DIFF_DST, src}, {DNNL_ARG_WEIGHTS, weights}, {DNNL_ARG_DIFF_SRC, dst}};
|
||||
}
|
||||
|
||||
appendPostOpArgs(attr);
|
||||
}
|
||||
|
||||
void MKLDNNDeconvolutionNode::createDescriptor(const std::vector<MemoryDescPtr> &inputDesc,
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <ie_parallel.hpp>
|
||||
|
||||
#include <mkldnn_types.h>
|
||||
#include "cpu_types.h"
|
||||
#include "utils/bfloat16.hpp"
|
||||
#include <cpu/x64/injectors/jit_uni_quantization_injector.hpp>
|
||||
#include <cpu/ref_eltwise.hpp>
|
||||
@ -31,6 +32,7 @@
|
||||
#include "ngraph_transformations/op/leaky_relu.hpp"
|
||||
#include "ngraph_transformations/op/swish_cpu.hpp"
|
||||
|
||||
#include <oneapi/dnnl/dnnl.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
@ -791,18 +793,41 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
MKLDNNEltwiseNode::BroadcastingPolicy MKLDNNEltwiseNode::determineBroadcastingPolicy(const std::shared_ptr<ngraph::Node>& op) {
|
||||
const auto const1 = std::dynamic_pointer_cast<ngraph::opset1::Constant>(op->get_input_node_shared_ptr(0));
|
||||
const auto const2 = std::dynamic_pointer_cast<ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1));
|
||||
int constPort = -1;
|
||||
if (const2) {
|
||||
constPort = 1;
|
||||
} else if (const1) {
|
||||
constPort = 0;
|
||||
} else {
|
||||
return Undefined;
|
||||
}
|
||||
|
||||
auto const_shape = op->get_input_shape(constPort);
|
||||
if (ngraph::shape_size(const_shape) == 1)
|
||||
return PerTensor;
|
||||
else
|
||||
return PerChannel;
|
||||
}
|
||||
|
||||
const std::map<const ngraph::DiscreteTypeInfo, MKLDNNEltwiseNode::Initializer> MKLDNNEltwiseNode::initializers = {
|
||||
{ngraph::op::v1::Add::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwiseAdd;
|
||||
node.broadcastingPolicy = determineBroadcastingPolicy(op);
|
||||
}},
|
||||
{ngraph::op::v1::Subtract::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwiseSubtract;
|
||||
node.broadcastingPolicy = determineBroadcastingPolicy(op);
|
||||
}},
|
||||
{ngraph::op::v1::Multiply::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwiseMultiply;
|
||||
node.broadcastingPolicy = determineBroadcastingPolicy(op);
|
||||
}},
|
||||
{ngraph::op::v1::Divide::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwiseDivide;
|
||||
node.broadcastingPolicy = determineBroadcastingPolicy(op);
|
||||
}},
|
||||
{ngraph::op::v0::SquaredDifference::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwiseSquaredDifference;
|
||||
@ -828,6 +853,7 @@ const std::map<const ngraph::DiscreteTypeInfo, MKLDNNEltwiseNode::Initializer> M
|
||||
node.alpha = powerStatic->get_power();
|
||||
node.beta = powerStatic->get_scale();
|
||||
node.gamma = powerStatic->get_shift();
|
||||
node.broadcastingPolicy = PerTensor;
|
||||
}},
|
||||
{ngraph::op::v1::Equal::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwiseEqual;
|
||||
@ -954,6 +980,7 @@ const std::map<const ngraph::DiscreteTypeInfo, MKLDNNEltwiseNode::Initializer> M
|
||||
}},
|
||||
{ngraph::op::v0::PRelu::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwisePrelu;
|
||||
node.broadcastingPolicy = determineBroadcastingPolicy(op);
|
||||
}},
|
||||
{ngraph::op::v0::Erf::get_type_info_static(), [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
|
||||
node.algorithm = EltwiseErf;
|
||||
@ -984,7 +1011,7 @@ bool MKLDNNEltwiseNode::isSupportedOperation(const std::shared_ptr<const ngraph:
|
||||
}
|
||||
|
||||
MKLDNNEltwiseNode::MKLDNNEltwiseNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
|
||||
MKLDNNNode(op, eng, cache) {
|
||||
MKLDNNNode(op, eng, cache), broadcastingPolicy(Undefined) {
|
||||
std::string errorMessage;
|
||||
if (!isSupportedOperation(op, errorMessage)) {
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
@ -1713,106 +1740,124 @@ void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) {
|
||||
getInputShapeAtPort(0) == getInputShapeAtPort(1);
|
||||
if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) {
|
||||
std::tie(scales, shifts) = getScalesAndShifts(parentNode.get());
|
||||
if ((parentNode->getType() == FullyConnected || parentNode->getType() == MatMul) && one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract,
|
||||
EltwiseMultiply, EltwiseDivide, EltwiseMulAdd, EltwisePowerStatic, EltwisePrelu)) {
|
||||
std::tie(scales, shifts) = getScalesAndShifts(parentNode.get());
|
||||
}
|
||||
}
|
||||
MKLDNNNode::fuseInto(parentNode);
|
||||
}
|
||||
|
||||
void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
|
||||
void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) {
|
||||
const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' ";
|
||||
|
||||
if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
|
||||
switch (getMKLDNNAlgorithm()) {
|
||||
case mkldnn::algorithm::eltwise_relu:
|
||||
case mkldnn::algorithm::eltwise_tanh:
|
||||
case mkldnn::algorithm::eltwise_elu:
|
||||
case mkldnn::algorithm::eltwise_square:
|
||||
case mkldnn::algorithm::eltwise_abs:
|
||||
case mkldnn::algorithm::eltwise_sqrt:
|
||||
case mkldnn::algorithm::eltwise_linear:
|
||||
case mkldnn::algorithm::eltwise_bounded_relu:
|
||||
case mkldnn::algorithm::eltwise_soft_relu:
|
||||
case mkldnn::algorithm::eltwise_logistic:
|
||||
case mkldnn::algorithm::eltwise_exp:
|
||||
case mkldnn::algorithm::eltwise_gelu_erf:
|
||||
case mkldnn::algorithm::eltwise_gelu_tanh:
|
||||
case mkldnn::algorithm::eltwise_clip:
|
||||
case mkldnn::algorithm::eltwise_swish:
|
||||
case mkldnn::algorithm::eltwise_hardswish:
|
||||
case mkldnn::algorithm::eltwise_mish:
|
||||
case mkldnn::algorithm::eltwise_hsigmoid:
|
||||
case mkldnn::algorithm::eltwise_round_half_to_even:
|
||||
case mkldnn::algorithm::eltwise_round_half_away_from_zero:
|
||||
ops.append_eltwise(1.0, getMKLDNNAlgorithm(), getAlpha(), getBeta());
|
||||
break;
|
||||
default: IE_THROW() << errorPrefix << "as post operation is not supported";
|
||||
case mkldnn::algorithm::eltwise_relu:
|
||||
case mkldnn::algorithm::eltwise_tanh:
|
||||
case mkldnn::algorithm::eltwise_elu:
|
||||
case mkldnn::algorithm::eltwise_square:
|
||||
case mkldnn::algorithm::eltwise_abs:
|
||||
case mkldnn::algorithm::eltwise_sqrt:
|
||||
case mkldnn::algorithm::eltwise_linear:
|
||||
case mkldnn::algorithm::eltwise_bounded_relu:
|
||||
case mkldnn::algorithm::eltwise_soft_relu:
|
||||
case mkldnn::algorithm::eltwise_logistic:
|
||||
case mkldnn::algorithm::eltwise_exp:
|
||||
case mkldnn::algorithm::eltwise_gelu_erf:
|
||||
case mkldnn::algorithm::eltwise_gelu_tanh:
|
||||
case mkldnn::algorithm::eltwise_clip:
|
||||
case mkldnn::algorithm::eltwise_swish:
|
||||
case mkldnn::algorithm::eltwise_hardswish:
|
||||
case mkldnn::algorithm::eltwise_mish:
|
||||
case mkldnn::algorithm::eltwise_hsigmoid:
|
||||
case mkldnn::algorithm::eltwise_round_half_to_even:
|
||||
case mkldnn::algorithm::eltwise_round_half_away_from_zero:
|
||||
ops.append_eltwise(1.0, getMKLDNNAlgorithm(), getAlpha(), getBeta());
|
||||
break;
|
||||
default: IE_THROW() << errorPrefix << "as post operation is not supported";
|
||||
}
|
||||
} else {
|
||||
const size_t chIdx = postOpDims.size() > 1 ? 1 : 0;
|
||||
const size_t chIdx = postOpDims.size() > 1 ? getFusingAxis() : 0;
|
||||
scalesBuffer = makeAlignedBuffer(postOpDims[chIdx], scales, align);
|
||||
if (getAlgorithm() != EltwisePrelu) {
|
||||
shiftsBuffer = makeAlignedBuffer(postOpDims[chIdx], shifts, align);
|
||||
}
|
||||
|
||||
if (initAsBinary) {
|
||||
auto appendBinary = [&](const mkldnn::algorithm alg, MKLDNNMemoryPtr &memPtr, const std::vector<float> &data) {
|
||||
if (data.empty())
|
||||
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
|
||||
|
||||
std::vector<size_t> binaryDims(postOpDims.size(), 1);
|
||||
binaryDims[chIdx] = postOpDims[chIdx];
|
||||
|
||||
DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryDims));
|
||||
ops.append_binary(alg, memoryDesc.getDnnlDesc());
|
||||
|
||||
if (initBinaryMemory) {
|
||||
memPtr.reset(new MKLDNNMemory(getEngine()));
|
||||
memPtr->Create(memoryDesc, &data[0]);
|
||||
}
|
||||
};
|
||||
switch (getAlgorithm()) {
|
||||
case EltwiseAdd:
|
||||
case EltwiseSubtract:
|
||||
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer);
|
||||
break;
|
||||
case EltwiseMultiply:
|
||||
case EltwiseDivide:
|
||||
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer);
|
||||
break;
|
||||
case EltwiseMulAdd:
|
||||
case EltwisePowerStatic:
|
||||
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scalesBuffer);
|
||||
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shiftsBuffer);
|
||||
break;
|
||||
case EltwisePrelu:
|
||||
appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scalesBuffer);
|
||||
break;
|
||||
default:
|
||||
IE_THROW() << errorPrefix << "as post operation is not supported";
|
||||
}
|
||||
} else {
|
||||
switch (getAlgorithm()) {
|
||||
case EltwiseAdd:
|
||||
case EltwiseSubtract:
|
||||
case EltwiseMultiply:
|
||||
case EltwiseDivide:
|
||||
case EltwiseMulAdd:
|
||||
case EltwisePowerStatic:
|
||||
if (scalesBuffer.empty() || shiftsBuffer.empty())
|
||||
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
|
||||
ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scalesBuffer[0], &shiftsBuffer[0]);
|
||||
break;
|
||||
case EltwisePrelu:
|
||||
if (scalesBuffer.empty())
|
||||
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
|
||||
ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scalesBuffer[0], nullptr);
|
||||
break;
|
||||
default:
|
||||
IE_THROW() << errorPrefix << "as post operation is not supported";
|
||||
}
|
||||
/* @todo legacy depthwise post ops are kept for now
|
||||
* for performance reasons
|
||||
*/
|
||||
switch (getAlgorithm()) {
|
||||
case EltwiseAdd:
|
||||
case EltwiseSubtract:
|
||||
case EltwiseMultiply:
|
||||
case EltwiseDivide:
|
||||
case EltwiseMulAdd:
|
||||
case EltwisePowerStatic:
|
||||
if (scales.empty() || shifts.empty())
|
||||
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
|
||||
ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scalesBuffer[0], &shiftsBuffer[0]);
|
||||
break;
|
||||
case EltwisePrelu:
|
||||
if (scales.empty())
|
||||
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
|
||||
ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scalesBuffer[0], nullptr);
|
||||
break;
|
||||
default:
|
||||
IE_THROW() << errorPrefix << "as post operation is not supported";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNEltwiseNode::appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) {
|
||||
const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' as binary post op ";
|
||||
VectorDims broadcastBinaryShape(postOpDims.size(), 1);
|
||||
|
||||
auto appendBinary = [&](const mkldnn::algorithm alg, MKLDNNMemoryPtr &memPtr, const std::vector<float> &data) {
|
||||
if (data.empty())
|
||||
IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
|
||||
if (broadcastingPolicy == Undefined)
|
||||
IE_THROW() << errorPrefix << "cannot be performed since policy is Undefined";
|
||||
|
||||
DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, broadcastingPolicy == PerTensor ? Shape(broadcastBinaryShape) : Shape(postOpDims));
|
||||
|
||||
ops.append_binary(alg, memoryDesc.getDnnlDesc());
|
||||
|
||||
if (!memPtr) {
|
||||
memPtr.reset(new MKLDNNMemory(getEngine()));
|
||||
memPtr->Create(memoryDesc, &data[0]);
|
||||
|
||||
binaryPostOpsMem.push_back(memPtr);
|
||||
}
|
||||
};
|
||||
|
||||
switch (getAlgorithm()) {
|
||||
case EltwiseAdd:
|
||||
case EltwiseSubtract:
|
||||
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
|
||||
break;
|
||||
case EltwiseDivide:
|
||||
case EltwiseMultiply:
|
||||
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
|
||||
break;
|
||||
case EltwiseMulAdd:
|
||||
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
|
||||
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
|
||||
break;
|
||||
case EltwisePowerStatic:
|
||||
if (beta != 1.0f) // Multiply if has scales
|
||||
appendBinary(mkldnn::algorithm::binary_mul, scalesMemory, scales);
|
||||
if (gamma != 0.0f) // Add only if has shifts
|
||||
appendBinary(mkldnn::algorithm::binary_add, shiftsMemory, shifts);
|
||||
break;
|
||||
case EltwisePrelu:
|
||||
appendBinary(mkldnn::algorithm::binary_prelu, scalesMemory, scales);
|
||||
break;
|
||||
default:
|
||||
IE_THROW() << errorPrefix << "as post operation is not supported";
|
||||
}
|
||||
}
|
||||
|
||||
bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const {
|
||||
auto isSuitableNode = [this](const MKLDNNEltwiseNode* node) {
|
||||
// [WA] Since execution precision change from I32 to FP32 for Divide operation may lead to incorrect results
|
||||
|
@ -75,7 +75,8 @@ public:
|
||||
bool created() const override;
|
||||
bool canBeInPlace() const override;
|
||||
bool canFuse(const MKLDNNNodePtr& node) const override;
|
||||
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1, bool initAsBinary = false, bool initBinaryMemory = false) override;
|
||||
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align = -1) override;
|
||||
void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) override;
|
||||
void fuseInto(MKLDNNNodePtr& parentNode) override;
|
||||
InferenceEngine::Precision getRuntimePrecision() const override;
|
||||
|
||||
@ -97,8 +98,17 @@ public:
|
||||
|
||||
void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); }
|
||||
|
||||
enum BroadcastingPolicy {
|
||||
PerChannel,
|
||||
PerTensor,
|
||||
Undefined,
|
||||
};
|
||||
|
||||
BroadcastingPolicy getBroadcastingPolicy() const { return broadcastingPolicy; }
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
|
||||
private:
|
||||
struct EltwiseExecutor {
|
||||
EltwiseExecutor(size_t batch) : batchDimIdx(batch) {}
|
||||
@ -130,6 +140,8 @@ private:
|
||||
size_t fullWorkAmount = 0;
|
||||
};
|
||||
|
||||
BroadcastingPolicy broadcastingPolicy;
|
||||
|
||||
mkldnn::algorithm mkldnnAlgorithm = mkldnn::algorithm::undef;
|
||||
|
||||
static const int optimalTensorRank = 6;
|
||||
@ -157,6 +169,8 @@ private:
|
||||
using Initializer = std::function<void(const std::shared_ptr<ngraph::Node>&, MKLDNNEltwiseNode& node)>;
|
||||
static const std::map<const ngraph::DiscreteTypeInfo, Initializer> initializers;
|
||||
|
||||
static BroadcastingPolicy determineBroadcastingPolicy(const std::shared_ptr<ngraph::Node>& op);
|
||||
|
||||
void executeOptimized6D(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
|
||||
const VectorDims &dims_out) const;
|
||||
void executeOptimizedGeneric(const std::unique_ptr<jit_uni_eltwise_kernel> &pKernel, const jit_eltwise_call_args_ptrs &args_ptrs,
|
||||
|
@ -860,7 +860,15 @@ bool MKLDNNFakeQuantizeNode::isSupportedOperation(const std::shared_ptr<const ng
|
||||
count_not_unit_axis++;
|
||||
}
|
||||
}
|
||||
if (count_not_unit_axis > 1 || not_unit_axis > 1) {
|
||||
|
||||
/* @todo
|
||||
* Channel axis 2 is added for 3D MatMul (most common one).
|
||||
* FQ for non-1 channel fallbacks to reference implementation.
|
||||
* Expected to be fused for 3D MatMul
|
||||
* Long term idea: restore limitation for channel axis 1 and
|
||||
* support fusing of unfolded FQ (see FakeQuantizeDecomposition transformation)
|
||||
*/
|
||||
if (count_not_unit_axis > 1 || !one_of(not_unit_axis, 1, 2)) {
|
||||
errorMessage = "Supports only per-tensor and per-channel quantizations";
|
||||
return false;
|
||||
}
|
||||
@ -1057,6 +1065,13 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr<ngraph::Nod
|
||||
outputScaleSize = outputScale.size();
|
||||
outputShiftSize = outputShift.size();
|
||||
|
||||
if (everyone_is(1, cropLowSize, cropHighSize, inputScaleSize, inputShiftSize, outputScaleSize, outputShiftSize))
|
||||
broadcastingPolicy = PerTensor;
|
||||
else if (one_of(1, cropLowSize, cropHighSize, inputScaleSize, inputShiftSize, outputScaleSize, outputShiftSize))
|
||||
broadcastingPolicy = Mixed;
|
||||
else
|
||||
broadcastingPolicy = PerChannel;
|
||||
|
||||
bool quantizationOnly = true;
|
||||
|
||||
for (int i = 0; i < cropLow.size(); i++) {
|
||||
@ -1649,14 +1664,12 @@ void MKLDNNFakeQuantizeNode::execute(mkldnn::stream strm) {
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align, bool initAsBinary, bool initBinaryMemory) {
|
||||
// MKLDNN quantization_injectors assumes that quantization data memory is always aligned on 16
|
||||
// by length of AVX512 vector register which is also enough for AVX2 and SSE42 implementations.
|
||||
// Otherwise it can lead to buffer over-read and performance penalties due to denormals.
|
||||
const size_t bufferAlignment = 16;
|
||||
void MKLDNNFakeQuantizeNode::initializePostOpData(const VectorDims &dims, const size_t bufferAlignment) {
|
||||
if (isPostOpDataInitialized)
|
||||
return;
|
||||
|
||||
if (getAlgorithm() == FQBinarization) {
|
||||
const auto realAxisSize = postOpDims[postOpDims.size() > 1 ? 1 : 0];
|
||||
const auto realAxisSize = dims[dims.size() > 1 ? 1 : 0];
|
||||
const auto axisPaddedSize = rnd_up(realAxisSize, bufferAlignment);
|
||||
if (!isPostOpDataInitialized) {
|
||||
binarizationThresholds.resize(axisPaddedSize, 0);
|
||||
@ -1671,73 +1684,76 @@ void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, const VectorDi
|
||||
std::fill(binarizationThresholds.begin() + realAxisSize, binarizationThresholds.end(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
ops.append_binarization(mkldnn::algorithm::binarization_depthwise, (const float*)&binarizationThresholds[0], (const float*)&binarizationOutputMask[0]);
|
||||
|
||||
if (!isInputLowBroadcasted && !isOutputHighBroadcasted) {
|
||||
isPostOpDataInitialized = true;
|
||||
}
|
||||
} else {
|
||||
if (!isPostOpDataInitialized) {
|
||||
if (cropLow.size() > 1)
|
||||
cropLow.resize(rnd_up(cropLow.size(), bufferAlignment), 0);
|
||||
if (cropHigh.size() > 1)
|
||||
cropHigh.resize(rnd_up(cropHigh.size(), bufferAlignment), 0);
|
||||
if (inputScale.size() > 1)
|
||||
inputScale.resize(rnd_up(inputScale.size(), bufferAlignment), 0);
|
||||
if (inputShift.size() > 1)
|
||||
inputShift.resize(rnd_up(inputShift.size(), bufferAlignment), 0);
|
||||
if (outputScale.size() > 1)
|
||||
outputScale.resize(rnd_up(outputScale.size(), bufferAlignment), 0);
|
||||
if (outputShift.size() > 1)
|
||||
outputShift.resize(rnd_up(outputShift.size(), bufferAlignment), 0);
|
||||
if (cropLow.size() > 1)
|
||||
cropLow.resize(rnd_up(cropLow.size(), bufferAlignment), 0);
|
||||
if (cropHigh.size() > 1)
|
||||
cropHigh.resize(rnd_up(cropHigh.size(), bufferAlignment), 0);
|
||||
if (inputScale.size() > 1)
|
||||
inputScale.resize(rnd_up(inputScale.size(), bufferAlignment), 0);
|
||||
if (inputShift.size() > 1)
|
||||
inputShift.resize(rnd_up(inputShift.size(), bufferAlignment), 0);
|
||||
if (outputScale.size() > 1)
|
||||
outputScale.resize(rnd_up(outputScale.size(), bufferAlignment), 0);
|
||||
if (outputShift.size() > 1)
|
||||
outputShift.resize(rnd_up(outputShift.size(), bufferAlignment), 0);
|
||||
|
||||
cropLowData.set(cropLow.size(), 1 << 1, &cropLow[0]);
|
||||
cropHighData.set(cropHigh.size(), 1 << 1, &cropHigh[0]);
|
||||
inputScaleData.set(inputScale.size(), 1 << 1, &inputScale[0]);
|
||||
inputShiftData.set(inputShift.size(), 1 << 1, &inputShift[0]);
|
||||
outputScaleData.set(outputScale.size(), 1 << 1, &outputScale[0]);
|
||||
outputShiftData.set(outputShift.size(), 1 << 1, &outputShift[0]);
|
||||
}
|
||||
cropLowData.set(cropLow.size(), 1 << 1, &cropLow[0]);
|
||||
cropHighData.set(cropHigh.size(), 1 << 1, &cropHigh[0]);
|
||||
inputScaleData.set(inputScale.size(), 1 << 1, &inputScale[0]);
|
||||
inputShiftData.set(inputShift.size(), 1 << 1, &inputShift[0]);
|
||||
outputScaleData.set(outputScale.size(), 1 << 1, &outputScale[0]);
|
||||
outputShiftData.set(outputShift.size(), 1 << 1, &outputShift[0]);
|
||||
}
|
||||
|
||||
isPostOpDataInitialized = true;
|
||||
}
|
||||
|
||||
void MKLDNNFakeQuantizeNode::appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, int align) {
|
||||
initializePostOpData(postOpDims, align);
|
||||
|
||||
if (getAlgorithm() == FQBinarization) {
|
||||
ops.append_binarization(mkldnn::algorithm::binarization_depthwise, (const float*)&binarizationThresholds[0], (const float*)&binarizationOutputMask[0]);
|
||||
} else {
|
||||
mkldnn::algorithm alg = getAlgorithm() == FQCommon ? mkldnn::algorithm::quantization_quantize_dequantize :
|
||||
mkldnn::algorithm::quantization_quantize;
|
||||
|
||||
if (initAsBinary) {
|
||||
auto appendBinary = [&](const mkldnn::algorithm alg, const size_t dataSize, MKLDNNMemoryPtr &memPtr, const void *data) {
|
||||
const auto rank = getOutputShapeAtPort(0).getRank();
|
||||
auto chIdx = rank > 1 ? 1 : 0;
|
||||
|
||||
std::vector<size_t> binaryShape(rank, 1);
|
||||
binaryShape[chIdx] = dataSize;
|
||||
|
||||
DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, Shape(binaryShape));
|
||||
ops.append_binary(alg, memoryDesc.getDnnlDesc());
|
||||
|
||||
if (initBinaryMemory) {
|
||||
memPtr.reset(new MKLDNNMemory(getEngine()));
|
||||
memPtr->Create(memoryDesc, data);
|
||||
}
|
||||
};
|
||||
|
||||
appendBinary(mkldnn::algorithm::binary_min, cropHighSize, cropHighMemory, &cropHighData.shifts_[0]);
|
||||
appendBinary(mkldnn::algorithm::binary_max, cropLowSize, cropLowMemory, &cropLowData.shifts_[0]);
|
||||
appendBinary(mkldnn::algorithm::binary_mul, inputScaleSize, inputScaleMemory, &inputScaleData.scales_[0]);
|
||||
appendBinary(mkldnn::algorithm::binary_add, inputShiftSize, inputShiftMemory, &inputShiftData.shifts_[0]);
|
||||
if (alg == mkldnn::algorithm::quantization_quantize_dequantize) {
|
||||
ops.append_eltwise(1.0f, mkldnn::algorithm::eltwise_round_half_to_even, 0, 0);
|
||||
}
|
||||
appendBinary(mkldnn::algorithm::binary_mul, outputScaleSize, outputScaleMemory, &outputScaleData.scales_[0]);
|
||||
appendBinary(mkldnn::algorithm::binary_add, outputShiftSize, outputShiftMemory, &outputShiftData.shifts_[0]);
|
||||
|
||||
} else {
|
||||
ops.append_quantization(alg, &cropLowData, &cropHighData, &inputScaleData, &inputShiftData, &outputScaleData, &outputShiftData);
|
||||
}
|
||||
|
||||
isPostOpDataInitialized = true;
|
||||
ops.append_quantization(alg, &cropLowData, &cropHighData, &inputScaleData, &inputShiftData, &outputScaleData, &outputShiftData);
|
||||
}
|
||||
}
|
||||
|
||||
void MKLDNNFakeQuantizeNode::appendBinPostOps(mkldnn::post_ops& ops, const VectorDims& postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) {
|
||||
static const size_t bufferAlignment = 1;
|
||||
|
||||
initializePostOpData(postOpDims, bufferAlignment);
|
||||
|
||||
VectorDims broadcastBinaryShape(postOpDims.size(), 1);
|
||||
|
||||
auto appendBinary = [&](const mkldnn::algorithm alg, const size_t dataSize, MKLDNNMemoryPtr &memPtr, const void *data) {
|
||||
DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, dataSize == 1 ? Shape(broadcastBinaryShape) : Shape(postOpDims));
|
||||
ops.append_binary(alg, memoryDesc.getDnnlDesc());
|
||||
|
||||
if (!memPtr) {
|
||||
memPtr.reset(new MKLDNNMemory(getEngine()));
|
||||
memPtr->Create(memoryDesc, data);
|
||||
|
||||
binaryPostOpsMem.push_back(memPtr);
|
||||
}
|
||||
};
|
||||
|
||||
mkldnn::algorithm alg = getAlgorithm() == FQCommon ? mkldnn::algorithm::quantization_quantize_dequantize :
|
||||
mkldnn::algorithm::quantization_quantize;
|
||||
|
||||
appendBinary(mkldnn::algorithm::binary_min, cropHighSize, cropHighMemory, &cropHighData.shifts_[0]);
|
||||
appendBinary(mkldnn::algorithm::binary_max, cropLowSize, cropLowMemory, &cropLowData.shifts_[0]);
|
||||
appendBinary(mkldnn::algorithm::binary_mul, inputScaleSize, inputScaleMemory, &inputScaleData.scales_[0]);
|
||||
appendBinary(mkldnn::algorithm::binary_add, inputShiftSize, inputShiftMemory, &inputShiftData.shifts_[0]);
|
||||
if (alg == mkldnn::algorithm::quantization_quantize_dequantize) {
|
||||
ops.append_eltwise(1.0f, mkldnn::algorithm::eltwise_round_half_to_even, 0, 0);
|
||||
}
|
||||
appendBinary(mkldnn::algorithm::binary_mul, outputScaleSize, outputScaleMemory, &outputScaleData.scales_[0]);
|
||||
appendBinary(mkldnn::algorithm::binary_add, outputShiftSize, outputShiftMemory, &outputShiftData.shifts_[0]);
|
||||
}
|
||||
|
||||
MKLDNNFakeQuantizeNode::FakeQuantizeJitExecutor::FakeQuantizeJitExecutor(const jit_quantize_params &_jqp) {
|
||||
bool isBinarization = _jqp.op_type == FQBinarization;
|
||||
if (mayiuse(cpu::x64::avx512_common)) {
|
||||
|
@ -121,11 +121,22 @@ public:
|
||||
InferenceEngine::Precision getInputPrecision() const { return inputPrecision; }
|
||||
InferenceEngine::Precision getOutputPrecision() const { return outputPrecision; }
|
||||
|
||||
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims = {}, int align = -1, bool initAsBinary = false,
|
||||
bool initBinaryMemory = false) override;
|
||||
// MKLDNN quantization_injectors assumes that quantization data memory is always aligned on 16
|
||||
// by length of AVX512 vector register which is also enough for AVX2 and SSE42 implementations.
|
||||
// Otherwise it can lead to buffer over-read and performance penalties due to denormals.
|
||||
void appendPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims = {}, int align = 16) override;
|
||||
void appendBinPostOps(mkldnn::post_ops& ops, const VectorDims &postOpDims, std::vector<MKLDNNMemoryPtr>& binaryPostOpsMem) override;
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
enum BroadcastingPolicy {
|
||||
PerChannel, // all FQ operations are per channel
|
||||
PerTensor, // all FQ operations are per tensor
|
||||
Mixed, // some per channel, some per tensor
|
||||
};
|
||||
|
||||
BroadcastingPolicy getBroadcastingPolicy() const { return broadcastingPolicy; }
|
||||
|
||||
MKLDNNMemoryPtr cropLowMemory;
|
||||
MKLDNNMemoryPtr cropHighMemory;
|
||||
MKLDNNMemoryPtr inputScaleMemory;
|
||||
@ -149,6 +160,7 @@ private:
|
||||
|
||||
void init() override;
|
||||
std::vector<LayoutType> getDataFormats() const;
|
||||
void initializePostOpData(const VectorDims &postOpDims, const size_t bufferAlignment);
|
||||
void executeReference();
|
||||
void executeBinarization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const;
|
||||
void executeQuantization(const std::unique_ptr<jit_uni_quantize_kernel> &pKernel) const;
|
||||
@ -195,6 +207,8 @@ private:
|
||||
InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32;
|
||||
|
||||
std::string errorPrefix;
|
||||
|
||||
BroadcastingPolicy broadcastingPolicy;
|
||||
};
|
||||
|
||||
} // namespace MKLDNNPlugin
|
||||
|
@ -147,13 +147,7 @@ void MKLDNNFullyConnectedNode::createPrimitive() {
|
||||
else
|
||||
primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getParentEdgeAt(WEIGHTS_ID)->getMemory().GetPrimitive()}, {DNNL_ARG_DST, dst}};
|
||||
|
||||
auto post_ops = attr->get_post_ops();
|
||||
int idx = 0;
|
||||
for (int i = 0; i < post_ops.len(); i++) {
|
||||
if (post_ops.kind(i) == mkldnn::primitive::kind::binary) {
|
||||
primArgs.insert({DNNL_ARG_ATTR_MULTIPLE_POST_OP(i) | DNNL_ARG_SRC_1, binaryPostOpsArgs[idx++]});
|
||||
}
|
||||
}
|
||||
appendPostOpArgs(*attr);
|
||||
}
|
||||
|
||||
void MKLDNNFullyConnectedNode::execute(mkldnn::stream strm) {
|
||||
@ -183,42 +177,32 @@ bool MKLDNNFullyConnectedNode::canFuse(const MKLDNNNodePtr& node) const {
|
||||
return canFuseSimpleOperation(node);
|
||||
}
|
||||
|
||||
void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false, bool initAsBinary = false) {
|
||||
bool initBinaryMemory = initWeights;
|
||||
void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) {
|
||||
mkldnn::post_ops ops;
|
||||
|
||||
auto getBinPostOpShape = [&](){
|
||||
const size_t binaryShapeRank = getOutputShapeAtPort(0).getRank() == 3 ? 2 : getOutputShapeAtPort(0).getRank();
|
||||
VectorDims binaryShape(binaryShapeRank, 1);
|
||||
const size_t channelAxis = getFusingAxis();
|
||||
// always use 1 as channelAxis for binary Shape, since oneDNN primitive is actually always 2D
|
||||
binaryShape[1] = getOutputShapeAtPort(0).getStaticDims()[channelAxis];
|
||||
|
||||
return binaryShape;
|
||||
};
|
||||
|
||||
for (auto &node : fusedWith) {
|
||||
auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get());
|
||||
if (fakeQuantizeNode) {
|
||||
// no need to fill post ops dims for fq, make sense only for bin fq
|
||||
fakeQuantizeNode->appendPostOps(ops, VectorDims{}, -1, initAsBinary, initBinaryMemory);
|
||||
if (initBinaryMemory) {
|
||||
if (fakeQuantizeNode->cropHighMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->cropHighMemory->GetPrimitive());
|
||||
if (fakeQuantizeNode->cropLowMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->cropLowMemory->GetPrimitive());
|
||||
if (fakeQuantizeNode->inputScaleMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->inputScaleMemory->GetPrimitive());
|
||||
if (fakeQuantizeNode->inputShiftMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->inputShiftMemory->GetPrimitive());
|
||||
if (fakeQuantizeNode->outputScaleMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->outputScaleMemory->GetPrimitive());
|
||||
if (fakeQuantizeNode->outputShiftMemory)
|
||||
binaryPostOpsArgs.push_back(fakeQuantizeNode->outputShiftMemory->GetPrimitive());
|
||||
}
|
||||
if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
|
||||
fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
|
||||
if (eltwiseNode) {
|
||||
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
|
||||
// TODO [DS]: change to shape from memory
|
||||
constexpr int align = -1;
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align, initAsBinary, initBinaryMemory);
|
||||
if (initBinaryMemory) {
|
||||
if (eltwiseNode->scalesMemory)
|
||||
binaryPostOpsArgs.push_back(eltwiseNode->scalesMemory->GetPrimitive());
|
||||
if (eltwiseNode->shiftsMemory)
|
||||
binaryPostOpsArgs.push_back(eltwiseNode->shiftsMemory->GetPrimitive());
|
||||
if (eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
|
||||
eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align);
|
||||
} else {
|
||||
eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -280,7 +264,7 @@ const std::vector<impl_desc_type>& MKLDNNFullyConnectedNode::getPrimitivesPriori
|
||||
MKLDNNNode::AttrPtr MKLDNNFullyConnectedNode::initPrimitiveAttr() {
|
||||
auto attr = std::make_shared<mkldnn::primitive_attr>(mkldnn::primitive_attr());
|
||||
|
||||
setPostOps(*attr, true, true);
|
||||
setPostOps(*attr);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
@ -26,6 +26,10 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t getFusingAxis() const override {
|
||||
return getOutputShapeAtPort(0).getRank() == 3 ? 2 : 1;
|
||||
}
|
||||
|
||||
const std::vector<impl_desc_type>& getPrimitivesPriority() override;
|
||||
void createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
|
||||
const std::vector<MemoryDescPtr>& outputDesc) override;
|
||||
@ -43,8 +47,7 @@ public:
|
||||
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
protected:
|
||||
AttrPtr initPrimitiveAttr();
|
||||
std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr() override;
|
||||
|
||||
private:
|
||||
void createDescriptorInternal(const mkldnn::memory::desc &inputDesc,
|
||||
@ -54,7 +57,7 @@ private:
|
||||
InferenceEngine::SizeVector biasesDims;
|
||||
|
||||
std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
|
||||
void setPostOps(mkldnn::primitive_attr &attr, bool initWeights, bool initAsBinary);
|
||||
void setPostOps(mkldnn::primitive_attr &attr, bool initWeights);
|
||||
|
||||
bool withBiases = false;
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "common/cpu_memcpy.h"
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include "memory_desc/dnnl_blocked_memory_desc.h"
|
||||
#include "nodes/mkldnn_fake_quantize_node.h"
|
||||
#include "utils/general_utils.h"
|
||||
#include "memory_desc/cpu_memory_desc_utils.h"
|
||||
#include "mkldnn_extension_utils.h"
|
||||
@ -54,31 +55,65 @@ bool MKLDNNMatMulNode::isSupportedOperation(const std::shared_ptr<const ngraph::
|
||||
}
|
||||
|
||||
MKLDNNMatMulNode::MKLDNNMatMulNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
|
||||
MKLDNNNode(op, eng, cache) {
|
||||
MKLDNNNode(op, eng, cache), withBiases(false) {
|
||||
std::string errorMessage;
|
||||
errorPrefix = "MatMul node with name '" + getName() + "'";
|
||||
|
||||
if (!isSupportedOperation(op, errorMessage))
|
||||
IE_THROW(NotImplemented) << errorMessage;
|
||||
|
||||
errorPrefix = "MatMul node with name '" + getName() + "'";
|
||||
|
||||
const auto matMul = std::dynamic_pointer_cast<const ngraph::opset1::MatMul>(op);
|
||||
|
||||
if (!matMul) {
|
||||
IE_THROW(NotImplemented) << "Operation with name " << op->get_friendly_name() << ":" << op->get_type_name() <<
|
||||
" is not an instance of MatMul from opset1";
|
||||
}
|
||||
|
||||
transposeIn[0] = matMul->get_transpose_a();
|
||||
transposeIn[1] = matMul->get_transpose_b();
|
||||
}
|
||||
|
||||
bool MKLDNNMatMulNode::canFuse(const MKLDNNNodePtr& node) const {
|
||||
return one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
|
||||
EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
|
||||
EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu);
|
||||
// per channel binary post op for rank > 2D is supported only by oneDNN reference implementation because of unusual MatMul channel axis (issue 6669)
|
||||
if (getOutputShapeAtPort(0).getRank() > 2) {
|
||||
if (const auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
|
||||
if (one_of(eltwiseNode->getAlgorithm(),
|
||||
EltwiseAdd, EltwiseMultiply, EltwiseSubtract, EltwiseDivide, EltwisePrelu, EltwiseMulAdd, EltwisePowerStatic) &&
|
||||
eltwiseNode->getBroadcastingPolicy() != MKLDNNEltwiseNode::PerTensor) {
|
||||
return false;
|
||||
}
|
||||
} else if (const auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
|
||||
if (fakeQuantizeNode->getBroadcastingPolicy() != MKLDNNFakeQuantizeNode::PerTensor) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return canFuseSimpleOperation(node);
|
||||
}
|
||||
|
||||
void MKLDNNMatMulNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims& dims, bool initWeights = false) const {
|
||||
void MKLDNNMatMulNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims& dims, bool initWeights = false) {
|
||||
mkldnn::post_ops ops;
|
||||
|
||||
for (auto &node : fusedWith) {
|
||||
auto getBinPostOpShape = [&](){
|
||||
const auto outShapeRank = dims.size();
|
||||
const auto chIdx = getFusingAxis();
|
||||
std::vector<size_t> binaryShape(outShapeRank, 1);
|
||||
binaryShape[chIdx] = dims[chIdx];
|
||||
return binaryShape;
|
||||
};
|
||||
|
||||
for (const auto &node : fusedWith) {
|
||||
if (auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get())) {
|
||||
eltwiseNode->appendPostOps(ops, dims);
|
||||
// TODO [DS]: change to shape from memory
|
||||
if (eltwiseNode->getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
|
||||
eltwiseNode->appendPostOps(ops, dims);
|
||||
} else {
|
||||
eltwiseNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
|
||||
}
|
||||
continue;
|
||||
} else if (auto* fakeQuantizeNode = dynamic_cast<MKLDNNFakeQuantizeNode *>(node.get())) {
|
||||
fakeQuantizeNode->appendBinPostOps(ops, getBinPostOpShape(), binaryPostOpsArgs);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -88,8 +123,7 @@ void MKLDNNMatMulNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims
|
||||
attr.set_post_ops(ops);
|
||||
}
|
||||
|
||||
|
||||
MKLDNNNode::AttrPtr MKLDNNMatMulNode::initPrimitiveAttr(const VectorDims &dims) const {
|
||||
MKLDNNNode::AttrPtr MKLDNNMatMulNode::initPrimitiveAttr(const VectorDims &dims) {
|
||||
auto attr = std::make_shared<mkldnn::primitive_attr>(mkldnn::primitive_attr());
|
||||
|
||||
setPostOps(*attr, dims, true);
|
||||
@ -97,7 +131,7 @@ MKLDNNNode::AttrPtr MKLDNNMatMulNode::initPrimitiveAttr(const VectorDims &dims)
|
||||
return attr;
|
||||
}
|
||||
|
||||
MKLDNNNode::AttrPtr MKLDNNMatMulNode::initPrimitiveAttr() const {
|
||||
MKLDNNNode::AttrPtr MKLDNNMatMulNode::initPrimitiveAttr() {
|
||||
auto dummyShape = MemoryDescUtils::makeDummyShape(getOutputShapeAtPort(0));
|
||||
return initPrimitiveAttr(dummyShape.getStaticDims());
|
||||
}
|
||||
@ -131,12 +165,29 @@ static VectorDims getStridesAndModifyShape(Shape& shape, const bool transpose) {
|
||||
return strides;
|
||||
}
|
||||
|
||||
mkldnn::memory::desc MKLDNNMatMulNode::getBiasDescFrom(const DnnlMemoryDescCPtr outMemDesc) {
|
||||
// oneDNN matmul requires shape for bias desc to be the same rank
|
||||
VectorDims biasDims(outMemDesc->getShape().getRank(), 1);
|
||||
const auto outDims = outMemDesc->getShape().getStaticDims();
|
||||
const auto chIdx = getFusingAxis();
|
||||
biasDims[chIdx] = outDims[chIdx];
|
||||
const auto bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(2));
|
||||
|
||||
return mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(biasDims), bdt, memory::format_tag::any);
|
||||
}
|
||||
|
||||
void MKLDNNMatMulNode::getSupportedDescriptors() {
|
||||
if (getParentEdges().size() != 2)
|
||||
if (getParentEdges().size() != getOriginalInputsNumber())
|
||||
IE_THROW() << errorPrefix << " has incorrect number of input edges for layer " << getName();
|
||||
if (getChildEdges().empty())
|
||||
IE_THROW() << errorPrefix << " has incorrect number of output edges for layer " << getName();
|
||||
|
||||
withBiases = getOriginalInputsNumber() == 3;
|
||||
|
||||
auto canBeExecutedInInt8 = [](const Precision firstInput, const Precision secondInput) {
|
||||
return one_of(firstInput, Precision::U8, Precision::I8) && secondInput == Precision::I8;
|
||||
};
|
||||
|
||||
auto firstInPortPrec = getOriginalInputPrecisionAtPort(0);
|
||||
auto secondInPortPrec = getOriginalInputPrecisionAtPort(1);
|
||||
auto outPortPrec = getOriginalOutputPrecisionAtPort(0);
|
||||
@ -154,6 +205,9 @@ void MKLDNNMatMulNode::getSupportedDescriptors() {
|
||||
outPortPrec = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);
|
||||
}
|
||||
|
||||
if (!canBeExecutedInInt8(firstInPortPrec, secondInPortPrec) && one_of(outPortPrec, Precision::U8, Precision::I8))
|
||||
outPortPrec = Precision::FP32; // INT output is not supported for non-INT inputs
|
||||
|
||||
const auto& inputShape0 = getInputShapeAtPort(0);
|
||||
const auto& inputShape1 = getInputShapeAtPort(1);
|
||||
const auto& outputShape = getOutputShapeAtPort(0);
|
||||
@ -206,12 +260,19 @@ void MKLDNNMatMulNode::getSupportedDescriptors() {
|
||||
|
||||
void MKLDNNMatMulNode::createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
|
||||
const std::vector<MemoryDescPtr>& outputDesc) {
|
||||
MKLDNNDescriptor desc{
|
||||
std::make_shared<matmul::desc>(inDataDesc[0]->getDnnlDesc(),
|
||||
inDataDesc[1]->getDnnlDesc(),
|
||||
outDataDesc->getDnnlDesc())};
|
||||
std::shared_ptr<mkldnn::matmul::desc> matmul_desc;
|
||||
if (withBiases) {
|
||||
matmul_desc.reset(new matmul::desc(inDataDesc[0]->getDnnlDesc(),
|
||||
inDataDesc[1]->getDnnlDesc(),
|
||||
getBiasDescFrom(outDataDesc),
|
||||
outDataDesc->getDnnlDesc()));
|
||||
} else {
|
||||
matmul_desc.reset(new matmul::desc(inDataDesc[0]->getDnnlDesc(),
|
||||
inDataDesc[1]->getDnnlDesc(),
|
||||
outDataDesc->getDnnlDesc()));
|
||||
}
|
||||
|
||||
descs.push_back(desc);
|
||||
descs.emplace_back(matmul_desc);
|
||||
}
|
||||
|
||||
void MKLDNNMatMulNode::initSupportedPrimitiveDescriptors() {
|
||||
@ -262,9 +323,13 @@ void MKLDNNMatMulNode::createPrimitive() {
|
||||
|
||||
MemoryDescPtr MKLDNNMatMulNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) {
|
||||
auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1): primitive_desc_it.src_desc(idx);
|
||||
return std::make_shared<CpuBlockedMemoryDesc>(
|
||||
MKLDNNExtensionUtils::DataTypeToIEPrecision(static_cast<mkldnn::memory::data_type>(desc.data.data_type)),
|
||||
getInputShapeAtPort(idx)); /* provide initial shapes, so hide transpose effect */
|
||||
|
||||
if (idx < 2) // inputs
|
||||
return std::make_shared<CpuBlockedMemoryDesc>(
|
||||
MKLDNNExtensionUtils::DataTypeToIEPrecision(static_cast<mkldnn::memory::data_type>(desc.data.data_type)),
|
||||
getInputShapeAtPort(idx)); /* provide initial shapes, so hide transpose effect */
|
||||
else // bias
|
||||
return MKLDNNExtensionUtils::makeDescriptor(desc);
|
||||
}
|
||||
|
||||
bool MKLDNNMatMulNode::created() const {
|
||||
@ -300,10 +365,7 @@ void MKLDNNMatMulNode::prepareParams() {
|
||||
AttrPtr attr;
|
||||
|
||||
if (isDynamicNode()) {
|
||||
if (!pAttr) {
|
||||
pAttr = initPrimitiveAttr(src0MemPtr->getStaticDims());
|
||||
}
|
||||
attr = pAttr;
|
||||
attr = initPrimitiveAttr(dstMemPtr->getStaticDims());
|
||||
|
||||
const auto& src0Desc = src0MemPtr->getDesc();
|
||||
const auto& src1Desc = src1MemPtr->getDesc();
|
||||
@ -323,13 +385,22 @@ void MKLDNNMatMulNode::prepareParams() {
|
||||
|
||||
auto dstDnnlDesc = dstMemPtr->GetDescWithType<DnnlMemoryDesc>();
|
||||
|
||||
MKLDNNDescriptor desc{
|
||||
std::make_shared<matmul::desc>(src0TransposedDesc->getDnnlDesc(),
|
||||
src1TransposedDesc->getDnnlDesc(),
|
||||
dstDnnlDesc->getDnnlDesc())};
|
||||
std::shared_ptr<mkldnn::matmul::desc> matmul_desc;
|
||||
|
||||
matmul::primitive_desc prim_desc;
|
||||
if (withBiases) {
|
||||
matmul_desc.reset(new mkldnn::matmul::desc{src0TransposedDesc->getDnnlDesc(),
|
||||
src1TransposedDesc->getDnnlDesc(),
|
||||
getBiasDescFrom(dstDnnlDesc),
|
||||
dstDnnlDesc->getDnnlDesc()});
|
||||
} else {
|
||||
matmul_desc.reset(new mkldnn::matmul::desc(src0TransposedDesc->getDnnlDesc(),
|
||||
src1TransposedDesc->getDnnlDesc(),
|
||||
dstDnnlDesc->getDnnlDesc()));
|
||||
}
|
||||
|
||||
MKLDNNDescriptor desc(matmul_desc);
|
||||
primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), *attr);
|
||||
matmul::primitive_desc prim_desc;
|
||||
|
||||
while (static_cast<bool>(itpd)) {
|
||||
impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str());
|
||||
@ -347,6 +418,10 @@ void MKLDNNMatMulNode::prepareParams() {
|
||||
primArgs[DNNL_ARG_SRC_0] = src0MemPtr->GetPrimitive();
|
||||
primArgs[DNNL_ARG_WEIGHTS_0] = src1MemPtr->GetPrimitive();
|
||||
primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive();
|
||||
if (withBiases)
|
||||
primArgs[DNNL_ARG_BIAS] = getParentEdgeAt(2)->getMemoryPtr()->GetPrimitive();
|
||||
|
||||
appendPostOpArgs(*attr);
|
||||
}
|
||||
|
||||
void MKLDNNMatMulNode::executeDynamicImpl(dnnl::stream strm) {
|
||||
|
@ -32,6 +32,10 @@ public:
|
||||
return getOriginalInputsNumber();
|
||||
}
|
||||
|
||||
size_t getFusingAxis() const override {
|
||||
return getOutputShapeAtPort(0).getRank() - 1;
|
||||
}
|
||||
|
||||
void prepareParams() override;
|
||||
void executeDynamicImpl(mkldnn::stream strm) override;
|
||||
|
||||
@ -39,11 +43,15 @@ public:
|
||||
const std::vector<impl_desc_type>& getPrimitivesPriority() override;
|
||||
|
||||
protected:
|
||||
AttrPtr initPrimitiveAttr() const override;
|
||||
AttrPtr initPrimitiveAttr(const VectorDims& dims) const;
|
||||
AttrPtr initPrimitiveAttr() override;
|
||||
AttrPtr initPrimitiveAttr(const VectorDims& dims);
|
||||
|
||||
private:
|
||||
void setPostOps(mkldnn::primitive_attr &attr, const VectorDims& dims, bool initWeights) const;
|
||||
mkldnn::memory::desc getBiasDescFrom(const DnnlMemoryDescCPtr outMemDesc);
|
||||
|
||||
bool withBiases;
|
||||
|
||||
void setPostOps(mkldnn::primitive_attr &attr, const VectorDims& dims, bool initWeights);
|
||||
|
||||
std::string errorPrefix;
|
||||
|
||||
|
@ -511,7 +511,7 @@ void MKLDNNPoolingNode::initDescriptor(const NodeConfig& config) {
|
||||
selectedPD->setConfig(rightConfig);
|
||||
}
|
||||
|
||||
MKLDNNNode::AttrPtr MKLDNNPoolingNode::initPrimitiveAttr() const {
|
||||
MKLDNNNode::AttrPtr MKLDNNPoolingNode::initPrimitiveAttr() {
|
||||
auto attr = std::make_shared<mkldnn::primitive_attr>(mkldnn::primitive_attr());
|
||||
|
||||
setPostOps(*attr, true);
|
||||
|
@ -34,7 +34,7 @@ public:
|
||||
static bool isSupportedOperation(const std::shared_ptr<const ov::Node>& op, std::string& errorMessage) noexcept;
|
||||
|
||||
protected:
|
||||
AttrPtr initPrimitiveAttr() const override;
|
||||
AttrPtr initPrimitiveAttr() override;
|
||||
|
||||
private:
|
||||
void setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) const;
|
||||
|
@ -4,6 +4,13 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
#include "ie_common.h"
|
||||
#include "ie_layouts.h"
|
||||
|
||||
namespace MKLDNNPlugin {
|
||||
|
||||
/**
|
||||
@ -36,7 +43,9 @@ inline std::vector<size_t> getNormalizedDimsBySize(const InferenceEngine::SizeVe
|
||||
* flag which specify how we compare C dims if value is undefined (weak or strong)
|
||||
* @return true if broadcastable, false otherwise.
|
||||
*/
|
||||
inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVector &firstInputDims, const InferenceEngine::SizeVector& secondInputDims,
|
||||
inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVector &firstInputDims,
|
||||
const InferenceEngine::SizeVector& secondInputDims,
|
||||
size_t channelAxis,
|
||||
bool weakComparison = false) {
|
||||
bool (*dimsEqual)(size_t, size_t) = weakComparison ? static_cast<bool (*)(size_t, size_t)>(dimsEqualWeak) :
|
||||
static_cast<bool (*)(size_t, size_t)>(dimsEqualStrong);
|
||||
@ -47,7 +56,7 @@ inline bool isPerTensorOrPerChannelBroadcastable(const InferenceEngine::SizeVect
|
||||
|
||||
std::vector<size_t> normalizedSecondInputDims = getNormalizedDimsBySize(secondInputDims, firstInputDims.size());
|
||||
for (size_t i = 0; i < normalizedSecondInputDims.size(); i++) {
|
||||
if ((i == 1 && !dimsEqual(normalizedSecondInputDims[i], firstInputDims[1])) || (i != 1 && normalizedSecondInputDims[i] != 1))
|
||||
if ((i == channelAxis && !dimsEqual(normalizedSecondInputDims[i], firstInputDims[i])) || (i != channelAxis && normalizedSecondInputDims[i] != 1))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -51,7 +51,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>(4 * 2, 2.f), ngraph::element::f32, ngraph::Shape{ 2, 4 } },
|
||||
{ 256ul, {{1}, {1}, {2, 1}, {2, 1}}, {-128.f}, {127.f}, {-128.f, -12.8f}, {127.f, 12.7f} },
|
||||
{ {}, {}, {} },
|
||||
"FullyConnected",
|
||||
"MatMul",
|
||||
"U8"
|
||||
},
|
||||
// 4D with Dq on weights
|
||||
@ -61,7 +61,7 @@ std::vector<MatMulWithConstantTransformationTestValues> testValues = {
|
||||
{ std::vector<float>(4 * 2, 2.f), ngraph::element::i8, ngraph::Shape{ 2, 4 } },
|
||||
{},
|
||||
{ ngraph::element::f32, {}, {{0.1f, 0.01}, ngraph::element::f32, ngraph::Shape{ 2, 1 }} },
|
||||
"FullyConnected",
|
||||
"MatMul",
|
||||
"U8"
|
||||
},
|
||||
// 3D with the same values
|
||||
|
@ -11,7 +11,8 @@ using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
|
||||
const std::vector<InferenceEngine::Precision> inputPrecisions = {
|
||||
InferenceEngine::Precision::FP32
|
||||
InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::Precision::I32,
|
||||
};
|
||||
|
||||
const std::vector<ShapeRelatedParams> shapeRelatedParams = {
|
||||
|
@ -2,9 +2,12 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "shared_test_classes/single_layer/mat_mul.hpp"
|
||||
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||
#include "ie_precision.hpp"
|
||||
#include "test_utils/fusing_test_utils.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include <string>
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
@ -139,11 +142,10 @@ protected:
|
||||
const auto& inShapeA = inputDynamicShapes[0];
|
||||
const auto& inShapeB = inputDynamicShapes[1];
|
||||
|
||||
/* @todo
|
||||
* Currently nodes are not fused thought Reshape
|
||||
* Check can be deleted after this limitation is gone
|
||||
*/
|
||||
if (nodeType == MatMulNodeType::MatMul && inShapeA.size() < 4 && inShapeB.size() < 4)
|
||||
// see comment in MKLDNNMatMulNode::canFuse
|
||||
if (!(nodeType == MatMulNodeType::MatMul &&
|
||||
std::get<0>(fusingParams) && std::get<0>(fusingParams)->getFusedOpsNames().find("(PerChannel)") != std::string::npos &&
|
||||
std::max(inShapeA.size(), inShapeB.size()) > 2))
|
||||
std::tie(postOpMgrPtr, fusedOps) = fusingParams;
|
||||
|
||||
configuration.insert(additionalConfig.begin(), additionalConfig.end());
|
||||
@ -179,6 +181,8 @@ TEST_P(MatMulLayerCPUTest, CompareWithRefs) {
|
||||
namespace {
|
||||
|
||||
/* ============= Common params ============= */
|
||||
std::map<std::string, std::string> emptyAdditionalConfig;
|
||||
|
||||
std::vector<std::map<std::string, std::string>> additionalConfig {
|
||||
std::map<std::string, std::string>{/* empty config */},
|
||||
{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}
|
||||
@ -196,15 +200,16 @@ std::vector<CPUSpecificParams> filterSpecificParams() {
|
||||
return specificParams;
|
||||
}
|
||||
|
||||
const auto fusingBias = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<Node> inpNode, const element::Type& ngPrc, ParameterVector& params) {
|
||||
size_t last_dim = inpNode->get_output_partial_shape(0).rbegin()->get_length();
|
||||
auto bias = builder::makeConstant(ngPrc, Shape{last_dim}, std::vector<float>{}, true);
|
||||
return std::make_shared<opset1::Add>(inpNode, bias);
|
||||
}, "fusingBias"}}), {"Add"}};
|
||||
|
||||
/* ============= FullyConnected ============= */
|
||||
namespace fullyConnected {
|
||||
|
||||
const auto fusingBiasFC = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<Node> inpNode, const element::Type& ngPrc, ParameterVector& params) {
|
||||
auto bias = builder::makeConstant(ngPrc, Shape({inpNode->get_output_shape(0).back()}), std::vector<float>{}, true);
|
||||
return std::make_shared<opset1::Add>(inpNode, bias);
|
||||
}, "fusingBiasFC"}}), {"Add"}};
|
||||
|
||||
const std::vector<ShapeRelatedParams> IS2D = {
|
||||
{static_shapes_to_test_representation({{59, 1}, {1, 120}}), {false, false}},
|
||||
{static_shapes_to_test_representation({{59, 1}, {1, 120}}), {true, false}},
|
||||
@ -229,26 +234,46 @@ const std::vector<ShapeRelatedParams> IS2D = {
|
||||
|
||||
std::vector<fusingSpecificParams> fusingParamsSet2D {
|
||||
emptyFusingSpec,
|
||||
fusingBiasFC,
|
||||
fusingBias,
|
||||
fusingRelu,
|
||||
fusingMultiplyPerChannel,
|
||||
fusingPReluPerTensor
|
||||
fusingScaleShift, // EltwiseMulAdd fusing
|
||||
fusingPReluPerTensor,
|
||||
fusingFakeQuantizePerChannelRelu,
|
||||
fusingFakeQuantizePerTensorRelu,
|
||||
};
|
||||
|
||||
const auto fullyConnectedParams2D = ::testing::Combine(::testing::ValuesIn(IS2D),
|
||||
::testing::ValuesIn(netPRCs),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(helpers::InputLayerType::CONSTANT),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::ValuesIn(additionalConfig));
|
||||
std::vector<fusingSpecificParams> fusingParamsSet2DBF16 {
|
||||
emptyFusingSpec,
|
||||
fusingBias,
|
||||
fusingRelu,
|
||||
fusingPReluPerTensor,
|
||||
};
|
||||
|
||||
const auto testParams2D = ::testing::Combine(fullyConnectedParams2D,
|
||||
const auto testParams2D = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(helpers::InputLayerType::CONSTANT),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(emptyAdditionalConfig)),
|
||||
::testing::Values(MatMulNodeType::FullyConnected),
|
||||
::testing::ValuesIn(fusingParamsSet2D),
|
||||
::testing::ValuesIn(filterSpecificParams()));
|
||||
|
||||
const auto testParams2DBF16 = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D),
|
||||
::testing::ValuesIn(netPRCs),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(helpers::InputLayerType::CONSTANT),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::ValuesIn(additionalConfig)),
|
||||
::testing::Values(MatMulNodeType::FullyConnected),
|
||||
::testing::ValuesIn(fusingParamsSet2DBF16),
|
||||
::testing::ValuesIn(filterSpecificParams()));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D, MatMulLayerCPUTest, testParams2D, MatMulLayerCPUTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_BF16, MatMulLayerCPUTest, testParams2DBF16, MatMulLayerCPUTest::getTestCaseName);
|
||||
|
||||
const std::vector<ShapeRelatedParams> IS3D = {
|
||||
{static_shapes_to_test_representation({{1, 32, 120}, {120, 5}}), {false, false}},
|
||||
@ -266,23 +291,46 @@ const std::vector<ShapeRelatedParams> IS3D = {
|
||||
|
||||
std::vector<fusingSpecificParams> fusingParamsSet3D {
|
||||
emptyFusingSpec,
|
||||
fusingBiasFC
|
||||
fusingBias,
|
||||
fusingMultiplyPerChannel,
|
||||
fusingFakeQuantizePerChannel,
|
||||
fusingFakeQuantizePerTensorRelu,
|
||||
};
|
||||
|
||||
std::vector<fusingSpecificParams> fusingParamsSet3DBF16 {
|
||||
emptyFusingSpec,
|
||||
fusingBias,
|
||||
fusingMultiplyPerChannel,
|
||||
};
|
||||
|
||||
const auto fullyConnectedParams3D = ::testing::Combine(::testing::ValuesIn(IS3D),
|
||||
::testing::ValuesIn(netPRCs),
|
||||
::testing::Values(ElementType::f32),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(helpers::InputLayerType::CONSTANT),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::ValuesIn(additionalConfig));
|
||||
::testing::Values(emptyAdditionalConfig));
|
||||
|
||||
const auto fullyConnectedParams3DBF16 = ::testing::Combine(::testing::ValuesIn(IS3D),
|
||||
::testing::ValuesIn(netPRCs),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(helpers::InputLayerType::CONSTANT),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::ValuesIn(additionalConfig));
|
||||
|
||||
const auto testParams3D = ::testing::Combine(fullyConnectedParams3D,
|
||||
::testing::Values(MatMulNodeType::FullyConnected),
|
||||
::testing::ValuesIn(fusingParamsSet3D),
|
||||
::testing::ValuesIn(filterSpecificParams()));
|
||||
|
||||
const auto testParams3DBF16 = ::testing::Combine(fullyConnectedParams3DBF16,
|
||||
::testing::Values(MatMulNodeType::FullyConnected),
|
||||
::testing::ValuesIn(fusingParamsSet3DBF16),
|
||||
::testing::ValuesIn(filterSpecificParams()));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FC_3D, MatMulLayerCPUTest, testParams3D, MatMulLayerCPUTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_BF16, MatMulLayerCPUTest, testParams3DBF16, MatMulLayerCPUTest::getTestCaseName);
|
||||
|
||||
std::vector<std::map<std::string, std::string>> filterAdditionalConfig_Brgemm() {
|
||||
std::vector<std::map<std::string, std::string>> additionalConfig = {
|
||||
@ -357,7 +405,9 @@ const std::vector<ShapeRelatedParams> IS = {
|
||||
{static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, false}},
|
||||
{static_shapes_to_test_representation({{55, 12}, {12, 55}}), {false, true}},
|
||||
{static_shapes_to_test_representation({{55, 12}, {12, 55}}), {true, true}},
|
||||
};
|
||||
|
||||
const std::vector<ShapeRelatedParams> IS_Dynamic = {
|
||||
{
|
||||
{ //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...}
|
||||
{{-1, -1}, {{55, 12}, {33, 7}}}, // input 0
|
||||
@ -507,7 +557,16 @@ const std::vector<ShapeRelatedParams> IS = {
|
||||
std::vector<fusingSpecificParams> matmulFusingParams {
|
||||
emptyFusingSpec,
|
||||
fusingElu,
|
||||
fusingSqrt
|
||||
fusingSqrt,
|
||||
fusingPReluPerTensor,
|
||||
fusingMultiplyPerChannel,
|
||||
fusingAddPerTensor,
|
||||
fusingBias,
|
||||
fusingFakeQuantizePerChannel,
|
||||
/* @todo FQ unfolds into FQ + Convert + Substract + Multiply after LPT,
|
||||
* so Relu cannot be fused in this case. Should be analysed */
|
||||
// fusingFakeQuantizePerChannelRelu,
|
||||
fusingFakeQuantizePerTensorRelu,
|
||||
};
|
||||
|
||||
const auto matMulParams = ::testing::Combine(::testing::ValuesIn(IS),
|
||||
@ -523,7 +582,70 @@ const auto testParams = ::testing::Combine(matMulParams,
|
||||
::testing::ValuesIn(matmulFusingParams),
|
||||
::testing::ValuesIn(filterSpecificParams()));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MM, MatMulLayerCPUTest, testParams, MatMulLayerCPUTest::getTestCaseName);
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MM_Static, MatMulLayerCPUTest, testParams, MatMulLayerCPUTest::getTestCaseName);
|
||||
|
||||
|
||||
const auto matMulParamsDynamic = ::testing::Combine(::testing::ValuesIn(IS_Dynamic),
|
||||
::testing::ValuesIn(netPRCs),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(helpers::InputLayerType::PARAMETER),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::ValuesIn(additionalConfig));
|
||||
|
||||
const auto testParamsDynamic = ::testing::Combine(matMulParamsDynamic,
|
||||
::testing::Values(MatMulNodeType::MatMul),
|
||||
::testing::Values(emptyFusingSpec),
|
||||
::testing::ValuesIn(filterSpecificParams()));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MM_Dynamic, MatMulLayerCPUTest, testParamsDynamic, MatMulLayerCPUTest::getTestCaseName);
|
||||
|
||||
|
||||
const std::vector<ShapeRelatedParams> IS_Dynamic_Fusing = {
|
||||
{
|
||||
{ //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...}
|
||||
{{-1, -1}, {{16, 12}, {33, 7}}}, // input 0
|
||||
{{-1, 33}, {{12, 33}, {7, 33}}} // input 1
|
||||
},
|
||||
{false, false}
|
||||
},
|
||||
{
|
||||
{ //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...}
|
||||
{{-1, -1, -1, -1}, {{1, 2, 32, 60}, {1, 2, 32, 30}}}, // input 0
|
||||
{{-1, 5}, {{60, 5}, {30, 5}}} // input 1
|
||||
},
|
||||
{false, false}
|
||||
},
|
||||
{
|
||||
{ //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...}
|
||||
{{-1, -1, -1}, {{7, 32, 60}, {7, 32, 30}}}, // input 0
|
||||
{{-1, -1, -1, 25}, {{3, 7, 60, 25}, {3, 7, 30, 25}}} // input 1
|
||||
},
|
||||
{false, false}
|
||||
},
|
||||
{
|
||||
{ //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...}
|
||||
{{-1, -1, -1}, {{10, 10, 10}, {5, 5, 5}}}, // input 0
|
||||
{{-1, -1, 5}, {{10, 10, 5}, {5, 5, 5}}} // input 1
|
||||
},
|
||||
{false, false}
|
||||
},
|
||||
};
|
||||
|
||||
const auto matMulParamsDynamicFusing = ::testing::Combine(::testing::ValuesIn(IS_Dynamic_Fusing),
|
||||
::testing::ValuesIn(netPRCs),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(ElementType::undefined),
|
||||
::testing::Values(helpers::InputLayerType::PARAMETER),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::ValuesIn(additionalConfig));
|
||||
|
||||
const auto testParamsDynamicFusing = ::testing::Combine(matMulParamsDynamicFusing,
|
||||
::testing::Values(MatMulNodeType::MatMul),
|
||||
::testing::ValuesIn(matmulFusingParams),
|
||||
::testing::ValuesIn(filterSpecificParams()));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_MM_Dynamic_Fusing, MatMulLayerCPUTest, testParamsDynamicFusing, MatMulLayerCPUTest::getTestCaseName);
|
||||
|
||||
} // namespace matmul
|
||||
|
||||
|
@ -1,101 +0,0 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "test_utils/fusing_test_utils.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace InferenceEngine;
|
||||
using namespace CPUTestUtils;
|
||||
|
||||
namespace SubgraphTestsDefinitions {
|
||||
|
||||
using ReshapeFCTestParams = std::tuple<std::pair<SizeVector, SizeVector>, // IS fully connected
|
||||
bool, // transpose B
|
||||
fusingSpecificParams>;
|
||||
|
||||
class ReshapeFCTest : public testing::WithParamInterface<ReshapeFCTestParams>, public CpuTestWithFusing,
|
||||
virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<ReshapeFCTestParams> obj) {
|
||||
std::pair<SizeVector, SizeVector> isFc;
|
||||
bool transpB;
|
||||
fusingSpecificParams fusingParams;
|
||||
std::tie(isFc, transpB, fusingParams) = obj.param;
|
||||
SizeVector isA = isFc.first; SizeVector isB = isFc.second;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS_reshape=" << CommonTestUtils::vec2str(isA) << "_";
|
||||
result << "IS_fc_B=" << CommonTestUtils::vec2str(isB) << "_";
|
||||
result << "Transp_B=" << transpB;
|
||||
result << CpuTestWithFusing::getTestCaseName(fusingParams);
|
||||
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||
std::pair<SizeVector, SizeVector> isFc;
|
||||
bool transpB;
|
||||
fusingSpecificParams fusingParams;
|
||||
std::tie(isFc, transpB, fusingParams) = this->GetParam();
|
||||
std::tie(postOpMgrPtr, fusedOps) = fusingParams;
|
||||
SizeVector isReshape = isFc.first; SizeVector isB = isFc.second;
|
||||
SizeVector isA(2);
|
||||
isA[0] = isReshape[0];
|
||||
isA[1] = std::accumulate(isReshape.begin() + 1, isReshape.end(), size_t{1}, std::multiplies<size_t>());
|
||||
if (transpB) {
|
||||
std::swap(*(isB.end() - 1), *(isB.end() - 2));
|
||||
}
|
||||
|
||||
auto inputParams = builder::makeParams(element::f32, {isReshape});
|
||||
auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(inputParams));
|
||||
|
||||
auto constNode = builder::makeConstant(element::i64, {isA.size()}, isA);
|
||||
auto reshape = std::make_shared<opset1::Reshape>(paramOuts[0], constNode, true);
|
||||
|
||||
auto matrixB = builder::makeConstant<float>(element::f32, isB, {}, true);
|
||||
auto matMul = builder::makeMatMul(reshape, matrixB, false, transpB);
|
||||
|
||||
const auto netType = element::f32;
|
||||
selectedType = makeSelectedTypeStr("jit_gemm", netType);
|
||||
|
||||
function = makeNgraphFunction(netType, inputParams, matMul, "ReshapeFC");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ReshapeFCTest, CompareWithRefs) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
|
||||
Run();
|
||||
CheckNodeOfTypeCount(executableNetwork, "Reshape", 0);
|
||||
CheckPluginRelatedResults(executableNetwork, "FullyConnected");
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
const std::vector<bool> transpose = {
|
||||
true, false
|
||||
};
|
||||
|
||||
const std::vector<std::pair<SizeVector, SizeVector>> isFC = {
|
||||
{{71, 128, 1, 1}, {128, 20}},
|
||||
{{1, 24, 2, 7}, {336, 16}}
|
||||
};
|
||||
|
||||
std::vector<fusingSpecificParams> fusingParamsSet {
|
||||
emptyFusingSpec,
|
||||
fusingAddPerChannel
|
||||
};
|
||||
|
||||
const auto reshapeFCParams = ::testing::Combine(::testing::ValuesIn(isFC),
|
||||
::testing::ValuesIn(transpose),
|
||||
::testing::ValuesIn(fusingParamsSet));
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Check, ReshapeFCTest, reshapeFCParams, ReshapeFCTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
||||
} // namespace SubgraphTestsDefinitions
|
@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "cpu_test_utils.hpp"
|
||||
#include <memory>
|
||||
#include <shared_test_classes/single_layer/activation.hpp>
|
||||
|
||||
namespace CPUTestUtils {
|
||||
@ -75,6 +76,24 @@ protected:
|
||||
bool checkFusingPosition = true;
|
||||
};
|
||||
|
||||
static size_t getFusingAxis(const std::shared_ptr<ngraph::Node>& node) {
|
||||
if (std::dynamic_pointer_cast<const ngraph::opset1::MatMul>(node))
|
||||
return node->get_output_partial_shape(0).size() - 1; // last dimension
|
||||
else
|
||||
return 1; // second dimension
|
||||
}
|
||||
|
||||
static ngraph::Shape generatePerChannelShape(const std::shared_ptr<ngraph::Node>& node) {
|
||||
const auto shape = node->get_output_partial_shape(0);
|
||||
if (shape.size() == 1)
|
||||
IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
|
||||
ngraph::Shape perChannelShape(shape.size(), 1);
|
||||
const auto channelAxis = getFusingAxis(node);
|
||||
perChannelShape[channelAxis] = shape[channelAxis].get_length();
|
||||
|
||||
return perChannelShape;
|
||||
}
|
||||
|
||||
/* FUSING PATTERNS */
|
||||
const auto emptyFusingSpec = fusingSpecificParams{nullptr, {}};
|
||||
|
||||
@ -120,11 +139,7 @@ const auto fusingSqrt = fusingSpecificParams{std::make_shared<postNodesMgr>(std:
|
||||
|
||||
const auto fusingPReluPerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
auto shape = inpNode->get_output_partial_shape(0);
|
||||
if (shape.size() == 1)
|
||||
IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
|
||||
ngraph::Shape newShape(shape.size(), 1);
|
||||
newShape[1] = shape[1].get_length();
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
auto data = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(ngraph::shape_size(newShape));
|
||||
return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::LeakyRelu, newShape, data);
|
||||
}, "PRelu(PerChannel)"}}), {"PRelu"}};
|
||||
@ -166,11 +181,7 @@ const auto fusingReluAdd = fusingSpecificParams{std::make_shared<postNodesMgr>(s
|
||||
return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Relu);
|
||||
}, "Relu"},
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
auto shape = inpNode->get_output_partial_shape(0);
|
||||
if (shape.size() == 1)
|
||||
IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
|
||||
ngraph::Shape newShape(shape.size(), 1);
|
||||
newShape[1] = shape[1].get_length();
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Add>(inpNode, constNode);
|
||||
}, "Add(PerChannel)"}}), {"Relu", "Add"}};
|
||||
@ -180,40 +191,24 @@ const auto fusingReluScaleShift = fusingSpecificParams{std::make_shared<postNode
|
||||
return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Relu);
|
||||
}, "Relu"},
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
auto shape = inpNode->get_output_partial_shape(0);
|
||||
if (shape.size() == 1)
|
||||
IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
|
||||
ngraph::Shape newShape(shape.size(), 1);
|
||||
newShape[1] = shape[1].get_length();
|
||||
auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Multiply>(inpNode, constNode);
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Multiply>(inpNode, constNode);
|
||||
}, "Multiply(PerChannel)"},
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
auto shape = inpNode->get_output_partial_shape(0);
|
||||
if (shape.size() == 1)
|
||||
IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
|
||||
ngraph::Shape newShape(shape.size(), 1);
|
||||
newShape[1] = shape[1].get_length();
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Add>(inpNode, constNode);
|
||||
}, "Add(PerChannel)"}}), {"Relu", "Add"}};
|
||||
|
||||
const auto fusingScaleShift = fusingSpecificParams{ std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) {
|
||||
auto shape = inpNode->get_output_partial_shape(0);
|
||||
if (shape.size() == 1)
|
||||
IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
|
||||
ngraph::Shape newShape(shape.size(), 1);
|
||||
newShape[1] = shape[1].get_length();
|
||||
auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Multiply>(inpNode, constNode);
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Multiply>(inpNode, constNode);
|
||||
}, "Multiply(PerChannel)"},
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) {
|
||||
auto shape = inpNode->get_output_partial_shape(0);
|
||||
if (shape.size() == 1)
|
||||
IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
|
||||
ngraph::Shape newShape(shape.size(), 1);
|
||||
newShape[1] = shape[1].get_length();
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Add>(inpNode, constNode);
|
||||
}, "Add(PerChannel)"}}), {"Add"} };
|
||||
@ -228,22 +223,14 @@ const auto fusingFakeQuantizePerTensor = fusingSpecificParams{ std::make_shared<
|
||||
const auto fusingFakeQuantizePerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
auto localPrc = inpNode->get_element_type();
|
||||
auto shape = inpNode->get_output_partial_shape(0);
|
||||
if (shape.size() == 1)
|
||||
IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
|
||||
ngraph::Shape newShape(shape.size(), 1);
|
||||
newShape[1] = shape[1].get_length();
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
|
||||
}, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"}};
|
||||
|
||||
const auto fusingFakeQuantizePerChannelRelu = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
auto localPrc = inpNode->get_element_type();
|
||||
auto shape = inpNode->get_output_partial_shape(0);
|
||||
if (shape.size() == 1)
|
||||
IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
|
||||
ngraph::Shape newShape(shape.size(), 1);
|
||||
newShape[1] = shape[1].get_length();
|
||||
ngraph::Shape newShape = generatePerChannelShape(inpNode);
|
||||
return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape);
|
||||
}, "FakeQuantize(PerChannel)"},
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
@ -291,60 +278,56 @@ const auto fusingSumEluFQ = fusingSpecificParams{std::make_shared<postNodesMgr>(
|
||||
const auto fusingMultiplyPerTensor = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
ngraph::Shape secondMultInShape(1, 1);
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::op::v1::Multiply>(inpNode, secondMultInput);
|
||||
}, "Multiply(PerTensor)"}}), {"Multiply"}};
|
||||
|
||||
const auto fusingMultiplyPerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
ngraph::Shape secondMultInShape(inpNode->get_output_partial_shape(0).size(), 1);
|
||||
secondMultInShape[1] = inpNode->get_output_partial_shape(0)[1].get_length();
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
|
||||
ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode);
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Multiply>(inpNode, secondMultInput);
|
||||
}, "Multiply(PerChannel)"}}), {"Multiply"}};
|
||||
|
||||
const auto fusingAddPerTensor = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
ngraph::Shape secondMultInShape(1, 1);
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Add>(inpNode, secondMultInput);
|
||||
}, "Add(PerTensor)"}}), {"Add"}};
|
||||
|
||||
const auto fusingAddPerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
ngraph::Shape secondMultInShape(inpNode->get_output_partial_shape(0).size(), 1);
|
||||
secondMultInShape[1] = inpNode->get_output_partial_shape(0)[1].get_length();
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
|
||||
ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode);
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Add>(inpNode, secondMultInput);
|
||||
}, "Add(PerChannel)"}}), {"Add"}};
|
||||
|
||||
const auto fusingSubtractPerTensor = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
ngraph::Shape secondMultInShape(1, 1);
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Subtract>(inpNode, secondMultInput);
|
||||
}, "Subtract(PerTensor)"}}), {"Subtract"}};
|
||||
|
||||
const auto fusingSubtractPerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
ngraph::Shape secondMultInShape(inpNode->get_output_partial_shape(0).size(), 1);
|
||||
secondMultInShape[1] = inpNode->get_output_partial_shape(0)[1].get_length();
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
|
||||
ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode);
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Subtract>(inpNode, secondMultInput);
|
||||
}, "Subtract(PerChannel)"}}), {"Subtract"}};
|
||||
|
||||
const auto fusingDividePerTensor = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
ngraph::Shape secondMultInShape(1, 1);
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Divide>(inpNode, secondMultInput);
|
||||
}, "Divide(PerTensor)"}}), {"Divide"}};
|
||||
|
||||
const auto fusingDividePerChannel = fusingSpecificParams{std::make_shared<postNodesMgr>(std::vector<postNodeBuilder>{
|
||||
{[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
|
||||
ngraph::Shape secondMultInShape(inpNode->get_output_partial_shape(0).size(), 1);
|
||||
secondMultInShape[1] = inpNode->get_output_partial_shape(0)[1].get_length();
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, ngraph::Shape(secondMultInShape), std::vector<float>{}, true);
|
||||
ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode);
|
||||
auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector<float>{}, true);
|
||||
return std::make_shared<ngraph::opset1::Divide>(inpNode, secondMultInput);
|
||||
}, "Divide(PerChannel)"}}), {"Divide"}};
|
||||
|
||||
|
@ -44,7 +44,7 @@ std::string MatMulTest::getTestCaseName(const testing::TestParamInfo<MatMulLayer
|
||||
result << "trgDev=" << targetDevice;
|
||||
result << "config=(";
|
||||
for (const auto configEntry : additionalConfig) {
|
||||
result << configEntry.first << ", " << configEntry.second << ":";
|
||||
result << configEntry.first << ", " << configEntry.second << ";";
|
||||
}
|
||||
result << ")";
|
||||
return result.str();
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include <ngraph_transformations/op/fully_connected.hpp>
|
||||
#include <ngraph_transformations/convert_matmul_to_fc.hpp>
|
||||
#include <ngraph_transformations/fc_bias_fusion.hpp>
|
||||
#include <ngraph_transformations/reshape_fully_connected.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
@ -171,7 +170,6 @@ TEST(TransformationTests, ConvertMatMulToFCTest7) {
|
||||
ngraph::pass::Manager m;
|
||||
m.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
m.register_pass<ConvertMatMulToFC>();
|
||||
m.register_pass<ReshapeFullyConnected>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
@ -179,12 +177,9 @@ TEST(TransformationTests, ConvertMatMulToFCTest7) {
|
||||
{
|
||||
auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{3, 2, 2});
|
||||
auto input2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{3, 2}, {1});
|
||||
auto reshape_begin = std::make_shared<ngraph::opset1::Reshape>(
|
||||
input1, ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{2}, std::vector<int64_t>{-1, 2}), false);
|
||||
auto fc = std::make_shared<FullyConnectedNode>(reshape_begin, input2, ngraph::Rank(2));
|
||||
auto reshape_end = ngraph::op::util::reshapeTo(fc, ngraph::Shape{3, 2, 3});
|
||||
auto fc = std::make_shared<FullyConnectedNode>(input1, input2, ngraph::Rank(2));
|
||||
|
||||
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{reshape_end}, ngraph::ParameterVector{input1});
|
||||
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{fc}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
@ -202,7 +197,6 @@ TEST(TransformationTests, ConvertMatMulToFCTest8) {
|
||||
ngraph::pass::Manager m;
|
||||
m.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
m.register_pass<ConvertMatMulToFC>();
|
||||
m.register_pass<ReshapeFullyConnected>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
@ -211,18 +205,14 @@ TEST(TransformationTests, ConvertMatMulToFCTest8) {
|
||||
auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::PartialShape{-1, -1, 2});
|
||||
auto input2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{3, 2}, {1});
|
||||
|
||||
auto reshape_begin = std::make_shared<ngraph::opset1::Reshape>(
|
||||
input1, ngraph::opset1::Constant::create(ngraph::element::i64, {2}, {-1, 2}), false);
|
||||
|
||||
auto fc = std::make_shared<FullyConnectedNode>(reshape_begin, input2, ngraph::Rank(2));
|
||||
auto fc = std::make_shared<FullyConnectedNode>(input1, input2, ngraph::Rank(2));
|
||||
auto a_shape = std::make_shared<ngraph::opset3::ShapeOf>(input1);
|
||||
|
||||
auto I = ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_node(a_shape, {0, 1});
|
||||
auto O = ngraph::opset1::Constant::create(ngraph::element::i64, { 1 }, { 3 });
|
||||
auto output_shape = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{I, O}, 0);
|
||||
auto reshape_end = std::make_shared<ngraph::opset1::Reshape>(fc, output_shape, false);
|
||||
|
||||
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{reshape_end}, ngraph::ParameterVector{input1});
|
||||
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{fc}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
@ -268,7 +258,6 @@ TEST(TransformationTests, ConvertMatMulToFCTest10) {
|
||||
ngraph::pass::Manager m;
|
||||
m.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
m.register_pass<ConvertMatMulToFC>();
|
||||
m.register_pass<ReshapeFullyConnected>();
|
||||
ASSERT_NO_THROW(m.run_passes(f));
|
||||
}
|
||||
|
||||
@ -439,25 +428,22 @@ TEST(TransformationTests, ConvertMatMulToFCTest_second_input_rank_adj_1) {
|
||||
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
|
||||
{
|
||||
auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 2, 3});
|
||||
auto input2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 2, 3}, {1});
|
||||
auto input2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{1, 2, 3}, {1});
|
||||
auto matmul = std::make_shared<ngraph::opset1::MatMul>(input1, input2, false, true);
|
||||
|
||||
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{matmul}, ngraph::ParameterVector{input1});
|
||||
ngraph::pass::Manager m;
|
||||
m.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
m.register_pass<ConvertMatMulToFC>();
|
||||
m.register_pass<ReshapeFullyConnected>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
|
||||
{
|
||||
auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{5, 2, 3});
|
||||
auto reshape_1 = std::make_shared<ngraph::opset1::Reshape>(input1, ngraph::opset1::Constant::create(ngraph::element::i64, {2}, {-1, 3}), false);
|
||||
auto input2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{2, 3}, {1});
|
||||
auto matmul = std::make_shared<FullyConnectedNode>(reshape_1, input2, ngraph::Rank(2));
|
||||
auto reshape_out = std::make_shared<ngraph::opset1::Reshape>(matmul, ngraph::opset1::Constant::create(ngraph::element::i64, {4}, {1, 5, 2, 2}), false);
|
||||
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{reshape_out}, ngraph::ParameterVector{input1});
|
||||
auto matmul = std::make_shared<FullyConnectedNode>(input1, input2, ngraph::Rank(2));
|
||||
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{matmul}, ngraph::ParameterVector{input1});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
@ -475,7 +461,6 @@ TEST(TransformationTests, ConvertMatMulToFCTest_second_input_rank_adj_2) {
|
||||
ngraph::pass::Manager m;
|
||||
m.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
m.register_pass<ConvertMatMulToFC>();
|
||||
m.register_pass<ReshapeFullyConnected>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
@ -495,9 +480,9 @@ TEST(TransformationTests, ConvertMatMulToFCTest_second_input_rank_adj_3) {
|
||||
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
|
||||
{
|
||||
auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{ 5, 2, 3 });
|
||||
auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{ 1, 1, 2, 3 }, { 1 });
|
||||
auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{ 1, 2, 3 }, { 1 });
|
||||
auto matmul = std::make_shared<ngraph::opset1::MatMul>(input1, weights, false, true);
|
||||
auto biases = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{ 1, 1, 1, 2 }, { 1 });
|
||||
auto biases = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{ 1, 1, 2 }, { 1 });
|
||||
auto add = std::make_shared<ngraph::opset1::Add>(matmul, biases);
|
||||
|
||||
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{ add }, ngraph::ParameterVector{ input1 });
|
||||
@ -505,7 +490,6 @@ TEST(TransformationTests, ConvertMatMulToFCTest_second_input_rank_adj_3) {
|
||||
m.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
m.register_pass<ConvertMatMulToFC>();
|
||||
m.register_pass<FullyConnectedBiasFusion>();
|
||||
m.register_pass<ReshapeFullyConnected>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
@ -513,53 +497,13 @@ TEST(TransformationTests, ConvertMatMulToFCTest_second_input_rank_adj_3) {
|
||||
{
|
||||
auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{ 5, 2, 3 });
|
||||
auto reshape_before_const = ngraph::opset1::Constant::create(ngraph::element::i64, { 2 }, { -1, 3 });
|
||||
auto reshape_1 = std::make_shared<ngraph::opset1::Reshape>(input1, reshape_before_const, false);
|
||||
|
||||
auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{ 2, 3 }, { 1 });
|
||||
auto biases = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{ 2 }, { 1 });
|
||||
auto matmul = std::make_shared<FullyConnectedNode>(reshape_1, weights, biases, ngraph::Rank(2));
|
||||
auto matmul = std::make_shared<FullyConnectedNode>(input1, weights, biases, ngraph::Rank(2));
|
||||
|
||||
auto reshape_after_const = ngraph::opset1::Constant::create(ngraph::element::i64, { 4 }, { 1, 5, 2, 2 });
|
||||
auto reshape_out = std::make_shared<ngraph::opset1::Reshape>(matmul, reshape_after_const, false);
|
||||
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ reshape_out }, ngraph::ParameterVector{ input1 });
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, ConvertMatMulToFCTest_second_input_rank_adj_dynamic) {
|
||||
std::shared_ptr<ngraph::Function> f(nullptr), f_ref(nullptr);
|
||||
{
|
||||
auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::PartialShape{-1, 2, 3});
|
||||
auto input2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 2, 3}, {1});
|
||||
auto matmul = std::make_shared<ngraph::opset1::MatMul>(input1, input2, false, true);
|
||||
|
||||
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{matmul}, ngraph::ParameterVector{input1});
|
||||
ngraph::pass::Manager m;
|
||||
m.register_pass<ngraph::pass::InitNodeInfo>();
|
||||
m.register_pass<ConvertMatMulToFC>();
|
||||
m.register_pass<ReshapeFullyConnected>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
|
||||
{
|
||||
auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::PartialShape{-1, 2, 3});
|
||||
auto reshape_1 = std::make_shared<ngraph::opset1::Reshape>(input1, ngraph::opset1::Constant::create(ngraph::element::i64, {2}, {-1, 3}), false);
|
||||
auto input2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{2, 3}, {1});
|
||||
auto matmul = std::make_shared<FullyConnectedNode>(reshape_1, input2, ngraph::Rank(2));
|
||||
|
||||
auto shape_of = std::make_shared<ngraph::opset7::ShapeOf>(input1);
|
||||
auto gather = std::make_shared<ngraph::opset7::Gather>(
|
||||
shape_of, ngraph::opset1::Constant::create(ngraph::element::i64, {2}, {0, 1}), ngraph::opset1::Constant::create(ngraph::element::i64, {}, {0}));
|
||||
auto concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{
|
||||
ngraph::opset1::Constant::create(ngraph::element::i64, {1}, {1}),
|
||||
gather,
|
||||
ngraph::opset1::Constant::create(ngraph::element::i64, {1}, {2}),
|
||||
}, 0);
|
||||
auto reshape_out = std::make_shared<ngraph::opset1::Reshape>(matmul, concat, false);
|
||||
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{reshape_out}, ngraph::ParameterVector{input1});
|
||||
f_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{ matmul }, ngraph::ParameterVector{ input1 });
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref, true);
|
||||
|
Loading…
Reference in New Issue
Block a user