[LPT] Introduced BiasAttribute (#16781)
* Extended check on ConvSum fusing * [LPT] Introduced 'bias' rt attribute * [CPU][TESTS] Added FQLayerDQBias tests
This commit is contained in:
parent
906ec7ee1b
commit
5f416dc4d2
@ -25,6 +25,7 @@
|
|||||||
<tab type="user" title="CreateAttribute" url="@ref openvino_docs_OV_UG_lpt_CreateAttribute"/>
|
<tab type="user" title="CreateAttribute" url="@ref openvino_docs_OV_UG_lpt_CreateAttribute"/>
|
||||||
<tab type="user" title="CreatePrecisionsDependentAttribute" url="@ref openvino_docs_OV_UG_lpt_CreatePrecisionsDependentAttribute"/>
|
<tab type="user" title="CreatePrecisionsDependentAttribute" url="@ref openvino_docs_OV_UG_lpt_CreatePrecisionsDependentAttribute"/>
|
||||||
<tab type="user" title="MarkupAvgPoolPrecisionPreserved" url="@ref openvino_docs_OV_UG_lpt_MarkupAvgPoolPrecisionPreserved"/>
|
<tab type="user" title="MarkupAvgPoolPrecisionPreserved" url="@ref openvino_docs_OV_UG_lpt_MarkupAvgPoolPrecisionPreserved"/>
|
||||||
|
<tab type="user" title="MarkupBias" url="@ref openvino_docs_OV_UG_lpt_MarkupBias"/>
|
||||||
<tab type="user" title="MarkupCanBeQuantized" url="@ref openvino_docs_OV_UG_lpt_MarkupCanBeQuantized"/>
|
<tab type="user" title="MarkupCanBeQuantized" url="@ref openvino_docs_OV_UG_lpt_MarkupCanBeQuantized"/>
|
||||||
<tab type="user" title="MarkupPerTensorQuantization" url="@ref openvino_docs_OV_UG_lpt_MarkupPerTensorQuantization"/>
|
<tab type="user" title="MarkupPerTensorQuantization" url="@ref openvino_docs_OV_UG_lpt_MarkupPerTensorQuantization"/>
|
||||||
<tab type="user" title="MarkupPrecisions" url="@ref openvino_docs_OV_UG_lpt_MarkupPrecisions"/>
|
<tab type="user" title="MarkupPrecisions" url="@ref openvino_docs_OV_UG_lpt_MarkupPrecisions"/>
|
||||||
|
@ -128,6 +128,7 @@ The model on this step is changed. There are more details in developer guide [Pr
|
|||||||
|
|
||||||
### Step 2. Markup
|
### Step 2. Markup
|
||||||
This step creates runtime attributes for operations. These attributes will be used in next step. Transformations:
|
This step creates runtime attributes for operations. These attributes will be used in next step. Transformations:
|
||||||
|
* [MarkupBias](@ref openvino_docs_OV_UG_lpt_MarkupBias)
|
||||||
* [MarkupCanBeQuantized](@ref openvino_docs_OV_UG_lpt_MarkupCanBeQuantized)
|
* [MarkupCanBeQuantized](@ref openvino_docs_OV_UG_lpt_MarkupCanBeQuantized)
|
||||||
* [MarkupPrecisions](@ref openvino_docs_OV_UG_lpt_MarkupPrecisions)
|
* [MarkupPrecisions](@ref openvino_docs_OV_UG_lpt_MarkupPrecisions)
|
||||||
* [MarkupPerTensorQuantization](@ref openvino_docs_OV_UG_lpt_MarkupPerTensorQuantization)
|
* [MarkupPerTensorQuantization](@ref openvino_docs_OV_UG_lpt_MarkupPerTensorQuantization)
|
||||||
|
@ -2,18 +2,20 @@
|
|||||||
|
|
||||||
This step defines the optimal `FakeQuantize` decomposition precisions for the best inference performance via operations markup with runtime attribute instances. Attributes are created for input and output ports and operations. Transformations do not change the operation output port precisions. A model markup low precision logic is decomposed and implemented into the following common markup transformations. The order of transformations is important:
|
This step defines the optimal `FakeQuantize` decomposition precisions for the best inference performance via operations markup with runtime attribute instances. Attributes are created for input and output ports and operations. Transformations do not change the operation output port precisions. A model markup low precision logic is decomposed and implemented into the following common markup transformations. The order of transformations is important:
|
||||||
|
|
||||||
1. [MarkupCanBeQuantized](@ref openvino_docs_OV_UG_lpt_MarkupCanBeQuantized)
|
1. [MarkupBias](@ref openvino_docs_OV_UG_lpt_MarkupBias)
|
||||||
2. [MarkupPrecisions](@ref openvino_docs_OV_UG_lpt_MarkupPrecisions)
|
2. [MarkupCanBeQuantized](@ref openvino_docs_OV_UG_lpt_MarkupCanBeQuantized)
|
||||||
3. [MarkupPerTensorQuantization](@ref openvino_docs_OV_UG_lpt_MarkupPerTensorQuantization)
|
3. [MarkupPrecisions](@ref openvino_docs_OV_UG_lpt_MarkupPrecisions)
|
||||||
4. [MarkupAvgPoolPrecisionPreserved](@ref openvino_docs_OV_UG_lpt_MarkupAvgPoolPrecisionPreserved)
|
4. [MarkupPerTensorQuantization](@ref openvino_docs_OV_UG_lpt_MarkupPerTensorQuantization)
|
||||||
5. [PropagatePrecisions](@ref openvino_docs_OV_UG_lpt_PropagatePrecisions)
|
5. [MarkupAvgPoolPrecisionPreserved](@ref openvino_docs_OV_UG_lpt_MarkupAvgPoolPrecisionPreserved)
|
||||||
6. [AlignQuantizationIntervals](@ref openvino_docs_OV_UG_lpt_AlignQuantizationIntervals)
|
6. [PropagatePrecisions](@ref openvino_docs_OV_UG_lpt_PropagatePrecisions)
|
||||||
7. [AlignQuantizationParameters](@ref openvino_docs_OV_UG_lpt_AlignQuantizationParameters)
|
7. [AlignQuantizationIntervals](@ref openvino_docs_OV_UG_lpt_AlignQuantizationIntervals)
|
||||||
|
8. [AlignQuantizationParameters](@ref openvino_docs_OV_UG_lpt_AlignQuantizationParameters)
|
||||||
|
|
||||||
The table of transformations and used attributes:
|
The table of transformations and used attributes:
|
||||||
|
|
||||||
| Transformation name | Create attributes | Use attributes |
|
| Transformation name | Create attributes | Use attributes |
|
||||||
|---------------------------------|-------------------------------|-------------------------------------------|
|
|---------------------------------|-------------------------------|-------------------------------------------|
|
||||||
|
| MarkupBias | Bias | |
|
||||||
| MarkupCanBeQuantized | Precisions | |
|
| MarkupCanBeQuantized | Precisions | |
|
||||||
| MarkupPrecisions | Precisions,PrecisionPreserved | |
|
| MarkupPrecisions | Precisions,PrecisionPreserved | |
|
||||||
| MarkupPerTensorQuantization | PerTensorQuantization | |
|
| MarkupPerTensorQuantization | PerTensorQuantization | |
|
||||||
|
@ -0,0 +1,3 @@
|
|||||||
|
# MarkupBias transformation {#openvino_docs_OV_UG_lpt_MarkupBias}
|
||||||
|
|
||||||
|
ngraph::pass::low_precision::MarkupBias class represents the `MarkupBias` transformation.
|
@ -27,6 +27,7 @@ openvino_docs_OV_UG_lpt_gathertransformation.rst
|
|||||||
openvino_docs_OV_UG_lpt_linopsequencefusion.rst
|
openvino_docs_OV_UG_lpt_linopsequencefusion.rst
|
||||||
openvino_docs_OV_UG_lpt_mvntransformation.rst
|
openvino_docs_OV_UG_lpt_mvntransformation.rst
|
||||||
openvino_docs_OV_UG_lpt_markupavgpoolprecisionpreserved.rst
|
openvino_docs_OV_UG_lpt_markupavgpoolprecisionpreserved.rst
|
||||||
|
openvino_docs_OV_UG_lpt_markupbias.rst
|
||||||
openvino_docs_OV_UG_lpt_markupcanbequantized.rst
|
openvino_docs_OV_UG_lpt_markupcanbequantized.rst
|
||||||
openvino_docs_OV_UG_lpt_markuppertensorquantization.rst
|
openvino_docs_OV_UG_lpt_markuppertensorquantization.rst
|
||||||
openvino_docs_OV_UG_lpt_markupprecisions.rst
|
openvino_docs_OV_UG_lpt_markupprecisions.rst
|
||||||
|
@ -0,0 +1,32 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <low_precision/lpt_visibility.hpp>
|
||||||
|
#include <memory>
|
||||||
|
#include <openvino/pass/graph_rewrite.hpp>
|
||||||
|
#include <openvino/pass/pattern/matcher.hpp>
|
||||||
|
|
||||||
|
namespace ngraph {
|
||||||
|
namespace pass {
|
||||||
|
namespace low_precision {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @ingroup ie_transformation_common_api
|
||||||
|
* @brief MarkupBias transformation marks biases after target layers.
|
||||||
|
*
|
||||||
|
* For more details about the transformation, refer to
|
||||||
|
* [MarkupBias](@ref openvino_docs_OV_UG_lpt_MarkupBias) page
|
||||||
|
* in the Inference Engine Developer Guide.
|
||||||
|
*/
|
||||||
|
class LP_TRANSFORMATIONS_API MarkupBias : public ov::pass::MatcherPass {
|
||||||
|
public:
|
||||||
|
OPENVINO_RTTI("MarkupBias", "0");
|
||||||
|
MarkupBias();
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace low_precision
|
||||||
|
} // namespace pass
|
||||||
|
} // namespace ngraph
|
@ -0,0 +1,21 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <low_precision/lpt_visibility.hpp>
|
||||||
|
#include <ngraph/node.hpp>
|
||||||
|
#include <openvino/core/runtime_attribute.hpp>
|
||||||
|
|
||||||
|
namespace ov {
|
||||||
|
LP_TRANSFORMATIONS_API void mark_as_bias(const std::shared_ptr<Node>& node);
|
||||||
|
|
||||||
|
LP_TRANSFORMATIONS_API bool marked_as_bias(const std::shared_ptr<const Node>& node);
|
||||||
|
|
||||||
|
class LP_TRANSFORMATIONS_API BiasAttribute : public ov::RuntimeAttribute {
|
||||||
|
public:
|
||||||
|
OPENVINO_RTTI("LowPrecision::Bias", "", ov::RuntimeAttribute);
|
||||||
|
bool is_copyable(const std::shared_ptr<Node>& to) const override;
|
||||||
|
};
|
||||||
|
} // namespace ov
|
@ -15,6 +15,7 @@
|
|||||||
|
|
||||||
#include "low_precision/common/ie_lpt_exception.hpp"
|
#include "low_precision/common/ie_lpt_exception.hpp"
|
||||||
#include "low_precision/network_helper.hpp"
|
#include "low_precision/network_helper.hpp"
|
||||||
|
#include "low_precision/rt_info/bias_attribute.hpp"
|
||||||
#include "itt.hpp"
|
#include "itt.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
@ -29,7 +30,7 @@ std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>&
|
|||||||
// - single responsibility
|
// - single responsibility
|
||||||
// - keep AddTransformation and AddToSubtractTransformation transformations independent and optional
|
// - keep AddTransformation and AddToSubtractTransformation transformations independent and optional
|
||||||
const auto add = ov::as_type_ptr<opset1::Add>(op);
|
const auto add = ov::as_type_ptr<opset1::Add>(op);
|
||||||
if (add == nullptr) {
|
if (add == nullptr || ov::marked_as_bias(add)) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,17 +41,8 @@ std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>&
|
|||||||
if (constBranchIndex == -1) {
|
if (constBranchIndex == -1) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
const size_t dataBranchIndex = constBranchIndex == 0 ? 1ul : 0;
|
const size_t dataBranchIndex = constBranchIndex == 0 ? 1ul : 0;
|
||||||
|
|
||||||
const auto parent = add->get_input_node_shared_ptr(dataBranchIndex);
|
|
||||||
if (ov::is_type<opset1::Convolution>(parent) ||
|
|
||||||
ov::is_type<opset1::GroupConvolution>(parent) ||
|
|
||||||
ov::is_type<opset1::ConvolutionBackpropData>(parent) ||
|
|
||||||
(ov::is_type<opset1::MatMul>(parent) &&
|
|
||||||
(ov::is_type<opset1::Constant>(parent->get_input_node_ptr(0)) || ov::is_type<opset1::Constant>(parent->get_input_node_ptr(1))))) {
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto constant = fold<opset1::Negative>(add->input_value(constBranchIndex));
|
auto constant = fold<opset1::Negative>(add->input_value(constBranchIndex));
|
||||||
auto constOutput = constant->output(0);
|
auto constOutput = constant->output(0);
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "low_precision/network_helper.hpp"
|
#include "low_precision/network_helper.hpp"
|
||||||
|
#include "low_precision/rt_info/bias_attribute.hpp"
|
||||||
|
|
||||||
using namespace ngraph;
|
using namespace ngraph;
|
||||||
using namespace ngraph::pass;
|
using namespace ngraph::pass;
|
||||||
@ -70,8 +71,18 @@ static std::shared_ptr<Node> getDataParent(const std::shared_ptr<Node> branchDat
|
|||||||
parent = parent->get_input_node_shared_ptr(0);
|
parent = parent->get_input_node_shared_ptr(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ov::is_type<opset1::Add>(parent) && isTargetType(parent->get_input_node_shared_ptr(0))) {
|
if (ov::marked_as_bias(parent)) {
|
||||||
return parent->get_input_node_shared_ptr(0);
|
const auto bias_parent = parent->get_input_node_shared_ptr(0);
|
||||||
|
// target node just before bias
|
||||||
|
if (isTargetType(bias_parent)) {
|
||||||
|
return bias_parent;
|
||||||
|
}
|
||||||
|
// between target node and bias are placed some DQ operations
|
||||||
|
const auto dq = NetworkHelper::getDequantization(parent->get_input_node_shared_ptr(0));
|
||||||
|
const auto data_node = dq.data.get_node_shared_ptr();
|
||||||
|
if (isTargetType(data_node)) {
|
||||||
|
return data_node;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return parent;
|
return parent;
|
||||||
}
|
}
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||||
|
|
||||||
#include "low_precision/network_helper.hpp"
|
#include "low_precision/network_helper.hpp"
|
||||||
|
#include "low_precision/rt_info/bias_attribute.hpp"
|
||||||
#include "itt.hpp"
|
#include "itt.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
@ -191,17 +192,8 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
|
|||||||
|
|
||||||
inputLowConst_f32 = fq::updateShape(fold<opset1::Add>(inputLowConst_f32, value), fakeQuantize->get_output_partial_shape(0));
|
inputLowConst_f32 = fq::updateShape(fold<opset1::Add>(inputLowConst_f32, value), fakeQuantize->get_output_partial_shape(0));
|
||||||
inputHighConst_f32 = fq::updateShape(fold<opset1::Add>(inputHighConst_f32, value), fakeQuantize->get_output_partial_shape(0));
|
inputHighConst_f32 = fq::updateShape(fold<opset1::Add>(inputHighConst_f32, value), fakeQuantize->get_output_partial_shape(0));
|
||||||
} else if (ov::is_type<opset1::Add>(eltwise) && checkElementwise(eltwise)) {
|
} else if (ov::is_type<opset1::Add>(eltwise) && checkElementwise(eltwise) && !ov::marked_as_bias(eltwise)) {
|
||||||
if (ov::is_type<opset1::Convolution>(fq::getDataNode(eltwise)) ||
|
|
||||||
ov::is_type<opset1::GroupConvolution>(fq::getDataNode(eltwise)) ||
|
|
||||||
ov::is_type<opset1::ConvolutionBackpropData>(fq::getDataNode(eltwise)) ||
|
|
||||||
ov::is_type<opset1::MatMul>(fq::getDataNode(eltwise)) ||
|
|
||||||
ov::is_type<opset1::GroupConvolutionBackpropData>(fq::getDataNode(eltwise))) {
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto value = foldConvert(constant, element::f32);
|
const auto value = foldConvert(constant, element::f32);
|
||||||
|
|
||||||
inputLowConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputLowConst_f32, value), fakeQuantize->get_output_partial_shape(0));
|
inputLowConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputLowConst_f32, value), fakeQuantize->get_output_partial_shape(0));
|
||||||
inputHighConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputHighConst_f32, value), fakeQuantize->get_output_partial_shape(0));
|
inputHighConst_f32 = fq::updateShape(fold<opset1::Subtract>(inputHighConst_f32, value), fakeQuantize->get_output_partial_shape(0));
|
||||||
} else if (ov::is_type<opset1::Convert>(eltwise)) {
|
} else if (ov::is_type<opset1::Convert>(eltwise)) {
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
#include "low_precision/align_quantization_intervals.hpp"
|
#include "low_precision/align_quantization_intervals.hpp"
|
||||||
#include "low_precision/fake_quantize_decomposition.hpp"
|
#include "low_precision/fake_quantize_decomposition.hpp"
|
||||||
|
#include "low_precision/markup_bias.hpp"
|
||||||
#include "low_precision/markup_precisions.hpp"
|
#include "low_precision/markup_precisions.hpp"
|
||||||
#include "low_precision/markup_can_be_quantized.hpp"
|
#include "low_precision/markup_can_be_quantized.hpp"
|
||||||
#include "low_precision/markup_avg_pool_precision_preserved.hpp"
|
#include "low_precision/markup_avg_pool_precision_preserved.hpp"
|
||||||
@ -201,6 +202,7 @@ bool ngraph::pass::low_precision::MarkupOptimizations::run_on_model(const std::s
|
|||||||
markup.register_pass<low_precision::AlignQuantizationIntervals>(params.defaultPrecisions);
|
markup.register_pass<low_precision::AlignQuantizationIntervals>(params.defaultPrecisions);
|
||||||
markup.register_pass<low_precision::AlignQuantizationParameters>(params.defaultPrecisions);
|
markup.register_pass<low_precision::AlignQuantizationParameters>(params.defaultPrecisions);
|
||||||
}
|
}
|
||||||
|
markup.register_pass<low_precision::MarkupBias>();
|
||||||
markup.run_passes(f);
|
markup.run_passes(f);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
41
src/common/low_precision_transformations/src/markup_bias.cpp
Normal file
41
src/common/low_precision_transformations/src/markup_bias.cpp
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "low_precision/markup_bias.hpp"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <openvino/opsets/opset1.hpp>
|
||||||
|
#include <openvino/pass/pattern/op/wrap_type.hpp>
|
||||||
|
|
||||||
|
#include "itt.hpp"
|
||||||
|
#include "low_precision/rt_info/bias_attribute.hpp"
|
||||||
|
|
||||||
|
using namespace ngraph::pass::low_precision;
|
||||||
|
|
||||||
|
MarkupBias::MarkupBias() {
|
||||||
|
MATCHER_SCOPE(MarkupBias);
|
||||||
|
auto layer_m = ov::pass::pattern::wrap_type<ov::opset1::Convolution,
|
||||||
|
ov::opset1::GroupConvolution,
|
||||||
|
ov::opset1::ConvolutionBackpropData,
|
||||||
|
ov::opset1::GroupConvolutionBackpropData,
|
||||||
|
ov::opset1::MatMul>(ov::pass::pattern::has_static_rank());
|
||||||
|
auto bias_const_m = ov::pass::pattern::wrap_type<ov::opset1::Constant>();
|
||||||
|
auto bias_m = ov::pass::pattern::wrap_type<ov::opset1::Add>({layer_m, bias_const_m});
|
||||||
|
|
||||||
|
ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
|
||||||
|
const auto& pattern_map = m.get_pattern_value_map();
|
||||||
|
const auto& const_shape = pattern_map.at(bias_const_m).get_shape();
|
||||||
|
|
||||||
|
const bool per_channel = std::count_if(const_shape.begin(), const_shape.end(), [](size_t x) { return x > 1; }) == 1;
|
||||||
|
if (ov::shape_size(const_shape) == 1 || per_channel) {
|
||||||
|
const auto bias = pattern_map.at(bias_m).get_node_shared_ptr();
|
||||||
|
ov::mark_as_bias(bias);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
auto m = std::make_shared<ov::pass::pattern::Matcher>(bias_m, matcher_name);
|
||||||
|
register_matcher(m, callback);
|
||||||
|
}
|
@ -1286,7 +1286,14 @@ FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_pt
|
|||||||
return 1ul;
|
return 1ul;
|
||||||
};
|
};
|
||||||
|
|
||||||
Output<Node> dataNode = inPlace ? std::const_pointer_cast<Node>(node)->output(0) : node->input_value(parentIndex);
|
Output<Node> dataNode;
|
||||||
|
if (inPlace) {
|
||||||
|
dataNode = std::const_pointer_cast<Node>(node);
|
||||||
|
} else {
|
||||||
|
if (parentIndex >= node->get_input_size())
|
||||||
|
return FakeQuantizeDequantization();
|
||||||
|
dataNode = node->input_value(parentIndex);
|
||||||
|
}
|
||||||
|
|
||||||
const std::shared_ptr<ngraph::opset1::Multiply> multiply = ov::as_type_ptr<ngraph::opset1::Multiply>(dataNode.get_node_shared_ptr());
|
const std::shared_ptr<ngraph::opset1::Multiply> multiply = ov::as_type_ptr<ngraph::opset1::Multiply>(dataNode.get_node_shared_ptr());
|
||||||
std::shared_ptr<opset1::Constant> multiplyConstant;
|
std::shared_ptr<opset1::Constant> multiplyConstant;
|
||||||
|
@ -0,0 +1,27 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "low_precision/rt_info/bias_attribute.hpp"
|
||||||
|
#include "low_precision/network_helper.hpp"
|
||||||
|
|
||||||
|
#include <iterator>
|
||||||
|
#include <memory>
|
||||||
|
#include <openvino/opsets/opset1.hpp>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
void ov::mark_as_bias(const std::shared_ptr<ov::Node>& node) {
|
||||||
|
auto& rt = node->get_rt_info();
|
||||||
|
rt[ov::BiasAttribute::get_type_info_static()] = ov::BiasAttribute();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ov::marked_as_bias(const std::shared_ptr<const ov::Node>& node) {
|
||||||
|
const auto& rt_info = node->get_rt_info();
|
||||||
|
return rt_info.find(ov::BiasAttribute::get_type_info_static()) != rt_info.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ov::BiasAttribute::is_copyable(const std::shared_ptr<ov::Node>& to) const {
|
||||||
|
return ov::is_type<ov::opset1::Add>(to) && ngraph::pass::low_precision::NetworkHelper::getConstantInput(to) != nullptr;
|
||||||
|
}
|
@ -0,0 +1,107 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <low_precision/markup_bias.hpp>
|
||||||
|
#include <low_precision/rt_info/bias_attribute.hpp>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||||
|
#include "layer_transformation.hpp"
|
||||||
|
#include "lpt_ngraph_functions/markup_bias_function.hpp"
|
||||||
|
#include "simple_low_precision_transformer.hpp"
|
||||||
|
|
||||||
|
using namespace testing;
|
||||||
|
|
||||||
|
class MarkupBiasTestParams {
|
||||||
|
public:
|
||||||
|
ov::PartialShape input_shape;
|
||||||
|
ov::PartialShape bias_shape;
|
||||||
|
bool is_bias;
|
||||||
|
};
|
||||||
|
|
||||||
|
using MarkupBiasTestValues = std::tuple<ov::element::Type, MarkupBiasTestParams, std::string>;
|
||||||
|
|
||||||
|
class MarkupBiasTests : public testing::WithParamInterface<MarkupBiasTestValues>, public LayerTransformation {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(const testing::TestParamInfo<MarkupBiasTestValues>& obj) {
|
||||||
|
ov::element::Type precision;
|
||||||
|
MarkupBiasTestParams test_values;
|
||||||
|
std::string layer_type;
|
||||||
|
std::tie(precision, test_values, layer_type) = obj.param;
|
||||||
|
|
||||||
|
std::ostringstream result;
|
||||||
|
result << precision << "IS=" << test_values.input_shape << "_bias_shape=" << test_values.bias_shape << "_"
|
||||||
|
<< layer_type << "_is_bias=" << test_values.is_bias;
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetUp() override {
|
||||||
|
ov::element::Type precision;
|
||||||
|
MarkupBiasTestParams test_values;
|
||||||
|
std::string layer_type;
|
||||||
|
std::tie(precision, test_values, layer_type) = GetParam();
|
||||||
|
|
||||||
|
actualFunction = ngraph::builder::subgraph::MarkupBiasFunction::get(precision,
|
||||||
|
test_values.input_shape,
|
||||||
|
test_values.bias_shape,
|
||||||
|
layer_type);
|
||||||
|
SimpleLowPrecisionTransformer transformer;
|
||||||
|
transformer.transform(actualFunction);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(MarkupBiasTests, CompareFunctions) {
|
||||||
|
actualFunction->validate_nodes_and_infer_types();
|
||||||
|
|
||||||
|
const auto addOps = LayerTransformation::get<opset1::Add>(actualFunction);
|
||||||
|
EXPECT_EQ(1ul, addOps.size()) << "unexpected addOps size";
|
||||||
|
|
||||||
|
const bool is_bias = std::get<1>(GetParam()).is_bias;
|
||||||
|
auto biasAttr = ngraph::pass::low_precision::getAttribute<ov::BiasAttribute>(addOps[0]);
|
||||||
|
EXPECT_EQ(!biasAttr.empty(), is_bias) << "Bias markup failed";
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace MarkupBiasTestsInstantiation {
|
||||||
|
std::vector<ov::element::Type> precisions = {
|
||||||
|
ov::element::f32,
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<MarkupBiasTestParams> test_params_4d = {
|
||||||
|
{{1, 10, 16, 16}, {1, 10, 1, 1}, true},
|
||||||
|
{{1, 10, 16, 16}, {1, 1, 1, 1}, true},
|
||||||
|
{{1, 10, 16, 16}, {1, 10, 16, 16}, false},
|
||||||
|
{{1, 10, 16, 16}, ov::PartialShape::dynamic(), false},
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<std::string> layer_types_4d = {
|
||||||
|
"Convolution",
|
||||||
|
"GroupConvolution",
|
||||||
|
"ConvolutionBackpropData",
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(smoke_LPT_4D_Positive,
|
||||||
|
MarkupBiasTests,
|
||||||
|
::testing::Combine(::testing::ValuesIn(precisions),
|
||||||
|
::testing::ValuesIn(test_params_4d),
|
||||||
|
::testing::ValuesIn(layer_types_4d)),
|
||||||
|
MarkupBiasTests::getTestCaseName);
|
||||||
|
|
||||||
|
std::vector<MarkupBiasTestParams> test_params_2d = {
|
||||||
|
{{1, 10}, {1, 10}, true},
|
||||||
|
{{1, 10}, {1, 1}, true},
|
||||||
|
{{1, 10}, ov::PartialShape::dynamic(), false},
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(smoke_LPT_2D_Positive,
|
||||||
|
MarkupBiasTests,
|
||||||
|
::testing::Combine(::testing::ValuesIn(precisions),
|
||||||
|
::testing::ValuesIn(test_params_2d),
|
||||||
|
::testing::Values("MatMulWithConstant")),
|
||||||
|
MarkupBiasTests::getTestCaseName);
|
||||||
|
|
||||||
|
} // namespace MarkupBiasTestsInstantiation
|
@ -7,6 +7,7 @@
|
|||||||
#include <low_precision/align_quantization_parameters.hpp>
|
#include <low_precision/align_quantization_parameters.hpp>
|
||||||
#include <low_precision/layer_transformation.hpp>
|
#include <low_precision/layer_transformation.hpp>
|
||||||
#include <low_precision/low_precision.hpp>
|
#include <low_precision/low_precision.hpp>
|
||||||
|
#include <low_precision/markup_bias.hpp>
|
||||||
#include <low_precision/markup_can_be_quantized.hpp>
|
#include <low_precision/markup_can_be_quantized.hpp>
|
||||||
#include <low_precision/markup_quantization_granularity.hpp>
|
#include <low_precision/markup_quantization_granularity.hpp>
|
||||||
#include <low_precision/transformation_context.hpp>
|
#include <low_precision/transformation_context.hpp>
|
||||||
@ -26,6 +27,7 @@ SimpleLowPrecisionTransformer::SimpleLowPrecisionTransformer(
|
|||||||
|
|
||||||
// TODO: use one pass manager
|
// TODO: use one pass manager
|
||||||
markup = std::make_shared<ngraph::pass::Manager>(passConfig);
|
markup = std::make_shared<ngraph::pass::Manager>(passConfig);
|
||||||
|
markup->register_pass<ngraph::pass::low_precision::MarkupBias>();
|
||||||
markup->register_pass<ngraph::pass::low_precision::MarkupCanBeQuantized>(params.defaultPrecisions);
|
markup->register_pass<ngraph::pass::low_precision::MarkupCanBeQuantized>(params.defaultPrecisions);
|
||||||
markup->register_pass<ngraph::pass::low_precision::MarkupPrecisions>(precisionRestrictions,
|
markup->register_pass<ngraph::pass::low_precision::MarkupPrecisions>(precisionRestrictions,
|
||||||
params.defaultPrecisions);
|
params.defaultPrecisions);
|
||||||
|
@ -2365,10 +2365,11 @@ bool Eltwise::canBeInPlace() const {
|
|||||||
|
|
||||||
void Eltwise::fuseInto(NodePtr& parentNode) {
|
void Eltwise::fuseInto(NodePtr& parentNode) {
|
||||||
// Handling Convolution custom Add node fusing case which is processed via dnnl append_sum() API.
|
// Handling Convolution custom Add node fusing case which is processed via dnnl append_sum() API.
|
||||||
specialConvolutionAddFusing = (parentNode->getType() == Type::Convolution
|
specialConvolutionAddFusing =
|
||||||
|| parentNode->getType() == Type::BinaryConvolution)
|
(parentNode->getType() == Type::Convolution || parentNode->getType() == Type::BinaryConvolution) &&
|
||||||
&& getAlgorithm() == Algorithm::EltwiseAdd &&
|
getAlgorithm() == Algorithm::EltwiseAdd &&
|
||||||
dimsEqualWeak(getInputShapeAtPort(0).getDims(), getInputShapeAtPort(1).getDims());
|
dimsEqualWeak(getInputShapeAtPort(0).getDims(), getInputShapeAtPort(1).getDims()) &&
|
||||||
|
!getParentEdgeAt(0)->getParent()->isConstant() && !getParentEdgeAt(1)->getParent()->isConstant();
|
||||||
if ((scales.empty() && shifts.empty()) &&
|
if ((scales.empty() && shifts.empty()) &&
|
||||||
!specialConvolutionAddFusing &&
|
!specialConvolutionAddFusing &&
|
||||||
canBePerformedAsScaleShift(parentNode.get())) {
|
canBePerformedAsScaleShift(parentNode.get())) {
|
||||||
|
@ -0,0 +1,118 @@
|
|||||||
|
// Copyright (C) 2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "lpt_ngraph_functions/markup_bias_function.hpp"
|
||||||
|
#include "ngraph_functions/builders.hpp"
|
||||||
|
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||||
|
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||||
|
#include "shared_test_classes/base/ov_subgraph.hpp"
|
||||||
|
#include "test_utils/cpu_test_utils.hpp"
|
||||||
|
#include "test_utils/fusing_test_utils.hpp"
|
||||||
|
|
||||||
|
using namespace ngraph;
|
||||||
|
using namespace ov::test;
|
||||||
|
using namespace CPUTestUtils;
|
||||||
|
using namespace InferenceEngine;
|
||||||
|
|
||||||
|
namespace SubgraphTestsDefinitions {
|
||||||
|
using FQLayerDQBiasParams = std::tuple<InputShape, std::string>;
|
||||||
|
|
||||||
|
class FQLayerDQBias : virtual public SubgraphBaseTest,
|
||||||
|
public CpuTestWithFusing,
|
||||||
|
public testing::WithParamInterface<FQLayerDQBiasParams> {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(testing::TestParamInfo<FQLayerDQBiasParams> obj) {
|
||||||
|
InputShape input_shape;
|
||||||
|
std::string layer_type;
|
||||||
|
std::tie(input_shape, layer_type) = obj.param;
|
||||||
|
|
||||||
|
std::ostringstream result;
|
||||||
|
result << "IS=(" << CommonTestUtils::partialShape2str({input_shape.first}) << ")_TS=(";
|
||||||
|
for (const auto& item : input_shape.second) {
|
||||||
|
result << CommonTestUtils::vec2str(item) << "_";
|
||||||
|
}
|
||||||
|
result << ")_layer_type=" << layer_type;
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetUp() override {
|
||||||
|
InputShape input_shape;
|
||||||
|
std::string layer_type;
|
||||||
|
std::tie(input_shape, layer_type) = GetParam();
|
||||||
|
|
||||||
|
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||||
|
fusedOps = std::vector<std::string>{"Add"};
|
||||||
|
std::tie(inFmts, outFmts, priority, selectedType) = CPUSpecificParams{{}, {}, {}, CPUTestsBase::any_type};
|
||||||
|
std::unordered_map<std::string, std::string> ngraph_type_to_plugin_type{
|
||||||
|
{"Convolution", "Convolution"},
|
||||||
|
{"GroupConvolution", "Convolution"},
|
||||||
|
{"ConvolutionBackpropData", "Deconvolution"},
|
||||||
|
{"MatMul", "MatMul"},
|
||||||
|
{"MatMulWithConstant", "FullyConnected"},
|
||||||
|
};
|
||||||
|
node_type = ngraph_type_to_plugin_type[layer_type];
|
||||||
|
|
||||||
|
const auto shapes = layer_type == "MatMul" ? std::vector<InputShape>{input_shape, input_shape}
|
||||||
|
: std::vector<InputShape>{input_shape};
|
||||||
|
init_input_shapes(shapes);
|
||||||
|
function = ngraph::builder::subgraph::MarkupBiasFunction::get(ov::element::f32, inputDynamicShapes[0], {}, layer_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string node_type;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(FQLayerDQBias, smoke_CompareWithRefs) {
|
||||||
|
run();
|
||||||
|
CheckPluginRelatedResults(compiledModel, node_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
const std::vector<InputShape> input_shapes_4D_static = {
|
||||||
|
{{}, {{1, 3, 1, 1}}},
|
||||||
|
{{}, {{1, 3, 64, 64}}}
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<std::string> layer_types_4D_static = {
|
||||||
|
"Convolution",
|
||||||
|
"GroupConvolution",
|
||||||
|
"ConvolutionBackpropData",
|
||||||
|
"MatMul",
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(smoke_FQLayerDQBias_4D_static, FQLayerDQBias,
|
||||||
|
::testing::Combine(::testing::ValuesIn(input_shapes_4D_static),
|
||||||
|
::testing::ValuesIn(layer_types_4D_static)),
|
||||||
|
FQLayerDQBias::getTestCaseName);
|
||||||
|
|
||||||
|
const std::vector<InputShape> input_shapes_4D_dynamic = {
|
||||||
|
{{-1, 3, -1, -1}, {{1, 3, 64, 64}}}
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<std::string> layer_types_4D_dynamic = {
|
||||||
|
"Convolution",
|
||||||
|
"GroupConvolution",
|
||||||
|
"MatMul",
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(smoke_FQLayerDQBias_4D_dynamic, FQLayerDQBias,
|
||||||
|
::testing::Combine(::testing::ValuesIn(input_shapes_4D_dynamic),
|
||||||
|
::testing::ValuesIn(layer_types_4D_dynamic)),
|
||||||
|
FQLayerDQBias::getTestCaseName);
|
||||||
|
|
||||||
|
const std::vector<InputShape> input_shapes_2D = {
|
||||||
|
{{-1, 768}, {{1, 768}}}
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<std::string> layer_types_2D = {
|
||||||
|
"MatMulWithConstant",
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(smoke_FQLayerDQBias_2D, FQLayerDQBias,
|
||||||
|
::testing::Combine(::testing::ValuesIn(input_shapes_2D),
|
||||||
|
::testing::ValuesIn(layer_types_2D)),
|
||||||
|
FQLayerDQBias::getTestCaseName);
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace SubgraphTestsDefinitions
|
@ -0,0 +1,25 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <openvino/core/model.hpp>
|
||||||
|
|
||||||
|
#include "common/builders.hpp"
|
||||||
|
|
||||||
|
namespace ngraph {
|
||||||
|
namespace builder {
|
||||||
|
namespace subgraph {
|
||||||
|
|
||||||
|
class MarkupBiasFunction {
|
||||||
|
public:
|
||||||
|
static std::shared_ptr<ov::Model> get(const ov::element::Type& precision,
|
||||||
|
const ov::PartialShape& input_shape,
|
||||||
|
const ov::PartialShape& add_shape,
|
||||||
|
const std::string& operation_type);
|
||||||
|
};
|
||||||
|
} // namespace subgraph
|
||||||
|
} // namespace builder
|
||||||
|
} // namespace ngraph
|
@ -0,0 +1,112 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <openvino/opsets/opset1.hpp>
|
||||||
|
#include "lpt_ngraph_functions/markup_bias_function.hpp"
|
||||||
|
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||||
|
#include "ngraph_functions/builders.hpp"
|
||||||
|
|
||||||
|
namespace ngraph {
|
||||||
|
namespace builder {
|
||||||
|
namespace subgraph {
|
||||||
|
std::shared_ptr<ov::Model> MarkupBiasFunction::get(const ov::element::Type& precision,
|
||||||
|
const ov::PartialShape& input_shape,
|
||||||
|
const ov::PartialShape& add_shape,
|
||||||
|
const std::string& layer_type) {
|
||||||
|
auto input_params = builder::makeDynamicParams(precision, {input_shape});
|
||||||
|
auto il = opset1::Constant::create(precision, {}, {0.f});
|
||||||
|
auto ih = opset1::Constant::create(precision, {}, {12.5f});
|
||||||
|
auto ol = opset1::Constant::create(precision, {}, {0.f});
|
||||||
|
auto oh = opset1::Constant::create(precision, {}, {12.5f});
|
||||||
|
auto fq = std::make_shared<opset1::FakeQuantize>(input_params[0], il, ih, ol, oh, 256);
|
||||||
|
|
||||||
|
std::shared_ptr<ov::Node> layer;
|
||||||
|
const size_t out_channels = 10;
|
||||||
|
if (layer_type == "Convolution") {
|
||||||
|
const size_t in_channels = input_params[0]->get_partial_shape()[1].get_length();
|
||||||
|
auto weights = builder::makeConstant<int8_t>(ov::element::i8, Shape{out_channels, in_channels, 1, 1}, {}, true);
|
||||||
|
auto convert = std::make_shared<ov::opset1::Convert>(weights, precision);
|
||||||
|
auto mul_const = builder::makeConstant<float>(precision, Shape{1, 1, 1, 1}, {}, true);
|
||||||
|
auto mul = std::make_shared<ov::opset1::Multiply>(convert, mul_const);
|
||||||
|
|
||||||
|
const ov::Strides strides = {1, 1};
|
||||||
|
const ov::CoordinateDiff pads_begin = {0, 0};
|
||||||
|
const ov::CoordinateDiff pads_end = {0, 0};
|
||||||
|
const ov::Strides dilations = {1, 1};
|
||||||
|
layer = std::make_shared<ov::opset1::Convolution>(fq, mul, strides, pads_begin, pads_end, dilations);
|
||||||
|
} else if (layer_type == "GroupConvolution") {
|
||||||
|
const size_t in_channels = input_params[0]->get_partial_shape()[1].get_length();
|
||||||
|
auto weights = builder::makeConstant<int8_t>(ov::element::i8, Shape{in_channels, 1, 1, 1}, {}, true);
|
||||||
|
auto convert = std::make_shared<ov::opset1::Convert>(weights, precision);
|
||||||
|
auto mul_const = builder::makeConstant<float>(precision, Shape{1, 1, 1, 1}, {}, true);
|
||||||
|
auto mul = std::make_shared<ov::opset1::Multiply>(convert, mul_const);
|
||||||
|
|
||||||
|
std::vector<int32_t> target_shape{static_cast<int32_t>(in_channels), 1, 1, 1, 1};
|
||||||
|
auto reshape_const = ov::opset1::Constant::create(ov::element::i32, {5}, target_shape);
|
||||||
|
auto reshape = std::make_shared<ov::opset1::Reshape>(mul, reshape_const, true);
|
||||||
|
|
||||||
|
const ov::Strides strides = {1, 1};
|
||||||
|
const ov::CoordinateDiff pads_begin = {0, 0};
|
||||||
|
const ov::CoordinateDiff pads_end = {0, 0};
|
||||||
|
const ov::Strides dilations = {1, 1};
|
||||||
|
layer = std::make_shared<ov::opset1::GroupConvolution>(fq, reshape, strides, pads_begin, pads_end, dilations);
|
||||||
|
} else if (layer_type == "ConvolutionBackpropData") {
|
||||||
|
const size_t in_channels = input_params[0]->get_partial_shape()[1].get_length();
|
||||||
|
auto weights = builder::makeConstant<int8_t>(ov::element::i8, Shape{in_channels, out_channels, 1, 1}, {}, true);
|
||||||
|
auto convert = std::make_shared<ov::opset1::Convert>(weights, precision);
|
||||||
|
auto mul_const = builder::makeConstant<float>(precision, Shape{1, 1, 1, 1}, {}, true);
|
||||||
|
auto mul = std::make_shared<ov::opset1::Multiply>(convert, mul_const);
|
||||||
|
|
||||||
|
const ov::Strides strides = {1, 1};
|
||||||
|
const ov::CoordinateDiff pads_begin = {0, 0};
|
||||||
|
const ov::CoordinateDiff pads_end = {0, 0};
|
||||||
|
const ov::Strides dilations = {1, 1};
|
||||||
|
layer = std::make_shared<ov::opset1::ConvolutionBackpropData>(fq, mul, strides, pads_begin, pads_end, dilations);
|
||||||
|
} else if (layer_type == "MatMul") {
|
||||||
|
auto new_param = std::make_shared<ov::opset1::Parameter>(precision, input_shape);
|
||||||
|
input_params.push_back(new_param);
|
||||||
|
auto il_2 = opset1::Constant::create(precision, {}, {-128.f});
|
||||||
|
auto ih_2 = opset1::Constant::create(precision, {}, {127.f});
|
||||||
|
auto ol_2 = opset1::Constant::create(precision, {}, {-128.f});
|
||||||
|
auto oh_2 = opset1::Constant::create(precision, {}, {127.f});
|
||||||
|
auto fq_2 = std::make_shared<opset1::FakeQuantize>(new_param, il_2, ih_2, ol_2, oh_2, 256);
|
||||||
|
layer = std::make_shared<ov::opset1::MatMul>(fq, fq_2, false, true);
|
||||||
|
} else if (layer_type == "MatMulWithConstant") {
|
||||||
|
const size_t in_channels = input_params[0]->get_partial_shape()[1].get_length();
|
||||||
|
auto weights = builder::makeConstant<int8_t>(ov::element::i8, Shape{out_channels, in_channels}, {}, true);
|
||||||
|
auto convert = std::make_shared<ov::opset1::Convert>(weights, precision);
|
||||||
|
auto mul_const = builder::makeConstant<float>(precision, Shape{out_channels, 1}, {}, true);
|
||||||
|
auto mul = std::make_shared<ov::opset1::Multiply>(convert, mul_const);
|
||||||
|
layer = std::make_shared<ov::opset1::MatMul>(fq, mul, false, true);
|
||||||
|
} else {
|
||||||
|
throw std::runtime_error("Unsupported layer type");
|
||||||
|
}
|
||||||
|
|
||||||
|
layer->set_friendly_name(layer_type);
|
||||||
|
|
||||||
|
std::shared_ptr<ov::Node> add_input;
|
||||||
|
// empty add_shape means that add_input must be generated automatically
|
||||||
|
if (add_shape.is_static() && add_shape.size() == 0) {
|
||||||
|
const auto& out_shape = layer->get_output_partial_shape(0);
|
||||||
|
Shape bias_shape(out_shape.size(), 1);
|
||||||
|
if (layer_type != "MatMul") {
|
||||||
|
bias_shape[1] = out_shape[1].get_length();
|
||||||
|
}
|
||||||
|
add_input = builder::makeConstant<float>(precision, bias_shape, {}, true);
|
||||||
|
} else {
|
||||||
|
if (add_shape.is_static()) {
|
||||||
|
add_input = builder::makeConstant<float>(precision, add_shape.to_shape(), {}, true);
|
||||||
|
} else {
|
||||||
|
auto new_param = std::make_shared<ov::opset1::Parameter>(precision, input_shape);
|
||||||
|
input_params.push_back(new_param);
|
||||||
|
add_input = new_param;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto add = std::make_shared<ov::opset1::Add>(layer, add_input);
|
||||||
|
return std::make_shared<ov::Model>(add, input_params);
|
||||||
|
}
|
||||||
|
} // namespace subgraph
|
||||||
|
} // namespace builder
|
||||||
|
} // namespace ngraph
|
Loading…
Reference in New Issue
Block a user