Mark as deprecated nGraph API (#17647)
* Mark as deprecated nGraph API * Fixed code style * Added IN_OV_LIBRARY define * Suppress warnings for log * Suppress warning * Updated nGraph headers * Fixed build for macOS * Fixed lpt and snippets * Fixed build all on macOS * Suppress some warnings * Fixed some new warnings * Fixed new warnings * Try to fix some warnings * More warnings * Soome change * Suppress more warnings * Suppress warnings for transformations * Suppress warnings for LPT * One more fix * Suppress more warnings * Try to fix opset error * Remove opset constructor * Cannot fix opset warning * Suppress warnings for offline transfromations * Fixed some warnings for Windows * Fixed code style * Suppress some warnings for onnx FE * Revert "Suppress some warnings for onnx FE" This reverts commit75d23b64fc
. * Revert "Fixed code style" This reverts commitc6eba63116
. * Revert "Fixed some warnings for Windows" This reverts commit23d7ed88b6
. * Revert "Suppress warnings for offline transfromations" This reverts commit0b9f6317bf
. * Revert "Cannot fix opset warning" This reverts commit19ea658639
. * Revert "Remove opset constructor" This reverts commit06afb1bc20
. * Revert "Suppress warnings for LPT" This reverts commit58b1c0f5a0
. * Revert "Suppress warnings for transformations" This reverts commitf8bb9814a1
. * Revert "Suppress more warnings" This reverts commitf9f0da9acb
. * Revert "Soome change" This reverts commite545d4984e
. * Remove deprecation for ngraph::OpSet and FactoryRegistry
This commit is contained in:
parent
6b3a252f92
commit
ea04f8217d
@ -10,6 +10,7 @@
|
|||||||
#include <low_precision/network_helper.hpp>
|
#include <low_precision/network_helper.hpp>
|
||||||
#include <transformations/common_optimizations/convert_quantize_dequantize.hpp>
|
#include <transformations/common_optimizations/convert_quantize_dequantize.hpp>
|
||||||
#include <transformations/op_conversions/convert_subtract.hpp>
|
#include <transformations/op_conversions/convert_subtract.hpp>
|
||||||
|
#include "openvino/pass/manager.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
@ -28,7 +29,7 @@ public:
|
|||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
std::shared_ptr<ov::Model> nGraphFunc;
|
std::shared_ptr<ov::Model> nGraphFunc;
|
||||||
ngraph::pass::Manager manager;
|
ov::pass::Manager manager;
|
||||||
auto pass_config = manager.get_pass_config();
|
auto pass_config = manager.get_pass_config();
|
||||||
//! [lpt_common]
|
//! [lpt_common]
|
||||||
// check if the function is quantized to ignore LPT transformations for not quantized function to speed up model loading
|
// check if the function is quantized to ignore LPT transformations for not quantized function to speed up model loading
|
||||||
@ -69,19 +70,19 @@ using namespace ngraph::pass::low_precision;
|
|||||||
if (useLpt) {
|
if (useLpt) {
|
||||||
// Low precision transformations plugin specific configuration: restrictions definition
|
// Low precision transformations plugin specific configuration: restrictions definition
|
||||||
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
|
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
|
||||||
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
PrecisionsRestriction::create<ov::opset1::Convolution>({
|
||||||
{{0}, {ngraph::element::u8}},
|
{{0}, {ngraph::element::u8}},
|
||||||
{{1}, {ngraph::element::i8}},
|
{{1}, {ngraph::element::i8}},
|
||||||
}),
|
}),
|
||||||
PrecisionsRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
|
PrecisionsRestriction::create<ov::opset1::ConvolutionBackpropData>({
|
||||||
{{0}, {ngraph::element::u8, ngraph::element::i8}},
|
{{0}, {ngraph::element::u8, ngraph::element::i8}},
|
||||||
{{1}, {ngraph::element::i8}}
|
{{1}, {ngraph::element::i8}}
|
||||||
}),
|
}),
|
||||||
PrecisionsRestriction::create<ngraph::opset1::GroupConvolution>({
|
PrecisionsRestriction::create<ov::opset1::GroupConvolution>({
|
||||||
{{0}, {ngraph::element::u8}},
|
{{0}, {ngraph::element::u8}},
|
||||||
{{1}, {ngraph::element::i8}}
|
{{1}, {ngraph::element::i8}}
|
||||||
}),
|
}),
|
||||||
PrecisionsRestriction::create<ngraph::opset1::Multiply>({
|
PrecisionsRestriction::create<ov::opset1::Multiply>({
|
||||||
{{0}, {ngraph::element::u8}},
|
{{0}, {ngraph::element::u8}},
|
||||||
{{1}, {ngraph::element::i8}},
|
{{1}, {ngraph::element::i8}},
|
||||||
}),
|
}),
|
||||||
@ -89,17 +90,17 @@ if (useLpt) {
|
|||||||
|
|
||||||
// Low precision transformations plugin specific configuration: per-tensor quantization operations definition
|
// Low precision transformations plugin specific configuration: per-tensor quantization operations definition
|
||||||
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
|
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
|
||||||
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0}),
|
QuantizationGranularityRestriction::create<ov::opset1::Convolution>({0}),
|
||||||
QuantizationGranularityRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0})
|
QuantizationGranularityRestriction::create<ov::opset1::ConvolutionBackpropData>({0})
|
||||||
});
|
});
|
||||||
|
|
||||||
// Low precision transformations instantiation and registration in pass manager
|
// Low precision transformations instantiation and registration in pass manager
|
||||||
ngraph::pass::Manager lptManager;
|
ov::pass::Manager lptManager;
|
||||||
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(supportedPrecisions, perTensorQuantization);
|
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(supportedPrecisions, perTensorQuantization);
|
||||||
|
|
||||||
// Low precision transformations plugin specific configuration: transformation callbacks definition
|
// Low precision transformations plugin specific configuration: transformation callbacks definition
|
||||||
lptManager.get_pass_config()->set_callback<MarkupPrecisions>([](const std::shared_ptr<const ngraph::Node>& node) -> bool {
|
lptManager.get_pass_config()->set_callback<MarkupPrecisions>([](const std::shared_ptr<const ngraph::Node>& node) -> bool {
|
||||||
if (const auto multiply = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
|
if (const auto multiply = std::dynamic_pointer_cast<const ov::opset1::Multiply>(node)) {
|
||||||
return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(multiply);
|
return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(multiply);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -117,7 +118,7 @@ if (useLpt) {
|
|||||||
//! [lpt_execution]
|
//! [lpt_execution]
|
||||||
|
|
||||||
//! [lpt_device]
|
//! [lpt_device]
|
||||||
ngraph::pass::Manager deviceSpecificManager;
|
ov::pass::Manager deviceSpecificManager;
|
||||||
deviceSpecificManager.register_pass<ngraph::pass::device::ConvertOpSet1ToDeviceSpecific>();
|
deviceSpecificManager.register_pass<ngraph::pass::device::ConvertOpSet1ToDeviceSpecific>();
|
||||||
deviceSpecificManager.run_passes(nGraphFunc);
|
deviceSpecificManager.run_passes(nGraphFunc);
|
||||||
//! [lpt_device]
|
//! [lpt_device]
|
||||||
@ -127,23 +128,23 @@ return 0;
|
|||||||
|
|
||||||
int lpt_supported_precisions() {
|
int lpt_supported_precisions() {
|
||||||
std::shared_ptr<ov::Model> nGraphFunc;
|
std::shared_ptr<ov::Model> nGraphFunc;
|
||||||
ngraph::pass::Manager manager;
|
ov::pass::Manager manager;
|
||||||
|
|
||||||
using namespace ngraph::pass::low_precision;
|
using namespace ngraph::pass::low_precision;
|
||||||
//! [lpt_supported_precisions]
|
//! [lpt_supported_precisions]
|
||||||
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
|
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
|
||||||
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
PrecisionsRestriction::create<ov::opset1::Convolution>({
|
||||||
{{0}, {ngraph::element::u8}},
|
{{0}, {ngraph::element::u8}},
|
||||||
{{1}, {ngraph::element::i8}},
|
{{1}, {ngraph::element::i8}},
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
ngraph::pass::Manager lptManager;
|
ov::pass::Manager lptManager;
|
||||||
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(supportedPrecisions);
|
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(supportedPrecisions);
|
||||||
lptManager.run_passes(nGraphFunc);
|
lptManager.run_passes(nGraphFunc);
|
||||||
//! [lpt_supported_precisions]
|
//! [lpt_supported_precisions]
|
||||||
|
|
||||||
ngraph::pass::Manager deviceSpecificManager;
|
ov::pass::Manager deviceSpecificManager;
|
||||||
deviceSpecificManager.register_pass<ngraph::pass::device::ConvertOpSet1ToDeviceSpecific>();
|
deviceSpecificManager.register_pass<ngraph::pass::device::ConvertOpSet1ToDeviceSpecific>();
|
||||||
deviceSpecificManager.run_passes(nGraphFunc);
|
deviceSpecificManager.run_passes(nGraphFunc);
|
||||||
|
|
||||||
@ -158,10 +159,10 @@ using namespace ngraph::pass::low_precision;
|
|||||||
const std::vector<PrecisionsRestriction> emptyRestrictions;
|
const std::vector<PrecisionsRestriction> emptyRestrictions;
|
||||||
|
|
||||||
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
|
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
|
||||||
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0})
|
QuantizationGranularityRestriction::create<ov::opset1::Convolution>({0})
|
||||||
});
|
});
|
||||||
|
|
||||||
ngraph::pass::Manager lptManager;
|
ov::pass::Manager lptManager;
|
||||||
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(emptyRestrictions, perTensorQuantization);
|
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(emptyRestrictions, perTensorQuantization);
|
||||||
lptManager.run_passes(nGraphFunc);
|
lptManager.run_passes(nGraphFunc);
|
||||||
//! [per_tensor_quantization]
|
//! [per_tensor_quantization]
|
||||||
@ -171,13 +172,13 @@ return 0;
|
|||||||
|
|
||||||
int asymmetric_quantization(const std::vector<ngraph::element::Type>& defaultPrecisions) {
|
int asymmetric_quantization(const std::vector<ngraph::element::Type>& defaultPrecisions) {
|
||||||
std::shared_ptr<ov::Model> nGraphFunc;
|
std::shared_ptr<ov::Model> nGraphFunc;
|
||||||
ngraph::pass::Manager manager;
|
ov::pass::Manager manager;
|
||||||
auto pass_config = manager.get_pass_config();
|
auto pass_config = manager.get_pass_config();
|
||||||
|
|
||||||
|
|
||||||
//! [asymmetric_quantization]
|
//! [asymmetric_quantization]
|
||||||
using namespace ngraph::pass::low_precision;
|
using namespace ngraph::pass::low_precision;
|
||||||
ngraph::pass::Manager lptManager;
|
ov::pass::Manager lptManager;
|
||||||
|
|
||||||
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>();
|
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>();
|
||||||
lptManager.get_pass_config()->set_callback<ConvolutionBackpropDataTransformation>([&defaultPrecisions](const std::shared_ptr<const ngraph::Node>& node) -> bool {
|
lptManager.get_pass_config()->set_callback<ConvolutionBackpropDataTransformation>([&defaultPrecisions](const std::shared_ptr<const ngraph::Node>& node) -> bool {
|
||||||
@ -191,27 +192,27 @@ return 0;
|
|||||||
|
|
||||||
int lpt_markup_pipeline() {
|
int lpt_markup_pipeline() {
|
||||||
std::shared_ptr<ov::Model> nGraphFunc;
|
std::shared_ptr<ov::Model> nGraphFunc;
|
||||||
ngraph::pass::Manager manager;
|
ov::pass::Manager manager;
|
||||||
|
|
||||||
using namespace ngraph::pass::low_precision;
|
using namespace ngraph::pass::low_precision;
|
||||||
//! [lpt_markup_pipeline]
|
//! [lpt_markup_pipeline]
|
||||||
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
|
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
|
||||||
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
PrecisionsRestriction::create<ov::opset1::Convolution>({
|
||||||
{{0}, {ngraph::element::u8}},
|
{{0}, {ngraph::element::u8}},
|
||||||
{{1}, {ngraph::element::i8}},
|
{{1}, {ngraph::element::i8}},
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
|
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
|
||||||
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0})
|
QuantizationGranularityRestriction::create<ov::opset1::Convolution>({0})
|
||||||
});
|
});
|
||||||
|
|
||||||
ngraph::pass::Manager lptManager;
|
ov::pass::Manager lptManager;
|
||||||
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(supportedPrecisions, perTensorQuantization);
|
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(supportedPrecisions, perTensorQuantization);
|
||||||
lptManager.run_passes(nGraphFunc);
|
lptManager.run_passes(nGraphFunc);
|
||||||
//! [lpt_markup_pipeline]
|
//! [lpt_markup_pipeline]
|
||||||
|
|
||||||
ngraph::pass::Manager deviceSpecificManager;
|
ov::pass::Manager deviceSpecificManager;
|
||||||
deviceSpecificManager.register_pass<ngraph::pass::device::ConvertOpSet1ToDeviceSpecific>();
|
deviceSpecificManager.register_pass<ngraph::pass::device::ConvertOpSet1ToDeviceSpecific>();
|
||||||
deviceSpecificManager.run_passes(nGraphFunc);
|
deviceSpecificManager.run_passes(nGraphFunc);
|
||||||
|
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
// Copyright (C) 2018-2021 Intel Corporation
|
// Copyright (C) 2018-2021 Intel Corporation
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
#include <ngraph/ngraph.hpp>
|
#include <openvino/opsets/opset8.hpp>
|
||||||
#include <ngraph/opsets/opset8.hpp>
|
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
//! [ngraph:graph]
|
//! [ngraph:graph]
|
||||||
@ -26,23 +25,21 @@ int main() {
|
|||||||
// | Result |
|
// | Result |
|
||||||
// | result |
|
// | result |
|
||||||
// |_____________|
|
// |_____________|
|
||||||
auto data1 = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::i64, ngraph::Shape{1, 3, 2, 2});
|
auto data1 = std::make_shared<ov::opset8::Parameter>(ov::element::i64, ov::Shape{1, 3, 2, 2});
|
||||||
data1->set_friendly_name("data1"); // operation name
|
data1->set_friendly_name("data1"); // operation name
|
||||||
data1->output(0).set_names({"data1_t"}); // tensor names
|
data1->output(0).set_names({"data1_t"}); // tensor names
|
||||||
auto data2 = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::i64, ngraph::Shape{1, 2, 2, 2});
|
auto data2 = std::make_shared<ov::opset8::Parameter>(ov::element::i64, ov::Shape{1, 2, 2, 2});
|
||||||
data2->set_friendly_name("data2"); // operation name
|
data2->set_friendly_name("data2"); // operation name
|
||||||
data2->output(0).set_names({"data2_t"}); // tensor names
|
data2->output(0).set_names({"data2_t"}); // tensor names
|
||||||
|
|
||||||
auto concat = std::make_shared<ngraph::opset8::Concat>(ngraph::OutputVector{data1, data2}, 1);
|
auto concat = std::make_shared<ov::opset8::Concat>(ov::OutputVector{data1, data2}, 1);
|
||||||
concat->set_friendly_name("concat"); // operation name
|
concat->set_friendly_name("concat"); // operation name
|
||||||
concat->output(0).set_names({"concat_t"}); // tensor name
|
concat->output(0).set_names({"concat_t"}); // tensor name
|
||||||
|
|
||||||
auto result = std::make_shared<ngraph::opset8::Result>(concat);
|
auto result = std::make_shared<ov::opset8::Result>(concat);
|
||||||
result->set_friendly_name("result"); // operation name
|
result->set_friendly_name("result"); // operation name
|
||||||
|
|
||||||
auto f = std::make_shared<ngraph::Function>(ngraph::ResultVector{result},
|
auto f = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{data1, data2}, "function_name");
|
||||||
ngraph::ParameterVector{data1, data2},
|
|
||||||
"function_name");
|
|
||||||
//! [ngraph:graph]
|
//! [ngraph:graph]
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -18,12 +18,12 @@
|
|||||||
|
|
||||||
#include "dict_attribute_visitor.hpp"
|
#include "dict_attribute_visitor.hpp"
|
||||||
#include "ngraph/check.hpp"
|
#include "ngraph/check.hpp"
|
||||||
#include "ngraph/log.hpp"
|
|
||||||
#include "openvino/core/except.hpp"
|
#include "openvino/core/except.hpp"
|
||||||
#include "openvino/core/node.hpp"
|
#include "openvino/core/node.hpp"
|
||||||
#include "openvino/op/util/op_types.hpp"
|
#include "openvino/op/util/op_types.hpp"
|
||||||
#include "openvino/op/util/variable.hpp"
|
#include "openvino/op/util/variable.hpp"
|
||||||
#include "openvino/opsets/opset.hpp"
|
#include "openvino/opsets/opset.hpp"
|
||||||
|
#include "openvino/util/log.hpp"
|
||||||
#include "pyopenvino/core/common.hpp"
|
#include "pyopenvino/core/common.hpp"
|
||||||
|
|
||||||
namespace py = pybind11;
|
namespace py = pybind11;
|
||||||
@ -61,7 +61,7 @@ public:
|
|||||||
"Currently NodeFactory doesn't support Constant node: ",
|
"Currently NodeFactory doesn't support Constant node: ",
|
||||||
op_type_name);
|
op_type_name);
|
||||||
|
|
||||||
NGRAPH_WARN << "Empty op created! Please assign inputs and attributes and run validate() before op is used.";
|
OPENVINO_WARN << "Empty op created! Please assign inputs and attributes and run validate() before op is used.";
|
||||||
|
|
||||||
return op_node;
|
return op_node;
|
||||||
}
|
}
|
||||||
|
@ -6,9 +6,9 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "ngraph/log.hpp"
|
|
||||||
#include "openvino/core/node.hpp"
|
#include "openvino/core/node.hpp"
|
||||||
#include "openvino/op/util/multi_subgraph_base.hpp"
|
#include "openvino/op/util/multi_subgraph_base.hpp"
|
||||||
|
#include "openvino/util/log.hpp"
|
||||||
#include "pyopenvino/core/common.hpp"
|
#include "pyopenvino/core/common.hpp"
|
||||||
#include "pyopenvino/graph/ops/if.hpp"
|
#include "pyopenvino/graph/ops/if.hpp"
|
||||||
#include "pyopenvino/graph/ops/util/multisubgraph.hpp"
|
#include "pyopenvino/graph/ops/util/multisubgraph.hpp"
|
||||||
@ -24,8 +24,8 @@ void regclass_graph_op_If(py::module m) {
|
|||||||
if (MultiSubgraphHelpers::is_constant_or_parameter(execution_condition)) {
|
if (MultiSubgraphHelpers::is_constant_or_parameter(execution_condition)) {
|
||||||
return std::make_shared<ov::op::v8::If>(execution_condition->output(0));
|
return std::make_shared<ov::op::v8::If>(execution_condition->output(0));
|
||||||
} else {
|
} else {
|
||||||
NGRAPH_WARN << "Please specify execution_condition as Constant or Parameter. Default If() "
|
OPENVINO_WARN << "Please specify execution_condition as Constant or Parameter. Default If() "
|
||||||
"constructor was applied.";
|
"constructor was applied.";
|
||||||
return std::make_shared<ov::op::v8::If>();
|
return std::make_shared<ov::op::v8::If>();
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
|
@ -6,9 +6,9 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "ngraph/log.hpp"
|
|
||||||
#include "openvino/core/node.hpp"
|
#include "openvino/core/node.hpp"
|
||||||
#include "openvino/op/loop.hpp"
|
#include "openvino/op/loop.hpp"
|
||||||
|
#include "openvino/util/log.hpp"
|
||||||
#include "pyopenvino/core/common.hpp"
|
#include "pyopenvino/core/common.hpp"
|
||||||
#include "pyopenvino/graph/ops/util/multisubgraph.hpp"
|
#include "pyopenvino/graph/ops/util/multisubgraph.hpp"
|
||||||
|
|
||||||
@ -28,7 +28,7 @@ void regclass_graph_op_Loop(py::module m) {
|
|||||||
MultiSubgraphHelpers::is_constant_or_parameter(execution_condition)) {
|
MultiSubgraphHelpers::is_constant_or_parameter(execution_condition)) {
|
||||||
return std::make_shared<ov::op::v5::Loop>(trip_count->output(0), execution_condition->output(0));
|
return std::make_shared<ov::op::v5::Loop>(trip_count->output(0), execution_condition->output(0));
|
||||||
} else {
|
} else {
|
||||||
NGRAPH_WARN
|
OPENVINO_WARN
|
||||||
<< "Please specify execution_condition and trip_count as Constant or Parameter. Default Loop() "
|
<< "Please specify execution_condition and trip_count as Constant or Parameter. Default Loop() "
|
||||||
"constructor was applied.";
|
"constructor was applied.";
|
||||||
return std::make_shared<ov::op::v5::Loop>();
|
return std::make_shared<ov::op::v5::Loop>();
|
||||||
|
@ -5,9 +5,9 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <ngraph/pass/pass.hpp>
|
|
||||||
#include "low_precision/lpt_visibility.hpp"
|
#include "low_precision/lpt_visibility.hpp"
|
||||||
#include "low_precision/layer_transformation.hpp"
|
#include "low_precision/layer_transformation.hpp"
|
||||||
|
#include "openvino/pass/pass.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
@ -28,11 +28,11 @@ class LP_TRANSFORMATIONS_API AlignQuantizationIntervals;
|
|||||||
* [AlignQuantizationIntervals](@ref openvino_docs_OV_UG_lpt_AlignQuantizationIntervals) page
|
* [AlignQuantizationIntervals](@ref openvino_docs_OV_UG_lpt_AlignQuantizationIntervals) page
|
||||||
* in the Inference Engine Developer Guide.
|
* in the Inference Engine Developer Guide.
|
||||||
*/
|
*/
|
||||||
class ngraph::pass::low_precision::AlignQuantizationIntervals : public ngraph::pass::FunctionPass {
|
class ngraph::pass::low_precision::AlignQuantizationIntervals : public ov::pass::ModelPass {
|
||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("AlignQuantizationIntervals", "0");
|
OPENVINO_RTTI("AlignQuantizationIntervals", "0");
|
||||||
AlignQuantizationIntervals(const std::vector<ngraph::element::Type>& defaultPrecisions = ngraph::pass::low_precision::precision_set::int8_support);
|
AlignQuantizationIntervals(const std::vector<ov::element::Type>& defaultPrecisions = ngraph::pass::low_precision::precision_set::int8_support);
|
||||||
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
|
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
|
||||||
private:
|
private:
|
||||||
const std::vector<ngraph::element::Type> defaultPrecisions;
|
const std::vector<ngraph::element::Type> defaultPrecisions;
|
||||||
};
|
};
|
||||||
|
@ -23,7 +23,7 @@ class LP_TRANSFORMATIONS_API AvgPoolTransformation : public LayerTransformation
|
|||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("AvgPoolTransformation", "0");
|
OPENVINO_RTTI("AvgPoolTransformation", "0");
|
||||||
AvgPoolTransformation(const Params& params = Params());
|
AvgPoolTransformation(const Params& params = Params());
|
||||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
bool transform(TransformationContext& context, ov::pass::pattern::Matcher &m) override;
|
||||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const override;
|
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const override;
|
||||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||||
};
|
};
|
||||||
|
@ -6,15 +6,15 @@
|
|||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
#include <ngraph/ngraph.hpp>
|
|
||||||
#include <ngraph/opsets/opset1.hpp>
|
|
||||||
#include <low_precision/lpt_visibility.hpp>
|
#include <low_precision/lpt_visibility.hpp>
|
||||||
|
#include "openvino/core/node.hpp"
|
||||||
|
#include "openvino/opsets/opset1.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
namespace low_precision {
|
namespace low_precision {
|
||||||
|
|
||||||
typedef std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> FakeQuantizeDequantizationValues;
|
typedef std::tuple<std::shared_ptr<ov::Node>, std::shared_ptr<ov::Node>> FakeQuantizeDequantizationValues;
|
||||||
|
|
||||||
class LP_TRANSFORMATIONS_API FakeQuantizeDequantization {
|
class LP_TRANSFORMATIONS_API FakeQuantizeDequantization {
|
||||||
public:
|
public:
|
||||||
@ -22,12 +22,12 @@ public:
|
|||||||
|
|
||||||
FakeQuantizeDequantization(
|
FakeQuantizeDequantization(
|
||||||
const Output<Node>& data,
|
const Output<Node>& data,
|
||||||
const std::shared_ptr<ngraph::opset1::Convert>& convert,
|
const std::shared_ptr<ov::opset1::Convert>& convert,
|
||||||
const std::shared_ptr<ngraph::opset1::Subtract>& subtract,
|
const std::shared_ptr<ov::opset1::Subtract>& subtract,
|
||||||
const std::shared_ptr<ngraph::opset1::Convert>& subtractConvert,
|
const std::shared_ptr<ov::opset1::Convert>& subtractConvert,
|
||||||
const std::shared_ptr<ngraph::opset1::Constant>& subtractConstant,
|
const std::shared_ptr<ov::opset1::Constant>& subtractConstant,
|
||||||
const std::shared_ptr<ngraph::opset1::Multiply>& multiply,
|
const std::shared_ptr<ov::opset1::Multiply>& multiply,
|
||||||
const std::shared_ptr<ngraph::opset1::Constant>& multiplyConstant);
|
const std::shared_ptr<ov::opset1::Constant>& multiplyConstant);
|
||||||
|
|
||||||
bool empty() const noexcept;
|
bool empty() const noexcept;
|
||||||
bool multiplyHasZeroOrDenormal() const;
|
bool multiplyHasZeroOrDenormal() const;
|
||||||
@ -41,21 +41,21 @@ public:
|
|||||||
|
|
||||||
static int fillDequantizationParams(
|
static int fillDequantizationParams(
|
||||||
const std::shared_ptr<ngraph::Node>& elementwise,
|
const std::shared_ptr<ngraph::Node>& elementwise,
|
||||||
std::shared_ptr<ngraph::opset1::Convert>& convert,
|
std::shared_ptr<ov::opset1::Convert>& convert,
|
||||||
std::shared_ptr<ngraph::opset1::Constant>& constant);
|
std::shared_ptr<ov::opset1::Constant>& constant);
|
||||||
|
|
||||||
static int fillDequantizationParams(
|
static int fillDequantizationParams(
|
||||||
const std::shared_ptr<ngraph::Node>& elementwise,
|
const std::shared_ptr<ngraph::Node>& elementwise,
|
||||||
std::shared_ptr<ngraph::opset1::Constant>& constant);
|
std::shared_ptr<ov::opset1::Constant>& constant);
|
||||||
|
|
||||||
size_t channelDimIndex;
|
size_t channelDimIndex;
|
||||||
Output<Node> data;
|
Output<Node> data;
|
||||||
std::shared_ptr<opset1::Convert> convert;
|
std::shared_ptr<ov::opset1::Convert> convert;
|
||||||
std::shared_ptr<opset1::Subtract> subtract;
|
std::shared_ptr<ov::opset1::Subtract> subtract;
|
||||||
std::shared_ptr<ngraph::opset1::Convert> subtractConvert;
|
std::shared_ptr<ov::opset1::Convert> subtractConvert;
|
||||||
std::shared_ptr<ngraph::opset1::Constant> subtractConstant;
|
std::shared_ptr<ov::opset1::Constant> subtractConstant;
|
||||||
std::shared_ptr<opset1::Multiply> multiply;
|
std::shared_ptr<ov::opset1::Multiply> multiply;
|
||||||
std::shared_ptr<ngraph::opset1::Constant> multiplyConstant;
|
std::shared_ptr<ov::opset1::Constant> multiplyConstant;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace low_precision
|
} // namespace low_precision
|
||||||
|
@ -8,7 +8,6 @@
|
|||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <ngraph/pass/pass.hpp>
|
|
||||||
#include "low_precision/lpt_visibility.hpp"
|
#include "low_precision/lpt_visibility.hpp"
|
||||||
#include "low_precision/rt_info/quantization_granularity_attribute.hpp"
|
#include "low_precision/rt_info/quantization_granularity_attribute.hpp"
|
||||||
|
|
||||||
|
@ -9,10 +9,8 @@
|
|||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <ngraph/node.hpp>
|
|
||||||
|
|
||||||
#include <low_precision/lpt_visibility.hpp>
|
#include <low_precision/lpt_visibility.hpp>
|
||||||
#include <ngraph/pass/graph_rewrite.hpp>
|
#include "openvino/core/node.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
|
@ -9,7 +9,6 @@
|
|||||||
#include <ngraph/node.hpp>
|
#include <ngraph/node.hpp>
|
||||||
|
|
||||||
#include <low_precision/lpt_visibility.hpp>
|
#include <low_precision/lpt_visibility.hpp>
|
||||||
#include <ngraph/pass/graph_rewrite.hpp>
|
|
||||||
#include <low_precision/rt_info/quantization_granularity_attribute.hpp>
|
#include <low_precision/rt_info/quantization_granularity_attribute.hpp>
|
||||||
#include <low_precision/common/port_quantization_granularity_restriction.hpp>
|
#include <low_precision/common/port_quantization_granularity_restriction.hpp>
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include <low_precision/lpt_visibility.hpp>
|
#include <low_precision/lpt_visibility.hpp>
|
||||||
#include <ngraph/pass/graph_rewrite.hpp>
|
#include "openvino/pass/graph_rewrite.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
@ -29,7 +29,7 @@ class LP_TRANSFORMATIONS_API ConvertSubtractConstant;
|
|||||||
* [ConvertSubtractConstant](@ref openvino_docs_OV_UG_lpt_ConvertSubtractConstant) page
|
* [ConvertSubtractConstant](@ref openvino_docs_OV_UG_lpt_ConvertSubtractConstant) page
|
||||||
* in the Inference Engine Developer Guide.
|
* in the Inference Engine Developer Guide.
|
||||||
*/
|
*/
|
||||||
class ngraph::pass::low_precision::ConvertSubtractConstant : public ngraph::pass::MatcherPass {
|
class ngraph::pass::low_precision::ConvertSubtractConstant : public ov::pass::MatcherPass {
|
||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("ConvertSubtractConstant", "0");
|
OPENVINO_RTTI("ConvertSubtractConstant", "0");
|
||||||
ConvertSubtractConstant(const std::vector<ngraph::element::Type>& constantPrecisions = {});
|
ConvertSubtractConstant(const std::vector<ngraph::element::Type>& constantPrecisions = {});
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <ngraph/ngraph.hpp>
|
|
||||||
#include "weightable_layer_transformation.hpp"
|
#include "weightable_layer_transformation.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
@ -23,7 +22,7 @@ class LP_TRANSFORMATIONS_API ConvolutionTransformation : public WeightableLayerT
|
|||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("ConvolutionTransformation", "0");
|
OPENVINO_RTTI("ConvolutionTransformation", "0");
|
||||||
ConvolutionTransformation(const Params& params = Params());
|
ConvolutionTransformation(const Params& params = Params());
|
||||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
bool transform(TransformationContext& context, ov::pass::pattern::Matcher &m) override;
|
||||||
bool isQuantized(const std::shared_ptr<const Node>& layer,
|
bool isQuantized(const std::shared_ptr<const Node>& layer,
|
||||||
const std::vector<ngraph::element::Type>&defaultPrecisions) const override;
|
const std::vector<ngraph::element::Type>&defaultPrecisions) const override;
|
||||||
static bool isQuantizedStatic(const std::shared_ptr<const Node>& layer,
|
static bool isQuantizedStatic(const std::shared_ptr<const Node>& layer,
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <ngraph/ngraph.hpp>
|
|
||||||
#include "weightable_layer_transformation.hpp"
|
#include "weightable_layer_transformation.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
@ -22,7 +21,7 @@ namespace low_precision {
|
|||||||
class LP_TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation {
|
class LP_TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation {
|
||||||
public:
|
public:
|
||||||
ConvolutionBackpropDataTransformation(const Params& params = Params());
|
ConvolutionBackpropDataTransformation(const Params& params = Params());
|
||||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
bool transform(TransformationContext& context, ov::pass::pattern::Matcher &m) override;
|
||||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
||||||
bool isQuantized(const std::shared_ptr<const Node>& layer,
|
bool isQuantized(const std::shared_ptr<const Node>& layer,
|
||||||
const std::vector<ngraph::element::Type>&defaultPrecisions) const override;
|
const std::vector<ngraph::element::Type>&defaultPrecisions) const override;
|
||||||
|
@ -22,7 +22,7 @@ class LP_TRANSFORMATIONS_API DepthToSpaceTransformation : public TransparentBase
|
|||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("DepthToSpaceTransformation", "0");
|
OPENVINO_RTTI("DepthToSpaceTransformation", "0");
|
||||||
DepthToSpaceTransformation(const Params& params = Params());
|
DepthToSpaceTransformation(const Params& params = Params());
|
||||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<ov::Node> layer) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace low_precision
|
} // namespace low_precision
|
||||||
|
@ -5,7 +5,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <ngraph/ngraph.hpp>
|
|
||||||
#include "layer_transformation.hpp"
|
#include "layer_transformation.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
@ -24,15 +23,15 @@ class LP_TRANSFORMATIONS_API FakeQuantizeTransformation : public LayerTransforma
|
|||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("FakeQuantizeTransformation", "0");
|
OPENVINO_RTTI("FakeQuantizeTransformation", "0");
|
||||||
FakeQuantizeTransformation(const Params& params = Params());
|
FakeQuantizeTransformation(const Params& params = Params());
|
||||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
bool transform(TransformationContext& context, ov::pass::pattern::Matcher &m) override;
|
||||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||||
|
|
||||||
static bool checkElementwise(const std::shared_ptr<Node>& eltwise);
|
static bool checkElementwise(const std::shared_ptr<Node>& eltwise);
|
||||||
|
|
||||||
static std::shared_ptr<opset1::FakeQuantize> fuseElementwise(
|
static std::shared_ptr<ov::opset1::FakeQuantize> fuseElementwise(
|
||||||
TransformationContext& context,
|
TransformationContext& context,
|
||||||
MatcherPass* matcherPass,
|
MatcherPass* matcherPass,
|
||||||
const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize,
|
const std::shared_ptr<ov::opset1::FakeQuantize>& fakeQuantize,
|
||||||
const bool updatePrecisions);
|
const bool updatePrecisions);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ class LP_TRANSFORMATIONS_API GatherTransformation : public LayerTransformation {
|
|||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("GatherTransformation", "0");
|
OPENVINO_RTTI("GatherTransformation", "0");
|
||||||
GatherTransformation(const Params& params = Params());
|
GatherTransformation(const Params& params = Params());
|
||||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
bool transform(TransformationContext& context, ov::pass::pattern::Matcher &m) override;
|
||||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const override;
|
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const override;
|
||||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||||
};
|
};
|
||||||
|
@ -11,9 +11,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <ngraph/ngraph.hpp>
|
#include "openvino/pass/graph_rewrite.hpp"
|
||||||
#include <ngraph/pass/graph_rewrite.hpp>
|
|
||||||
|
|
||||||
#include "transformation_context.hpp"
|
#include "transformation_context.hpp"
|
||||||
#include "quantization_details.hpp"
|
#include "quantization_details.hpp"
|
||||||
#include "low_precision/common/ie_lpt_exception.hpp"
|
#include "low_precision/common/ie_lpt_exception.hpp"
|
||||||
@ -234,7 +232,7 @@ inline std::ostream &operator << (std::ostream &os, const DataPrecision& value)
|
|||||||
* @ingroup ie_transformation_common_api
|
* @ingroup ie_transformation_common_api
|
||||||
* @brief Base class for low precision transformation.
|
* @brief Base class for low precision transformation.
|
||||||
*/
|
*/
|
||||||
class LP_TRANSFORMATIONS_API LayerTransformation : public ngraph::pass::MatcherPass {
|
class LP_TRANSFORMATIONS_API LayerTransformation : public ov::pass::MatcherPass {
|
||||||
public:
|
public:
|
||||||
class Params {
|
class Params {
|
||||||
public:
|
public:
|
||||||
@ -285,7 +283,7 @@ public:
|
|||||||
|
|
||||||
LayerTransformation(const Params& params);
|
LayerTransformation(const Params& params);
|
||||||
virtual ~LayerTransformation() = default;
|
virtual ~LayerTransformation() = default;
|
||||||
virtual bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) = 0;
|
virtual bool transform(TransformationContext& context, ov::pass::pattern::Matcher &m) = 0;
|
||||||
|
|
||||||
void setContext(TransformationContext* context) noexcept;
|
void setContext(TransformationContext* context) noexcept;
|
||||||
|
|
||||||
@ -369,19 +367,19 @@ protected:
|
|||||||
std::shared_ptr<ngraph::Node> lastNode,
|
std::shared_ptr<ngraph::Node> lastNode,
|
||||||
std::string originalName) const;
|
std::string originalName) const;
|
||||||
|
|
||||||
void addPattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr<Node> patternRoot);
|
void addPattern(ov::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr<Node> patternRoot);
|
||||||
|
|
||||||
//TODO: replace with canBeTransformed when quantization by special dimension is supported for all transformations
|
//TODO: replace with canBeTransformed when quantization by special dimension is supported for all transformations
|
||||||
bool canBeTransformedSpatialDimension(const TransformationContext& context, std::shared_ptr<Node> layer) const;
|
bool canBeTransformedSpatialDimension(const TransformationContext& context, std::shared_ptr<Node> layer) const;
|
||||||
|
|
||||||
template <typename Operation>
|
template <typename Operation>
|
||||||
void addSingleNodePattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context) const {
|
void addSingleNodePattern(ov::pass::GraphRewrite& pass, TransformationContext& context) const {
|
||||||
using namespace ngraph;
|
using namespace ngraph;
|
||||||
|
|
||||||
auto is_op_type = [](std::shared_ptr<Node> n) {
|
auto is_op_type = [](std::shared_ptr<Node> n) {
|
||||||
return !!as_type_ptr<Operation>(n);
|
return !!as_type_ptr<Operation>(n);
|
||||||
};
|
};
|
||||||
auto p_node = std::make_shared<pattern::op::Label>(element::f32, Shape{}, is_op_type);
|
auto p_node = std::make_shared<ov::pass::pattern::op::Label>(element::f32, Shape{}, is_op_type);
|
||||||
|
|
||||||
addPattern(pass, context, p_node);
|
addPattern(pass, context, p_node);
|
||||||
}
|
}
|
||||||
|
@ -20,11 +20,12 @@
|
|||||||
|
|
||||||
|
|
||||||
#include <low_precision/lpt_visibility.hpp>
|
#include <low_precision/lpt_visibility.hpp>
|
||||||
#include <ngraph/pass/graph_rewrite.hpp>
|
|
||||||
#include <low_precision/common/quantization_granularity_restriction.hpp>
|
#include <low_precision/common/quantization_granularity_restriction.hpp>
|
||||||
#include <low_precision/common/precisions_restriction.hpp>
|
#include <low_precision/common/precisions_restriction.hpp>
|
||||||
#include "low_precision/layer_transformation.hpp"
|
#include "low_precision/layer_transformation.hpp"
|
||||||
#include "low_precision/markup_precisions.hpp"
|
#include "low_precision/markup_precisions.hpp"
|
||||||
|
#include "openvino/pass/graph_rewrite.hpp"
|
||||||
|
#include "openvino/pass/pass.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
@ -38,27 +39,27 @@ class LP_TRANSFORMATIONS_API LowPrecision;
|
|||||||
} // namespace pass
|
} // namespace pass
|
||||||
} // namespace ngraph
|
} // namespace ngraph
|
||||||
|
|
||||||
class ngraph::pass::low_precision::MarkupOptimizations : public ngraph::pass::FunctionPass {
|
class ngraph::pass::low_precision::MarkupOptimizations : public ov::pass::ModelPass {
|
||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("MarkupOptimizations", "0");
|
OPENVINO_RTTI("MarkupOptimizations", "0");
|
||||||
MarkupOptimizations(
|
MarkupOptimizations(
|
||||||
const std::vector<PrecisionsRestriction>& precisionRestrictions,
|
const std::vector<PrecisionsRestriction>& precisionRestrictions,
|
||||||
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions,
|
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions,
|
||||||
const AttributeParameters& params);
|
const AttributeParameters& params);
|
||||||
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
|
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
|
||||||
private:
|
private:
|
||||||
const std::vector<PrecisionsRestriction>& precisionRestrictions;
|
const std::vector<PrecisionsRestriction>& precisionRestrictions;
|
||||||
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions;
|
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions;
|
||||||
const AttributeParameters& params;
|
const AttributeParameters& params;
|
||||||
};
|
};
|
||||||
|
|
||||||
class ngraph::pass::low_precision::TypeRelaxedReplacer : public ngraph::pass::GraphRewrite {
|
class ngraph::pass::low_precision::TypeRelaxedReplacer : public ov::pass::GraphRewrite {
|
||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("TypeRelaxedReplacer", "0");
|
OPENVINO_RTTI("TypeRelaxedReplacer", "0");
|
||||||
TypeRelaxedReplacer();
|
TypeRelaxedReplacer();
|
||||||
};
|
};
|
||||||
|
|
||||||
class ngraph::pass::low_precision::LowPrecision : public ngraph::pass::FunctionPass {
|
class ngraph::pass::low_precision::LowPrecision : public ov::pass::ModelPass {
|
||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("LowPrecision", "0");
|
OPENVINO_RTTI("LowPrecision", "0");
|
||||||
LowPrecision(
|
LowPrecision(
|
||||||
|
@ -5,9 +5,9 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <ngraph/pass/pass.hpp>
|
|
||||||
#include <low_precision/lpt_visibility.hpp>
|
#include <low_precision/lpt_visibility.hpp>
|
||||||
#include "low_precision/layer_transformation.hpp"
|
#include "low_precision/layer_transformation.hpp"
|
||||||
|
#include "openvino/pass/pass.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
@ -27,11 +27,11 @@ class LP_TRANSFORMATIONS_API MarkupAvgPoolPrecisionPreserved;
|
|||||||
* [MarkupAvgPoolPrecisionPreserved](@ref openvino_docs_OV_UG_lpt_MarkupAvgPoolPrecisionPreserved) page
|
* [MarkupAvgPoolPrecisionPreserved](@ref openvino_docs_OV_UG_lpt_MarkupAvgPoolPrecisionPreserved) page
|
||||||
* in the Inference Engine Developer Guide.
|
* in the Inference Engine Developer Guide.
|
||||||
*/
|
*/
|
||||||
class ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved : public ngraph::pass::FunctionPass {
|
class ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved : public ov::pass::ModelPass {
|
||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("MarkupAvgPoolPrecisionPreserved", "0");
|
OPENVINO_RTTI("MarkupAvgPoolPrecisionPreserved", "0");
|
||||||
MarkupAvgPoolPrecisionPreserved(const std::vector<ngraph::element::Type> defaultPrecisions = ngraph::pass::low_precision::precision_set::int8_support);
|
MarkupAvgPoolPrecisionPreserved(const std::vector<ov::element::Type> defaultPrecisions = ngraph::pass::low_precision::precision_set::int8_support);
|
||||||
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
|
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
|
||||||
private:
|
private:
|
||||||
const std::vector<ngraph::element::Type> defaultPrecisions;
|
const std::vector<ngraph::element::Type> defaultPrecisions;
|
||||||
};
|
};
|
||||||
|
@ -8,9 +8,9 @@
|
|||||||
#include <set>
|
#include <set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <ngraph/pass/pass.hpp>
|
|
||||||
#include "low_precision/lpt_visibility.hpp"
|
#include "low_precision/lpt_visibility.hpp"
|
||||||
#include "low_precision/common/precisions_restriction.hpp"
|
#include "low_precision/common/precisions_restriction.hpp"
|
||||||
|
#include "openvino/pass/pass.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
@ -34,7 +34,7 @@ class LP_TRANSFORMATIONS_API MarkupPrecisions;
|
|||||||
* [MarkupPrecisions](@ref openvino_docs_OV_UG_lpt_MarkupPrecisions) page
|
* [MarkupPrecisions](@ref openvino_docs_OV_UG_lpt_MarkupPrecisions) page
|
||||||
* in the Inference Engine Developer Guide.
|
* in the Inference Engine Developer Guide.
|
||||||
*/
|
*/
|
||||||
class ngraph::pass::low_precision::MarkupPrecisions : public ngraph::pass::FunctionPass {
|
class ngraph::pass::low_precision::MarkupPrecisions : public ov::pass::ModelPass {
|
||||||
public:
|
public:
|
||||||
class Restriction {
|
class Restriction {
|
||||||
public:
|
public:
|
||||||
@ -68,7 +68,7 @@ public:
|
|||||||
OPENVINO_RTTI("MarkupPrecisions", "0");
|
OPENVINO_RTTI("MarkupPrecisions", "0");
|
||||||
explicit MarkupPrecisions(const std::vector<PrecisionsRestriction>& restrictions = {},
|
explicit MarkupPrecisions(const std::vector<PrecisionsRestriction>& restrictions = {},
|
||||||
const std::vector<ngraph::element::Type>& defaultPrecisions = { ngraph::element::u8, ngraph::element::i8 });
|
const std::vector<ngraph::element::Type>& defaultPrecisions = { ngraph::element::u8, ngraph::element::i8 });
|
||||||
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
|
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static bool isPrecisionPreserved(const std::shared_ptr<Node>& node);
|
static bool isPrecisionPreserved(const std::shared_ptr<Node>& node);
|
||||||
|
@ -23,7 +23,7 @@ class LP_TRANSFORMATIONS_API MatMulTransformation : public LayerTransformation {
|
|||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("MatMulTransformation", "0");
|
OPENVINO_RTTI("MatMulTransformation", "0");
|
||||||
MatMulTransformation(const Params& params = Params());
|
MatMulTransformation(const Params& params = Params());
|
||||||
bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override;
|
bool transform(TransformationContext &context, ov::pass::pattern::Matcher &m) override;
|
||||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||||
};
|
};
|
||||||
|
@ -5,7 +5,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <ngraph/ngraph.hpp>
|
|
||||||
#include "low_precision/layer_transformation.hpp"
|
#include "low_precision/layer_transformation.hpp"
|
||||||
#include "common/precisions_restriction.hpp"
|
#include "common/precisions_restriction.hpp"
|
||||||
|
|
||||||
@ -28,7 +27,7 @@ public:
|
|||||||
const Params& params = Params(),
|
const Params& params = Params(),
|
||||||
const PrecisionsRestriction::PrecisionsByPorts& restrictions = {});
|
const PrecisionsRestriction::PrecisionsByPorts& restrictions = {});
|
||||||
~MultiplyToGroupConvolutionTransformation() override {}
|
~MultiplyToGroupConvolutionTransformation() override {}
|
||||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
bool transform(TransformationContext& context, ov::pass::pattern::Matcher &m) override;
|
||||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||||
bool isQuantized(const std::shared_ptr<const Node>& layer,
|
bool isQuantized(const std::shared_ptr<const Node>& layer,
|
||||||
|
@ -22,7 +22,7 @@ class LP_TRANSFORMATIONS_API MVNTransformation : public LayerTransformation {
|
|||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("MVNTransformation", "0");
|
OPENVINO_RTTI("MVNTransformation", "0");
|
||||||
MVNTransformation(const Params& params = Params());
|
MVNTransformation(const Params& params = Params());
|
||||||
bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override;
|
bool transform(TransformationContext &context, ov::pass::pattern::Matcher &m) override;
|
||||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||||
};
|
};
|
||||||
|
@ -10,11 +10,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
|
||||||
#include <ngraph/ngraph.hpp>
|
|
||||||
#include <ngraph/pattern/matcher.hpp>
|
|
||||||
#include <ngraph/opsets/opset1.hpp>
|
|
||||||
#include "ov_ops/type_relaxed.hpp"
|
#include "ov_ops/type_relaxed.hpp"
|
||||||
#include <ngraph/rt_info.hpp>
|
|
||||||
|
|
||||||
#include "rt_info/shared_value_attribute.hpp"
|
#include "rt_info/shared_value_attribute.hpp"
|
||||||
#include "rt_info/precisions_attribute.hpp"
|
#include "rt_info/precisions_attribute.hpp"
|
||||||
@ -26,6 +22,7 @@
|
|||||||
#include "common/fake_quantize_dequantization.hpp"
|
#include "common/fake_quantize_dequantization.hpp"
|
||||||
#include "common/ie_lpt_exception.hpp"
|
#include "common/ie_lpt_exception.hpp"
|
||||||
#include "layer_transformation.hpp"
|
#include "layer_transformation.hpp"
|
||||||
|
#include "openvino/opsets/opset1.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
@ -54,8 +51,8 @@ public:
|
|||||||
static std::shared_ptr<Node> setOutDataPrecision(std::shared_ptr<OperationType> operation, const element::Type& precision);
|
static std::shared_ptr<Node> setOutDataPrecision(std::shared_ptr<OperationType> operation, const element::Type& precision);
|
||||||
|
|
||||||
// applies constant folding of operation to constant and returns the specified output
|
// applies constant folding of operation to constant and returns the specified output
|
||||||
static std::shared_ptr<opset1::Constant> foldDequantizationConstant(
|
static std::shared_ptr<ov::opset1::Constant> foldDequantizationConstant(
|
||||||
const std::shared_ptr<opset1::Constant>& foldingConstant,
|
const std::shared_ptr<ov::opset1::Constant>& foldingConstant,
|
||||||
const std::shared_ptr<Node>& operation,
|
const std::shared_ptr<Node>& operation,
|
||||||
const size_t outIdx = 0);
|
const size_t outIdx = 0);
|
||||||
|
|
||||||
@ -73,7 +70,7 @@ public:
|
|||||||
// Remove node by connecting its 0th input with 0th output
|
// Remove node by connecting its 0th input with 0th output
|
||||||
static void removeLayer(std::shared_ptr<Node> node);
|
static void removeLayer(std::shared_ptr<Node> node);
|
||||||
|
|
||||||
static std::shared_ptr<Node> swapMultiplyAndAdd(std::shared_ptr<opset1::Add> addAfterMultiply, const int multiplyBranch);
|
static std::shared_ptr<Node> swapMultiplyAndAdd(std::shared_ptr<ov::opset1::Add> addAfterMultiply, const int multiplyBranch);
|
||||||
|
|
||||||
static void copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::vector<std::shared_ptr<Node>>& targets, bool overrideName = true);
|
static void copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::vector<std::shared_ptr<Node>>& targets, bool overrideName = true);
|
||||||
|
|
||||||
@ -81,11 +78,11 @@ public:
|
|||||||
|
|
||||||
static void copyInfo(const std::shared_ptr<Node>& source, const std::shared_ptr<Node>& target, bool overrideName = true);
|
static void copyInfo(const std::shared_ptr<Node>& source, const std::shared_ptr<Node>& target, bool overrideName = true);
|
||||||
|
|
||||||
static bool isScalarLike(std::shared_ptr<opset1::Constant> constant);
|
static bool isScalarLike(std::shared_ptr<ov::opset1::Constant> constant);
|
||||||
|
|
||||||
static bool isZero(std::shared_ptr<opset1::Constant> constant);
|
static bool isZero(std::shared_ptr<ov::opset1::Constant> constant);
|
||||||
|
|
||||||
static std::shared_ptr<opset1::Constant> toScalar(std::shared_ptr<opset1::Constant> constant);
|
static std::shared_ptr<ov::opset1::Constant> toScalar(std::shared_ptr<ov::opset1::Constant> constant);
|
||||||
|
|
||||||
static std::shared_ptr<Node> getConstantInput(const std::shared_ptr<const Node>& node, const bool convertIsExpected = false);
|
static std::shared_ptr<Node> getConstantInput(const std::shared_ptr<const Node>& node, const bool convertIsExpected = false);
|
||||||
|
|
||||||
@ -95,15 +92,15 @@ public:
|
|||||||
const std::vector<size_t>& reshapeValues);
|
const std::vector<size_t>& reshapeValues);
|
||||||
|
|
||||||
// Optimizes the series of multiplies after a given output port
|
// Optimizes the series of multiplies after a given output port
|
||||||
static std::shared_ptr<ngraph::opset1::Multiply> optimizeMultipliesAfter(std::shared_ptr<Node> multiply);
|
static std::shared_ptr<ov::opset1::Multiply> optimizeMultipliesAfter(std::shared_ptr<Node> multiply);
|
||||||
|
|
||||||
static std::shared_ptr<opset1::Constant> round(std::shared_ptr<Node> node, element::Type target_type);
|
static std::shared_ptr<ov::opset1::Constant> round(std::shared_ptr<Node> node, element::Type target_type);
|
||||||
|
|
||||||
static std::shared_ptr<opset1::FakeQuantize> composeFakeQuantize(const std::shared_ptr<opset1::FakeQuantize>& fq,
|
static std::shared_ptr<ov::opset1::FakeQuantize> composeFakeQuantize(const std::shared_ptr<ov::opset1::FakeQuantize>& fq,
|
||||||
const std::vector<ngraph::element::Type>& defaultPrecisions = precision_set::int8_support);
|
const std::vector<ngraph::element::Type>& defaultPrecisions = precision_set::int8_support);
|
||||||
|
|
||||||
static std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> decomposeFakeQuantize(
|
static std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> decomposeFakeQuantize(
|
||||||
std::shared_ptr<opset1::FakeQuantize> fq,
|
std::shared_ptr<ov::opset1::FakeQuantize> fq,
|
||||||
const element::Type precision,
|
const element::Type precision,
|
||||||
const float min,
|
const float min,
|
||||||
const float max,
|
const float max,
|
||||||
@ -112,8 +109,8 @@ public:
|
|||||||
const element::Type deqPrecision = element::f32,
|
const element::Type deqPrecision = element::f32,
|
||||||
const size_t outChannelsShapeIndex = 0);
|
const size_t outChannelsShapeIndex = 0);
|
||||||
|
|
||||||
static std::shared_ptr<opset1::FakeQuantize> updateFakeQuantize(
|
static std::shared_ptr<ov::opset1::FakeQuantize> updateFakeQuantize(
|
||||||
std::shared_ptr<opset1::FakeQuantize> fq,
|
std::shared_ptr<ov::opset1::FakeQuantize> fq,
|
||||||
element::Type precision,
|
element::Type precision,
|
||||||
float min,
|
float min,
|
||||||
float max,
|
float max,
|
||||||
@ -138,7 +135,7 @@ public:
|
|||||||
static bool areQuantizeAndDequantizeSupportedForMultiply(const std::shared_ptr<const ngraph::Node>& node,
|
static bool areQuantizeAndDequantizeSupportedForMultiply(const std::shared_ptr<const ngraph::Node>& node,
|
||||||
const std::vector<ngraph::element::Type>& _defaultPrecisions = precision_set::int8_support);
|
const std::vector<ngraph::element::Type>& _defaultPrecisions = precision_set::int8_support);
|
||||||
|
|
||||||
static bool isQuantizeSupported(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize);
|
static bool isQuantizeSupported(const std::shared_ptr<ov::opset1::FakeQuantize>& fakeQuantize);
|
||||||
|
|
||||||
static FakeQuantizeDequantization getDequantization(const std::shared_ptr<const Node>& node,
|
static FakeQuantizeDequantization getDequantization(const std::shared_ptr<const Node>& node,
|
||||||
const std::vector<ngraph::element::Type> _defaultPrecisions = precision_set::int8_support,
|
const std::vector<ngraph::element::Type> _defaultPrecisions = precision_set::int8_support,
|
||||||
@ -149,14 +146,14 @@ public:
|
|||||||
|
|
||||||
static FakeQuantizeDequantization normalizeDequantization(FakeQuantizeDequantization dequantization);
|
static FakeQuantizeDequantization normalizeDequantization(FakeQuantizeDequantization dequantization);
|
||||||
|
|
||||||
static std::shared_ptr<opset1::Constant> normalizeDequantizationShape(
|
static std::shared_ptr<ov::opset1::Constant> normalizeDequantizationShape(
|
||||||
const std::shared_ptr<Node>& eltwise,
|
const std::shared_ptr<Node>& eltwise,
|
||||||
const bool convertIsExpected = true);
|
const bool convertIsExpected = true);
|
||||||
|
|
||||||
// 1. remove Convert if possible
|
// 1. remove Convert if possible
|
||||||
// 2. optimize Constant if possible
|
// 2. optimize Constant if possible
|
||||||
// 3. remove Subtract if Constant on the second branch is zero
|
// 3. remove Subtract if Constant on the second branch is zero
|
||||||
static std::shared_ptr<Node> optimizeSubtract(std::shared_ptr<opset1::Subtract> add);
|
static std::shared_ptr<Node> optimizeSubtract(std::shared_ptr<ov::opset1::Subtract> add);
|
||||||
|
|
||||||
class InsertDequantizationResult {
|
class InsertDequantizationResult {
|
||||||
public:
|
public:
|
||||||
@ -181,9 +178,9 @@ public:
|
|||||||
const bool updatePrecision,
|
const bool updatePrecision,
|
||||||
const bool moveSubtract);
|
const bool moveSubtract);
|
||||||
|
|
||||||
static std::vector<std::vector<std::shared_ptr<ngraph::opset1::Constant>>> splitConstantsBeforeConcat(
|
static std::vector<std::vector<std::shared_ptr<ov::opset1::Constant>>> splitConstantsBeforeConcat(
|
||||||
const std::shared_ptr<ov::Node> concat,
|
const std::shared_ptr<ov::Node> concat,
|
||||||
const std::vector<std::shared_ptr<opset1::Constant>> currConstants);
|
const std::vector<std::shared_ptr<ov::opset1::Constant>> currConstants);
|
||||||
|
|
||||||
static bool checkConstantValuePrecision(const element::Type expectedPrecision, const std::shared_ptr<Node>& constant);
|
static bool checkConstantValuePrecision(const element::Type expectedPrecision, const std::shared_ptr<Node>& constant);
|
||||||
|
|
||||||
@ -198,8 +195,8 @@ public:
|
|||||||
|
|
||||||
static std::shared_ptr<Node> toScalarIfPossible(std::shared_ptr<Node> node);
|
static std::shared_ptr<Node> toScalarIfPossible(std::shared_ptr<Node> node);
|
||||||
|
|
||||||
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq);
|
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<ov::opset1::FakeQuantize>& fq);
|
||||||
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<opset1::FakeQuantize>& fq, const bool roundValues);
|
static std::shared_ptr<Node> fold_fake_quantize(const std::shared_ptr<ov::opset1::FakeQuantize>& fq, const bool roundValues);
|
||||||
|
|
||||||
static FakeQuantizeDequantization foldDequantization(const std::shared_ptr<Node>& node,
|
static FakeQuantizeDequantization foldDequantization(const std::shared_ptr<Node>& node,
|
||||||
const size_t branchIndex,
|
const size_t branchIndex,
|
||||||
@ -209,7 +206,7 @@ public:
|
|||||||
static std::shared_ptr<ngraph::Node> separateInStandaloneBranch(std::shared_ptr<ngraph::Node> node,
|
static std::shared_ptr<ngraph::Node> separateInStandaloneBranch(std::shared_ptr<ngraph::Node> node,
|
||||||
const std::vector<ngraph::element::Type>& defaultPrecisions = precision_set::int8_support);
|
const std::vector<ngraph::element::Type>& defaultPrecisions = precision_set::int8_support);
|
||||||
|
|
||||||
static std::shared_ptr<opset1::FakeQuantize> fuseConvert(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize);
|
static std::shared_ptr<ov::opset1::FakeQuantize> fuseConvert(const std::shared_ptr<ov::opset1::FakeQuantize>& fakeQuantize);
|
||||||
|
|
||||||
static std::vector<element::Type> precisionIntersection(
|
static std::vector<element::Type> precisionIntersection(
|
||||||
const std::vector<element::Type>& v1,
|
const std::vector<element::Type>& v1,
|
||||||
@ -254,7 +251,7 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
static std::shared_ptr<Node> foldFakeQuantize(
|
static std::shared_ptr<Node> foldFakeQuantize(
|
||||||
const std::shared_ptr<opset1::FakeQuantize>& fq,
|
const std::shared_ptr<ov::opset1::FakeQuantize>& fq,
|
||||||
const bool roundValues,
|
const bool roundValues,
|
||||||
const bool roundValuesWasSet);
|
const bool roundValuesWasSet);
|
||||||
|
|
||||||
@ -296,7 +293,8 @@ std::shared_ptr<Node> NetworkHelper::setOutDataPrecision(std::shared_ptr<Operati
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::shared_ptr<Node> make_op_pattern(const ngraph::NodeVector& args) {
|
std::shared_ptr<Node> make_op_pattern(const ngraph::NodeVector& args) {
|
||||||
return std::make_shared<ngraph::pattern::op::Any>(element::undefined, PartialShape{}, [](std::shared_ptr<Node> n) {return !!ov::as_type_ptr<T>(n); }, args);
|
return std::make_shared<ov::pass::pattern::op::Any>(element::undefined, PartialShape{},
|
||||||
|
[](std::shared_ptr<Node> n) {return !!ov::as_type_ptr<T>(n); }, args);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename... Args>
|
template <typename T, typename... Args>
|
||||||
@ -317,14 +315,14 @@ template <typename T, typename... Args>
|
|||||||
std::shared_ptr<Node> fold_reshape(Args&&... args) {
|
std::shared_ptr<Node> fold_reshape(Args&&... args) {
|
||||||
std::shared_ptr<Node> node = std::make_shared<T>(args...);
|
std::shared_ptr<Node> node = std::make_shared<T>(args...);
|
||||||
if (node->get_output_size() == 1) {
|
if (node->get_output_size() == 1) {
|
||||||
const auto data_const = ov::as_type_ptr<opset1::Constant>(node->get_input_node_shared_ptr(0));
|
const auto data_const = ov::as_type_ptr<ov::opset1::Constant>(node->get_input_node_shared_ptr(0));
|
||||||
const auto target_shape = ov::as_type_ptr<opset1::Constant>(node->get_input_node_shared_ptr(1));
|
const auto target_shape = ov::as_type_ptr<ov::opset1::Constant>(node->get_input_node_shared_ptr(1));
|
||||||
if (data_const && target_shape) {
|
if (data_const && target_shape) {
|
||||||
return std::make_shared<opset1::Constant>(node->get_input_element_type(0),
|
return std::make_shared<ov::opset1::Constant>(node->get_input_element_type(0),
|
||||||
node->get_output_shape(0),
|
node->get_output_shape(0),
|
||||||
data_const->get_data_ptr());
|
data_const->get_data_ptr());
|
||||||
}
|
}
|
||||||
return fold<opset1::Reshape>(std::forward<Args>(args)...);
|
return fold<ov::opset1::Reshape>(std::forward<Args>(args)...);
|
||||||
}
|
}
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
@ -22,7 +22,7 @@ class LP_TRANSFORMATIONS_API NormalizeL2Transformation : public LayerTransformat
|
|||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("NormalizeL2Transformation", "0");
|
OPENVINO_RTTI("NormalizeL2Transformation", "0");
|
||||||
NormalizeL2Transformation(const Params& params = Params());
|
NormalizeL2Transformation(const Params& params = Params());
|
||||||
bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override;
|
bool transform(TransformationContext &context, ov::pass::pattern::Matcher &m) override;
|
||||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||||
};
|
};
|
||||||
|
@ -7,10 +7,9 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <ngraph/node.hpp>
|
|
||||||
#include <ngraph/pass/graph_rewrite.hpp>
|
|
||||||
#include <low_precision/lpt_visibility.hpp>
|
#include <low_precision/lpt_visibility.hpp>
|
||||||
#include "low_precision/rt_info/attribute_parameters.hpp"
|
#include "low_precision/rt_info/attribute_parameters.hpp"
|
||||||
|
#include "openvino/pass/pass.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
@ -30,11 +29,11 @@ class LP_TRANSFORMATIONS_API PropagatePrecisions;
|
|||||||
* [PropagatePrecisions](@ref openvino_docs_OV_UG_lpt_PropagatePrecisions) page
|
* [PropagatePrecisions](@ref openvino_docs_OV_UG_lpt_PropagatePrecisions) page
|
||||||
* in the Inference Engine Developer Guide.
|
* in the Inference Engine Developer Guide.
|
||||||
*/
|
*/
|
||||||
class ngraph::pass::low_precision::PropagatePrecisions : public ngraph::pass::FunctionPass {
|
class ngraph::pass::low_precision::PropagatePrecisions : public ov::pass::ModelPass {
|
||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("PropagatePrecisions", "0");
|
OPENVINO_RTTI("PropagatePrecisions", "0");
|
||||||
PropagatePrecisions(const AttributeParameters& params = AttributeParameters());
|
PropagatePrecisions(const AttributeParameters& params = AttributeParameters());
|
||||||
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
|
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const AttributeParameters params;
|
const AttributeParameters params;
|
||||||
|
@ -104,9 +104,9 @@ private:
|
|||||||
auto getInput = [&defaultPrecisions](const std::shared_ptr<ngraph::Node>& node, const size_t index) -> Input<Node> {
|
auto getInput = [&defaultPrecisions](const std::shared_ptr<ngraph::Node>& node, const size_t index) -> Input<Node> {
|
||||||
const auto dequantization = NetworkHelper::getDequantization(node, defaultPrecisions, index);
|
const auto dequantization = NetworkHelper::getDequantization(node, defaultPrecisions, index);
|
||||||
if (!dequantization.empty() &&
|
if (!dequantization.empty() &&
|
||||||
ov::is_type<opset1::Convert>(dequantization.data.get_node()) &&
|
ov::is_type<ov::opset1::Convert>(dequantization.data.get_node()) &&
|
||||||
(dequantization.data.get_node()->get_input_size() == 1ul) &&
|
(dequantization.data.get_node()->get_input_size() == 1ul) &&
|
||||||
ov::is_type<opset1::FakeQuantize>(dequantization.data.get_node()->get_input_node_ptr(0))) {
|
ov::is_type<ov::opset1::FakeQuantize>(dequantization.data.get_node()->get_input_node_ptr(0))) {
|
||||||
return dequantization.data.get_node()->input(0);
|
return dequantization.data.get_node()->input(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,10 +9,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <low_precision/lpt_visibility.hpp>
|
#include <low_precision/lpt_visibility.hpp>
|
||||||
|
#include "openvino/opsets/opset1.hpp"
|
||||||
#include <ngraph/node.hpp>
|
|
||||||
#include <ngraph/opsets/opset1.hpp>
|
|
||||||
#include <ngraph/type.hpp>
|
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
@ -29,19 +26,19 @@ public:
|
|||||||
const std::vector<float>& outputLowValues,
|
const std::vector<float>& outputLowValues,
|
||||||
const std::vector<float>& outputHighValues);
|
const std::vector<float>& outputHighValues);
|
||||||
|
|
||||||
static bool outputLayoutIsSupported(std::shared_ptr<opset1::FakeQuantize> quantize, bool isConvertExpected = false);
|
static bool outputLayoutIsSupported(std::shared_ptr<ov::opset1::FakeQuantize> quantize, bool isConvertExpected = false);
|
||||||
|
|
||||||
static void getInputIntervals(
|
static void getInputIntervals(
|
||||||
std::shared_ptr<opset1::FakeQuantize> quantize,
|
std::shared_ptr<ov::opset1::FakeQuantize> quantize,
|
||||||
std::vector<float>& inputLowValues,
|
std::vector<float>& inputLowValues,
|
||||||
std::vector<float>& inputHighValues);
|
std::vector<float>& inputHighValues);
|
||||||
|
|
||||||
static void getOutputIntervals(
|
static void getOutputIntervals(
|
||||||
std::shared_ptr<opset1::FakeQuantize> quantize,
|
std::shared_ptr<ov::opset1::FakeQuantize> quantize,
|
||||||
std::vector<float>& outputLowValues,
|
std::vector<float>& outputLowValues,
|
||||||
std::vector<float>& outputHighValues);
|
std::vector<float>& outputHighValues);
|
||||||
|
|
||||||
static QuantizationDetails getDetails(std::shared_ptr<opset1::FakeQuantize>);
|
static QuantizationDetails getDetails(std::shared_ptr<ov::opset1::FakeQuantize>);
|
||||||
bool hasNegativeOutput() const;
|
bool hasNegativeOutput() const;
|
||||||
float maxOutput(const size_t channel) const;
|
float maxOutput(const size_t channel) const;
|
||||||
float maxInput(const size_t channel) const;
|
float maxInput(const size_t channel) const;
|
||||||
|
@ -23,7 +23,7 @@ class LP_TRANSFORMATIONS_API ReshapeTransformation : public LayerTransformation
|
|||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("ReshapeTransformation", "0");
|
OPENVINO_RTTI("ReshapeTransformation", "0");
|
||||||
ReshapeTransformation(const Params& params = Params());
|
ReshapeTransformation(const Params& params = Params());
|
||||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
bool transform(TransformationContext& context, ov::pass::pattern::Matcher &m) override;
|
||||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
||||||
|
|
||||||
|
@ -9,9 +9,6 @@
|
|||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <ngraph/node.hpp>
|
|
||||||
#include <ngraph/pass/graph_rewrite.hpp>
|
|
||||||
|
|
||||||
#include "low_precision/lpt_visibility.hpp"
|
#include "low_precision/lpt_visibility.hpp"
|
||||||
#include "low_precision/rt_info/attribute_parameters.hpp"
|
#include "low_precision/rt_info/attribute_parameters.hpp"
|
||||||
#include "low_precision/rt_info/shared_value_attribute.hpp"
|
#include "low_precision/rt_info/shared_value_attribute.hpp"
|
||||||
|
@ -9,10 +9,7 @@
|
|||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <ngraph/node.hpp>
|
|
||||||
|
|
||||||
#include <low_precision/lpt_visibility.hpp>
|
#include <low_precision/lpt_visibility.hpp>
|
||||||
#include <ngraph/pass/graph_rewrite.hpp>
|
|
||||||
#include "shared_value_attribute.hpp"
|
#include "shared_value_attribute.hpp"
|
||||||
#include "attribute_parameters.hpp"
|
#include "attribute_parameters.hpp"
|
||||||
|
|
||||||
|
@ -4,10 +4,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <ngraph/node.hpp>
|
|
||||||
|
|
||||||
#include <low_precision/lpt_visibility.hpp>
|
#include <low_precision/lpt_visibility.hpp>
|
||||||
#include <ngraph/pass/graph_rewrite.hpp>
|
|
||||||
#include "low_precision/rt_info/shared_value_attribute.hpp"
|
#include "low_precision/rt_info/shared_value_attribute.hpp"
|
||||||
#include "low_precision/layer_transformation.hpp"
|
#include "low_precision/layer_transformation.hpp"
|
||||||
#include "attribute_parameters.hpp"
|
#include "attribute_parameters.hpp"
|
||||||
|
@ -11,7 +11,6 @@
|
|||||||
#include <ngraph/node.hpp>
|
#include <ngraph/node.hpp>
|
||||||
|
|
||||||
#include <low_precision/lpt_visibility.hpp>
|
#include <low_precision/lpt_visibility.hpp>
|
||||||
#include <ngraph/pass/graph_rewrite.hpp>
|
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
class LP_TRANSFORMATIONS_API SharedAttribute : public ov::RuntimeAttribute {
|
class LP_TRANSFORMATIONS_API SharedAttribute : public ov::RuntimeAttribute {
|
||||||
|
@ -25,7 +25,7 @@ class LP_TRANSFORMATIONS_API SplitTransformation : public LayerTransformation {
|
|||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("SplitTransformation", "0");
|
OPENVINO_RTTI("SplitTransformation", "0");
|
||||||
SplitTransformation(const Params& params = Params());
|
SplitTransformation(const Params& params = Params());
|
||||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override;
|
bool transform(TransformationContext& context, ov::pass::pattern::Matcher& m) override;
|
||||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||||
void updateOutputs(
|
void updateOutputs(
|
||||||
|
@ -6,7 +6,6 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <ngraph/ngraph.hpp>
|
|
||||||
#include "low_precision/quantization_details.hpp"
|
#include "low_precision/quantization_details.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
|
@ -34,23 +34,23 @@ class UpdateSharedPrecisionPreserved;
|
|||||||
* in the Inference Engine Developer Guide.
|
* in the Inference Engine Developer Guide.
|
||||||
*/
|
*/
|
||||||
template <typename AttributeType, typename ExpectedAttributeType = AttributeType>
|
template <typename AttributeType, typename ExpectedAttributeType = AttributeType>
|
||||||
class ngraph::pass::low_precision::UpdateSharedPrecisionPreserved : public ngraph::pass::MatcherPass {
|
class ngraph::pass::low_precision::UpdateSharedPrecisionPreserved : public ov::pass::MatcherPass {
|
||||||
public:
|
public:
|
||||||
UpdateSharedPrecisionPreserved(const std::vector<ngraph::element::Type>& defaultPrecisions = precision_set::int8_support) {
|
UpdateSharedPrecisionPreserved(const std::vector<ngraph::element::Type>& defaultPrecisions = precision_set::int8_support) {
|
||||||
ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) {
|
ov::graph_rewrite_callback callback = [&](ov::pass::pattern::Matcher& m) {
|
||||||
auto node = m.get_match_root();
|
auto node = m.get_match_root();
|
||||||
|
|
||||||
const bool needToCheckExpectedAttributeType = !std::is_same<ExpectedAttributeType, AttributeType>::value;
|
const bool needToCheckExpectedAttributeType = !std::is_same<ExpectedAttributeType, AttributeType>::value;
|
||||||
if (!needToCheckExpectedAttributeType) {
|
if (!needToCheckExpectedAttributeType) {
|
||||||
// expected attribute is ignored, set attributes for node inputs except Result & FakeQuantize operations
|
// expected attribute is ignored, set attributes for node inputs except Result & FakeQuantize operations
|
||||||
if (ov::is_type<ngraph::opset1::Result>(node) ||
|
if (ov::is_type<ov::opset1::Result>(node) ||
|
||||||
ov::is_type<ngraph::opset1::FakeQuantize>(node) ||
|
ov::is_type<ov::opset1::FakeQuantize>(node) ||
|
||||||
transformation_callback(node)) {
|
transformation_callback(node)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ngraph::pass::low_precision::NetworkHelper::isPrecisionPreserved(node) || ov::is_type<opset1::FakeQuantize>(node)) {
|
if (ngraph::pass::low_precision::NetworkHelper::isPrecisionPreserved(node) || ov::is_type<ov::opset1::FakeQuantize>(node)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,7 +90,7 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
auto matcher = std::make_shared<ngraph::pattern::Matcher>(pattern::any_input(), "UpdateSharedPrecisionPreserved");
|
auto matcher = std::make_shared<ov::pass::pattern::Matcher>(ov::pass::pattern::any_input(), "UpdateSharedPrecisionPreserved");
|
||||||
this->register_matcher(matcher, callback);
|
this->register_matcher(matcher, callback);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,8 +98,8 @@ private:
|
|||||||
Input<Node> getDequantizationInput(const Input<Node>& input, const std::vector<ngraph::element::Type>& defaultPrecisions) {
|
Input<Node> getDequantizationInput(const Input<Node>& input, const std::vector<ngraph::element::Type>& defaultPrecisions) {
|
||||||
const auto dequantization = NetworkHelper::getDequantization(input.get_node()->shared_from_this(), defaultPrecisions, input.get_index());
|
const auto dequantization = NetworkHelper::getDequantization(input.get_node()->shared_from_this(), defaultPrecisions, input.get_index());
|
||||||
if (!dequantization.empty() &&
|
if (!dequantization.empty() &&
|
||||||
(ov::is_type<opset1::Convert>(dequantization.data.get_node())) &&
|
(ov::is_type<ov::opset1::Convert>(dequantization.data.get_node())) &&
|
||||||
ov::is_type<opset1::FakeQuantize>(dequantization.data.get_node()->get_input_node_ptr(0))) {
|
ov::is_type<ov::opset1::FakeQuantize>(dequantization.data.get_node()->get_input_node_ptr(0))) {
|
||||||
assert(dequantization.data.get_target_inputs().size() == 1ul);
|
assert(dequantization.data.get_target_inputs().size() == 1ul);
|
||||||
return *dequantization.data.get_target_inputs().begin();
|
return *dequantization.data.get_target_inputs().begin();
|
||||||
}
|
}
|
||||||
|
@ -5,9 +5,9 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <ngraph/ngraph.hpp>
|
|
||||||
#include "transformation_context.hpp"
|
#include "transformation_context.hpp"
|
||||||
#include "layer_transformation.hpp"
|
#include "layer_transformation.hpp"
|
||||||
|
#include "openvino/opsets/opset1.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
@ -42,7 +42,7 @@ protected:
|
|||||||
virtual size_t getInputChannels(const std::shared_ptr<ngraph::Node> conv) const = 0;
|
virtual size_t getInputChannels(const std::shared_ptr<ngraph::Node> conv) const = 0;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static std::shared_ptr<opset1::FakeQuantize> getFakeQuantizeOnWeights(const std::shared_ptr<Node>& node);
|
static std::shared_ptr<ov::opset1::FakeQuantize> getFakeQuantizeOnWeights(const std::shared_ptr<Node>& node);
|
||||||
static DataPrecision getDataPrecisionOnWeights(const std::shared_ptr<Node>& node, const std::vector<ngraph::element::Type>& defaultPrecisions);
|
static DataPrecision getDataPrecisionOnWeights(const std::shared_ptr<Node>& node, const std::vector<ngraph::element::Type>& defaultPrecisions);
|
||||||
static bool isAsymmetricOnWeights(const std::shared_ptr<const Node>& node,
|
static bool isAsymmetricOnWeights(const std::shared_ptr<const Node>& node,
|
||||||
const std::vector<ngraph::element::Type>& defaultPrecisions = precision_set::int8_support);
|
const std::vector<ngraph::element::Type>& defaultPrecisions = precision_set::int8_support);
|
||||||
|
@ -24,29 +24,29 @@ namespace low_precision {
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>& op) {
|
std::shared_ptr<ov::opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>& op) {
|
||||||
// TODO: separate this part to standalone transformation: AddToSubtractTransformation
|
// TODO: separate this part to standalone transformation: AddToSubtractTransformation
|
||||||
// motivation:
|
// motivation:
|
||||||
// - single responsibility
|
// - single responsibility
|
||||||
// - keep AddTransformation and AddToSubtractTransformation transformations independent and optional
|
// - keep AddTransformation and AddToSubtractTransformation transformations independent and optional
|
||||||
const auto add = ov::as_type_ptr<opset1::Add>(op);
|
const auto add = ov::as_type_ptr<ov::opset1::Add>(op);
|
||||||
if (add == nullptr || ov::marked_as_bias(add)) {
|
if (add == nullptr || ov::marked_as_bias(add)) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: use general way from getDequantization: is eltwise with Constant
|
// TODO: use general way from getDequantization: is eltwise with Constant
|
||||||
const int constBranchIndex = ov::is_type<opset1::Constant>(add->get_input_node_ptr(0)) ?
|
const int constBranchIndex = ov::is_type<ov::opset1::Constant>(add->get_input_node_ptr(0)) ?
|
||||||
0 :
|
0 :
|
||||||
(ov::is_type<opset1::Constant>(add->get_input_node_ptr(1)) ? 1 : -1);
|
(ov::is_type<ov::opset1::Constant>(add->get_input_node_ptr(1)) ? 1 : -1);
|
||||||
if (constBranchIndex == -1) {
|
if (constBranchIndex == -1) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
const size_t dataBranchIndex = constBranchIndex == 0 ? 1ul : 0;
|
const size_t dataBranchIndex = constBranchIndex == 0 ? 1ul : 0;
|
||||||
auto constant = fold<opset1::Negative>(add->input_value(constBranchIndex));
|
auto constant = fold<ov::opset1::Negative>(add->input_value(constBranchIndex));
|
||||||
auto constOutput = constant->output(0);
|
auto constOutput = constant->output(0);
|
||||||
|
|
||||||
const auto subtract = std::make_shared<ov::op::TypeRelaxed<opset1::Subtract>>(
|
const auto subtract = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Subtract>>(
|
||||||
std::vector<element::Type>{element::f32, element::f32},
|
std::vector<element::Type>{element::f32, element::f32},
|
||||||
std::vector<element::Type>{ op->get_output_element_type(0) },
|
std::vector<element::Type>{ op->get_output_element_type(0) },
|
||||||
ov::op::TemporaryReplaceOutputType(add->input_value(dataBranchIndex), element::f32).get(),
|
ov::op::TemporaryReplaceOutputType(add->input_value(dataBranchIndex), element::f32).get(),
|
||||||
@ -59,20 +59,20 @@ std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>&
|
|||||||
return subtract;
|
return subtract;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<opset1::Subtract> fuseWithSubtract(const std::shared_ptr<Node>& op) {
|
std::shared_ptr<ov::opset1::Subtract> fuseWithSubtract(const std::shared_ptr<Node>& op) {
|
||||||
const auto add = ov::as_type_ptr<opset1::Add>(op);
|
const auto add = ov::as_type_ptr<ov::opset1::Add>(op);
|
||||||
if ((add == nullptr) ||
|
if ((add == nullptr) ||
|
||||||
!ov::is_type<opset1::Subtract>(add->get_input_node_shared_ptr(0)) ||
|
!ov::is_type<ov::opset1::Subtract>(add->get_input_node_shared_ptr(0)) ||
|
||||||
// TODO: use general way from getDequantization: is eltwise with Constant
|
// TODO: use general way from getDequantization: is eltwise with Constant
|
||||||
!ov::is_type<opset1::Constant>(add->get_input_node_shared_ptr(0)->get_input_node_shared_ptr(1))) {
|
!ov::is_type<ov::opset1::Constant>(add->get_input_node_shared_ptr(0)->get_input_node_shared_ptr(1))) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto newSubConst = fold<opset1::Subtract>(
|
const auto newSubConst = fold<ov::opset1::Subtract>(
|
||||||
add->get_input_node_shared_ptr(0)->input_value(1),
|
add->get_input_node_shared_ptr(0)->input_value(1),
|
||||||
add->input_value(1));
|
add->input_value(1));
|
||||||
|
|
||||||
const auto newSubtract = std::make_shared<ov::op::TypeRelaxed<opset1::Subtract>>(
|
const auto newSubtract = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Subtract>>(
|
||||||
std::vector<element::Type>{element::f32, element::f32},
|
std::vector<element::Type>{element::f32, element::f32},
|
||||||
std::vector<element::Type>{ op->get_output_element_type(0) },
|
std::vector<element::Type>{ op->get_output_element_type(0) },
|
||||||
ov::op::TemporaryReplaceOutputType(add->get_input_node_shared_ptr(0)->input_value(0), element::f32).get(),
|
ov::op::TemporaryReplaceOutputType(add->get_input_node_shared_ptr(0)->input_value(0), element::f32).get(),
|
||||||
@ -87,7 +87,7 @@ std::shared_ptr<opset1::Subtract> fuseWithSubtract(const std::shared_ptr<Node>&
|
|||||||
|
|
||||||
AddTransformation::AddTransformation(const Params& params) : EltwiseBaseTransformation(params) {
|
AddTransformation::AddTransformation(const Params& params) : EltwiseBaseTransformation(params) {
|
||||||
MATCHER_SCOPE(AddTransformation);
|
MATCHER_SCOPE(AddTransformation);
|
||||||
auto matcher = ngraph::pattern::wrap_type<opset1::Add>();
|
auto matcher = ngraph::pattern::wrap_type<ov::opset1::Add>();
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -102,7 +102,7 @@ AddTransformation::AddTransformation(const Params& params) : EltwiseBaseTransfor
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool AddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
bool AddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
||||||
std::shared_ptr<opset1::Add> op = ov::as_type_ptr<opset1::Add>(m.get_match_root());
|
std::shared_ptr<ov::opset1::Add> op = ov::as_type_ptr<ov::opset1::Add>(m.get_match_root());
|
||||||
if ((op == nullptr) || (!canBeTransformed(context, op))) {
|
if ((op == nullptr) || (!canBeTransformed(context, op))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -111,7 +111,7 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter
|
|||||||
NetworkHelper::normalizeDequantization(NetworkHelper::getDequantization(op, defaultPrecisions, 1));
|
NetworkHelper::normalizeDequantization(NetworkHelper::getDequantization(op, defaultPrecisions, 1));
|
||||||
|
|
||||||
std::shared_ptr<Node> addNode = NetworkHelper::separateInStandaloneBranch(op, defaultPrecisions);
|
std::shared_ptr<Node> addNode = NetworkHelper::separateInStandaloneBranch(op, defaultPrecisions);
|
||||||
std::shared_ptr<opset1::Add> add = ov::as_type_ptr<opset1::Add>(addNode);
|
std::shared_ptr<ov::opset1::Add> add = ov::as_type_ptr<ov::opset1::Add>(addNode);
|
||||||
|
|
||||||
const int fullPathIndex = getNotEmpty(add);
|
const int fullPathIndex = getNotEmpty(add);
|
||||||
std::shared_ptr<Node> newMultiply;
|
std::shared_ptr<Node> newMultiply;
|
||||||
@ -131,7 +131,7 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter
|
|||||||
|
|
||||||
newMultiply = NetworkHelper::swapMultiplyAndAdd(add, multiplyBranch.first);
|
newMultiply = NetworkHelper::swapMultiplyAndAdd(add, multiplyBranch.first);
|
||||||
ngraph::copy_runtime_info({ add, newMultiply }, newMultiply);
|
ngraph::copy_runtime_info({ add, newMultiply }, newMultiply);
|
||||||
if (ov::is_type<opset1::Add>(newMultiply->get_input_node_shared_ptr(0))) {
|
if (ov::is_type<ov::opset1::Add>(newMultiply->get_input_node_shared_ptr(0))) {
|
||||||
newAddOrSubtract = newMultiply->get_input_node_shared_ptr(0);
|
newAddOrSubtract = newMultiply->get_input_node_shared_ptr(0);
|
||||||
|
|
||||||
auto subtract = fuseWithSubtract(newAddOrSubtract);
|
auto subtract = fuseWithSubtract(newAddOrSubtract);
|
||||||
@ -172,13 +172,13 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter
|
|||||||
// after : Y = SC2 * ( SC1' * (X1 - SH1') + X2 ) , where :
|
// after : Y = SC2 * ( SC1' * (X1 - SH1') + X2 ) , where :
|
||||||
// SC1' = SC1 / SC2
|
// SC1' = SC1 / SC2
|
||||||
// SH1' = SH1 + SC2 * SH2 / SC1
|
// SH1' = SH1 + SC2 * SH2 / SC1
|
||||||
auto newSubtractFullPathValues = fold<opset1::Add>(
|
auto newSubtractFullPathValues = fold<ov::opset1::Add>(
|
||||||
subtractFullPathValues,
|
subtractFullPathValues,
|
||||||
fold<opset1::Divide>(
|
fold<ov::opset1::Divide>(
|
||||||
fold<opset1::Multiply>(subtractEmptyPathValues, multiplyEmptyPathValues),
|
fold<ov::opset1::Multiply>(subtractEmptyPathValues, multiplyEmptyPathValues),
|
||||||
multiplyFullPathValues));
|
multiplyFullPathValues));
|
||||||
|
|
||||||
auto newMultiplyFullPathValues = fold<opset1::Divide>(multiplyFullPathValues, multiplyEmptyPathValues);
|
auto newMultiplyFullPathValues = fold<ov::opset1::Divide>(multiplyFullPathValues, multiplyEmptyPathValues);
|
||||||
|
|
||||||
// Transformation can't be applied if new full path values brake accuracy because of Inf values
|
// Transformation can't be applied if new full path values brake accuracy because of Inf values
|
||||||
if (!NetworkHelper::checkConstantNotInf(newSubtractFullPathValues) ||
|
if (!NetworkHelper::checkConstantNotInf(newSubtractFullPathValues) ||
|
||||||
@ -202,25 +202,25 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter
|
|||||||
// newMultiply
|
// newMultiply
|
||||||
|
|
||||||
inputs[emptyPathIndex] = dequantizationEmptyPath.data;
|
inputs[emptyPathIndex] = dequantizationEmptyPath.data;
|
||||||
inputs[fullPathIndex] = std::make_shared<opset1::Multiply>(
|
inputs[fullPathIndex] = std::make_shared<ov::opset1::Multiply>(
|
||||||
newSubtractFullPathValues == nullptr ?
|
newSubtractFullPathValues == nullptr ?
|
||||||
(fullPathInput.get_element_type() != newMultiplyFullPathValues->get_element_type() ?
|
(fullPathInput.get_element_type() != newMultiplyFullPathValues->get_element_type() ?
|
||||||
std::make_shared<opset1::Convert>(fullPathInput, newMultiplyFullPathValues->get_element_type()) :
|
std::make_shared<ov::opset1::Convert>(fullPathInput, newMultiplyFullPathValues->get_element_type()) :
|
||||||
fullPathInput) :
|
fullPathInput) :
|
||||||
std::make_shared<opset1::Subtract>(
|
std::make_shared<ov::opset1::Subtract>(
|
||||||
// precision on branch with dequantization operations can be different with dequantization precision,
|
// precision on branch with dequantization operations can be different with dequantization precision,
|
||||||
// for example: FP16 model with FP32 dequantization
|
// for example: FP16 model with FP32 dequantization
|
||||||
fullPathInput.get_element_type() != newSubtractFullPathValues->get_element_type() ?
|
fullPathInput.get_element_type() != newSubtractFullPathValues->get_element_type() ?
|
||||||
std::make_shared<opset1::Convert>(fullPathInput, newSubtractFullPathValues->get_element_type()) :
|
std::make_shared<ov::opset1::Convert>(fullPathInput, newSubtractFullPathValues->get_element_type()) :
|
||||||
fullPathInput,
|
fullPathInput,
|
||||||
newSubtractFullPathValues),
|
newSubtractFullPathValues),
|
||||||
newMultiplyFullPathValues);
|
newMultiplyFullPathValues);
|
||||||
|
|
||||||
newAddOrSubtract = std::make_shared<ov::op::TypeRelaxed<opset1::Add>>(
|
newAddOrSubtract = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Add>>(
|
||||||
std::vector<element::Type>{element::f32, element::f32}, std::vector<element::Type>{ element::f32 },
|
std::vector<element::Type>{element::f32, element::f32}, std::vector<element::Type>{ element::f32 },
|
||||||
ov::op::TemporaryReplaceOutputType(inputs[0], element::f32).get(),
|
ov::op::TemporaryReplaceOutputType(inputs[0], element::f32).get(),
|
||||||
ov::op::TemporaryReplaceOutputType(inputs[1], element::f32).get());
|
ov::op::TemporaryReplaceOutputType(inputs[1], element::f32).get());
|
||||||
newMultiply = std::make_shared<ov::op::TypeRelaxed<opset1::Multiply>>(
|
newMultiply = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Multiply>>(
|
||||||
std::vector<element::Type>{element::f32, element::f32}, std::vector<element::Type>{ add->get_output_element_type(0) },
|
std::vector<element::Type>{element::f32, element::f32}, std::vector<element::Type>{ add->get_output_element_type(0) },
|
||||||
ov::op::TemporaryReplaceOutputType(newAddOrSubtract, element::f32).get(),
|
ov::op::TemporaryReplaceOutputType(newAddOrSubtract, element::f32).get(),
|
||||||
ov::op::TemporaryReplaceOutputType(multiplyEmptyPathValues, element::f32).get());
|
ov::op::TemporaryReplaceOutputType(multiplyEmptyPathValues, element::f32).get());
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "low_precision/rt_info/intervals_alignment_attribute.hpp"
|
#include "low_precision/rt_info/intervals_alignment_attribute.hpp"
|
||||||
#include "low_precision/rt_info/attribute_parameters.hpp"
|
#include "low_precision/rt_info/attribute_parameters.hpp"
|
||||||
#include "itt.hpp"
|
#include "itt.hpp"
|
||||||
|
#include "openvino/pass/manager.hpp"
|
||||||
|
|
||||||
using namespace ngraph;
|
using namespace ngraph;
|
||||||
using namespace ngraph::pass::low_precision;
|
using namespace ngraph::pass::low_precision;
|
||||||
@ -19,7 +20,7 @@ ngraph::pass::low_precision::AlignQuantizationIntervals::AlignQuantizationInterv
|
|||||||
|
|
||||||
bool ngraph::pass::low_precision::AlignQuantizationIntervals::run_on_model(const std::shared_ptr<ngraph::Function>& f) {
|
bool ngraph::pass::low_precision::AlignQuantizationIntervals::run_on_model(const std::shared_ptr<ngraph::Function>& f) {
|
||||||
RUN_ON_FUNCTION_SCOPE(AlignQuantizationIntervals);
|
RUN_ON_FUNCTION_SCOPE(AlignQuantizationIntervals);
|
||||||
ngraph::pass::Manager manager;
|
ov::pass::Manager manager;
|
||||||
manager.set_per_pass_validation(false);
|
manager.set_per_pass_validation(false);
|
||||||
std::shared_ptr<ngraph::pass::GraphRewrite> intervalsAlignment = manager.register_pass<ngraph::pass::GraphRewrite>();
|
std::shared_ptr<ngraph::pass::GraphRewrite> intervalsAlignment = manager.register_pass<ngraph::pass::GraphRewrite>();
|
||||||
intervalsAlignment->add_matcher<low_precision::CreateAttribute<IntervalsAlignmentAttribute, opset1::FakeQuantize>>(
|
intervalsAlignment->add_matcher<low_precision::CreateAttribute<IntervalsAlignmentAttribute, opset1::FakeQuantize>>(
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include "low_precision/rt_info/quantization_granularity_attribute.hpp"
|
#include "low_precision/rt_info/quantization_granularity_attribute.hpp"
|
||||||
#include "low_precision/update_shared_precision_preserved.hpp"
|
#include "low_precision/update_shared_precision_preserved.hpp"
|
||||||
#include "itt.hpp"
|
#include "itt.hpp"
|
||||||
|
#include "openvino/pass/manager.hpp"
|
||||||
|
|
||||||
using namespace ngraph;
|
using namespace ngraph;
|
||||||
using namespace ngraph::pass::low_precision;
|
using namespace ngraph::pass::low_precision;
|
||||||
@ -20,7 +21,7 @@ ngraph::pass::low_precision::AlignQuantizationParameters::AlignQuantizationParam
|
|||||||
|
|
||||||
bool ngraph::pass::low_precision::AlignQuantizationParameters::run_on_model(const std::shared_ptr<ngraph::Function>& f) {
|
bool ngraph::pass::low_precision::AlignQuantizationParameters::run_on_model(const std::shared_ptr<ngraph::Function>& f) {
|
||||||
RUN_ON_FUNCTION_SCOPE(AlignQuantizationParameters);
|
RUN_ON_FUNCTION_SCOPE(AlignQuantizationParameters);
|
||||||
ngraph::pass::Manager manager;
|
ov::pass::Manager manager;
|
||||||
manager.set_per_pass_validation(false);
|
manager.set_per_pass_validation(false);
|
||||||
std::shared_ptr<ngraph::pass::GraphRewrite> propagation = manager.register_pass<ngraph::pass::GraphRewrite>();
|
std::shared_ptr<ngraph::pass::GraphRewrite> propagation = manager.register_pass<ngraph::pass::GraphRewrite>();
|
||||||
propagation->add_matcher<low_precision::CreateAttribute<QuantizationAlignmentAttribute>>();
|
propagation->add_matcher<low_precision::CreateAttribute<QuantizationAlignmentAttribute>>();
|
||||||
|
@ -21,8 +21,8 @@ namespace low_precision {
|
|||||||
AssignAndReadValueTransformation::AssignAndReadValueTransformation(const std::shared_ptr<ngraph::Function> function, const Params& params) :
|
AssignAndReadValueTransformation::AssignAndReadValueTransformation(const std::shared_ptr<ngraph::Function> function, const Params& params) :
|
||||||
LayerTransformation(params), function(function) {
|
LayerTransformation(params), function(function) {
|
||||||
MATCHER_SCOPE(AssignAndReadValueTransformation);
|
MATCHER_SCOPE(AssignAndReadValueTransformation);
|
||||||
auto assign3 = pattern::wrap_type<opset3::Assign>({ pattern::wrap_type<opset1::Multiply>() });
|
auto assign3 = pattern::wrap_type<opset3::Assign>({ pattern::wrap_type<ov::opset1::Multiply>() });
|
||||||
auto assign6 = pattern::wrap_type<opset6::Assign>({ pattern::wrap_type<opset1::Multiply>() });
|
auto assign6 = pattern::wrap_type<opset6::Assign>({ pattern::wrap_type<ov::opset1::Multiply>() });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [=](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [=](pattern::Matcher& m) {
|
||||||
const auto& opsMap = m.get_pattern_value_map();
|
const auto& opsMap = m.get_pattern_value_map();
|
||||||
@ -86,15 +86,15 @@ bool AssignAndReadValueTransformation::transform(TransformationContext& context,
|
|||||||
if (nextLayers.size() > 1) {
|
if (nextLayers.size() > 1) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
const auto fakeQuantize = as_type_ptr<opset1::FakeQuantize>(nextLayers.begin()->get_node()->shared_from_this());
|
const auto fakeQuantize = as_type_ptr<ov::opset1::FakeQuantize>(nextLayers.begin()->get_node()->shared_from_this());
|
||||||
|
|
||||||
if (fakeQuantize == nullptr) {
|
if (fakeQuantize == nullptr) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
auto fakeQuantizeInputs = fakeQuantize->input_values();
|
auto fakeQuantizeInputs = fakeQuantize->input_values();
|
||||||
|
|
||||||
const auto inputLow = as_type_ptr<opset1::Constant>(fakeQuantizeInputs[1].get_node_shared_ptr());
|
const auto inputLow = as_type_ptr<ov::opset1::Constant>(fakeQuantizeInputs[1].get_node_shared_ptr());
|
||||||
const auto inputHigh = as_type_ptr<opset1::Constant>(fakeQuantizeInputs[2].get_node_shared_ptr());
|
const auto inputHigh = as_type_ptr<ov::opset1::Constant>(fakeQuantizeInputs[2].get_node_shared_ptr());
|
||||||
|
|
||||||
if (inputLow == nullptr || inputHigh == nullptr) {
|
if (inputLow == nullptr || inputHigh == nullptr) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -17,7 +17,7 @@ namespace low_precision {
|
|||||||
|
|
||||||
ClampTransformation::ClampTransformation(const Params& params) : LayerTransformation(params) {
|
ClampTransformation::ClampTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(ClampTransformation);
|
MATCHER_SCOPE(ClampTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::Clamp>({ pattern::wrap_type<opset1::Multiply>() });
|
auto matcher = pattern::wrap_type<ov::opset1::Clamp>({ pattern::wrap_type<ov::opset1::Multiply>() });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -45,9 +45,9 @@ bool ClampTransformation::transform(TransformationContext& context, ngraph::patt
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto newClamp = ov::as_type_ptr<opset1::Clamp>(moveDequantizationAfter(context, clamp, dequantization, false, moveSubtract));
|
const auto newClamp = ov::as_type_ptr<ov::opset1::Clamp>(moveDequantizationAfter(context, clamp, dequantization, false, moveSubtract));
|
||||||
|
|
||||||
std::shared_ptr<ngraph::opset1::Clamp> replacement;
|
std::shared_ptr<ov::opset1::Clamp> replacement;
|
||||||
{
|
{
|
||||||
double min = newClamp->get_min();
|
double min = newClamp->get_min();
|
||||||
double max = newClamp->get_max();
|
double max = newClamp->get_max();
|
||||||
@ -67,7 +67,7 @@ bool ClampTransformation::transform(TransformationContext& context, ngraph::patt
|
|||||||
max += shift;
|
max += shift;
|
||||||
}
|
}
|
||||||
|
|
||||||
replacement = std::make_shared<ngraph::opset1::Clamp>(newClamp->input_value(0), min, max);
|
replacement = std::make_shared<ov::opset1::Clamp>(newClamp->input_value(0), min, max);
|
||||||
}
|
}
|
||||||
|
|
||||||
replace_node_update_name(newClamp, replacement);
|
replace_node_update_name(newClamp, replacement);
|
||||||
|
@ -22,7 +22,7 @@ namespace low_precision {
|
|||||||
|
|
||||||
ConvertTransformation::ConvertTransformation(const Params& params) : LayerTransformation(params) {
|
ConvertTransformation::ConvertTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(ConvertTransformation);
|
MATCHER_SCOPE(ConvertTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::Convert>();
|
auto matcher = pattern::wrap_type<ov::opset1::Convert>();
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -37,7 +37,7 @@ ConvertTransformation::ConvertTransformation(const Params& params) : LayerTransf
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool ConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
bool ConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
||||||
std::shared_ptr<opset1::Convert> convert = ov::as_type_ptr<opset1::Convert>(m.get_match_root());
|
std::shared_ptr<ov::opset1::Convert> convert = ov::as_type_ptr<ov::opset1::Convert>(m.get_match_root());
|
||||||
if (!convert) {
|
if (!convert) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -48,9 +48,9 @@ bool ConvertTransformation::transform(TransformationContext& context, ngraph::pa
|
|||||||
|
|
||||||
const ngraph::element::Type precisionBefore = convert->get_input_element_type(0);
|
const ngraph::element::Type precisionBefore = convert->get_input_element_type(0);
|
||||||
|
|
||||||
std::shared_ptr<opset1::Subtract> subtract = std::make_shared<ov::op::TypeRelaxed<opset1::Subtract>>(
|
std::shared_ptr<ov::opset1::Subtract> subtract = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Subtract>>(
|
||||||
convert->input_value(0),
|
convert->input_value(0),
|
||||||
std::make_shared<opset1::Constant>(precisionBefore, Shape{}, std::vector<size_t>({ 0 })));
|
std::make_shared<ov::opset1::Constant>(precisionBefore, Shape{}, std::vector<size_t>({ 0 })));
|
||||||
NetworkHelper::setOutDataPrecision(subtract, convert->get_output_element_type(0));
|
NetworkHelper::setOutDataPrecision(subtract, convert->get_output_element_type(0));
|
||||||
|
|
||||||
replace_node(convert, subtract);
|
replace_node(convert, subtract);
|
||||||
|
@ -44,7 +44,7 @@ ngraph::pass::low_precision::ConvertSubtractConstant::ConvertSubtractConstant(co
|
|||||||
auto multiplyConstantWrapper = ngraph::pattern::wrap_type<opset1::Constant>(pattern::consumers_count(1));
|
auto multiplyConstantWrapper = ngraph::pattern::wrap_type<opset1::Constant>(pattern::consumers_count(1));
|
||||||
auto multiplyWrapper = ngraph::pattern::wrap_type<opset1::Multiply>({ subtractWrapper, multiplyConstantWrapper }, pattern::consumers_count(1));
|
auto multiplyWrapper = ngraph::pattern::wrap_type<opset1::Multiply>({ subtractWrapper, multiplyConstantWrapper }, pattern::consumers_count(1));
|
||||||
|
|
||||||
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher & m) -> bool {
|
ov::matcher_pass_callback callback = [=](ngraph::pattern::Matcher & m) -> bool {
|
||||||
const auto& opsMap = m.get_pattern_value_map();
|
const auto& opsMap = m.get_pattern_value_map();
|
||||||
const auto weightsConvert = opsMap.at(weightsConvertWrapper).get_node_shared_ptr();
|
const auto weightsConvert = opsMap.at(weightsConvertWrapper).get_node_shared_ptr();
|
||||||
const auto quantizePrecision = weightsConvert->get_input_element_type(0);
|
const auto quantizePrecision = weightsConvert->get_input_element_type(0);
|
||||||
|
@ -22,16 +22,16 @@ namespace low_precision {
|
|||||||
|
|
||||||
ConvolutionTransformation::ConvolutionTransformation(const Params& params) : WeightableLayerTransformation(params) {
|
ConvolutionTransformation::ConvolutionTransformation(const Params& params) : WeightableLayerTransformation(params) {
|
||||||
MATCHER_SCOPE(ConvolutionTransformation);
|
MATCHER_SCOPE(ConvolutionTransformation);
|
||||||
auto matcher = ngraph::pattern::wrap_type<opset1::Convolution>({
|
auto matcher = ngraph::pattern::wrap_type<ov::opset1::Convolution>({
|
||||||
ngraph::pattern::wrap_type<opset1::Multiply>(),
|
ngraph::pattern::wrap_type<ov::opset1::Multiply>(),
|
||||||
std::make_shared<pattern::op::Or>(OutputVector {
|
std::make_shared<pattern::op::Or>(OutputVector {
|
||||||
pattern::wrap_type<opset1::Multiply>(),
|
pattern::wrap_type<ov::opset1::Multiply>(),
|
||||||
pattern::wrap_type<opset1::FakeQuantize>()
|
pattern::wrap_type<ov::opset1::FakeQuantize>()
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ov::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
if (transformation_callback(op)) {
|
if (transformation_callback(op)) {
|
||||||
return false;
|
return false;
|
||||||
@ -64,7 +64,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
|
|
||||||
if (!canConvolutionBeTransformed(context, convolution, defaultPrecisions)) {
|
if (!canConvolutionBeTransformed(context, convolution, defaultPrecisions)) {
|
||||||
const auto weightInput = convolution->get_input_node_shared_ptr(1);
|
const auto weightInput = convolution->get_input_node_shared_ptr(1);
|
||||||
const auto reshapeFromWeights = ov::as_type_ptr<opset1::Reshape>(weightInput);
|
const auto reshapeFromWeights = ov::as_type_ptr<ov::opset1::Reshape>(weightInput);
|
||||||
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
|
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
|
||||||
NetworkHelper::getDequantization(convolution, defaultPrecisions, 1ul) :
|
NetworkHelper::getDequantization(convolution, defaultPrecisions, 1ul) :
|
||||||
NetworkHelper::getDequantization(reshapeFromWeights, defaultPrecisions);
|
NetworkHelper::getDequantization(reshapeFromWeights, defaultPrecisions);
|
||||||
@ -72,12 +72,12 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
const auto fqOnWeights = getFakeQuantizeOnWeights(convolution);
|
const auto fqOnWeights = getFakeQuantizeOnWeights(convolution);
|
||||||
std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
|
std::shared_ptr<ngraph::Node> resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights);
|
||||||
if (reshapeFromWeights != nullptr) {
|
if (reshapeFromWeights != nullptr) {
|
||||||
resultConstant = fold_reshape<opset1::Reshape>(
|
resultConstant = fold_reshape<ov::opset1::Reshape>(
|
||||||
resultConstant,
|
resultConstant,
|
||||||
reshapeFromWeights->input_value(1),
|
reshapeFromWeights->input_value(1),
|
||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
if (ov::is_type<opset1::Constant>(resultConstant)) {
|
if (ov::is_type<ov::opset1::Constant>(resultConstant)) {
|
||||||
replace_node(weightInput, resultConstant);
|
replace_node(weightInput, resultConstant);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -97,13 +97,13 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
|
|
||||||
std::shared_ptr<Node> newMultiplyAfter;
|
std::shared_ptr<Node> newMultiplyAfter;
|
||||||
{
|
{
|
||||||
std::shared_ptr<opset1::Subtract> subtract;
|
std::shared_ptr<ov::opset1::Subtract> subtract;
|
||||||
if (dequantization.subtract != nullptr) {
|
if (dequantization.subtract != nullptr) {
|
||||||
auto optimizedSubtract = NetworkHelper::optimizeSubtract(dequantization.subtract);
|
auto optimizedSubtract = NetworkHelper::optimizeSubtract(dequantization.subtract);
|
||||||
if (optimizedSubtract == nullptr) {
|
if (optimizedSubtract == nullptr) {
|
||||||
optimizedSubtract = dequantization.subtract;
|
optimizedSubtract = dequantization.subtract;
|
||||||
}
|
}
|
||||||
subtract = ov::as_type_ptr<opset1::Subtract>(optimizedSubtract);
|
subtract = ov::as_type_ptr<ov::opset1::Subtract>(optimizedSubtract);
|
||||||
}
|
}
|
||||||
|
|
||||||
// workaround normalizes shape of Subtract to match CPU plugin expectations
|
// workaround normalizes shape of Subtract to match CPU plugin expectations
|
||||||
@ -114,14 +114,14 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
Shape broadcastShape(length, 1);
|
Shape broadcastShape(length, 1);
|
||||||
broadcastShape[1] = getInputChannels(convolution);
|
broadcastShape[1] = getInputChannels(convolution);
|
||||||
|
|
||||||
std::shared_ptr<Node> newShift = fold<opset1::Broadcast>(
|
std::shared_ptr<Node> newShift = fold<ov::opset1::Broadcast>(
|
||||||
subtract->input_value(1),
|
subtract->input_value(1),
|
||||||
std::make_shared<opset1::Constant>(
|
std::make_shared<ov::opset1::Constant>(
|
||||||
element::i64,
|
element::i64,
|
||||||
Shape{ length },
|
Shape{ length },
|
||||||
broadcastShape));
|
broadcastShape));
|
||||||
|
|
||||||
const auto newSubtract = ov::as_type_ptr<opset1::Subtract>(subtract->clone_with_new_inputs({
|
const auto newSubtract = ov::as_type_ptr<ov::opset1::Subtract>(subtract->clone_with_new_inputs({
|
||||||
subtract->input_value(0),
|
subtract->input_value(0),
|
||||||
newShift }));
|
newShift }));
|
||||||
NetworkHelper::copyInfo(subtract, newSubtract);
|
NetworkHelper::copyInfo(subtract, newSubtract);
|
||||||
@ -155,35 +155,35 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
|
|
||||||
Shape newMulShape(outputPShape.size(), 1);
|
Shape newMulShape(outputPShape.size(), 1);
|
||||||
newMulShape[1] = outputScales.size();
|
newMulShape[1] = outputScales.size();
|
||||||
newMultiplyAfterConst = std::make_shared<opset1::Constant>(
|
newMultiplyAfterConst = std::make_shared<ov::opset1::Constant>(
|
||||||
dequantization.multiplyConstant->get_element_type(),
|
dequantization.multiplyConstant->get_element_type(),
|
||||||
newMulShape,
|
newMulShape,
|
||||||
outputScales);
|
outputScales);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
newMultiplyAfterConst = std::make_shared<opset1::Constant>(
|
newMultiplyAfterConst = std::make_shared<ov::opset1::Constant>(
|
||||||
dequantization.multiplyConstant->get_element_type(),
|
dequantization.multiplyConstant->get_element_type(),
|
||||||
Shape{ 1 },
|
Shape{ 1 },
|
||||||
dequantization.multiplyConstant->cast_vector<float>()[0]);
|
dequantization.multiplyConstant->cast_vector<float>()[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto copyNode = convolution->clone_with_new_inputs({ dequantization.multiply->input_value(0), convolution->input_value(1) });
|
const auto copyNode = convolution->clone_with_new_inputs({ dequantization.multiply->input_value(0), convolution->input_value(1) });
|
||||||
auto conv = ov::as_type_ptr<opset1::Convolution>(copyNode);
|
auto conv = ov::as_type_ptr<ov::opset1::Convolution>(copyNode);
|
||||||
std::shared_ptr<Node> relaxedNewConvolution;
|
std::shared_ptr<Node> relaxedNewConvolution;
|
||||||
if (conv) {
|
if (conv) {
|
||||||
relaxedNewConvolution = std::make_shared<ov::op::TypeRelaxed<opset1::Convolution>>(
|
relaxedNewConvolution = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Convolution>>(
|
||||||
*conv,
|
*conv,
|
||||||
std::vector<element::Type>{deqPrecision, deqPrecision},
|
std::vector<element::Type>{deqPrecision, deqPrecision},
|
||||||
std::vector<element::Type>{deqPrecision});
|
std::vector<element::Type>{deqPrecision});
|
||||||
} else {
|
} else {
|
||||||
relaxedNewConvolution = std::make_shared<ov::op::TypeRelaxed<opset1::GroupConvolution>>(
|
relaxedNewConvolution = std::make_shared<ov::op::TypeRelaxed<ov::opset1::GroupConvolution>>(
|
||||||
*ov::as_type_ptr<opset1::GroupConvolution>(copyNode),
|
*ov::as_type_ptr<ov::opset1::GroupConvolution>(copyNode),
|
||||||
std::vector<element::Type>{deqPrecision, deqPrecision},
|
std::vector<element::Type>{deqPrecision, deqPrecision},
|
||||||
std::vector<element::Type>{deqPrecision});
|
std::vector<element::Type>{deqPrecision});
|
||||||
}
|
}
|
||||||
NetworkHelper::copyInfo(convolution, relaxedNewConvolution);
|
NetworkHelper::copyInfo(convolution, relaxedNewConvolution);
|
||||||
|
|
||||||
newMultiplyAfter = std::make_shared<ov::op::TypeRelaxed<opset1::Multiply>>(
|
newMultiplyAfter = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Multiply>>(
|
||||||
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
||||||
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
|
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
|
||||||
ov::op::TemporaryReplaceOutputType(relaxedNewConvolution, deqPrecision).get(),
|
ov::op::TemporaryReplaceOutputType(relaxedNewConvolution, deqPrecision).get(),
|
||||||
@ -192,7 +192,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
NetworkHelper::insertDequantizationAfter(convolution, newMultiplyAfter, relaxedNewConvolution);
|
NetworkHelper::insertDequantizationAfter(convolution, newMultiplyAfter, relaxedNewConvolution);
|
||||||
convolution = newMultiplyAfter->input_value(0).get_node_shared_ptr();
|
convolution = newMultiplyAfter->input_value(0).get_node_shared_ptr();
|
||||||
|
|
||||||
if (ov::is_type<opset1::Convert>(convolution->get_input_node_ptr(0))) {
|
if (ov::is_type<ov::opset1::Convert>(convolution->get_input_node_ptr(0))) {
|
||||||
auto newConvolution = convolution->clone_with_new_inputs({
|
auto newConvolution = convolution->clone_with_new_inputs({
|
||||||
convolution->get_input_node_ptr(0)->input_value(0),
|
convolution->get_input_node_ptr(0)->input_value(0),
|
||||||
convolution->input_value(1)});
|
convolution->input_value(1)});
|
||||||
@ -208,27 +208,27 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<opset1::Reshape> reshapeFromWeights = ov::as_type_ptr<opset1::Reshape>(convolution->get_input_node_shared_ptr(1));
|
std::shared_ptr<ov::opset1::Reshape> reshapeFromWeights = ov::as_type_ptr<ov::opset1::Reshape>(convolution->get_input_node_shared_ptr(1));
|
||||||
|
|
||||||
dequantization = reshapeFromWeights == nullptr ?
|
dequantization = reshapeFromWeights == nullptr ?
|
||||||
NetworkHelper::getDequantization(convolution, defaultPrecisions, 1ul) :
|
NetworkHelper::getDequantization(convolution, defaultPrecisions, 1ul) :
|
||||||
NetworkHelper::getDequantization(reshapeFromWeights, defaultPrecisions);
|
NetworkHelper::getDequantization(reshapeFromWeights, defaultPrecisions);
|
||||||
assert(!dequantization.empty());
|
assert(!dequantization.empty());
|
||||||
if (const auto fq = ov::as_type_ptr<opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr())) {
|
if (const auto fq = ov::as_type_ptr<ov::opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr())) {
|
||||||
const auto newFQ = NetworkHelper::fold_fake_quantize(fq, true);
|
const auto newFQ = NetworkHelper::fold_fake_quantize(fq, true);
|
||||||
NetworkHelper::copyInfo(fq, newFQ);
|
NetworkHelper::copyInfo(fq, newFQ);
|
||||||
replace_node(fq, newFQ);
|
replace_node(fq, newFQ);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<opset1::Multiply> multiplyFromWeights = ov::as_type_ptr<opset1::Multiply>(
|
std::shared_ptr<ov::opset1::Multiply> multiplyFromWeights = ov::as_type_ptr<ov::opset1::Multiply>(
|
||||||
reshapeFromWeights == nullptr ?
|
reshapeFromWeights == nullptr ?
|
||||||
convolution->get_input_node_shared_ptr(1) :
|
convolution->get_input_node_shared_ptr(1) :
|
||||||
convolution->get_input_node_ptr(1)->get_input_node_shared_ptr(0));
|
convolution->get_input_node_ptr(1)->get_input_node_shared_ptr(0));
|
||||||
std::shared_ptr<opset1::Subtract> subtractFromWeights = ov::as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
|
std::shared_ptr<ov::opset1::Subtract> subtractFromWeights = ov::as_type_ptr<ov::opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
|
||||||
|
|
||||||
{
|
{
|
||||||
if (reshapeFromWeights != nullptr) {
|
if (reshapeFromWeights != nullptr) {
|
||||||
reshapeFromWeights = ov::as_type_ptr<opset1::Reshape>(reshapeFromWeights->clone_with_new_inputs({
|
reshapeFromWeights = ov::as_type_ptr<ov::opset1::Reshape>(reshapeFromWeights->clone_with_new_inputs({
|
||||||
multiplyFromWeights->input_value(0),
|
multiplyFromWeights->input_value(0),
|
||||||
reshapeFromWeights->input_value(1) }));
|
reshapeFromWeights->input_value(1) }));
|
||||||
}
|
}
|
||||||
@ -251,12 +251,12 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
return new_shape;
|
return new_shape;
|
||||||
}();
|
}();
|
||||||
|
|
||||||
newMultiplyAfter = std::make_shared<opset1::Multiply>(
|
newMultiplyAfter = std::make_shared<ov::opset1::Multiply>(
|
||||||
newConvolution,
|
newConvolution,
|
||||||
foldConvert(
|
foldConvert(
|
||||||
fold_reshape<opset1::Reshape>(
|
fold_reshape<ov::opset1::Reshape>(
|
||||||
multiplyFromWeights->input_value(1),
|
multiplyFromWeights->input_value(1),
|
||||||
std::make_shared<opset1::Constant>(element::i32, Shape{ newScaleShape.size() }, newScaleShape),
|
std::make_shared<ov::opset1::Constant>(element::i32, Shape{ newScaleShape.size() }, newScaleShape),
|
||||||
false),
|
false),
|
||||||
convolution->get_output_element_type(0)));
|
convolution->get_output_element_type(0)));
|
||||||
NetworkHelper::insertDequantizationAfter(convolution, newMultiplyAfter, newConvolution);
|
NetworkHelper::insertDequantizationAfter(convolution, newMultiplyAfter, newConvolution);
|
||||||
@ -271,7 +271,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
if (optimizedSubtract == nullptr) {
|
if (optimizedSubtract == nullptr) {
|
||||||
subtractFromWeights = nullptr;
|
subtractFromWeights = nullptr;
|
||||||
} else {
|
} else {
|
||||||
subtractFromWeights = ov::as_type_ptr<opset1::Subtract>(optimizedSubtract);
|
subtractFromWeights = ov::as_type_ptr<ov::opset1::Subtract>(optimizedSubtract);
|
||||||
|
|
||||||
const auto weightsPShape = subtractFromWeights->get_input_partial_shape(0);
|
const auto weightsPShape = subtractFromWeights->get_input_partial_shape(0);
|
||||||
assert(weightsPShape.is_static());
|
assert(weightsPShape.is_static());
|
||||||
@ -285,15 +285,15 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
zeroPointShape[1] = static_cast<size_t>(weightsPShape[1].get_length());
|
zeroPointShape[1] = static_cast<size_t>(weightsPShape[1].get_length());
|
||||||
}
|
}
|
||||||
|
|
||||||
auto zeroPointConstant = fold<opset1::Broadcast>(
|
auto zeroPointConstant = fold<ov::opset1::Broadcast>(
|
||||||
subtractFromWeights->input_value(1),
|
subtractFromWeights->input_value(1),
|
||||||
std::make_shared<opset1::Constant>(element::i32, Shape{ zeroPointShape.size() }, zeroPointShape));
|
std::make_shared<ov::opset1::Constant>(element::i32, Shape{ zeroPointShape.size() }, zeroPointShape));
|
||||||
NetworkHelper::copyInfo(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
|
NetworkHelper::copyInfo(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
|
||||||
replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
|
replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<opset1::Convert> convertFromWeights = ov::as_type_ptr<opset1::Convert>(subtractFromWeights == nullptr ?
|
std::shared_ptr<ov::opset1::Convert> convertFromWeights = ov::as_type_ptr<ov::opset1::Convert>(subtractFromWeights == nullptr ?
|
||||||
multiplyFromWeights->get_input_node_shared_ptr(0) :
|
multiplyFromWeights->get_input_node_shared_ptr(0) :
|
||||||
subtractFromWeights->get_input_node_shared_ptr(0));
|
subtractFromWeights->get_input_node_shared_ptr(0));
|
||||||
if (convertFromWeights != nullptr) {
|
if (convertFromWeights != nullptr) {
|
||||||
@ -310,10 +310,10 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
convolution = newConvolution;
|
convolution = newConvolution;
|
||||||
}
|
}
|
||||||
|
|
||||||
reshapeFromWeights = ov::as_type_ptr<opset1::Reshape>(convolution->get_input_node_shared_ptr(1));
|
reshapeFromWeights = ov::as_type_ptr<ov::opset1::Reshape>(convolution->get_input_node_shared_ptr(1));
|
||||||
if (reshapeFromWeights != nullptr) {
|
if (reshapeFromWeights != nullptr) {
|
||||||
// remove Reshape on weights
|
// remove Reshape on weights
|
||||||
const std::shared_ptr<Node> newWeights = fold_reshape<opset1::Reshape>(
|
const std::shared_ptr<Node> newWeights = fold_reshape<ov::opset1::Reshape>(
|
||||||
reshapeFromWeights->input_value(0),
|
reshapeFromWeights->input_value(0),
|
||||||
reshapeFromWeights->input_value(1),
|
reshapeFromWeights->input_value(1),
|
||||||
false);
|
false);
|
||||||
@ -323,15 +323,15 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
|||||||
}
|
}
|
||||||
|
|
||||||
const auto finalDequantization = NetworkHelper::optimizeMultipliesAfter(newMultiplyAfter);
|
const auto finalDequantization = NetworkHelper::optimizeMultipliesAfter(newMultiplyAfter);
|
||||||
ngraph::copy_runtime_info({ convolution, finalDequantization }, finalDequantization);
|
ov::copy_runtime_info({ convolution, finalDequantization }, finalDequantization);
|
||||||
updateOutput(context, finalDequantization, convolution);
|
updateOutput(context, finalDequantization, convolution);
|
||||||
|
|
||||||
auto onWeights = convolution->get_input_node_shared_ptr(1);
|
auto onWeights = convolution->get_input_node_shared_ptr(1);
|
||||||
if (ov::is_type<opset1::Reshape>(onWeights)) {
|
if (ov::is_type<ov::opset1::Reshape>(onWeights)) {
|
||||||
onWeights = onWeights->get_input_node_shared_ptr(0);
|
onWeights = onWeights->get_input_node_shared_ptr(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ov::is_type<opset1::Subtract>(onWeights)) {
|
if (ov::is_type<ov::opset1::Subtract>(onWeights)) {
|
||||||
ov::disable_constant_folding(onWeights);
|
ov::disable_constant_folding(onWeights);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -23,27 +23,27 @@ namespace low_precision {
|
|||||||
ConvolutionBackpropDataTransformation::ConvolutionBackpropDataTransformation(const Params& params) : WeightableLayerTransformation(params) {
|
ConvolutionBackpropDataTransformation::ConvolutionBackpropDataTransformation(const Params& params) : WeightableLayerTransformation(params) {
|
||||||
MATCHER_SCOPE(ConvolutionBackpropDataTransformation);
|
MATCHER_SCOPE(ConvolutionBackpropDataTransformation);
|
||||||
auto matcher = std::make_shared<pattern::op::Or>(OutputVector{
|
auto matcher = std::make_shared<pattern::op::Or>(OutputVector{
|
||||||
pattern::wrap_type<opset1::ConvolutionBackpropData>({
|
pattern::wrap_type<ov::opset1::ConvolutionBackpropData>({
|
||||||
pattern::wrap_type<opset1::Multiply>(),
|
pattern::wrap_type<ov::opset1::Multiply>(),
|
||||||
pattern::wrap_type<opset1::Multiply>()
|
pattern::wrap_type<ov::opset1::Multiply>()
|
||||||
}),
|
}),
|
||||||
ngraph::pattern::wrap_type<opset1::ConvolutionBackpropData>({
|
ngraph::pattern::wrap_type<ov::opset1::ConvolutionBackpropData>({
|
||||||
pattern::wrap_type<opset1::Multiply>(),
|
pattern::wrap_type<ov::opset1::Multiply>(),
|
||||||
pattern::wrap_type<opset1::FakeQuantize>()
|
pattern::wrap_type<ov::opset1::FakeQuantize>()
|
||||||
}),
|
}),
|
||||||
ngraph::pattern::wrap_type<opset1::ConvolutionBackpropData>({
|
ngraph::pattern::wrap_type<ov::opset1::ConvolutionBackpropData>({
|
||||||
pattern::wrap_type<opset1::Multiply>(),
|
pattern::wrap_type<ov::opset1::Multiply>(),
|
||||||
pattern::wrap_type<opset1::Multiply>(),
|
pattern::wrap_type<ov::opset1::Multiply>(),
|
||||||
pattern::wrap_type<opset1::Constant>()
|
pattern::wrap_type<ov::opset1::Constant>()
|
||||||
}),
|
}),
|
||||||
ngraph::pattern::wrap_type<opset1::ConvolutionBackpropData>({
|
ngraph::pattern::wrap_type<ov::opset1::ConvolutionBackpropData>({
|
||||||
pattern::wrap_type<opset1::Multiply>(),
|
pattern::wrap_type<ov::opset1::Multiply>(),
|
||||||
pattern::wrap_type<opset1::FakeQuantize>(),
|
pattern::wrap_type<ov::opset1::FakeQuantize>(),
|
||||||
pattern::wrap_type<opset1::Constant>()
|
pattern::wrap_type<ov::opset1::Constant>()
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ov::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
if (transformation_callback(op)) {
|
if (transformation_callback(op)) {
|
||||||
return false;
|
return false;
|
||||||
@ -76,7 +76,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
|
|
||||||
if (!canBeTransformed(context, convolutionBackpropData)) {
|
if (!canBeTransformed(context, convolutionBackpropData)) {
|
||||||
auto weightsInput = convolutionBackpropData->get_input_node_shared_ptr(1);
|
auto weightsInput = convolutionBackpropData->get_input_node_shared_ptr(1);
|
||||||
std::shared_ptr<opset1::Reshape> reshapeFromWeights = ov::as_type_ptr<opset1::Reshape>(weightsInput);
|
std::shared_ptr<ov::opset1::Reshape> reshapeFromWeights = ov::as_type_ptr<ov::opset1::Reshape>(weightsInput);
|
||||||
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
|
FakeQuantizeDequantization dequantization = reshapeFromWeights == nullptr ?
|
||||||
NetworkHelper::getDequantization(convolutionBackpropData, defaultPrecisions, 1ul) :
|
NetworkHelper::getDequantization(convolutionBackpropData, defaultPrecisions, 1ul) :
|
||||||
NetworkHelper::getDequantization(reshapeFromWeights, defaultPrecisions);
|
NetworkHelper::getDequantization(reshapeFromWeights, defaultPrecisions);
|
||||||
@ -89,12 +89,12 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
|
|
||||||
auto resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights, false);
|
auto resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights, false);
|
||||||
if (reshapeFromWeights != nullptr) {
|
if (reshapeFromWeights != nullptr) {
|
||||||
resultConstant = fold_reshape<opset1::Reshape>(
|
resultConstant = fold_reshape<ov::opset1::Reshape>(
|
||||||
resultConstant,
|
resultConstant,
|
||||||
reshapeFromWeights->input_value(1),
|
reshapeFromWeights->input_value(1),
|
||||||
false);
|
false);
|
||||||
}
|
}
|
||||||
if (ov::is_type<opset1::Constant>(resultConstant)) {
|
if (ov::is_type<ov::opset1::Constant>(resultConstant)) {
|
||||||
replace_node(weightsInput, resultConstant);
|
replace_node(weightsInput, resultConstant);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -111,7 +111,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
NetworkHelper::optimizeSubtract(dequantization.subtract);
|
NetworkHelper::optimizeSubtract(dequantization.subtract);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<Node> newMultiplyAfterConst = std::make_shared<opset1::Constant>(
|
std::shared_ptr<Node> newMultiplyAfterConst = std::make_shared<ov::opset1::Constant>(
|
||||||
dequantization.multiplyConstant->get_element_type(),
|
dequantization.multiplyConstant->get_element_type(),
|
||||||
Shape{ 1 },
|
Shape{ 1 },
|
||||||
dequantization.multiplyConstant->cast_vector<float>()[0]);
|
dequantization.multiplyConstant->cast_vector<float>()[0]);
|
||||||
@ -119,12 +119,12 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
inputs[0] = dequantization.multiply->input_value(0);
|
inputs[0] = dequantization.multiply->input_value(0);
|
||||||
const auto copyNode = convolutionBackpropData->clone_with_new_inputs(inputs);
|
const auto copyNode = convolutionBackpropData->clone_with_new_inputs(inputs);
|
||||||
|
|
||||||
const auto relaxedConvolutionBackpropData = std::make_shared<ov::op::TypeRelaxed<opset1::ConvolutionBackpropData>>(
|
const auto relaxedConvolutionBackpropData = std::make_shared<ov::op::TypeRelaxed<ov::opset1::ConvolutionBackpropData>>(
|
||||||
*ov::as_type_ptr<opset1::ConvolutionBackpropData>(copyNode),
|
*ov::as_type_ptr<ov::opset1::ConvolutionBackpropData>(copyNode),
|
||||||
std::vector<element::Type>{deqPrecision, deqPrecision},
|
std::vector<element::Type>{deqPrecision, deqPrecision},
|
||||||
std::vector<element::Type>{deqPrecision});
|
std::vector<element::Type>{deqPrecision});
|
||||||
|
|
||||||
newMultiplyAfter = std::make_shared<ov::op::TypeRelaxed<opset1::Multiply>>(
|
newMultiplyAfter = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Multiply>>(
|
||||||
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
||||||
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
|
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
|
||||||
ov::op::TemporaryReplaceOutputType(relaxedConvolutionBackpropData, deqPrecision).get(),
|
ov::op::TemporaryReplaceOutputType(relaxedConvolutionBackpropData, deqPrecision).get(),
|
||||||
@ -133,7 +133,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
|
|
||||||
convolutionBackpropData = newMultiplyAfter->get_input_node_shared_ptr(0);
|
convolutionBackpropData = newMultiplyAfter->get_input_node_shared_ptr(0);
|
||||||
inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
|
inputs[0] = convolutionBackpropData->get_input_node_ptr(0)->input_value(0);
|
||||||
if (ov::is_type<opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) {
|
if (ov::is_type<ov::opset1::Convert>(convolutionBackpropData->get_input_node_ptr(0))) {
|
||||||
auto newConvolution = convolutionBackpropData->clone_with_new_inputs(inputs);
|
auto newConvolution = convolutionBackpropData->clone_with_new_inputs(inputs);
|
||||||
replace_node(convolutionBackpropData, newConvolution);
|
replace_node(convolutionBackpropData, newConvolution);
|
||||||
convolutionBackpropData = newConvolution;
|
convolutionBackpropData = newConvolution;
|
||||||
@ -144,14 +144,14 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul);
|
decomposeFakeQuantizeForWeightsPath(convolutionBackpropData, 1ul);
|
||||||
dequantization = NetworkHelper::getDequantization(convolutionBackpropData, defaultPrecisions, 1ul);
|
dequantization = NetworkHelper::getDequantization(convolutionBackpropData, defaultPrecisions, 1ul);
|
||||||
|
|
||||||
if (const auto fq = ov::as_type_ptr<opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr())) {
|
if (const auto fq = ov::as_type_ptr<ov::opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr())) {
|
||||||
const auto newFQ = NetworkHelper::fold_fake_quantize(fq, true);
|
const auto newFQ = NetworkHelper::fold_fake_quantize(fq, true);
|
||||||
NetworkHelper::copyInfo(fq, newFQ);
|
NetworkHelper::copyInfo(fq, newFQ);
|
||||||
replace_node(fq, newFQ);
|
replace_node(fq, newFQ);
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto multiplyFromWeights = convolutionBackpropData->get_input_node_shared_ptr(1);
|
const auto multiplyFromWeights = convolutionBackpropData->get_input_node_shared_ptr(1);
|
||||||
auto subtractFromWeights = ov::as_type_ptr<opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
|
auto subtractFromWeights = ov::as_type_ptr<ov::opset1::Subtract>(multiplyFromWeights->get_input_node_shared_ptr(0));
|
||||||
|
|
||||||
{
|
{
|
||||||
const auto newScalePShape = multiplyFromWeights->get_input_partial_shape(1);
|
const auto newScalePShape = multiplyFromWeights->get_input_partial_shape(1);
|
||||||
@ -162,12 +162,12 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
inputs[1] = multiplyFromWeights->input_value(0);
|
inputs[1] = multiplyFromWeights->input_value(0);
|
||||||
|
|
||||||
const auto newconvolutionBackpropData = convolutionBackpropData->copy_with_new_inputs(inputs);
|
const auto newconvolutionBackpropData = convolutionBackpropData->copy_with_new_inputs(inputs);
|
||||||
newMultiplyAfter = std::make_shared<opset1::Multiply>(
|
newMultiplyAfter = std::make_shared<ov::opset1::Multiply>(
|
||||||
newconvolutionBackpropData,
|
newconvolutionBackpropData,
|
||||||
foldConvert(
|
foldConvert(
|
||||||
fold_reshape<opset1::Reshape>(
|
fold_reshape<ov::opset1::Reshape>(
|
||||||
multiplyFromWeights->input_value(1),
|
multiplyFromWeights->input_value(1),
|
||||||
std::make_shared<opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
|
std::make_shared<ov::opset1::Constant>(element::u64, Shape{ newScaleShape.size() }, newScaleShape),
|
||||||
false),
|
false),
|
||||||
convolutionBackpropData->get_output_element_type(0)));
|
convolutionBackpropData->get_output_element_type(0)));
|
||||||
NetworkHelper::insertDequantizationAfter(convolutionBackpropData, newMultiplyAfter, newconvolutionBackpropData);
|
NetworkHelper::insertDequantizationAfter(convolutionBackpropData, newMultiplyAfter, newconvolutionBackpropData);
|
||||||
@ -180,7 +180,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
if (optimizedSubtract == nullptr) {
|
if (optimizedSubtract == nullptr) {
|
||||||
subtractFromWeights = nullptr;
|
subtractFromWeights = nullptr;
|
||||||
} else {
|
} else {
|
||||||
subtractFromWeights = ov::as_type_ptr<opset1::Subtract>(optimizedSubtract);
|
subtractFromWeights = ov::as_type_ptr<ov::opset1::Subtract>(optimizedSubtract);
|
||||||
|
|
||||||
const auto weightsPShape = subtractFromWeights->get_input_partial_shape(0);
|
const auto weightsPShape = subtractFromWeights->get_input_partial_shape(0);
|
||||||
assert(weightsPShape.is_static());
|
assert(weightsPShape.is_static());
|
||||||
@ -189,15 +189,15 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
Shape zeroPointShape(weightsRankValue, 1ul);
|
Shape zeroPointShape(weightsRankValue, 1ul);
|
||||||
zeroPointShape[1] = static_cast<size_t>(weightsPShape[1].get_length());
|
zeroPointShape[1] = static_cast<size_t>(weightsPShape[1].get_length());
|
||||||
|
|
||||||
auto zeroPointConstant = fold<opset1::Broadcast>(
|
auto zeroPointConstant = fold<ov::opset1::Broadcast>(
|
||||||
subtractFromWeights->input_value(1),
|
subtractFromWeights->input_value(1),
|
||||||
std::make_shared<opset1::Constant>(element::i32, Shape{zeroPointShape.size()}, zeroPointShape));
|
std::make_shared<ov::opset1::Constant>(element::i32, Shape{zeroPointShape.size()}, zeroPointShape));
|
||||||
replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
|
replace_node(subtractFromWeights->get_input_node_shared_ptr(1), zeroPointConstant);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<opset1::Convert> convertFromWeights =
|
std::shared_ptr<ov::opset1::Convert> convertFromWeights =
|
||||||
ov::as_type_ptr<opset1::Convert>(
|
ov::as_type_ptr<ov::opset1::Convert>(
|
||||||
subtractFromWeights == nullptr ?
|
subtractFromWeights == nullptr ?
|
||||||
multiplyFromWeights->get_input_node_shared_ptr(0) :
|
multiplyFromWeights->get_input_node_shared_ptr(0) :
|
||||||
subtractFromWeights->get_input_node_shared_ptr(0));
|
subtractFromWeights->get_input_node_shared_ptr(0));
|
||||||
@ -212,15 +212,15 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
|||||||
}
|
}
|
||||||
|
|
||||||
const auto finalDequantization = NetworkHelper::optimizeMultipliesAfter(newMultiplyAfter);
|
const auto finalDequantization = NetworkHelper::optimizeMultipliesAfter(newMultiplyAfter);
|
||||||
ngraph::copy_runtime_info({ convolutionBackpropData, finalDequantization }, finalDequantization);
|
ov::copy_runtime_info({ convolutionBackpropData, finalDequantization }, finalDequantization);
|
||||||
updateOutput(context, finalDequantization, convolutionBackpropData);
|
updateOutput(context, finalDequantization, convolutionBackpropData);
|
||||||
|
|
||||||
auto onWeights = convolutionBackpropData->get_input_node_shared_ptr(1);
|
auto onWeights = convolutionBackpropData->get_input_node_shared_ptr(1);
|
||||||
if (ov::is_type<opset1::Reshape>(onWeights)) {
|
if (ov::is_type<ov::opset1::Reshape>(onWeights)) {
|
||||||
onWeights = onWeights->get_input_node_shared_ptr(0);
|
onWeights = onWeights->get_input_node_shared_ptr(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ov::is_type<opset1::Subtract>(onWeights)) {
|
if (ov::is_type<ov::opset1::Subtract>(onWeights)) {
|
||||||
ov::disable_constant_folding(onWeights);
|
ov::disable_constant_folding(onWeights);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,13 +9,11 @@
|
|||||||
#include "low_precision/network_helper.hpp"
|
#include "low_precision/network_helper.hpp"
|
||||||
#include "itt.hpp"
|
#include "itt.hpp"
|
||||||
|
|
||||||
using namespace ngraph;
|
|
||||||
using namespace ngraph::pass;
|
|
||||||
using namespace ngraph::pass::low_precision;
|
using namespace ngraph::pass::low_precision;
|
||||||
|
|
||||||
DepthToSpaceTransformation::DepthToSpaceTransformation(const Params& params) : TransparentBaseTransformation(params) {
|
DepthToSpaceTransformation::DepthToSpaceTransformation(const Params& params) : TransparentBaseTransformation(params) {
|
||||||
MATCHER_SCOPE(DepthToSpaceTransformation);
|
MATCHER_SCOPE(DepthToSpaceTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::DepthToSpace>({ pattern::wrap_type<opset1::Multiply>() });
|
auto matcher = pattern::wrap_type<ov::opset1::DepthToSpace>({ pattern::wrap_type<ov::opset1::Multiply>() });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -29,7 +27,7 @@ DepthToSpaceTransformation::DepthToSpaceTransformation(const Params& params) : T
|
|||||||
this->register_matcher(m, callback);
|
this->register_matcher(m, callback);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DepthToSpaceTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const {
|
bool DepthToSpaceTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<ov::Node> layer) const {
|
||||||
if (!LayerTransformation::canBeTransformed(context, layer)) {
|
if (!LayerTransformation::canBeTransformed(context, layer)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -17,12 +17,12 @@ namespace low_precision {
|
|||||||
|
|
||||||
EliminateFakeQuantizeTransformation::EliminateFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {
|
EliminateFakeQuantizeTransformation::EliminateFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(FuseMultiplyToFakeQuantizeTransformation);
|
MATCHER_SCOPE(FuseMultiplyToFakeQuantizeTransformation);
|
||||||
const auto matcher = pattern::wrap_type<opset1::FakeQuantize>({
|
const auto matcher = pattern::wrap_type<ov::opset1::FakeQuantize>({
|
||||||
pattern::any_input(),
|
pattern::any_input(),
|
||||||
pattern::wrap_type<opset1::Constant>(),
|
pattern::wrap_type<ov::opset1::Constant>(),
|
||||||
pattern::wrap_type<opset1::Constant>(),
|
pattern::wrap_type<ov::opset1::Constant>(),
|
||||||
pattern::wrap_type<opset1::Constant>(),
|
pattern::wrap_type<ov::opset1::Constant>(),
|
||||||
pattern::wrap_type<opset1::Constant>()
|
pattern::wrap_type<ov::opset1::Constant>()
|
||||||
});
|
});
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
@ -47,8 +47,8 @@ bool EliminateFakeQuantizeTransformation::transform(TransformationContext& conte
|
|||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
bool check_interval(const std::shared_ptr<opset1::FakeQuantize>& fq,
|
bool check_interval(const std::shared_ptr<ov::opset1::FakeQuantize>& fq,
|
||||||
const std::shared_ptr<opset1::Constant>& constant,
|
const std::shared_ptr<ov::opset1::Constant>& constant,
|
||||||
const float value,
|
const float value,
|
||||||
const float max_diff,
|
const float max_diff,
|
||||||
const bool exact_comparison) noexcept {
|
const bool exact_comparison) noexcept {
|
||||||
@ -67,14 +67,14 @@ bool check_interval(const std::shared_ptr<opset1::FakeQuantize>& fq,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (need_to_check_intervals) {
|
if (need_to_check_intervals) {
|
||||||
auto tmp_fq = as_type_ptr<opset1::FakeQuantize>(fq->clone_with_new_inputs({
|
auto tmp_fq = as_type_ptr<ov::opset1::FakeQuantize>(fq->clone_with_new_inputs({
|
||||||
constant,
|
constant,
|
||||||
fq->get_input_node_shared_ptr(1),
|
fq->get_input_node_shared_ptr(1),
|
||||||
fq->get_input_node_shared_ptr(2),
|
fq->get_input_node_shared_ptr(2),
|
||||||
fq->get_input_node_shared_ptr(3),
|
fq->get_input_node_shared_ptr(3),
|
||||||
fq->get_input_node_shared_ptr(4)}));
|
fq->get_input_node_shared_ptr(4)}));
|
||||||
auto result = NetworkHelper::fold_fake_quantize(tmp_fq, false);
|
auto result = NetworkHelper::fold_fake_quantize(tmp_fq, false);
|
||||||
const auto result_constant = as_type_ptr<opset1::Constant>(result);
|
const auto result_constant = as_type_ptr<ov::opset1::Constant>(result);
|
||||||
if (result_constant == nullptr) {
|
if (result_constant == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -90,7 +90,7 @@ bool check_interval(const std::shared_ptr<opset1::FakeQuantize>& fq,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool check_intervals(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize) {
|
bool check_intervals(const std::shared_ptr<ov::opset1::FakeQuantize>& fakeQuantize) {
|
||||||
const auto& element_type = fakeQuantize->get_output_element_type(0);
|
const auto& element_type = fakeQuantize->get_output_element_type(0);
|
||||||
const auto levels = fakeQuantize->get_levels();
|
const auto levels = fakeQuantize->get_levels();
|
||||||
const auto min_value = DataPrecision::getMinValue(element_type, levels);
|
const auto min_value = DataPrecision::getMinValue(element_type, levels);
|
||||||
@ -100,15 +100,19 @@ bool check_intervals(const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize)
|
|||||||
const auto exact_comparison = !element_type.is_integral();
|
const auto exact_comparison = !element_type.is_integral();
|
||||||
|
|
||||||
return
|
return
|
||||||
check_interval(fakeQuantize, ov::as_type_ptr<opset1::Constant>(fakeQuantize->get_input_node_shared_ptr(1)), min_value, max_diff, exact_comparison) &&
|
check_interval(fakeQuantize, ov::as_type_ptr<ov::opset1::Constant>(fakeQuantize->get_input_node_shared_ptr(1)),
|
||||||
check_interval(fakeQuantize, ov::as_type_ptr<opset1::Constant>(fakeQuantize->get_input_node_shared_ptr(2)), max_value, max_diff, exact_comparison) &&
|
min_value, max_diff, exact_comparison) &&
|
||||||
check_interval(fakeQuantize, ov::as_type_ptr<opset1::Constant>(fakeQuantize->get_input_node_shared_ptr(3)), min_value, max_diff, true) &&
|
check_interval(fakeQuantize, ov::as_type_ptr<ov::opset1::Constant>(fakeQuantize->get_input_node_shared_ptr(2)),
|
||||||
check_interval(fakeQuantize, ov::as_type_ptr<opset1::Constant>(fakeQuantize->get_input_node_shared_ptr(4)), max_value, max_diff, true);
|
max_value, max_diff, exact_comparison) &&
|
||||||
|
check_interval(fakeQuantize, ov::as_type_ptr<ov::opset1::Constant>(fakeQuantize->get_input_node_shared_ptr(3)),
|
||||||
|
min_value, max_diff, true) &&
|
||||||
|
check_interval(fakeQuantize, ov::as_type_ptr<ov::opset1::Constant>(fakeQuantize->get_input_node_shared_ptr(4)),
|
||||||
|
max_value, max_diff, true);
|
||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
bool EliminateFakeQuantizeTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> operation) const {
|
bool EliminateFakeQuantizeTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> operation) const {
|
||||||
const auto fakeQuantize = ov::as_type_ptr<opset1::FakeQuantize>(operation);
|
const auto fakeQuantize = ov::as_type_ptr<ov::opset1::FakeQuantize>(operation);
|
||||||
OPENVINO_ASSERT(fakeQuantize != nullptr, "unexpected operation type");
|
OPENVINO_ASSERT(fakeQuantize != nullptr, "unexpected operation type");
|
||||||
|
|
||||||
const auto& input_type = fakeQuantize->get_input_element_type(0);
|
const auto& input_type = fakeQuantize->get_input_element_type(0);
|
||||||
|
@ -60,14 +60,14 @@ bool EltwiseBaseTransformation::canBeTransformed(const TransformationContext& co
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool isTargetType(const std::shared_ptr<Node> node) {
|
static bool isTargetType(const std::shared_ptr<Node> node) {
|
||||||
return node != nullptr && (ov::is_type<opset1::Convolution>(node) ||
|
return node != nullptr && (ov::is_type<ov::opset1::Convolution>(node) ||
|
||||||
ov::is_type<opset1::GroupConvolution>(node) ||
|
ov::is_type<ov::opset1::GroupConvolution>(node) ||
|
||||||
ov::is_type<opset1::MatMul>(node));
|
ov::is_type<ov::opset1::MatMul>(node));
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::shared_ptr<Node> getDataParent(const std::shared_ptr<Node> branchData) {
|
static std::shared_ptr<Node> getDataParent(const std::shared_ptr<Node> branchData) {
|
||||||
std::shared_ptr<Node> parent = branchData;
|
std::shared_ptr<Node> parent = branchData;
|
||||||
while (ov::is_type<opset1::FakeQuantize>(parent)) {
|
while (ov::is_type<ov::opset1::FakeQuantize>(parent)) {
|
||||||
parent = parent->get_input_node_shared_ptr(0);
|
parent = parent->get_input_node_shared_ptr(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -101,7 +101,7 @@ static bool isBranchHaveMultipleConsumers(const std::shared_ptr<Node> branchData
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
const auto new_parent = parent->get_input_node_shared_ptr(0);
|
const auto new_parent = parent->get_input_node_shared_ptr(0);
|
||||||
parent = !ov::is_type<opset1::Constant>(new_parent) ? new_parent : parent->get_input_node_shared_ptr(1);
|
parent = !ov::is_type<ov::opset1::Constant>(new_parent) ? new_parent : parent->get_input_node_shared_ptr(1);
|
||||||
}
|
}
|
||||||
return several_consumers(parent);
|
return several_consumers(parent);
|
||||||
}
|
}
|
||||||
@ -109,12 +109,12 @@ static bool isBranchHaveMultipleConsumers(const std::shared_ptr<Node> branchData
|
|||||||
// return branch index with FP32 precision after eltwise transformation
|
// return branch index with FP32 precision after eltwise transformation
|
||||||
int EltwiseBaseTransformation::getNotEmpty(const std::shared_ptr<Node>& eltwise) const {
|
int EltwiseBaseTransformation::getNotEmpty(const std::shared_ptr<Node>& eltwise) const {
|
||||||
const FakeQuantizeDequantization dequantization1 = pass::low_precision::NetworkHelper::getDequantization(eltwise, defaultPrecisions, 0ul);
|
const FakeQuantizeDequantization dequantization1 = pass::low_precision::NetworkHelper::getDequantization(eltwise, defaultPrecisions, 0ul);
|
||||||
if (ov::as_type<opset1::Constant>(dequantization1.data.get_node())) {
|
if (ov::as_type<ov::opset1::Constant>(dequantization1.data.get_node())) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
const FakeQuantizeDequantization dequantization2 = pass::low_precision::NetworkHelper::getDequantization(eltwise, defaultPrecisions, 1ul);
|
const FakeQuantizeDequantization dequantization2 = pass::low_precision::NetworkHelper::getDequantization(eltwise, defaultPrecisions, 1ul);
|
||||||
if (ov::as_type<opset1::Constant>(dequantization2.data.get_node())) {
|
if (ov::as_type<ov::opset1::Constant>(dequantization2.data.get_node())) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -142,10 +142,10 @@ int EltwiseBaseTransformation::getNotEmpty(const std::shared_ptr<Node>& eltwise)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::shared_ptr<opset1::FakeQuantize> fakeQuantize1 =
|
const std::shared_ptr<ov::opset1::FakeQuantize> fakeQuantize1 =
|
||||||
ov::as_type_ptr<opset1::FakeQuantize>(dequantization1.data.get_node_shared_ptr());
|
ov::as_type_ptr<ov::opset1::FakeQuantize>(dequantization1.data.get_node_shared_ptr());
|
||||||
const std::shared_ptr<opset1::FakeQuantize> fakeQuantize2 =
|
const std::shared_ptr<ov::opset1::FakeQuantize> fakeQuantize2 =
|
||||||
ov::as_type_ptr<opset1::FakeQuantize>(dequantization2.data.get_node_shared_ptr());
|
ov::as_type_ptr<ov::opset1::FakeQuantize>(dequantization2.data.get_node_shared_ptr());
|
||||||
|
|
||||||
if (fakeQuantize1 && !fakeQuantize2) {
|
if (fakeQuantize1 && !fakeQuantize2) {
|
||||||
return 0;
|
return 0;
|
||||||
@ -164,11 +164,11 @@ int EltwiseBaseTransformation::getNotEmpty(const std::shared_ptr<Node>& eltwise)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ov::is_type<opset1::Constant>(dequantization1.data.get_node())) {
|
if (ov::is_type<ov::opset1::Constant>(dequantization1.data.get_node())) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ov::is_type<opset1::Constant>(dequantization2.data.get_node())) {
|
if (ov::is_type<ov::opset1::Constant>(dequantization2.data.get_node())) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -211,18 +211,18 @@ std::pair<int, int> EltwiseBaseTransformation::getMultiplyConstBranch(const std:
|
|||||||
const std::shared_ptr<Node> parent2 = eltwise->get_input_node_shared_ptr(1);
|
const std::shared_ptr<Node> parent2 = eltwise->get_input_node_shared_ptr(1);
|
||||||
const auto dequantization2 = NetworkHelper::getDequantization(eltwise, defaultPrecisions, 1);
|
const auto dequantization2 = NetworkHelper::getDequantization(eltwise, defaultPrecisions, 1);
|
||||||
|
|
||||||
std::shared_ptr<opset1::Constant> constParent = dequantization1.empty() ?
|
std::shared_ptr<ov::opset1::Constant> constParent = dequantization1.empty() ?
|
||||||
ov::as_type_ptr<opset1::Constant>(parent1) :
|
ov::as_type_ptr<ov::opset1::Constant>(parent1) :
|
||||||
ov::as_type_ptr<opset1::Constant>(dequantization1.data.get_node_shared_ptr());
|
ov::as_type_ptr<ov::opset1::Constant>(dequantization1.data.get_node_shared_ptr());
|
||||||
std::shared_ptr<opset1::Multiply> multiplyParent = ov::as_type_ptr<opset1::Multiply>(parent2);
|
std::shared_ptr<ov::opset1::Multiply> multiplyParent = ov::as_type_ptr<ov::opset1::Multiply>(parent2);
|
||||||
int multiplyBranch = 1;
|
int multiplyBranch = 1;
|
||||||
|
|
||||||
|
|
||||||
if (constParent == nullptr || multiplyParent == nullptr) {
|
if (constParent == nullptr || multiplyParent == nullptr) {
|
||||||
constParent = dequantization2.empty() ?
|
constParent = dequantization2.empty() ?
|
||||||
ov::as_type_ptr<opset1::Constant>(parent2) :
|
ov::as_type_ptr<ov::opset1::Constant>(parent2) :
|
||||||
ov::as_type_ptr<opset1::Constant>(dequantization2.data.get_node_shared_ptr());
|
ov::as_type_ptr<ov::opset1::Constant>(dequantization2.data.get_node_shared_ptr());
|
||||||
multiplyParent = ov::as_type_ptr<opset1::Multiply>(parent1);
|
multiplyParent = ov::as_type_ptr<ov::opset1::Multiply>(parent1);
|
||||||
multiplyBranch = 0;
|
multiplyBranch = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -233,14 +233,14 @@ std::pair<int, int> EltwiseBaseTransformation::getMultiplyConstBranch(const std:
|
|||||||
auto multiplyParentParent1 = multiplyParent->get_input_node_shared_ptr(0);
|
auto multiplyParentParent1 = multiplyParent->get_input_node_shared_ptr(0);
|
||||||
auto multiplyParentParent2 = multiplyParent->get_input_node_shared_ptr(1);
|
auto multiplyParentParent2 = multiplyParent->get_input_node_shared_ptr(1);
|
||||||
|
|
||||||
auto multiplyParentParent = ov::as_type_ptr<opset1::Multiply>(multiplyParentParent1);
|
auto multiplyParentParent = ov::as_type_ptr<ov::opset1::Multiply>(multiplyParentParent1);
|
||||||
auto multiplyParentConst = ov::as_type_ptr<opset1::Constant>(multiplyParentParent2);
|
auto multiplyParentConst = ov::as_type_ptr<ov::opset1::Constant>(multiplyParentParent2);
|
||||||
int multiplyActBranch = 0;
|
int multiplyActBranch = 0;
|
||||||
|
|
||||||
|
|
||||||
if (multiplyParentConst == nullptr) {
|
if (multiplyParentConst == nullptr) {
|
||||||
multiplyParentParent = ov::as_type_ptr<opset1::Multiply>(multiplyParentParent2);
|
multiplyParentParent = ov::as_type_ptr<ov::opset1::Multiply>(multiplyParentParent2);
|
||||||
multiplyParentConst = ov::as_type_ptr<opset1::Constant>(multiplyParentParent1);
|
multiplyParentConst = ov::as_type_ptr<ov::opset1::Constant>(multiplyParentParent1);
|
||||||
multiplyActBranch = 1;
|
multiplyActBranch = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ FakeQuantizeTransformation::FakeQuantizeTransformation(const Params& params) : L
|
|||||||
MATCHER_SCOPE(FakeQuantizeTransformation);
|
MATCHER_SCOPE(FakeQuantizeTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::FakeQuantize>();
|
auto matcher = pattern::wrap_type<opset1::FakeQuantize>();
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ov::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
if (transformation_callback(op)) {
|
if (transformation_callback(op)) {
|
||||||
return false;
|
return false;
|
||||||
@ -220,7 +220,7 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
|
|||||||
matcherPass->register_new_node(newFakeQuantize);
|
matcherPass->register_new_node(newFakeQuantize);
|
||||||
|
|
||||||
replace_node(fakeQuantize, newFakeQuantize);
|
replace_node(fakeQuantize, newFakeQuantize);
|
||||||
ngraph::copy_runtime_info({ fakeQuantize, eltwise }, newFakeQuantize);
|
ov::copy_runtime_info({ fakeQuantize, eltwise }, newFakeQuantize);
|
||||||
newFakeQuantize->set_friendly_name(fakeQuantize->get_friendly_name());
|
newFakeQuantize->set_friendly_name(fakeQuantize->get_friendly_name());
|
||||||
return newFakeQuantize;
|
return newFakeQuantize;
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,7 @@ namespace low_precision {
|
|||||||
|
|
||||||
FoldConvertTransformation::FoldConvertTransformation(const Params& params) : LayerTransformation(params) {
|
FoldConvertTransformation::FoldConvertTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(FoldConvertTransformation);
|
MATCHER_SCOPE(FoldConvertTransformation);
|
||||||
auto subtract = pattern::wrap_type<opset1::Subtract>();
|
auto subtract = pattern::wrap_type<ov::opset1::Subtract>();
|
||||||
auto matcher = std::make_shared<ngraph::pattern::Matcher>(subtract, matcher_name);
|
auto matcher = std::make_shared<ngraph::pattern::Matcher>(subtract, matcher_name);
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
@ -38,12 +38,12 @@ bool FoldConvertTransformation::transform(TransformationContext& context, ngraph
|
|||||||
|
|
||||||
auto foldConvert = [&](const size_t branch) {
|
auto foldConvert = [&](const size_t branch) {
|
||||||
const auto convert = subtract->get_input_node_shared_ptr(branch);
|
const auto convert = subtract->get_input_node_shared_ptr(branch);
|
||||||
if (!ov::is_type<opset1::Convert>(convert) || !ov::is_type<opset1::Constant>(convert->get_input_node_shared_ptr(0))) {
|
if (!ov::is_type<ov::opset1::Convert>(convert) || !ov::is_type<ov::opset1::Constant>(convert->get_input_node_shared_ptr(0))) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto resultConstant = ngraph::pass::low_precision::foldConvert(convert->input_value(0), convert->get_output_element_type(0));
|
const auto resultConstant = ngraph::pass::low_precision::foldConvert(convert->input_value(0), convert->get_output_element_type(0));
|
||||||
assert(ov::is_type<opset1::Constant>(resultConstant));
|
assert(ov::is_type<ov::opset1::Constant>(resultConstant));
|
||||||
|
|
||||||
replace_node(convert, resultConstant);
|
replace_node(convert, resultConstant);
|
||||||
updateOutput(context, resultConstant, convert);
|
updateOutput(context, resultConstant, convert);
|
||||||
@ -57,10 +57,10 @@ bool FoldConvertTransformation::transform(TransformationContext& context, ngraph
|
|||||||
|
|
||||||
bool FoldConvertTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> operation) const {
|
bool FoldConvertTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> operation) const {
|
||||||
return
|
return
|
||||||
(ov::is_type<opset1::Convert>(operation->get_input_node_ptr(1)) &&
|
(ov::is_type<ov::opset1::Convert>(operation->get_input_node_ptr(1)) &&
|
||||||
ov::is_type<opset1::Constant>(operation->get_input_node_ptr(1)->get_input_node_ptr(0))) ||
|
ov::is_type<ov::opset1::Constant>(operation->get_input_node_ptr(1)->get_input_node_ptr(0))) ||
|
||||||
(ov::is_type<opset1::Convert>(operation->get_input_node_ptr(0)) &&
|
(ov::is_type<ov::opset1::Convert>(operation->get_input_node_ptr(0)) &&
|
||||||
ov::is_type<opset1::Constant>(operation->get_input_node_ptr(0)->get_input_node_ptr(0)));
|
ov::is_type<ov::opset1::Constant>(operation->get_input_node_ptr(0)->get_input_node_ptr(0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool FoldConvertTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept {
|
bool FoldConvertTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept {
|
||||||
|
@ -19,7 +19,7 @@ namespace low_precision {
|
|||||||
|
|
||||||
FoldFakeQuantizeTransformation::FoldFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {
|
FoldFakeQuantizeTransformation::FoldFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(FoldFakeQuantizeTransformation);
|
MATCHER_SCOPE(FoldFakeQuantizeTransformation);
|
||||||
auto fakeQuantize = pattern::wrap_type<opset1::FakeQuantize>();
|
auto fakeQuantize = pattern::wrap_type<ov::opset1::FakeQuantize>();
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -34,7 +34,7 @@ FoldFakeQuantizeTransformation::FoldFakeQuantizeTransformation(const Params& par
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool FoldFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
bool FoldFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
||||||
const auto fakeQuantize = ov::as_type_ptr<opset1::FakeQuantize>(m.get_match_root());
|
const auto fakeQuantize = ov::as_type_ptr<ov::opset1::FakeQuantize>(m.get_match_root());
|
||||||
if (fakeQuantize == nullptr) {
|
if (fakeQuantize == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -49,7 +49,7 @@ bool FoldFakeQuantizeTransformation::transform(TransformationContext& context, n
|
|||||||
}
|
}
|
||||||
|
|
||||||
const auto resultConstant = NetworkHelper::fold_fake_quantize(fakeQuantize, false);
|
const auto resultConstant = NetworkHelper::fold_fake_quantize(fakeQuantize, false);
|
||||||
if (ov::is_type<opset1::Constant>(resultConstant)) {
|
if (ov::is_type<ov::opset1::Constant>(resultConstant)) {
|
||||||
replace_node(fakeQuantize, resultConstant);
|
replace_node(fakeQuantize, resultConstant);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -58,13 +58,13 @@ bool FoldFakeQuantizeTransformation::transform(TransformationContext& context, n
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool FoldFakeQuantizeTransformation::isConstantOutput(std::shared_ptr<ngraph::Node> node) const {
|
bool FoldFakeQuantizeTransformation::isConstantOutput(std::shared_ptr<ngraph::Node> node) const {
|
||||||
const auto fakeQuantize = ov::as_type_ptr<opset1::FakeQuantize>(node);
|
const auto fakeQuantize = ov::as_type_ptr<ov::opset1::FakeQuantize>(node);
|
||||||
if (!fakeQuantize) {
|
if (!fakeQuantize) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto outputLow = as_type_ptr<opset1::Constant>(fakeQuantize->get_input_node_shared_ptr(3));
|
const auto outputLow = as_type_ptr<ov::opset1::Constant>(fakeQuantize->get_input_node_shared_ptr(3));
|
||||||
const auto outputHigh = as_type_ptr<opset1::Constant>(fakeQuantize->get_input_node_shared_ptr(4));
|
const auto outputHigh = as_type_ptr<ov::opset1::Constant>(fakeQuantize->get_input_node_shared_ptr(4));
|
||||||
|
|
||||||
if (outputLow == nullptr || outputHigh == nullptr) {
|
if (outputLow == nullptr || outputHigh == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
@ -81,7 +81,7 @@ bool FoldFakeQuantizeTransformation::canBeTransformed(const TransformationContex
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto fq = ov::as_type_ptr<opset1::FakeQuantize>(op);
|
const auto fq = ov::as_type_ptr<ov::opset1::FakeQuantize>(op);
|
||||||
if (!fq) {
|
if (!fq) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -21,11 +21,11 @@ namespace low_precision {
|
|||||||
|
|
||||||
FuseConvertTransformation::FuseConvertTransformation(const Params& params) : LayerTransformation(params) {
|
FuseConvertTransformation::FuseConvertTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(FuseConvertTransformation);
|
MATCHER_SCOPE(FuseConvertTransformation);
|
||||||
auto multiply = pattern::wrap_type<opset1::Multiply>({ pattern::wrap_type<opset1::Convert>(), pattern::wrap_type<opset1::Constant>() });
|
auto multiply = pattern::wrap_type<ov::opset1::Multiply>({ pattern::wrap_type<ov::opset1::Convert>(), pattern::wrap_type<ov::opset1::Constant>() });
|
||||||
auto subtract = pattern::wrap_type<opset1::Subtract>({ pattern::wrap_type<opset1::Convert>(), pattern::wrap_type<opset1::Constant>() });
|
auto subtract = pattern::wrap_type<ov::opset1::Subtract>({ pattern::wrap_type<ov::opset1::Convert>(), pattern::wrap_type<ov::opset1::Constant>() });
|
||||||
auto add = pattern::wrap_type<opset1::Add>({ pattern::wrap_type<opset1::Convert>(), pattern::wrap_type<opset1::Constant>() });
|
auto add = pattern::wrap_type<ov::opset1::Add>({ pattern::wrap_type<ov::opset1::Convert>(), pattern::wrap_type<ov::opset1::Constant>() });
|
||||||
auto fakeQuantize = pattern::wrap_type<opset1::FakeQuantize>({
|
auto fakeQuantize = pattern::wrap_type<ov::opset1::FakeQuantize>({
|
||||||
pattern::wrap_type<opset1::Convert>({pattern::wrap_type<opset1::Constant>()}),
|
pattern::wrap_type<ov::opset1::Convert>({pattern::wrap_type<ov::opset1::Constant>()}),
|
||||||
pattern::any_input(),
|
pattern::any_input(),
|
||||||
pattern::any_input(),
|
pattern::any_input(),
|
||||||
pattern::any_input(),
|
pattern::any_input(),
|
||||||
@ -48,13 +48,13 @@ FuseConvertTransformation::FuseConvertTransformation(const Params& params) : Lay
|
|||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
std::shared_ptr<Node> removeConvertIfPossibleForSubtract(
|
std::shared_ptr<Node> removeConvertIfPossibleForSubtract(
|
||||||
const std::shared_ptr<opset1::Convert>& convert,
|
const std::shared_ptr<ov::opset1::Convert>& convert,
|
||||||
const std::shared_ptr<opset1::Subtract>& subtract) {
|
const std::shared_ptr<ov::opset1::Subtract>& subtract) {
|
||||||
std::shared_ptr<Node> newSubtract;
|
std::shared_ptr<Node> newSubtract;
|
||||||
|
|
||||||
const element::Type precisionBeforeConvert = convert->input(0).get_element_type();
|
const element::Type precisionBeforeConvert = convert->input(0).get_element_type();
|
||||||
if (NetworkHelper::checkConstantValuePrecision(precisionBeforeConvert, subtract->get_input_node_shared_ptr(1))) {
|
if (NetworkHelper::checkConstantValuePrecision(precisionBeforeConvert, subtract->get_input_node_shared_ptr(1))) {
|
||||||
newSubtract = std::make_shared<ov::op::TypeRelaxed<opset1::Subtract>>(
|
newSubtract = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Subtract>>(
|
||||||
std::vector<ngraph::element::Type>{ element::f32, element::f32 }, std::vector<ngraph::element::Type>{},
|
std::vector<ngraph::element::Type>{ element::f32, element::f32 }, std::vector<ngraph::element::Type>{},
|
||||||
ov::op::TemporaryReplaceOutputType(convert->input_value(0), element::f32).get(),
|
ov::op::TemporaryReplaceOutputType(convert->input_value(0), element::f32).get(),
|
||||||
ov::op::TemporaryReplaceOutputType(subtract->input_value(1), element::f32).get());
|
ov::op::TemporaryReplaceOutputType(subtract->input_value(1), element::f32).get());
|
||||||
@ -73,27 +73,27 @@ bool FuseConvertTransformation::transform(TransformationContext& context, ngraph
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto convert = ov::as_type_ptr<opset1::Convert>(op->get_input_node_shared_ptr(0));
|
const auto convert = ov::as_type_ptr<ov::opset1::Convert>(op->get_input_node_shared_ptr(0));
|
||||||
auto parent = convert->input_value(0);
|
auto parent = convert->input_value(0);
|
||||||
|
|
||||||
if (ov::is_type<opset1::Constant>(parent.get_node_shared_ptr())) {
|
if (ov::is_type<ov::opset1::Constant>(parent.get_node_shared_ptr())) {
|
||||||
auto convertedConstant = foldConvert(parent, convert->get_convert_element_type());
|
auto convertedConstant = foldConvert(parent, convert->get_convert_element_type());
|
||||||
NetworkHelper::copyInfo(parent.get_node_shared_ptr(), convertedConstant);
|
NetworkHelper::copyInfo(parent.get_node_shared_ptr(), convertedConstant);
|
||||||
replace_node(convert, convertedConstant);
|
replace_node(convert, convertedConstant);
|
||||||
} else {
|
} else {
|
||||||
std::shared_ptr<Node> newOp;
|
std::shared_ptr<Node> newOp;
|
||||||
if (ov::is_type<opset1::Subtract>(op)) {
|
if (ov::is_type<ov::opset1::Subtract>(op)) {
|
||||||
auto subtract = ov::as_type_ptr<opset1::Subtract>(op);
|
auto subtract = ov::as_type_ptr<ov::opset1::Subtract>(op);
|
||||||
newOp = removeConvertIfPossibleForSubtract(convert, subtract);
|
newOp = removeConvertIfPossibleForSubtract(convert, subtract);
|
||||||
} else if (ov::is_type<opset1::Multiply>(op)) {
|
} else if (ov::is_type<ov::opset1::Multiply>(op)) {
|
||||||
newOp = std::make_shared<ov::op::TypeRelaxed<opset1::Multiply>>(
|
newOp = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Multiply>>(
|
||||||
std::vector<ngraph::element::Type>{ element::f32, element::f32 }, std::vector<ngraph::element::Type>{},
|
std::vector<ngraph::element::Type>{ element::f32, element::f32 }, std::vector<ngraph::element::Type>{},
|
||||||
ov::op::TemporaryReplaceOutputType(convert->input_value(0), element::f32).get(),
|
ov::op::TemporaryReplaceOutputType(convert->input_value(0), element::f32).get(),
|
||||||
ov::op::TemporaryReplaceOutputType(op->input_value(1), element::f32).get());
|
ov::op::TemporaryReplaceOutputType(op->input_value(1), element::f32).get());
|
||||||
NetworkHelper::setOutDataPrecisionForTypeRelaxed(newOp, op->get_output_element_type(0));
|
NetworkHelper::setOutDataPrecisionForTypeRelaxed(newOp, op->get_output_element_type(0));
|
||||||
replace_node(op, newOp);
|
replace_node(op, newOp);
|
||||||
} else if (ov::is_type<opset1::Add>(op)) {
|
} else if (ov::is_type<ov::opset1::Add>(op)) {
|
||||||
newOp = std::make_shared<ov::op::TypeRelaxed<opset1::Add>>(
|
newOp = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Add>>(
|
||||||
std::vector<ngraph::element::Type>{ element::f32, element::f32 }, std::vector<ngraph::element::Type>{},
|
std::vector<ngraph::element::Type>{ element::f32, element::f32 }, std::vector<ngraph::element::Type>{},
|
||||||
ov::op::TemporaryReplaceOutputType(convert->input_value(0), element::f32).get(),
|
ov::op::TemporaryReplaceOutputType(convert->input_value(0), element::f32).get(),
|
||||||
ov::op::TemporaryReplaceOutputType(op->input_value(1), element::f32).get());
|
ov::op::TemporaryReplaceOutputType(op->input_value(1), element::f32).get());
|
||||||
@ -118,7 +118,7 @@ bool FuseConvertTransformation::canBeTransformed(const TransformationContext& co
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto convert = ov::as_type_ptr<opset1::Convert>(op->get_input_node_shared_ptr(0));
|
const auto convert = ov::as_type_ptr<ov::opset1::Convert>(op->get_input_node_shared_ptr(0));
|
||||||
// issue #40395
|
// issue #40395
|
||||||
if (convert == nullptr) {
|
if (convert == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -18,7 +18,7 @@ namespace low_precision {
|
|||||||
|
|
||||||
FuseMultiplyToFakeQuantizeTransformation::FuseMultiplyToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {
|
FuseMultiplyToFakeQuantizeTransformation::FuseMultiplyToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(FuseMultiplyToFakeQuantizeTransformation);
|
MATCHER_SCOPE(FuseMultiplyToFakeQuantizeTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::Multiply>();
|
auto matcher = pattern::wrap_type<ov::opset1::Multiply>();
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -39,15 +39,15 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
|
|||||||
}
|
}
|
||||||
|
|
||||||
const auto parent = multiply->get_input_node_shared_ptr(0);
|
const auto parent = multiply->get_input_node_shared_ptr(0);
|
||||||
auto fakeQuantize = ov::as_type_ptr<opset1::FakeQuantize>(parent);
|
auto fakeQuantize = ov::as_type_ptr<ov::opset1::FakeQuantize>(parent);
|
||||||
const auto convert = ov::as_type_ptr<opset1::Convert>(parent);
|
const auto convert = ov::as_type_ptr<ov::opset1::Convert>(parent);
|
||||||
|
|
||||||
if (convert) {
|
if (convert) {
|
||||||
fakeQuantize = ov::as_type_ptr<opset1::FakeQuantize>(convert->get_input_node_shared_ptr(0));
|
fakeQuantize = ov::as_type_ptr<ov::opset1::FakeQuantize>(convert->get_input_node_shared_ptr(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto multiplyConstant = multiply->get_input_node_shared_ptr(1);
|
const auto multiplyConstant = multiply->get_input_node_shared_ptr(1);
|
||||||
if (!ov::is_type<opset1::Constant>(multiplyConstant)) {
|
if (!ov::is_type<ov::opset1::Constant>(multiplyConstant)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -58,8 +58,8 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
|
|||||||
multiplyConstant :
|
multiplyConstant :
|
||||||
foldConvert(multiplyConstant, deqPrecision);
|
foldConvert(multiplyConstant, deqPrecision);
|
||||||
|
|
||||||
outputLowConst_f32 = fold<opset1::Multiply>(outputLowConst_f32, value);
|
outputLowConst_f32 = fold<ov::opset1::Multiply>(outputLowConst_f32, value);
|
||||||
outputHighConst_f32 = fold<opset1::Multiply>(outputHighConst_f32, value);
|
outputHighConst_f32 = fold<ov::opset1::Multiply>(outputHighConst_f32, value);
|
||||||
|
|
||||||
const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
|
const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
|
||||||
const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
|
const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
|
||||||
@ -68,8 +68,8 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
|
|||||||
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
|
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
|
||||||
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
|
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
|
||||||
|
|
||||||
auto newFakeQuantize = std::make_shared<ov::op::TypeRelaxed<opset1::FakeQuantize>>(
|
auto newFakeQuantize = std::make_shared<ov::op::TypeRelaxed<ov::opset1::FakeQuantize>>(
|
||||||
opset1::FakeQuantize(
|
ov::opset1::FakeQuantize(
|
||||||
fakeQuantize->input_value(0),
|
fakeQuantize->input_value(0),
|
||||||
inputLow,
|
inputLow,
|
||||||
inputHigh,
|
inputHigh,
|
||||||
@ -91,7 +91,7 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext&
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool FuseMultiplyToFakeQuantizeTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> operation) const {
|
bool FuseMultiplyToFakeQuantizeTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> operation) const {
|
||||||
if (!ov::is_type<opset1::Constant>(operation->get_input_node_shared_ptr(1))) {
|
if (!ov::is_type<ov::opset1::Constant>(operation->get_input_node_shared_ptr(1))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -104,11 +104,11 @@ bool FuseMultiplyToFakeQuantizeTransformation::canBeTransformed(const Transforma
|
|||||||
}
|
}
|
||||||
|
|
||||||
const auto parent = operation->get_input_node_shared_ptr(0);
|
const auto parent = operation->get_input_node_shared_ptr(0);
|
||||||
auto fq = ov::as_type_ptr<opset1::FakeQuantize>(parent);
|
auto fq = ov::as_type_ptr<ov::opset1::FakeQuantize>(parent);
|
||||||
const auto convert = ov::as_type_ptr<opset1::Convert>(parent);
|
const auto convert = ov::as_type_ptr<ov::opset1::Convert>(parent);
|
||||||
|
|
||||||
if (convert) {
|
if (convert) {
|
||||||
fq = ov::as_type_ptr<opset1::FakeQuantize>(convert->get_input_node_shared_ptr(0));
|
fq = ov::as_type_ptr<ov::opset1::FakeQuantize>(convert->get_input_node_shared_ptr(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!fq) {
|
if (!fq) {
|
||||||
|
@ -17,7 +17,7 @@ namespace low_precision {
|
|||||||
|
|
||||||
FuseSubtractToFakeQuantizeTransformation::FuseSubtractToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {
|
FuseSubtractToFakeQuantizeTransformation::FuseSubtractToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(FuseSubtractToFakeQuantizeTransformation);
|
MATCHER_SCOPE(FuseSubtractToFakeQuantizeTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::Subtract>();
|
auto matcher = pattern::wrap_type<ov::opset1::Subtract>();
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -38,15 +38,15 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
|
|||||||
}
|
}
|
||||||
|
|
||||||
const auto parent = subtract->get_input_node_shared_ptr(0);
|
const auto parent = subtract->get_input_node_shared_ptr(0);
|
||||||
auto fakeQuantize = ov::as_type_ptr<opset1::FakeQuantize>(parent);
|
auto fakeQuantize = ov::as_type_ptr<ov::opset1::FakeQuantize>(parent);
|
||||||
const auto convert = ov::as_type_ptr<opset1::Convert>(parent);
|
const auto convert = ov::as_type_ptr<ov::opset1::Convert>(parent);
|
||||||
|
|
||||||
if (convert) {
|
if (convert) {
|
||||||
fakeQuantize = ov::as_type_ptr<opset1::FakeQuantize>(convert->get_input_node_shared_ptr(0));
|
fakeQuantize = ov::as_type_ptr<ov::opset1::FakeQuantize>(convert->get_input_node_shared_ptr(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto subtractConstant = subtract->get_input_node_shared_ptr(1);
|
const auto subtractConstant = subtract->get_input_node_shared_ptr(1);
|
||||||
if (!ov::is_type<opset1::Constant>(subtractConstant)) {
|
if (!ov::is_type<ov::opset1::Constant>(subtractConstant)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -57,8 +57,8 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
|
|||||||
subtractConstant :
|
subtractConstant :
|
||||||
foldConvert(subtractConstant, deqPrecision);
|
foldConvert(subtractConstant, deqPrecision);
|
||||||
|
|
||||||
outputLowConst_f32 = fold<opset1::Subtract>(outputLowConst_f32, value);
|
outputLowConst_f32 = fold<ov::opset1::Subtract>(outputLowConst_f32, value);
|
||||||
outputHighConst_f32 = fold<opset1::Subtract>(outputHighConst_f32, value);
|
outputHighConst_f32 = fold<ov::opset1::Subtract>(outputHighConst_f32, value);
|
||||||
|
|
||||||
const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
|
const auto inputLow = foldConvert(fakeQuantize->input_value(1), deqPrecision);
|
||||||
const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
|
const auto inputHigh = foldConvert(fakeQuantize->input_value(2), deqPrecision);
|
||||||
@ -67,8 +67,8 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
|
|||||||
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
|
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(3), outputLowConst_f32);
|
||||||
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
|
NetworkHelper::copyInfo(fakeQuantize->get_input_node_shared_ptr(4), outputHighConst_f32);
|
||||||
|
|
||||||
auto newFakeQuantize = std::make_shared<ov::op::TypeRelaxed<opset1::FakeQuantize>>(
|
auto newFakeQuantize = std::make_shared<ov::op::TypeRelaxed<ov::opset1::FakeQuantize>>(
|
||||||
opset1::FakeQuantize(
|
ov::opset1::FakeQuantize(
|
||||||
fakeQuantize->input_value(0),
|
fakeQuantize->input_value(0),
|
||||||
inputLow,
|
inputLow,
|
||||||
inputHigh,
|
inputHigh,
|
||||||
@ -85,7 +85,7 @@ bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext&
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool FuseSubtractToFakeQuantizeTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> operation) const {
|
bool FuseSubtractToFakeQuantizeTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> operation) const {
|
||||||
if (!ov::is_type<opset1::Constant>(operation->get_input_node_shared_ptr(1))) {
|
if (!ov::is_type<ov::opset1::Constant>(operation->get_input_node_shared_ptr(1))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -100,20 +100,20 @@ bool FuseSubtractToFakeQuantizeTransformation::canBeTransformed(const Transforma
|
|||||||
const auto children = operation->get_output_target_inputs(0);
|
const auto children = operation->get_output_target_inputs(0);
|
||||||
|
|
||||||
for (const auto& target : children) {
|
for (const auto& target : children) {
|
||||||
const auto convolution = ov::is_type<opset1::Convolution>(target.get_node());
|
const auto convolution = ov::is_type<ov::opset1::Convolution>(target.get_node());
|
||||||
const auto groupConvolution = ov::is_type<opset1::GroupConvolution>(target.get_node());
|
const auto groupConvolution = ov::is_type<ov::opset1::GroupConvolution>(target.get_node());
|
||||||
const auto convolutionBackpropData = ov::is_type<opset1::ConvolutionBackpropData>(target.get_node());
|
const auto convolutionBackpropData = ov::is_type<ov::opset1::ConvolutionBackpropData>(target.get_node());
|
||||||
if (convolution || groupConvolution || convolutionBackpropData) {
|
if (convolution || groupConvolution || convolutionBackpropData) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto parent = operation->get_input_node_shared_ptr(0);
|
const auto parent = operation->get_input_node_shared_ptr(0);
|
||||||
auto fq = ov::as_type_ptr<opset1::FakeQuantize>(parent);
|
auto fq = ov::as_type_ptr<ov::opset1::FakeQuantize>(parent);
|
||||||
const auto convert = ov::as_type_ptr<opset1::Convert>(parent);
|
const auto convert = ov::as_type_ptr<ov::opset1::Convert>(parent);
|
||||||
|
|
||||||
if (convert) {
|
if (convert) {
|
||||||
fq = ov::as_type_ptr<opset1::FakeQuantize>(convert->get_input_node_shared_ptr(0));
|
fq = ov::as_type_ptr<ov::opset1::FakeQuantize>(convert->get_input_node_shared_ptr(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!fq) {
|
if (!fq) {
|
||||||
|
@ -18,7 +18,7 @@ namespace low_precision {
|
|||||||
|
|
||||||
GroupConvolutionTransformation::GroupConvolutionTransformation(const Params& params) : ConvolutionTransformation(params) {
|
GroupConvolutionTransformation::GroupConvolutionTransformation(const Params& params) : ConvolutionTransformation(params) {
|
||||||
MATCHER_SCOPE(GroupConvolutionTransformation);
|
MATCHER_SCOPE(GroupConvolutionTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::GroupConvolution>();
|
auto matcher = pattern::wrap_type<ov::opset1::GroupConvolution>();
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
#include "itt.hpp"
|
#include "itt.hpp"
|
||||||
|
#include "openvino/util/log.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
@ -139,9 +140,9 @@ bool LayerTransformation::canSubtractBeHandled(const std::shared_ptr<Node>& op,
|
|||||||
|
|
||||||
const auto parent = dequantization.subtract->input_value(1).get_node_shared_ptr();
|
const auto parent = dequantization.subtract->input_value(1).get_node_shared_ptr();
|
||||||
|
|
||||||
if (ov::is_type<opset1::Constant>(parent)) {
|
if (ov::is_type<ov::opset1::Constant>(parent)) {
|
||||||
return true;
|
return true;
|
||||||
} else if (ov::is_type<opset1::Convert>(parent) && ov::is_type<opset1::Constant>(parent->get_input_node_shared_ptr(0))) {
|
} else if (ov::is_type<ov::opset1::Convert>(parent) && ov::is_type<ov::opset1::Constant>(parent->get_input_node_shared_ptr(0))) {
|
||||||
const auto constant = parent->get_input_node_shared_ptr(0);
|
const auto constant = parent->get_input_node_shared_ptr(0);
|
||||||
const auto constantType = constant->output(0).get_element_type();
|
const auto constantType = constant->output(0).get_element_type();
|
||||||
return operationType == constantType;
|
return operationType == constantType;
|
||||||
@ -403,7 +404,7 @@ void LayerTransformation::updateOutput(
|
|||||||
// TODO: not tested!!!
|
// TODO: not tested!!!
|
||||||
for (auto output : lastNode->outputs()) {
|
for (auto output : lastNode->outputs()) {
|
||||||
for (auto input : output.get_target_inputs()) {
|
for (auto input : output.get_target_inputs()) {
|
||||||
if (ov::is_type<ngraph::opset1::Result>(input.get_node())) {
|
if (ov::is_type<ov::opset1::Result>(input.get_node())) {
|
||||||
const std::string originalName = originalNode->get_friendly_name();
|
const std::string originalName = originalNode->get_friendly_name();
|
||||||
originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix);
|
originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix);
|
||||||
lastNode->set_friendly_name(originalName);
|
lastNode->set_friendly_name(originalName);
|
||||||
@ -428,9 +429,9 @@ void LayerTransformation::updateOutput(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void LayerTransformation::addPattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr<Node> patternRoot) {
|
void LayerTransformation::addPattern(ov::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr<Node> patternRoot) {
|
||||||
MATCHER_SCOPE(SingleNodeMatcher);
|
MATCHER_SCOPE(SingleNodeMatcher);
|
||||||
ngraph::graph_rewrite_callback internal_callback = [this, &context](ngraph::pattern::Matcher &m) {
|
ov::graph_rewrite_callback internal_callback = [this, &context](ov::pass::pattern::Matcher &m) {
|
||||||
const bool result = transform(context, m);
|
const bool result = transform(context, m);
|
||||||
(void)result;
|
(void)result;
|
||||||
#ifdef LPT_DISPLAY_PRECISION
|
#ifdef LPT_DISPLAY_PRECISION
|
||||||
@ -446,15 +447,15 @@ void LayerTransformation::addPattern(ngraph::pass::GraphRewrite& pass, Transform
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
// TODO: better name for matcher? required?
|
// TODO: better name for matcher? required?
|
||||||
auto m = std::make_shared<ngraph::pattern::Matcher>(patternRoot, matcher_name);
|
auto m = std::make_shared<ov::pass::pattern::Matcher>(patternRoot, matcher_name);
|
||||||
auto match_pass = std::make_shared<ov::pass::MatcherPass>(
|
auto match_pass = std::make_shared<ov::pass::MatcherPass>(
|
||||||
m->get_name(),
|
m->get_name(),
|
||||||
m,
|
m,
|
||||||
[m, internal_callback](const std::shared_ptr<Node>& node) -> bool {
|
[m, internal_callback](const std::shared_ptr<Node>& node) -> bool {
|
||||||
NGRAPH_DEBUG << "Running matcher " << m->get_name() << " on " << node;
|
OPENVINO_DEBUG << "Running matcher " << m->get_name() << " on " << node;
|
||||||
OV_PASS_CALLBACK(m);
|
OV_PASS_CALLBACK(m);
|
||||||
if (std::dynamic_pointer_cast<ov::pass::pattern::Matcher>(m)->match(node->output(0))) {
|
if (std::dynamic_pointer_cast<ov::pass::pattern::Matcher>(m)->match(node->output(0))) {
|
||||||
NGRAPH_DEBUG << "Matcher " << m->get_name() << " matched " << node;
|
OPENVINO_DEBUG << "Matcher " << m->get_name() << " matched " << node;
|
||||||
bool status = internal_callback(*m.get());
|
bool status = internal_callback(*m.get());
|
||||||
// explicitly clear Matcher state because it holds pointers to matched nodes
|
// explicitly clear Matcher state because it holds pointers to matched nodes
|
||||||
m->clear_state();
|
m->clear_state();
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#include "low_precision/propagate_precisions.hpp"
|
#include "low_precision/propagate_precisions.hpp"
|
||||||
#include "low_precision/align_quantization_parameters.hpp"
|
#include "low_precision/align_quantization_parameters.hpp"
|
||||||
|
|
||||||
|
#include "openvino/util/log.hpp"
|
||||||
#include "transformations/common_optimizations/lin_op_sequence_fusion.hpp"
|
#include "transformations/common_optimizations/lin_op_sequence_fusion.hpp"
|
||||||
#include "low_precision/fold_convert.hpp"
|
#include "low_precision/fold_convert.hpp"
|
||||||
#include "low_precision/pull_reshape_through_dequantization.hpp"
|
#include "low_precision/pull_reshape_through_dequantization.hpp"
|
||||||
@ -139,9 +140,9 @@ void make_matcher_type_relaxed(ngraph::pass::GraphRewrite* transformation) {
|
|||||||
m->get_name(),
|
m->get_name(),
|
||||||
m,
|
m,
|
||||||
[m, callback](const std::shared_ptr<Node>& node) -> bool {
|
[m, callback](const std::shared_ptr<Node>& node) -> bool {
|
||||||
NGRAPH_DEBUG << "Running matcher " << m->get_name() << " on " << node;
|
OPENVINO_DEBUG << "Running matcher " << m->get_name() << " on " << node;
|
||||||
if (std::dynamic_pointer_cast<ov::pass::pattern::Matcher>(m)->match(node->output(0))) {
|
if (std::dynamic_pointer_cast<ov::pass::pattern::Matcher>(m)->match(node->output(0))) {
|
||||||
NGRAPH_DEBUG << "Matcher " << m->get_name() << " matched " << node;
|
OPENVINO_DEBUG << "Matcher " << m->get_name() << " matched " << node;
|
||||||
OV_PASS_CALLBACK(m);
|
OV_PASS_CALLBACK(m);
|
||||||
bool status = callback(*m.get());
|
bool status = callback(*m.get());
|
||||||
// explicitly clear Matcher state because it holds pointers to matched nodes
|
// explicitly clear Matcher state because it holds pointers to matched nodes
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "low_precision/propagate_through_precision_preserved.hpp"
|
#include "low_precision/propagate_through_precision_preserved.hpp"
|
||||||
#include "low_precision/update_shared_precision_preserved.hpp"
|
#include "low_precision/update_shared_precision_preserved.hpp"
|
||||||
#include "itt.hpp"
|
#include "itt.hpp"
|
||||||
|
#include "openvino/pass/manager.hpp"
|
||||||
|
|
||||||
using namespace ngraph;
|
using namespace ngraph;
|
||||||
|
|
||||||
@ -18,7 +19,7 @@ ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved::MarkupAvgPoolPreci
|
|||||||
|
|
||||||
bool ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved::run_on_model(const std::shared_ptr<ngraph::Function>& f) {
|
bool ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved::run_on_model(const std::shared_ptr<ngraph::Function>& f) {
|
||||||
RUN_ON_FUNCTION_SCOPE(MarkupAvgPoolPrecisionPreserved);
|
RUN_ON_FUNCTION_SCOPE(MarkupAvgPoolPrecisionPreserved);
|
||||||
ngraph::pass::Manager manager;
|
ov::pass::Manager manager;
|
||||||
manager.set_per_pass_validation(false);
|
manager.set_per_pass_validation(false);
|
||||||
std::shared_ptr<ngraph::pass::GraphRewrite> markupAvgPoolPrecision = manager.register_pass<ngraph::pass::GraphRewrite>();
|
std::shared_ptr<ngraph::pass::GraphRewrite> markupAvgPoolPrecision = manager.register_pass<ngraph::pass::GraphRewrite>();
|
||||||
markupAvgPoolPrecision->add_matcher<low_precision::CreatePrecisionsDependentAttribute<AvgPoolPrecisionPreservedAttribute, opset1::AvgPool>>();
|
markupAvgPoolPrecision->add_matcher<low_precision::CreatePrecisionsDependentAttribute<AvgPoolPrecisionPreservedAttribute, opset1::AvgPool>>();
|
||||||
|
@ -57,7 +57,7 @@ bool ngraph::pass::low_precision::MarkupQuantizationGranularity::run_on_model(co
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (const auto multiSubGraph = ov::as_type_ptr<ngraph::op::util::MultiSubGraphOp>(node)) {
|
if (const auto multiSubGraph = ov::as_type_ptr<ov::op::util::MultiSubGraphOp>(node)) {
|
||||||
for (size_t i = 0; i < multiSubGraph->get_internal_subgraphs_size(); i++)
|
for (size_t i = 0; i < multiSubGraph->get_internal_subgraphs_size(); i++)
|
||||||
run_on_model(multiSubGraph->get_function(i));
|
run_on_model(multiSubGraph->get_function(i));
|
||||||
continue;
|
continue;
|
||||||
|
@ -21,12 +21,12 @@ using namespace ngraph::pass::low_precision;
|
|||||||
|
|
||||||
MatMulTransformation::MatMulTransformation(const Params& params) : LayerTransformation(params) {
|
MatMulTransformation::MatMulTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(MatMulTransformation);
|
MATCHER_SCOPE(MatMulTransformation);
|
||||||
auto mul1 = pattern::wrap_type<opset1::Multiply>();
|
auto mul1 = pattern::wrap_type<ov::opset1::Multiply>();
|
||||||
auto mul2 = pattern::wrap_type<opset1::Multiply>();
|
auto mul2 = pattern::wrap_type<ov::opset1::Multiply>();
|
||||||
auto fq2 = pattern::wrap_type<opset1::FakeQuantize>();
|
auto fq2 = pattern::wrap_type<ov::opset1::FakeQuantize>();
|
||||||
auto matcher = pattern::wrap_type<opset1::MatMul>({ mul1, std::make_shared<pattern::op::Or>(OutputVector{ mul2, fq2 })});
|
auto matcher = pattern::wrap_type<ov::opset1::MatMul>({ mul1, std::make_shared<pattern::op::Or>(OutputVector{ mul2, fq2 })});
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ov::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
if (transformation_callback(op)) {
|
if (transformation_callback(op)) {
|
||||||
return false;
|
return false;
|
||||||
@ -39,18 +39,18 @@ MatMulTransformation::MatMulTransformation(const Params& params) : LayerTransfor
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool MatMulTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) {
|
bool MatMulTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) {
|
||||||
std::shared_ptr<opset1::MatMul> matMul = ov::as_type_ptr<opset1::MatMul>(m.get_match_root());
|
std::shared_ptr<ov::opset1::MatMul> matMul = ov::as_type_ptr<ov::opset1::MatMul>(m.get_match_root());
|
||||||
if ((matMul == nullptr) || !canBeTransformed(context, matMul)) {
|
if ((matMul == nullptr) || !canBeTransformed(context, matMul)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
matMul = ov::as_type_ptr<opset1::MatMul>(NetworkHelper::separateInStandaloneBranch(matMul, defaultPrecisions));
|
matMul = ov::as_type_ptr<ov::opset1::MatMul>(NetworkHelper::separateInStandaloneBranch(matMul, defaultPrecisions));
|
||||||
const auto dequantization1 = NetworkHelper::getDequantization(matMul, defaultPrecisions, 0);
|
const auto dequantization1 = NetworkHelper::getDequantization(matMul, defaultPrecisions, 0);
|
||||||
auto dequantization2 = NetworkHelper::getDequantization(matMul, defaultPrecisions, 1);
|
auto dequantization2 = NetworkHelper::getDequantization(matMul, defaultPrecisions, 1);
|
||||||
|
|
||||||
if (dequantization2.empty()) {
|
if (dequantization2.empty()) {
|
||||||
const std::shared_ptr<opset1::FakeQuantize> fakeQuantize =
|
const std::shared_ptr<ov::opset1::FakeQuantize> fakeQuantize =
|
||||||
ov::as_type_ptr<opset1::FakeQuantize>(dequantization2.data.get_node_shared_ptr());
|
ov::as_type_ptr<ov::opset1::FakeQuantize>(dequantization2.data.get_node_shared_ptr());
|
||||||
if (fakeQuantize != nullptr) {
|
if (fakeQuantize != nullptr) {
|
||||||
const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fakeQuantize);
|
const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fakeQuantize);
|
||||||
|
|
||||||
@ -80,7 +80,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
|
|||||||
dequantization2 = NetworkHelper::getDequantization(matMul, defaultPrecisions, 1);
|
dequantization2 = NetworkHelper::getDequantization(matMul, defaultPrecisions, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::shared_ptr<opset1::MatMul> newMatMul = std::make_shared<ov::op::TypeRelaxed<opset1::MatMul>>(
|
const std::shared_ptr<ov::opset1::MatMul> newMatMul = std::make_shared<ov::op::TypeRelaxed<ov::opset1::MatMul>>(
|
||||||
std::vector<element::Type>({ deqPrecision, deqPrecision }), std::vector<element::Type>({ deqPrecision }),
|
std::vector<element::Type>({ deqPrecision, deqPrecision }), std::vector<element::Type>({ deqPrecision }),
|
||||||
ov::op::TemporaryReplaceOutputType(dequantization1.data, deqPrecision).get(),
|
ov::op::TemporaryReplaceOutputType(dequantization1.data, deqPrecision).get(),
|
||||||
ov::op::TemporaryReplaceOutputType(dequantization2.data, deqPrecision).get(),
|
ov::op::TemporaryReplaceOutputType(dequantization2.data, deqPrecision).get(),
|
||||||
@ -92,7 +92,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
|
|||||||
|
|
||||||
// dequantization with subtract on activations & constant weights
|
// dequantization with subtract on activations & constant weights
|
||||||
if (dequantization1.subtract) {
|
if (dequantization1.subtract) {
|
||||||
auto broadcastShape = NetworkHelper::isScalarLike(ov::as_type_ptr<opset1::Constant>(dequantization1.subtractConstant)) ?
|
auto broadcastShape = NetworkHelper::isScalarLike(ov::as_type_ptr<ov::opset1::Constant>(dequantization1.subtractConstant)) ?
|
||||||
Shape(dequantization1.subtract->get_output_partial_shape(0).rank().get_length(), 1) :
|
Shape(dequantization1.subtract->get_output_partial_shape(0).rank().get_length(), 1) :
|
||||||
dequantization1.subtractConstant->get_shape();
|
dequantization1.subtractConstant->get_shape();
|
||||||
|
|
||||||
@ -105,25 +105,25 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
|
|||||||
broadcastShape[lastDataIdx] = weightsShape[firstWeightsIdx];
|
broadcastShape[lastDataIdx] = weightsShape[firstWeightsIdx];
|
||||||
|
|
||||||
// broadcasted sub const to form [1, ..., 1, Y]
|
// broadcasted sub const to form [1, ..., 1, Y]
|
||||||
const auto broadcastedConst = fold<opset1::Broadcast>(
|
const auto broadcastedConst = fold<ov::opset1::Broadcast>(
|
||||||
dequantization1.subtractConstant,
|
dequantization1.subtractConstant,
|
||||||
opset1::Constant::create(ngraph::element::i32, { broadcastShape.size() }, broadcastShape));
|
ov::opset1::Constant::create(ngraph::element::i32, { broadcastShape.size() }, broadcastShape));
|
||||||
|
|
||||||
// multiply by weights: [1, ..., 1, Y] x [Y, Z] => [1, ..., 1, Z]
|
// multiply by weights: [1, ..., 1, Y] x [Y, Z] => [1, ..., 1, Z]
|
||||||
const auto newSubConst = NetworkHelper::toScalarIfPossible(fold<opset1::MatMul>(
|
const auto newSubConst = NetworkHelper::toScalarIfPossible(fold<ov::opset1::MatMul>(
|
||||||
foldConvert(broadcastedConst, newMatMul->get_element_type()),
|
foldConvert(broadcastedConst, newMatMul->get_element_type()),
|
||||||
foldConvert(newMatMul->input_value(1), newMatMul->get_element_type()),
|
foldConvert(newMatMul->input_value(1), newMatMul->get_element_type()),
|
||||||
newMatMul->get_transpose_a(),
|
newMatMul->get_transpose_a(),
|
||||||
newMatMul->get_transpose_b()));
|
newMatMul->get_transpose_b()));
|
||||||
|
|
||||||
const auto newSubtract = std::make_shared<opset1::Subtract>(newMatMul, newSubConst);
|
const auto newSubtract = std::make_shared<ov::opset1::Subtract>(newMatMul, newSubConst);
|
||||||
newSubtract->set_friendly_name(newMatMul->get_friendly_name() + "/DequantizationSubtract");
|
newSubtract->set_friendly_name(newMatMul->get_friendly_name() + "/DequantizationSubtract");
|
||||||
copy_runtime_info({ newSubtract, matMul }, newSubtract);
|
copy_runtime_info({ newSubtract, matMul }, newSubtract);
|
||||||
|
|
||||||
parent = newSubtract;
|
parent = newSubtract;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto transpose = [](const std::shared_ptr<opset1::Constant>& node) -> std::shared_ptr<Node> {
|
auto transpose = [](const std::shared_ptr<ov::opset1::Constant>& node) -> std::shared_ptr<Node> {
|
||||||
const Shape outputShape = node->get_shape();
|
const Shape outputShape = node->get_shape();
|
||||||
if (outputShape.size() < 2ul) {
|
if (outputShape.size() < 2ul) {
|
||||||
return node;
|
return node;
|
||||||
@ -133,16 +133,16 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
|
|||||||
std::iota(transposeConstant.begin(), transposeConstant.end(), 0);
|
std::iota(transposeConstant.begin(), transposeConstant.end(), 0);
|
||||||
std::swap(*(transposeConstant.end() - 1), *(transposeConstant.end() - 2));
|
std::swap(*(transposeConstant.end() - 1), *(transposeConstant.end() - 2));
|
||||||
|
|
||||||
auto order = opset1::Constant::create(element::u32, Shape{ transposeConstant.size() }, transposeConstant);
|
auto order = ov::opset1::Constant::create(element::u32, Shape{ transposeConstant.size() }, transposeConstant);
|
||||||
std::shared_ptr<Node> transposedConstant = fold<opset1::Transpose>(node, order);
|
std::shared_ptr<Node> transposedConstant = fold<ov::opset1::Transpose>(node, order);
|
||||||
return transposedConstant;
|
return transposedConstant;
|
||||||
};
|
};
|
||||||
|
|
||||||
const auto mulConst1 = matMul->get_transpose_a() ? transpose(dequantization1.multiplyConstant) : dequantization1.multiplyConstant;
|
const auto mulConst1 = matMul->get_transpose_a() ? transpose(dequantization1.multiplyConstant) : dequantization1.multiplyConstant;
|
||||||
auto mulConst2 = matMul->get_transpose_b() ? transpose(dequantization2.multiplyConstant) : dequantization2.multiplyConstant;
|
auto mulConst2 = matMul->get_transpose_b() ? transpose(dequantization2.multiplyConstant) : dequantization2.multiplyConstant;
|
||||||
|
|
||||||
if (NetworkHelper::isScalarLike(ov::as_type_ptr<opset1::Constant>(mulConst2))) {
|
if (NetworkHelper::isScalarLike(ov::as_type_ptr<ov::opset1::Constant>(mulConst2))) {
|
||||||
mulConst2 = NetworkHelper::toScalar(ov::as_type_ptr<opset1::Constant>(mulConst2));
|
mulConst2 = NetworkHelper::toScalar(ov::as_type_ptr<ov::opset1::Constant>(mulConst2));
|
||||||
} else {
|
} else {
|
||||||
const auto constShape = mulConst2->get_shape();
|
const auto constShape = mulConst2->get_shape();
|
||||||
const size_t inputRank = matMul->get_input_partial_shape(0).rank().get_length();
|
const size_t inputRank = matMul->get_input_partial_shape(0).rank().get_length();
|
||||||
@ -152,17 +152,17 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
|
|||||||
Shape unsqueezeConstantShape(inputRank - constShape.size());
|
Shape unsqueezeConstantShape(inputRank - constShape.size());
|
||||||
std::iota(unsqueezeConstantShape.begin(), unsqueezeConstantShape.end(), 0ul);
|
std::iota(unsqueezeConstantShape.begin(), unsqueezeConstantShape.end(), 0ul);
|
||||||
|
|
||||||
mulConst2 = fold<opset1::Unsqueeze>(
|
mulConst2 = fold<ov::opset1::Unsqueeze>(
|
||||||
mulConst2,
|
mulConst2,
|
||||||
op::Constant::create(element::i32, Shape{ unsqueezeConstantShape.size() }, unsqueezeConstantShape));
|
ov::op::v0::Constant::create(element::i32, Shape{ unsqueezeConstantShape.size() }, unsqueezeConstantShape));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto newMulConst = NetworkHelper::toScalarIfPossible(fold<opset1::Multiply>(
|
const auto newMulConst = NetworkHelper::toScalarIfPossible(fold<ov::opset1::Multiply>(
|
||||||
mulConst1,
|
mulConst1,
|
||||||
foldConvert(mulConst2, element::f32)));
|
foldConvert(mulConst2, element::f32)));
|
||||||
|
|
||||||
const auto newMultiply = std::make_shared<ov::op::TypeRelaxed<opset1::Multiply>>(
|
const auto newMultiply = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Multiply>>(
|
||||||
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
||||||
std::vector<element::Type>{ dequantization1.multiply->get_output_element_type(0) },
|
std::vector<element::Type>{ dequantization1.multiply->get_output_element_type(0) },
|
||||||
ov::op::TemporaryReplaceOutputType(parent, deqPrecision).get(),
|
ov::op::TemporaryReplaceOutputType(parent, deqPrecision).get(),
|
||||||
@ -187,7 +187,7 @@ bool MatMulTransformation::canBeTransformed(const TransformationContext& context
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<opset1::MatMul> matMul = ov::as_type_ptr<opset1::MatMul>(layer);
|
std::shared_ptr<ov::opset1::MatMul> matMul = ov::as_type_ptr<ov::opset1::MatMul>(layer);
|
||||||
if (matMul == nullptr) {
|
if (matMul == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -245,7 +245,7 @@ bool MatMulTransformation::canBeTransformed(const TransformationContext& context
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto fakeQuantize = ov::as_type_ptr<opset1::FakeQuantize>(layer->get_input_node_shared_ptr(1));
|
const auto fakeQuantize = ov::as_type_ptr<ov::opset1::FakeQuantize>(layer->get_input_node_shared_ptr(1));
|
||||||
if (fakeQuantize) {
|
if (fakeQuantize) {
|
||||||
if (!QuantizationDetails::outputLayoutIsSupported(fakeQuantize)) {
|
if (!QuantizationDetails::outputLayoutIsSupported(fakeQuantize)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -24,7 +24,7 @@ namespace low_precision {
|
|||||||
|
|
||||||
MultiplyTransformation::MultiplyTransformation(const Params& params) : EltwiseBaseTransformation(params) {
|
MultiplyTransformation::MultiplyTransformation(const Params& params) : EltwiseBaseTransformation(params) {
|
||||||
MATCHER_SCOPE(MultiplyTransformation);
|
MATCHER_SCOPE(MultiplyTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::Multiply>();
|
auto matcher = pattern::wrap_type<ov::opset1::Multiply>();
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -51,10 +51,10 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p
|
|||||||
auto newMultiply = multiply;
|
auto newMultiply = multiply;
|
||||||
|
|
||||||
auto fold_fake_quantizes = [](std::shared_ptr<Node>& multiply, const size_t index) {
|
auto fold_fake_quantizes = [](std::shared_ptr<Node>& multiply, const size_t index) {
|
||||||
auto fakeQuantizeOnWeights = ov::as_type_ptr<opset1::FakeQuantize>(multiply->get_input_node_shared_ptr(index));
|
auto fakeQuantizeOnWeights = ov::as_type_ptr<ov::opset1::FakeQuantize>(multiply->get_input_node_shared_ptr(index));
|
||||||
if (fakeQuantizeOnWeights != nullptr) {
|
if (fakeQuantizeOnWeights != nullptr) {
|
||||||
auto result = NetworkHelper::fold_fake_quantize(fakeQuantizeOnWeights);
|
auto result = NetworkHelper::fold_fake_quantize(fakeQuantizeOnWeights);
|
||||||
if (ov::is_type<opset1::Constant>(result)) {
|
if (ov::is_type<ov::opset1::Constant>(result)) {
|
||||||
replace_node(fakeQuantizeOnWeights, result);
|
replace_node(fakeQuantizeOnWeights, result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -82,12 +82,12 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p
|
|||||||
auto multiplyParentParent = multiplyParent.get_node_shared_ptr()->input_value(multiplyBranch.second);
|
auto multiplyParentParent = multiplyParent.get_node_shared_ptr()->input_value(multiplyBranch.second);
|
||||||
auto multiplyParentConst = multiplyParent.get_node_shared_ptr()->input_value(multiplyBranch.second == 0 ? 1 : 0);
|
auto multiplyParentConst = multiplyParent.get_node_shared_ptr()->input_value(multiplyBranch.second == 0 ? 1 : 0);
|
||||||
|
|
||||||
newMultiply = std::make_shared<ov::op::TypeRelaxed<opset1::Multiply>>(
|
newMultiply = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Multiply>>(
|
||||||
std::vector<ngraph::element::Type>{ element::f32, element::f32 },
|
std::vector<ngraph::element::Type>{ element::f32, element::f32 },
|
||||||
std::vector<ngraph::element::Type>{ multiply->get_output_element_type(0) },
|
std::vector<ngraph::element::Type>{ multiply->get_output_element_type(0) },
|
||||||
ov::op::TemporaryReplaceOutputType(multiplyParentParent, element::f32).get(),
|
ov::op::TemporaryReplaceOutputType(multiplyParentParent, element::f32).get(),
|
||||||
ov::op::TemporaryReplaceOutputType(
|
ov::op::TemporaryReplaceOutputType(
|
||||||
fold<opset1::Multiply>(
|
fold<ov::opset1::Multiply>(
|
||||||
foldConvert(multiplyParentConst, element::f32),
|
foldConvert(multiplyParentConst, element::f32),
|
||||||
foldConvert(constParent, element::f32)),
|
foldConvert(constParent, element::f32)),
|
||||||
element::f32).get());
|
element::f32).get());
|
||||||
@ -123,17 +123,17 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p
|
|||||||
// before: Y = (SC1 * (X1 - SH1)) * (SC2 * X2)
|
// before: Y = (SC1 * (X1 - SH1)) * (SC2 * X2)
|
||||||
// after : Y = (SC1' * (X1 - SH1)) * (X2) , where :
|
// after : Y = (SC1' * (X1 - SH1)) * (X2) , where :
|
||||||
// SC1' = SC1 * SC2
|
// SC1' = SC1 * SC2
|
||||||
auto newMultiplyValuesFullPath = fold<opset1::Multiply>(multiplyValuesEmptyPath, multiplyValuesFullPath);
|
auto newMultiplyValuesFullPath = fold<ov::opset1::Multiply>(multiplyValuesEmptyPath, multiplyValuesFullPath);
|
||||||
OutputVector inputs{ {}, {} };
|
OutputVector inputs{ {}, {} };
|
||||||
inputs[emptyPathIndex] = dequantizationEmptyPath.data;
|
inputs[emptyPathIndex] = dequantizationEmptyPath.data;
|
||||||
inputs[fullPathIndex] = std::make_shared<opset1::Multiply>(
|
inputs[fullPathIndex] = std::make_shared<ov::opset1::Multiply>(
|
||||||
dequantizationFullPath.subtract == nullptr ?
|
dequantizationFullPath.subtract == nullptr ?
|
||||||
(dequantizationFullPath.convert == nullptr ?
|
(dequantizationFullPath.convert == nullptr ?
|
||||||
dequantizationFullPath.data : dequantizationFullPath.convert) :
|
dequantizationFullPath.data : dequantizationFullPath.convert) :
|
||||||
dequantizationFullPath.subtract,
|
dequantizationFullPath.subtract,
|
||||||
newMultiplyValuesFullPath);
|
newMultiplyValuesFullPath);
|
||||||
|
|
||||||
newMultiply = std::make_shared<ov::op::TypeRelaxed<opset1::Multiply>>(
|
newMultiply = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Multiply>>(
|
||||||
std::vector<element::Type>{element::f32, element::f32},
|
std::vector<element::Type>{element::f32, element::f32},
|
||||||
std::vector<element::Type>{ multiply->get_output_element_type(0) },
|
std::vector<element::Type>{ multiply->get_output_element_type(0) },
|
||||||
ov::op::TemporaryReplaceOutputType(inputs[0], element::f32).get(),
|
ov::op::TemporaryReplaceOutputType(inputs[0], element::f32).get(),
|
||||||
@ -159,8 +159,8 @@ bool MultiplyTransformation::canBeTransformed(const TransformationContext& conte
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool nonConstantData = !ov::is_type<opset1::Constant>(dequantization1.data.get_node_shared_ptr()) &&
|
const bool nonConstantData = !ov::is_type<ov::opset1::Constant>(dequantization1.data.get_node_shared_ptr()) &&
|
||||||
!ov::is_type<opset1::Constant>(dequantization2.data.get_node_shared_ptr());
|
!ov::is_type<ov::opset1::Constant>(dequantization2.data.get_node_shared_ptr());
|
||||||
|
|
||||||
if (((dequantization1.empty() || dequantization2.empty()) && nonConstantData)) {
|
if (((dequantization1.empty() || dequantization2.empty()) && nonConstantData)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -17,7 +17,7 @@ MultiplyToGroupConvolutionTransformation::MultiplyToGroupConvolutionTransformati
|
|||||||
const Params& params,
|
const Params& params,
|
||||||
const PrecisionsRestriction::PrecisionsByPorts& restrictions) : LayerTransformation(params), restrictions(restrictions), groupSize(1ul) {
|
const PrecisionsRestriction::PrecisionsByPorts& restrictions) : LayerTransformation(params), restrictions(restrictions), groupSize(1ul) {
|
||||||
MATCHER_SCOPE(MultiplyToGroupConvolutionTransformation);
|
MATCHER_SCOPE(MultiplyToGroupConvolutionTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::Multiply>();
|
auto matcher = pattern::wrap_type<ov::opset1::Multiply>();
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -40,7 +40,7 @@ bool MultiplyToGroupConvolutionTransformation::transform(TransformationContext&
|
|||||||
auto input = multiply->get_input_node_shared_ptr(0);
|
auto input = multiply->get_input_node_shared_ptr(0);
|
||||||
auto constant = multiply->get_input_node_shared_ptr(1);
|
auto constant = multiply->get_input_node_shared_ptr(1);
|
||||||
auto inputIndex = 0;
|
auto inputIndex = 0;
|
||||||
if (!ov::is_type<opset1::Constant>(constant)) {
|
if (!ov::is_type<ov::opset1::Constant>(constant)) {
|
||||||
input = multiply->get_input_node_shared_ptr(1);
|
input = multiply->get_input_node_shared_ptr(1);
|
||||||
constant = multiply->get_input_node_shared_ptr(0);
|
constant = multiply->get_input_node_shared_ptr(0);
|
||||||
inputIndex = 1;
|
inputIndex = 1;
|
||||||
@ -108,14 +108,14 @@ bool MultiplyToGroupConvolutionTransformation::transform(TransformationContext&
|
|||||||
weightsShape[0] = group;
|
weightsShape[0] = group;
|
||||||
weightsShape[1] = outputChannelsCount / group;
|
weightsShape[1] = outputChannelsCount / group;
|
||||||
weightsShape[2] = inputChannelsCount / group;
|
weightsShape[2] = inputChannelsCount / group;
|
||||||
const auto weightsNode = std::make_shared<opset1::Constant>(weightsPrecision, weightsShape, weightsBuffer);
|
const auto weightsNode = std::make_shared<ov::opset1::Constant>(weightsPrecision, weightsShape, weightsBuffer);
|
||||||
|
|
||||||
const size_t spatialDimsSize = pShape.rank().get_length() - 2;
|
const size_t spatialDimsSize = pShape.rank().get_length() - 2;
|
||||||
ngraph::Strides strides(spatialDimsSize, 1ul);
|
ngraph::Strides strides(spatialDimsSize, 1ul);
|
||||||
ngraph::CoordinateDiff pads(spatialDimsSize, 0ul);
|
ngraph::CoordinateDiff pads(spatialDimsSize, 0ul);
|
||||||
ngraph::Strides dilations(spatialDimsSize, 1ul);
|
ngraph::Strides dilations(spatialDimsSize, 1ul);
|
||||||
|
|
||||||
const auto convolution = std::make_shared<ov::op::TypeRelaxed<opset1::GroupConvolution>>(
|
const auto convolution = std::make_shared<ov::op::TypeRelaxed<ov::opset1::GroupConvolution>>(
|
||||||
std::vector<element::Type>{ element::f32, element::f32 },
|
std::vector<element::Type>{ element::f32, element::f32 },
|
||||||
std::vector<element::Type>{ element::f32 },
|
std::vector<element::Type>{ element::f32 },
|
||||||
ov::op::TemporaryReplaceOutputType(dequantization.data, element::f32).get(),
|
ov::op::TemporaryReplaceOutputType(dequantization.data, element::f32).get(),
|
||||||
@ -128,9 +128,9 @@ bool MultiplyToGroupConvolutionTransformation::transform(TransformationContext&
|
|||||||
|
|
||||||
std::shared_ptr<Node> lastNode = convolution;
|
std::shared_ptr<Node> lastNode = convolution;
|
||||||
if (dequantization.subtract != nullptr) {
|
if (dequantization.subtract != nullptr) {
|
||||||
lastNode = std::make_shared<opset1::Add>(
|
lastNode = std::make_shared<ov::opset1::Add>(
|
||||||
convolution,
|
convolution,
|
||||||
fold<opset1::Negative>(foldConvert(dequantization.subtractConstant, element::f32)));
|
fold<ov::opset1::Negative>(foldConvert(dequantization.subtractConstant, element::f32)));
|
||||||
lastNode->set_friendly_name(convolution->get_friendly_name() + "/Add");
|
lastNode->set_friendly_name(convolution->get_friendly_name() + "/Add");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -164,15 +164,15 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformed(const Transforma
|
|||||||
|
|
||||||
Shape constShape;
|
Shape constShape;
|
||||||
int inputIndex;
|
int inputIndex;
|
||||||
if (const auto constant = ov::as_type_ptr<opset1::Constant>(operation->get_input_node_shared_ptr(1))) {
|
if (const auto constant = ov::as_type_ptr<ov::opset1::Constant>(operation->get_input_node_shared_ptr(1))) {
|
||||||
inputIndex = 0;
|
inputIndex = 0;
|
||||||
constShape = constant->get_shape();
|
constShape = constant->get_shape();
|
||||||
if (ov::is_type<opset1::Constant>(operation->get_input_node_shared_ptr(0)) ||
|
if (ov::is_type<ov::opset1::Constant>(operation->get_input_node_shared_ptr(0)) ||
|
||||||
(ov::is_type<opset1::Subtract>(operation->get_input_node_shared_ptr(0)) &&
|
(ov::is_type<ov::opset1::Subtract>(operation->get_input_node_shared_ptr(0)) &&
|
||||||
ov::is_type<opset1::Constant>(operation->get_input_node_shared_ptr(0)->get_input_node_shared_ptr(0)))) {
|
ov::is_type<ov::opset1::Constant>(operation->get_input_node_shared_ptr(0)->get_input_node_shared_ptr(0)))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else if (const auto constant = ov::as_type_ptr<opset1::Constant>(operation->get_input_node_shared_ptr(0))) {
|
} else if (const auto constant = ov::as_type_ptr<ov::opset1::Constant>(operation->get_input_node_shared_ptr(0))) {
|
||||||
inputIndex = 1;
|
inputIndex = 1;
|
||||||
constShape = constant->get_shape();
|
constShape = constant->get_shape();
|
||||||
} else {
|
} else {
|
||||||
@ -210,7 +210,7 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolutio
|
|||||||
const auto parent0 = layer->get_input_node_shared_ptr(0);
|
const auto parent0 = layer->get_input_node_shared_ptr(0);
|
||||||
const auto parent1 = layer->get_input_node_shared_ptr(1);
|
const auto parent1 = layer->get_input_node_shared_ptr(1);
|
||||||
|
|
||||||
if (!ov::is_type<opset1::Constant>(parent0) && !ov::is_type<opset1::Constant>(parent1)) {
|
if (!ov::is_type<ov::opset1::Constant>(parent0) && !ov::is_type<ov::opset1::Constant>(parent1)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -225,10 +225,10 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolutio
|
|||||||
|
|
||||||
bool MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(const std::shared_ptr<const Node>& node) {
|
bool MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(const std::shared_ptr<const Node>& node) {
|
||||||
auto getConstantIndex = [](const std::shared_ptr<const Node>& node) -> int {
|
auto getConstantIndex = [](const std::shared_ptr<const Node>& node) -> int {
|
||||||
if (ov::is_type<opset1::Constant>(node->get_input_node_shared_ptr(1))) {
|
if (ov::is_type<ov::opset1::Constant>(node->get_input_node_shared_ptr(1))) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (ov::is_type<opset1::Constant>(node->get_input_node_shared_ptr(0))) {
|
if (ov::is_type<ov::opset1::Constant>(node->get_input_node_shared_ptr(0))) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -44,11 +44,11 @@ std::shared_ptr<ngraph::op::Constant> createNewScalesConst(const ngraph::op::Con
|
|||||||
MVNTransformation::MVNTransformation(const Params& params) : LayerTransformation(params) {
|
MVNTransformation::MVNTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(MVNTransformation);
|
MATCHER_SCOPE(MVNTransformation);
|
||||||
auto matcher = std::make_shared<pattern::op::Or>(OutputVector{
|
auto matcher = std::make_shared<pattern::op::Or>(OutputVector{
|
||||||
pattern::wrap_type<ngraph::op::MVN>({ pattern::wrap_type<ngraph::opset1::Multiply>() }),
|
pattern::wrap_type<ngraph::op::MVN>({ pattern::wrap_type<ov::opset1::Multiply>() }),
|
||||||
pattern::wrap_type<ngraph::opset6::MVN>({ pattern::wrap_type<ngraph::opset1::Multiply>(), pattern::wrap_type<ngraph::opset1::Constant>() })
|
pattern::wrap_type<ngraph::opset6::MVN>({ pattern::wrap_type<ov::opset1::Multiply>(), pattern::wrap_type<ov::opset1::Constant>() })
|
||||||
});
|
});
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ov::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
if (transformation_callback(op)) {
|
if (transformation_callback(op)) {
|
||||||
return false;
|
return false;
|
||||||
@ -84,7 +84,7 @@ bool MVNTransformation::canBeTransformed(const TransformationContext& context, s
|
|||||||
} else {
|
} else {
|
||||||
// MVN-6 allows negative values in reduction axes: [-r, r-1]
|
// MVN-6 allows negative values in reduction axes: [-r, r-1]
|
||||||
// given static rank of input data of MVN node, we can recover the exact axis number
|
// given static rank of input data of MVN node, we can recover the exact axis number
|
||||||
auto axis_set = ov::as_type_ptr<opset1::Constant>(mvn->get_input_node_shared_ptr(1))->cast_vector<int64_t>();
|
auto axis_set = ov::as_type_ptr<ov::opset1::Constant>(mvn->get_input_node_shared_ptr(1))->cast_vector<int64_t>();
|
||||||
|
|
||||||
Dimension::value_type ndims = 0;
|
Dimension::value_type ndims = 0;
|
||||||
if (std::any_of(axis_set.begin(), axis_set.end(), [](int64_t v) { return v < 0; })) {
|
if (std::any_of(axis_set.begin(), axis_set.end(), [](int64_t v) { return v < 0; })) {
|
||||||
@ -166,10 +166,10 @@ bool MVNTransformation::transform(TransformationContext &context, ngraph::patter
|
|||||||
NetworkHelper::setOutDataPrecisionForTypeRelaxed(newMVN, deqPrecision);
|
NetworkHelper::setOutDataPrecisionForTypeRelaxed(newMVN, deqPrecision);
|
||||||
NetworkHelper::copyInfo(mvn, newMVN);
|
NetworkHelper::copyInfo(mvn, newMVN);
|
||||||
|
|
||||||
auto newMultiply = std::make_shared<ov::op::TypeRelaxed<opset1::Multiply>>(
|
auto newMultiply = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Multiply>>(
|
||||||
opset1::Multiply(newMVN, newScalesConst),
|
ov::opset1::Multiply(newMVN, newScalesConst),
|
||||||
mvn->get_output_element_type(0));
|
mvn->get_output_element_type(0));
|
||||||
ngraph::copy_runtime_info({ mvn, newMultiply }, newMultiply);
|
ov::copy_runtime_info({ mvn, newMultiply }, newMultiply);
|
||||||
|
|
||||||
NetworkHelper::insertDequantizationAfter(mvn, newMultiply, newMVN);
|
NetworkHelper::insertDequantizationAfter(mvn, newMultiply, newMVN);
|
||||||
|
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -23,7 +23,7 @@ using namespace ngraph::pass::low_precision;
|
|||||||
namespace normalize_l2 {
|
namespace normalize_l2 {
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
std::shared_ptr<ngraph::op::Constant> createNewScalesConst(const ngraph::op::Constant& originalConst) {
|
std::shared_ptr<ov::opset1::Constant> createNewScalesConst(const ov::opset1::Constant& originalConst) {
|
||||||
std::vector<T> source = originalConst.cast_vector<T>();
|
std::vector<T> source = originalConst.cast_vector<T>();
|
||||||
|
|
||||||
std::vector<T> newData(source.size());
|
std::vector<T> newData(source.size());
|
||||||
@ -32,16 +32,16 @@ std::shared_ptr<ngraph::op::Constant> createNewScalesConst(const ngraph::op::Con
|
|||||||
}
|
}
|
||||||
|
|
||||||
const ngraph::element::Type type = originalConst.get_output_element_type(0);
|
const ngraph::element::Type type = originalConst.get_output_element_type(0);
|
||||||
return ngraph::op::Constant::create(type, originalConst.get_shape(), newData);
|
return ov::opset1::Constant::create(type, originalConst.get_shape(), newData);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace normalize_l2
|
} // namespace normalize_l2
|
||||||
|
|
||||||
NormalizeL2Transformation::NormalizeL2Transformation(const Params& params) : LayerTransformation(params) {
|
NormalizeL2Transformation::NormalizeL2Transformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(NormalizeL2Transformation);
|
MATCHER_SCOPE(NormalizeL2Transformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::NormalizeL2>({ pattern::wrap_type<opset1::Multiply>(), pattern::wrap_type<opset1::Constant>() });
|
auto matcher = pattern::wrap_type<ov::opset1::NormalizeL2>({ pattern::wrap_type<ov::opset1::Multiply>(), pattern::wrap_type<ov::opset1::Constant>() });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ov::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
if (transformation_callback(op)) {
|
if (transformation_callback(op)) {
|
||||||
return false;
|
return false;
|
||||||
@ -69,7 +69,7 @@ bool NormalizeL2Transformation::canBeTransformed(const TransformationContext& co
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Expand transformation for all cases of axes values
|
// TODO: Expand transformation for all cases of axes values
|
||||||
const auto axes = ov::as_type_ptr<opset1::Constant>(operation->get_input_node_shared_ptr(1));
|
const auto axes = ov::as_type_ptr<ov::opset1::Constant>(operation->get_input_node_shared_ptr(1));
|
||||||
const std::vector<int64_t> axesAcrossSpatial = { 1 };
|
const std::vector<int64_t> axesAcrossSpatial = { 1 };
|
||||||
const std::vector<int64_t> axesByChannels = { 1, 2, 3 };
|
const std::vector<int64_t> axesByChannels = { 1, 2, 3 };
|
||||||
|
|
||||||
@ -102,16 +102,16 @@ bool NormalizeL2Transformation::transform(TransformationContext &context, ngraph
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto normalize = ov::as_type_ptr<opset1::NormalizeL2>(NetworkHelper::separateInStandaloneBranch(operation, defaultPrecisions));
|
auto normalize = ov::as_type_ptr<ov::opset1::NormalizeL2>(NetworkHelper::separateInStandaloneBranch(operation, defaultPrecisions));
|
||||||
|
|
||||||
const auto axes = ov::as_type_ptr<opset1::Constant>(normalize->get_input_node_shared_ptr(1));
|
const auto axes = ov::as_type_ptr<ov::opset1::Constant>(normalize->get_input_node_shared_ptr(1));
|
||||||
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(normalize, defaultPrecisions);
|
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(normalize, defaultPrecisions);
|
||||||
auto scalesConst = ov::as_type_ptr<opset1::Constant>(dequantization.multiply->get_input_node_shared_ptr(1));
|
auto scalesConst = ov::as_type_ptr<ov::opset1::Constant>(dequantization.multiply->get_input_node_shared_ptr(1));
|
||||||
if (scalesConst == nullptr) {
|
if (scalesConst == nullptr) {
|
||||||
scalesConst = ov::as_type_ptr<opset1::Constant>(dequantization.multiply->get_input_node_shared_ptr(0));
|
scalesConst = ov::as_type_ptr<ov::opset1::Constant>(dequantization.multiply->get_input_node_shared_ptr(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<opset1::Constant> newScalesConst;
|
std::shared_ptr<ov::opset1::Constant> newScalesConst;
|
||||||
const auto type = scalesConst->get_output_element_type(0);
|
const auto type = scalesConst->get_output_element_type(0);
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case ngraph::element::Type_t::f16: {
|
case ngraph::element::Type_t::f16: {
|
||||||
@ -127,7 +127,7 @@ bool NormalizeL2Transformation::transform(TransformationContext &context, ngraph
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto newNormalize = std::make_shared<ov::op::TypeRelaxed<opset1::NormalizeL2>>(
|
auto newNormalize = std::make_shared<ov::op::TypeRelaxed<ov::opset1::NormalizeL2>>(
|
||||||
std::vector<ngraph::element::Type>{ element::f32, axes->output(0).get_element_type() },
|
std::vector<ngraph::element::Type>{ element::f32, axes->output(0).get_element_type() },
|
||||||
std::vector<ngraph::element::Type>{deqPrecision},
|
std::vector<ngraph::element::Type>{deqPrecision},
|
||||||
ov::op::TemporaryReplaceOutputType(dequantization.subtract == nullptr ? dequantization.data : dequantization.subtract, element::f32).get(),
|
ov::op::TemporaryReplaceOutputType(dequantization.subtract == nullptr ? dequantization.data : dequantization.subtract, element::f32).get(),
|
||||||
@ -136,14 +136,14 @@ bool NormalizeL2Transformation::transform(TransformationContext &context, ngraph
|
|||||||
normalize->get_eps_mode());
|
normalize->get_eps_mode());
|
||||||
NetworkHelper::copyInfo(normalize, newNormalize);
|
NetworkHelper::copyInfo(normalize, newNormalize);
|
||||||
|
|
||||||
auto newMultiply = std::make_shared<ov::op::TypeRelaxed<opset1::Multiply>>(
|
auto newMultiply = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Multiply>>(
|
||||||
std::vector<ngraph::element::Type>{ element::f32, element::f32 },
|
std::vector<ngraph::element::Type>{ element::f32, element::f32 },
|
||||||
std::vector<ngraph::element::Type>{normalize->get_output_element_type(0)},
|
std::vector<ngraph::element::Type>{normalize->get_output_element_type(0)},
|
||||||
ov::op::TemporaryReplaceOutputType(newNormalize, element::f32).get(),
|
ov::op::TemporaryReplaceOutputType(newNormalize, element::f32).get(),
|
||||||
ov::op::TemporaryReplaceOutputType(newScalesConst, element::f32).get());
|
ov::op::TemporaryReplaceOutputType(newScalesConst, element::f32).get());
|
||||||
|
|
||||||
NetworkHelper::insertDequantizationAfter(normalize, newMultiply, newNormalize);
|
NetworkHelper::insertDequantizationAfter(normalize, newMultiply, newNormalize);
|
||||||
ngraph::copy_runtime_info({ normalize, newMultiply }, newMultiply);
|
ov::copy_runtime_info({ normalize, newMultiply }, newMultiply);
|
||||||
|
|
||||||
updateOutput(context, newMultiply, newNormalize);
|
updateOutput(context, newMultiply, newNormalize);
|
||||||
return true;
|
return true;
|
||||||
|
@ -17,11 +17,11 @@ namespace low_precision {
|
|||||||
|
|
||||||
PadTransformation::PadTransformation(const Params& params) : LayerTransformation(params) {
|
PadTransformation::PadTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(PadTransformation);
|
MATCHER_SCOPE(PadTransformation);
|
||||||
auto mul = pattern::wrap_type<opset1::Multiply>();
|
auto mul = pattern::wrap_type<ov::opset1::Multiply>();
|
||||||
auto padsBegin = pattern::wrap_type<opset1::Constant>();
|
auto padsBegin = pattern::wrap_type<ov::opset1::Constant>();
|
||||||
auto padsEnd = pattern::wrap_type<opset1::Constant>();
|
auto padsEnd = pattern::wrap_type<ov::opset1::Constant>();
|
||||||
auto padsValue = pattern::wrap_type<opset1::Constant>();
|
auto padsValue = pattern::wrap_type<ov::opset1::Constant>();
|
||||||
auto matcher = pattern::wrap_type<opset1::Pad>({ mul, padsBegin, padsEnd, padsValue });
|
auto matcher = pattern::wrap_type<ov::opset1::Pad>({ mul, padsBegin, padsEnd, padsValue });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -40,8 +40,8 @@ bool PadTransformation::transform(TransformationContext& context, ngraph::patter
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto pad = ov::as_type_ptr<opset1::Pad>(NetworkHelper::separateInStandaloneBranch(m.get_match_root(), defaultPrecisions));
|
const auto pad = ov::as_type_ptr<ov::opset1::Pad>(NetworkHelper::separateInStandaloneBranch(m.get_match_root(), defaultPrecisions));
|
||||||
const auto padConstant = ov::as_type_ptr<opset1::Constant>(pad->get_input_node_shared_ptr(3));
|
const auto padConstant = ov::as_type_ptr<ov::opset1::Constant>(pad->get_input_node_shared_ptr(3));
|
||||||
const auto padConstantValue = padConstant->cast_vector<float>()[0];
|
const auto padConstantValue = padConstant->cast_vector<float>()[0];
|
||||||
|
|
||||||
const auto padsBegin = pad->get_pads_begin();
|
const auto padsBegin = pad->get_pads_begin();
|
||||||
@ -51,7 +51,7 @@ bool PadTransformation::transform(TransformationContext& context, ngraph::patter
|
|||||||
auto dequantization = NetworkHelper::getDequantization(pad, defaultPrecisions);
|
auto dequantization = NetworkHelper::getDequantization(pad, defaultPrecisions);
|
||||||
|
|
||||||
if (padMode == op::PadMode::CONSTANT) {
|
if (padMode == op::PadMode::CONSTANT) {
|
||||||
auto bcastConstant = [&](const std::shared_ptr<opset1::Constant> &constant) {
|
auto bcastConstant = [&](const std::shared_ptr<ov::opset1::Constant> &constant) {
|
||||||
size_t padIdx = 0;
|
size_t padIdx = 0;
|
||||||
for (size_t i = 0; i < padsBegin.size(); ++i) {
|
for (size_t i = 0; i < padsBegin.size(); ++i) {
|
||||||
if (padsBegin[i] != 0 || padsEnd[i] != 0) {
|
if (padsBegin[i] != 0 || padsEnd[i] != 0) {
|
||||||
@ -66,8 +66,8 @@ bool PadTransformation::transform(TransformationContext& context, ngraph::patter
|
|||||||
auto bcastedShape = Shape(inputPShape.rank().get_length(), 1ul);
|
auto bcastedShape = Shape(inputPShape.rank().get_length(), 1ul);
|
||||||
bcastedShape[padIdx] = inputPShape[padIdx].get_length();
|
bcastedShape[padIdx] = inputPShape[padIdx].get_length();
|
||||||
|
|
||||||
const auto bCastConst = opset1::Constant::create(element::i32, Shape{bcastedShape.size()}, bcastedShape);
|
const auto bCastConst = ov::opset1::Constant::create(element::i32, Shape{bcastedShape.size()}, bcastedShape);
|
||||||
return ov::as_type_ptr<opset1::Constant>(fold<opset1::Broadcast>(constant, bCastConst));
|
return ov::as_type_ptr<ov::opset1::Constant>(fold<ov::opset1::Broadcast>(constant, bCastConst));
|
||||||
};
|
};
|
||||||
|
|
||||||
if (dequantization.subtract && shape_size(dequantization.subtractConstant->get_shape()) == 1ul) {
|
if (dequantization.subtract && shape_size(dequantization.subtractConstant->get_shape()) == 1ul) {
|
||||||
@ -84,8 +84,8 @@ bool PadTransformation::transform(TransformationContext& context, ngraph::patter
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto foldConstantIfNecessary = [&padMode, &padsBegin, &padsEnd](
|
auto foldConstantIfNecessary = [&padMode, &padsBegin, &padsEnd](
|
||||||
const std::shared_ptr<opset1::Constant>& constant,
|
const std::shared_ptr<ov::opset1::Constant>& constant,
|
||||||
const std::shared_ptr<opset1::Pad>& pad,
|
const std::shared_ptr<ov::opset1::Pad>& pad,
|
||||||
float padVal) {
|
float padVal) {
|
||||||
const auto constantShape = constant->get_shape();
|
const auto constantShape = constant->get_shape();
|
||||||
if (shape_size(constantShape) == 1ul) {
|
if (shape_size(constantShape) == 1ul) {
|
||||||
@ -110,11 +110,11 @@ bool PadTransformation::transform(TransformationContext& context, ngraph::patter
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (foldingIsNecessary) {
|
if (foldingIsNecessary) {
|
||||||
const auto beginConst = opset1::Constant::create(element::u32, { padsForConstantBegin.size() }, padsForConstantBegin);
|
const auto beginConst = ov::opset1::Constant::create(element::u32, { padsForConstantBegin.size() }, padsForConstantBegin);
|
||||||
const auto endConst = opset1::Constant::create(element::u32, { padsForConstantEnd.size() }, padsForConstantEnd);
|
const auto endConst = ov::opset1::Constant::create(element::u32, { padsForConstantEnd.size() }, padsForConstantEnd);
|
||||||
const auto padValueConstant = opset1::Constant::create(constant->get_element_type(), Shape{}, { padVal });
|
const auto padValueConstant = ov::opset1::Constant::create(constant->get_element_type(), Shape{}, { padVal });
|
||||||
const auto foldedConstant = fold<opset1::Pad>(constant, beginConst, endConst, padValueConstant, padMode);
|
const auto foldedConstant = fold<ov::opset1::Pad>(constant, beginConst, endConst, padValueConstant, padMode);
|
||||||
return ov::as_type_ptr<opset1::Constant>(foldedConstant);
|
return ov::as_type_ptr<ov::opset1::Constant>(foldedConstant);
|
||||||
} else {
|
} else {
|
||||||
return constant;
|
return constant;
|
||||||
}
|
}
|
||||||
@ -145,7 +145,7 @@ bool PadTransformation::transform(TransformationContext& context, ngraph::patter
|
|||||||
}
|
}
|
||||||
|
|
||||||
// we must convert pad value in low precision
|
// we must convert pad value in low precision
|
||||||
const auto convertedZero = opset1::Constant::create(dequantization.data.get_element_type(), Shape{}, { padConstantValue });
|
const auto convertedZero = ov::opset1::Constant::create(dequantization.data.get_element_type(), Shape{}, { padConstantValue });
|
||||||
pad->set_argument(3, convertedZero);
|
pad->set_argument(3, convertedZero);
|
||||||
|
|
||||||
moveDequantizationAfter(context, pad, dequantization, true);
|
moveDequantizationAfter(context, pad, dequantization, true);
|
||||||
@ -157,7 +157,7 @@ bool PadTransformation::canBeTransformed(const TransformationContext& context, s
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto pad = ov::as_type_ptr<opset1::Pad>(op);
|
const auto pad = ov::as_type_ptr<ov::opset1::Pad>(op);
|
||||||
if (!pad) {
|
if (!pad) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -169,7 +169,7 @@ bool PadTransformation::canBeTransformed(const TransformationContext& context, s
|
|||||||
|
|
||||||
const auto mode = pad->get_pad_mode();
|
const auto mode = pad->get_pad_mode();
|
||||||
if (mode == op::PadMode::CONSTANT) {
|
if (mode == op::PadMode::CONSTANT) {
|
||||||
auto padAndDqByTheSameDimension = [&](const std::shared_ptr<opset1::Constant>& deqConst) {
|
auto padAndDqByTheSameDimension = [&](const std::shared_ptr<ov::opset1::Constant>& deqConst) {
|
||||||
const auto padsBegin = pad->get_pads_begin();
|
const auto padsBegin = pad->get_pads_begin();
|
||||||
const auto padsEnd = pad->get_pads_end();
|
const auto padsEnd = pad->get_pads_end();
|
||||||
|
|
||||||
@ -231,7 +231,7 @@ bool PadTransformation::canBeTransformed(const TransformationContext& context, s
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto constant = ov::as_type_ptr<opset1::Constant>(pad->get_input_node_shared_ptr(3));
|
const auto constant = ov::as_type_ptr<ov::opset1::Constant>(pad->get_input_node_shared_ptr(3));
|
||||||
const auto constantValue = constant->cast_vector<float>()[0];
|
const auto constantValue = constant->cast_vector<float>()[0];
|
||||||
if (constantValue != 0.f && !padAndDqByTheSameDimension(dequantization.multiplyConstant)) {
|
if (constantValue != 0.f && !padAndDqByTheSameDimension(dequantization.multiplyConstant)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -20,7 +20,7 @@ namespace low_precision {
|
|||||||
|
|
||||||
PReluTransformation::PReluTransformation(const Params& params) : LayerTransformation(params) {
|
PReluTransformation::PReluTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(PReluTransformation);
|
MATCHER_SCOPE(PReluTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::PRelu>({ pattern::wrap_type<opset1::Multiply>(), pattern::wrap_type<opset1::Constant>() });
|
auto matcher = pattern::wrap_type<ov::opset1::PRelu>({ pattern::wrap_type<ov::opset1::Multiply>(), pattern::wrap_type<ov::opset1::Constant>() });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#include "low_precision/propagate_through_precision_preserved.hpp"
|
#include "low_precision/propagate_through_precision_preserved.hpp"
|
||||||
#include "low_precision/propagate_to_input.hpp"
|
#include "low_precision/propagate_to_input.hpp"
|
||||||
#include "itt.hpp"
|
#include "itt.hpp"
|
||||||
|
#include "openvino/pass/manager.hpp"
|
||||||
|
|
||||||
using namespace ngraph;
|
using namespace ngraph;
|
||||||
using namespace ngraph::pass::low_precision;
|
using namespace ngraph::pass::low_precision;
|
||||||
@ -20,7 +21,7 @@ ngraph::pass::low_precision::PropagatePrecisions::PropagatePrecisions(const Attr
|
|||||||
|
|
||||||
bool ngraph::pass::low_precision::PropagatePrecisions::run_on_model(const std::shared_ptr<ngraph::Function>& f) {
|
bool ngraph::pass::low_precision::PropagatePrecisions::run_on_model(const std::shared_ptr<ngraph::Function>& f) {
|
||||||
RUN_ON_FUNCTION_SCOPE(PropagatePrecisions);
|
RUN_ON_FUNCTION_SCOPE(PropagatePrecisions);
|
||||||
ngraph::pass::Manager manager;
|
ov::pass::Manager manager;
|
||||||
manager.set_per_pass_validation(false);
|
manager.set_per_pass_validation(false);
|
||||||
std::shared_ptr<ngraph::pass::GraphRewrite> precisionsPropagation = manager.register_pass<ngraph::pass::GraphRewrite>();
|
std::shared_ptr<ngraph::pass::GraphRewrite> precisionsPropagation = manager.register_pass<ngraph::pass::GraphRewrite>();
|
||||||
precisionsPropagation->add_matcher<low_precision::CreateAttribute<PrecisionsAttribute, opset1::FakeQuantize>>(params, AttributeSource::OutputPort);
|
precisionsPropagation->add_matcher<low_precision::CreateAttribute<PrecisionsAttribute, opset1::FakeQuantize>>(params, AttributeSource::OutputPort);
|
||||||
|
@ -86,7 +86,7 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& reshap
|
|||||||
std::make_shared<opset1::Convert>(newElementwiseValues, elementwiseValuesConvert->get_destination_type()) });
|
std::make_shared<opset1::Convert>(newElementwiseValues, elementwiseValuesConvert->get_destination_type()) });
|
||||||
|
|
||||||
replace_node(reshape, newElementwise);
|
replace_node(reshape, newElementwise);
|
||||||
copy_runtime_info({ elementwise, reshape }, { newReshape, newElementwise });
|
ov::copy_runtime_info({ elementwise, reshape }, { newReshape, newElementwise });
|
||||||
return newReshape;
|
return newReshape;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -94,7 +94,7 @@ std::shared_ptr<Node> moveThroughConvert(const std::shared_ptr<Node>& reshape, c
|
|||||||
const auto newReshape = reshape->clone_with_new_inputs({ convert->input_value(0), reshape->input_value(1) });
|
const auto newReshape = reshape->clone_with_new_inputs({ convert->input_value(0), reshape->input_value(1) });
|
||||||
const auto newConvert = convert->clone_with_new_inputs({ newReshape });
|
const auto newConvert = convert->clone_with_new_inputs({ newReshape });
|
||||||
replace_node(reshape, newConvert);
|
replace_node(reshape, newConvert);
|
||||||
copy_runtime_info({ convert, reshape }, { newReshape, newConvert });
|
ov::copy_runtime_info({ convert, reshape }, { newReshape, newConvert });
|
||||||
|
|
||||||
return newReshape;
|
return newReshape;
|
||||||
}
|
}
|
||||||
|
@ -62,7 +62,7 @@ std::shared_ptr<Node> moveThroughElementwise(const std::shared_ptr<Node>& transp
|
|||||||
std::make_shared<opset1::Convert>(newElementwiseValues, elementwiseValuesConvert->get_destination_type()) });
|
std::make_shared<opset1::Convert>(newElementwiseValues, elementwiseValuesConvert->get_destination_type()) });
|
||||||
|
|
||||||
replace_node(transpose, newElementwise);
|
replace_node(transpose, newElementwise);
|
||||||
copy_runtime_info({ elementwise, transpose }, { newTranspose, newElementwise });
|
ov::copy_runtime_info({ elementwise, transpose }, { newTranspose, newElementwise });
|
||||||
|
|
||||||
return newTranspose;
|
return newTranspose;
|
||||||
}
|
}
|
||||||
@ -71,7 +71,7 @@ std::shared_ptr<Node> moveThroughConvert(const std::shared_ptr<Node>& transpose,
|
|||||||
const auto newTranspose = transpose->clone_with_new_inputs({convert->input_value(0), transpose->input_value(1) });
|
const auto newTranspose = transpose->clone_with_new_inputs({convert->input_value(0), transpose->input_value(1) });
|
||||||
const auto newConvert = convert->clone_with_new_inputs({ newTranspose });
|
const auto newConvert = convert->clone_with_new_inputs({ newTranspose });
|
||||||
replace_node(transpose, newConvert);
|
replace_node(transpose, newConvert);
|
||||||
copy_runtime_info({ convert, transpose }, { newTranspose, newConvert });
|
ov::copy_runtime_info({ convert, transpose }, { newTranspose, newConvert });
|
||||||
|
|
||||||
return newTranspose;
|
return newTranspose;
|
||||||
}
|
}
|
||||||
|
@ -49,13 +49,13 @@ QuantizationDetails::QuantizationDetails(const size_t levels, const std::vector<
|
|||||||
outputLowValues(outputLowValues),
|
outputLowValues(outputLowValues),
|
||||||
outputHighValues(outputHighValues) {}
|
outputHighValues(outputHighValues) {}
|
||||||
|
|
||||||
bool QuantizationDetails::outputLayoutIsSupported(std::shared_ptr<opset1::FakeQuantize> quantize, bool isConvertExpected) {
|
bool QuantizationDetails::outputLayoutIsSupported(std::shared_ptr<ov::opset1::FakeQuantize> quantize, bool isConvertExpected) {
|
||||||
const auto inputs = quantize->inputs();
|
const auto inputs = quantize->inputs();
|
||||||
for (size_t i = 1; i < inputs.size(); ++i) {
|
for (size_t i = 1; i < inputs.size(); ++i) {
|
||||||
const auto node = inputs[i].get_source_output().get_node_shared_ptr();
|
const auto node = inputs[i].get_source_output().get_node_shared_ptr();
|
||||||
bool supported = ov::is_type<opset1::Constant>(node);
|
bool supported = ov::is_type<ov::opset1::Constant>(node);
|
||||||
if (!supported && isConvertExpected) {
|
if (!supported && isConvertExpected) {
|
||||||
supported = ov::is_type<op::Convert>(node) && ov::is_type<opset1::Constant>(node->get_input_node_ptr(0));
|
supported = ov::is_type<ov::opset1::Convert>(node) && ov::is_type<ov::opset1::Constant>(node->get_input_node_ptr(0));
|
||||||
}
|
}
|
||||||
if (!supported) {
|
if (!supported) {
|
||||||
return false;
|
return false;
|
||||||
@ -65,14 +65,14 @@ bool QuantizationDetails::outputLayoutIsSupported(std::shared_ptr<opset1::FakeQu
|
|||||||
}
|
}
|
||||||
|
|
||||||
void QuantizationDetails::getInputIntervals(
|
void QuantizationDetails::getInputIntervals(
|
||||||
std::shared_ptr<opset1::FakeQuantize> quantize,
|
std::shared_ptr<ov::opset1::FakeQuantize> quantize,
|
||||||
std::vector<float>& inputLowValues,
|
std::vector<float>& inputLowValues,
|
||||||
std::vector<float>& inputHighValues) {
|
std::vector<float>& inputHighValues) {
|
||||||
std::shared_ptr<opset1::Constant> inputLowLayer = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(1));
|
std::shared_ptr<ov::opset1::Constant> inputLowLayer = ov::as_type_ptr<ov::opset1::Constant>(quantize->get_input_node_shared_ptr(1));
|
||||||
const std::vector<float>& inputLowBlobValues = getBlobValue(inputLowLayer);
|
const std::vector<float>& inputLowBlobValues = getBlobValue(inputLowLayer);
|
||||||
inputLowValues.insert(inputLowValues.end(), inputLowBlobValues.begin(), inputLowBlobValues.end());
|
inputLowValues.insert(inputLowValues.end(), inputLowBlobValues.begin(), inputLowBlobValues.end());
|
||||||
|
|
||||||
std::shared_ptr<opset1::Constant> inputHighLayer = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(2));
|
std::shared_ptr<ov::opset1::Constant> inputHighLayer = ov::as_type_ptr<ov::opset1::Constant>(quantize->get_input_node_shared_ptr(2));
|
||||||
const std::vector<float> inputHighBlobValues = getBlobValue(inputHighLayer);
|
const std::vector<float> inputHighBlobValues = getBlobValue(inputHighLayer);
|
||||||
inputHighValues.insert(inputHighValues.end(), inputHighBlobValues.begin(), inputHighBlobValues.end());
|
inputHighValues.insert(inputHighValues.end(), inputHighBlobValues.begin(), inputHighBlobValues.end());
|
||||||
|
|
||||||
@ -83,14 +83,14 @@ void QuantizationDetails::getInputIntervals(
|
|||||||
|
|
||||||
|
|
||||||
void QuantizationDetails::getOutputIntervals(
|
void QuantizationDetails::getOutputIntervals(
|
||||||
std::shared_ptr<opset1::FakeQuantize> quantize,
|
std::shared_ptr<ov::opset1::FakeQuantize> quantize,
|
||||||
std::vector<float>& outputLowValues,
|
std::vector<float>& outputLowValues,
|
||||||
std::vector<float>& outputHighValues) {
|
std::vector<float>& outputHighValues) {
|
||||||
std::shared_ptr<opset1::Constant> outputLowLayer = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(3));
|
std::shared_ptr<ov::opset1::Constant> outputLowLayer = ov::as_type_ptr<ov::opset1::Constant>(quantize->get_input_node_shared_ptr(3));
|
||||||
const std::vector<float>& outputLowBlobValues = getBlobValue(outputLowLayer);
|
const std::vector<float>& outputLowBlobValues = getBlobValue(outputLowLayer);
|
||||||
outputLowValues.insert(outputLowValues.end(), outputLowBlobValues.begin(), outputLowBlobValues.end());
|
outputLowValues.insert(outputLowValues.end(), outputLowBlobValues.begin(), outputLowBlobValues.end());
|
||||||
|
|
||||||
std::shared_ptr<opset1::Constant> outputHighLayer = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(4));
|
std::shared_ptr<ov::opset1::Constant> outputHighLayer = ov::as_type_ptr<ov::opset1::Constant>(quantize->get_input_node_shared_ptr(4));
|
||||||
const std::vector<float> outputHighBlobValues = getBlobValue(outputHighLayer);
|
const std::vector<float> outputHighBlobValues = getBlobValue(outputHighLayer);
|
||||||
outputHighValues.insert(outputHighValues.end(), outputHighBlobValues.begin(), outputHighBlobValues.end());
|
outputHighValues.insert(outputHighValues.end(), outputHighBlobValues.begin(), outputHighBlobValues.end());
|
||||||
|
|
||||||
@ -99,16 +99,16 @@ void QuantizationDetails::getOutputIntervals(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
QuantizationDetails QuantizationDetails::getDetails(std::shared_ptr<opset1::FakeQuantize> quantize) {
|
QuantizationDetails QuantizationDetails::getDetails(std::shared_ptr<ov::opset1::FakeQuantize> quantize) {
|
||||||
if (!QuantizationDetails::outputLayoutIsSupported(quantize)) {
|
if (!QuantizationDetails::outputLayoutIsSupported(quantize)) {
|
||||||
return QuantizationDetails();
|
return QuantizationDetails();
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<float> inputLowValues = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(1))->cast_vector<float>();
|
const std::vector<float> inputLowValues = ov::as_type_ptr<ov::opset1::Constant>(quantize->get_input_node_shared_ptr(1))->cast_vector<float>();
|
||||||
const std::vector<float> inputHighValues = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(2))->cast_vector<float>();
|
const std::vector<float> inputHighValues = ov::as_type_ptr<ov::opset1::Constant>(quantize->get_input_node_shared_ptr(2))->cast_vector<float>();
|
||||||
|
|
||||||
const std::vector<float> outputLowValues = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(3))->cast_vector<float>();
|
const std::vector<float> outputLowValues = ov::as_type_ptr<ov::opset1::Constant>(quantize->get_input_node_shared_ptr(3))->cast_vector<float>();
|
||||||
const std::vector<float> outputHighValues = ov::as_type_ptr<opset1::Constant>(quantize->get_input_node_shared_ptr(4))->cast_vector<float>();
|
const std::vector<float> outputHighValues = ov::as_type_ptr<ov::opset1::Constant>(quantize->get_input_node_shared_ptr(4))->cast_vector<float>();
|
||||||
|
|
||||||
return QuantizationDetails(
|
return QuantizationDetails(
|
||||||
quantize->get_levels(),
|
quantize->get_levels(),
|
||||||
@ -163,7 +163,7 @@ float QuantizationDetails::getOutputHighValue(const size_t index) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<float> QuantizationDetails::getBlobValue(std::shared_ptr<Node> constantLayer) {
|
std::vector<float> QuantizationDetails::getBlobValue(std::shared_ptr<Node> constantLayer) {
|
||||||
return ov::as_type_ptr<opset1::Constant>(constantLayer)->cast_vector<float>();
|
return ov::as_type_ptr<ov::opset1::Constant>(constantLayer)->cast_vector<float>();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool QuantizationDetails::empty() const noexcept {
|
bool QuantizationDetails::empty() const noexcept {
|
||||||
|
@ -36,7 +36,7 @@ bool ReduceBaseTransformation::canBeTransformed(const TransformationContext& con
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto axesConstant = ov::as_type_ptr<ngraph::opset1::Constant>(reduce->get_input_node_shared_ptr(1));
|
const auto axesConstant = ov::as_type_ptr<ov::opset1::Constant>(reduce->get_input_node_shared_ptr(1));
|
||||||
if (axesConstant == nullptr) {
|
if (axesConstant == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,7 @@ namespace low_precision {
|
|||||||
|
|
||||||
ReduceMaxTransformation::ReduceMaxTransformation(const Params& params) : ReduceBaseTransformation(params) {
|
ReduceMaxTransformation::ReduceMaxTransformation(const Params& params) : ReduceBaseTransformation(params) {
|
||||||
MATCHER_SCOPE(ReduceMaxTransformation);
|
MATCHER_SCOPE(ReduceMaxTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::ReduceMax>({ pattern::wrap_type<opset1::Multiply>(), pattern::wrap_type<opset1::Constant>() });
|
auto matcher = pattern::wrap_type<ov::opset1::ReduceMax>({ pattern::wrap_type<ov::opset1::Multiply>(), pattern::wrap_type<ov::opset1::Constant>() });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -31,7 +31,7 @@ ReduceMaxTransformation::ReduceMaxTransformation(const Params& params) : ReduceB
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool ReduceMaxTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const {
|
bool ReduceMaxTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const {
|
||||||
if (!ov::is_type<opset1::ReduceMax>(reduce)) {
|
if (!ov::is_type<ov::opset1::ReduceMax>(reduce)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,7 +40,7 @@ bool ReduceMaxTransformation::canBeTransformed(const TransformationContext& cont
|
|||||||
}
|
}
|
||||||
|
|
||||||
const auto dequantization = NetworkHelper::getDequantization(reduce, defaultPrecisions);
|
const auto dequantization = NetworkHelper::getDequantization(reduce, defaultPrecisions);
|
||||||
const std::vector<float> scales = ov::as_type_ptr<opset1::Constant>(dequantization.multiplyConstant)->cast_vector<float>();
|
const std::vector<float> scales = ov::as_type_ptr<ov::opset1::Constant>(dequantization.multiplyConstant)->cast_vector<float>();
|
||||||
if (std::any_of(scales.begin(), scales.end(), [](const float value) { return value < 0.0; })) {
|
if (std::any_of(scales.begin(), scales.end(), [](const float value) { return value < 0.0; })) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,7 @@ namespace low_precision {
|
|||||||
|
|
||||||
ReduceMeanTransformation::ReduceMeanTransformation(const Params& params) : ReduceBaseTransformation(params) {
|
ReduceMeanTransformation::ReduceMeanTransformation(const Params& params) : ReduceBaseTransformation(params) {
|
||||||
MATCHER_SCOPE(ReduceMeanTransformation);
|
MATCHER_SCOPE(ReduceMeanTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::ReduceMean>({ pattern::wrap_type<opset1::Multiply>(), pattern::wrap_type<opset1::Constant>() });
|
auto matcher = pattern::wrap_type<ov::opset1::ReduceMean>({ pattern::wrap_type<ov::opset1::Multiply>(), pattern::wrap_type<ov::opset1::Constant>() });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -31,7 +31,7 @@ ReduceMeanTransformation::ReduceMeanTransformation(const Params& params) : Reduc
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool ReduceMeanTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const {
|
bool ReduceMeanTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const {
|
||||||
return ov::is_type<opset1::ReduceMean>(reduce) ? ReduceBaseTransformation::canBeTransformed(context, reduce) : false;
|
return ov::is_type<ov::opset1::ReduceMean>(reduce) ? ReduceBaseTransformation::canBeTransformed(context, reduce) : false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ReduceMeanTransformation::isPrecisionPreserved(std::shared_ptr<Node> reduce) const noexcept {
|
bool ReduceMeanTransformation::isPrecisionPreserved(std::shared_ptr<Node> reduce) const noexcept {
|
||||||
|
@ -16,7 +16,7 @@ namespace low_precision {
|
|||||||
|
|
||||||
ReduceMinTransformation::ReduceMinTransformation(const Params& params) : ReduceBaseTransformation(params) {
|
ReduceMinTransformation::ReduceMinTransformation(const Params& params) : ReduceBaseTransformation(params) {
|
||||||
MATCHER_SCOPE(ReduceMinTransformation);
|
MATCHER_SCOPE(ReduceMinTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::ReduceMin>({ pattern::wrap_type<opset1::Multiply>(), pattern::wrap_type<opset1::Constant>() });
|
auto matcher = pattern::wrap_type<ov::opset1::ReduceMin>({ pattern::wrap_type<ov::opset1::Multiply>(), pattern::wrap_type<ov::opset1::Constant>() });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -31,7 +31,7 @@ ReduceMinTransformation::ReduceMinTransformation(const Params& params) : ReduceB
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool ReduceMinTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const {
|
bool ReduceMinTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const {
|
||||||
if (!ov::is_type<opset1::ReduceMin>(reduce)) {
|
if (!ov::is_type<ov::opset1::ReduceMin>(reduce)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,7 +40,7 @@ bool ReduceMinTransformation::canBeTransformed(const TransformationContext& cont
|
|||||||
}
|
}
|
||||||
|
|
||||||
const auto dequantization = NetworkHelper::getDequantization(reduce, defaultPrecisions);
|
const auto dequantization = NetworkHelper::getDequantization(reduce, defaultPrecisions);
|
||||||
const std::vector<float> scales = ov::as_type_ptr<opset1::Constant>(dequantization.multiplyConstant)->cast_vector<float>();
|
const std::vector<float> scales = ov::as_type_ptr<ov::opset1::Constant>(dequantization.multiplyConstant)->cast_vector<float>();
|
||||||
if (std::any_of(scales.begin(), scales.end(), [](const float value) { return value < 0.0; })) {
|
if (std::any_of(scales.begin(), scales.end(), [](const float value) { return value < 0.0; })) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,7 @@ namespace low_precision {
|
|||||||
|
|
||||||
ReduceSumTransformation::ReduceSumTransformation(const Params& params) : ReduceBaseTransformation(params) {
|
ReduceSumTransformation::ReduceSumTransformation(const Params& params) : ReduceBaseTransformation(params) {
|
||||||
MATCHER_SCOPE(ReduceSumTransformation);
|
MATCHER_SCOPE(ReduceSumTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::ReduceSum>({ pattern::wrap_type<opset1::Multiply>(), pattern::wrap_type<opset1::Constant>() });
|
auto matcher = pattern::wrap_type<ov::opset1::ReduceSum>({ pattern::wrap_type<ov::opset1::Multiply>(), pattern::wrap_type<ov::opset1::Constant>() });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -31,7 +31,7 @@ ReduceSumTransformation::ReduceSumTransformation(const Params& params) : ReduceB
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool ReduceSumTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const {
|
bool ReduceSumTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const {
|
||||||
const auto reduceSum = ov::as_type_ptr<opset1::ReduceSum>(reduce);
|
const auto reduceSum = ov::as_type_ptr<ov::opset1::ReduceSum>(reduce);
|
||||||
if (!reduceSum || !ReduceBaseTransformation::canBeTransformed(context, reduceSum)) {
|
if (!reduceSum || !ReduceBaseTransformation::canBeTransformed(context, reduceSum)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -57,7 +57,7 @@ void ReduceSumTransformation::changeDequantizationValues(
|
|||||||
ReduceBaseTransformation::changeDequantizationValues(reduce, dequantization);
|
ReduceBaseTransformation::changeDequantizationValues(reduce, dequantization);
|
||||||
|
|
||||||
if (dequantization.subtract) {
|
if (dequantization.subtract) {
|
||||||
const auto reduceSum = ov::as_type_ptr<opset1::ReduceSum>(reduce);
|
const auto reduceSum = ov::as_type_ptr<ov::opset1::ReduceSum>(reduce);
|
||||||
const auto reductionAxes = reduceSum->get_reduction_axes();
|
const auto reductionAxes = reduceSum->get_reduction_axes();
|
||||||
const auto inputShape = reduceSum->get_input_partial_shape(0);
|
const auto inputShape = reduceSum->get_input_partial_shape(0);
|
||||||
|
|
||||||
@ -68,11 +68,11 @@ void ReduceSumTransformation::changeDequantizationValues(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// (a1 - s) + (a2 - s) + ... + (an - s) = (a1 + a2 + ... + an) - n * s
|
// (a1 - s) + (a2 - s) + ... + (an - s) = (a1 + a2 + ... + an) - n * s
|
||||||
const auto reductionSizeConstant = opset1::Constant::create(deqPrecision, Shape{}, { static_cast<float>(reductionSize) });
|
const auto reductionSizeConstant = ov::opset1::Constant::create(deqPrecision, Shape{}, { static_cast<float>(reductionSize) });
|
||||||
const auto result = fold<opset1::Multiply>(dequantization.subtractConstant, reductionSizeConstant);
|
const auto result = fold<ov::opset1::Multiply>(dequantization.subtractConstant, reductionSizeConstant);
|
||||||
|
|
||||||
replace_node(dequantization.subtractConstant, result);
|
replace_node(dequantization.subtractConstant, result);
|
||||||
dequantization.subtractConstant = ov::as_type_ptr<opset1::Constant>(result);
|
dequantization.subtractConstant = ov::as_type_ptr<ov::opset1::Constant>(result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ namespace low_precision {
|
|||||||
|
|
||||||
ReluTransformation::ReluTransformation(const Params& params) : LayerTransformation(params) {
|
ReluTransformation::ReluTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(ReluTransformation);
|
MATCHER_SCOPE(ReluTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::Relu>({ pattern::wrap_type<opset1::Multiply>() });
|
auto matcher = pattern::wrap_type<ov::opset1::Relu>({ pattern::wrap_type<ov::opset1::Multiply>() });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
|
@ -24,15 +24,15 @@ namespace low_precision {
|
|||||||
|
|
||||||
ReshapeTransformation::ReshapeTransformation(const Params& params) : LayerTransformation(params) {
|
ReshapeTransformation::ReshapeTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(ReshapeTransformation);
|
MATCHER_SCOPE(ReshapeTransformation);
|
||||||
auto input = pattern::any_input();
|
auto input = ov::pass::pattern::any_input();
|
||||||
auto mul_const_m = pattern::wrap_type<opset1::Constant>();
|
auto mul_const_m = pattern::wrap_type<ov::opset1::Constant>();
|
||||||
auto mul_m = pattern::wrap_type<opset1::Multiply>({ input, mul_const_m });
|
auto mul_m = pattern::wrap_type<ov::opset1::Multiply>({ input, mul_const_m });
|
||||||
auto reshape_pattern_const = pattern::wrap_type<opset1::Constant>();
|
auto reshape_pattern_const = pattern::wrap_type<ov::opset1::Constant>();
|
||||||
auto reshape_pattern_nonconst = pattern::any_input();
|
auto reshape_pattern_nonconst = ov::pass::pattern::any_input();
|
||||||
auto reshape_pattern = std::make_shared<pattern::op::Or>(OutputVector{ reshape_pattern_const, reshape_pattern_nonconst });
|
auto reshape_pattern = std::make_shared<pattern::op::Or>(OutputVector{ reshape_pattern_const, reshape_pattern_nonconst });
|
||||||
auto matcher = pattern::wrap_type<opset1::Reshape>({ mul_m, reshape_pattern });
|
auto matcher = pattern::wrap_type<ov::opset1::Reshape>({ mul_m, reshape_pattern });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [=](pattern::Matcher& m) {
|
ov::graph_rewrite_callback callback = [=](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
if (transformation_callback(op)) {
|
if (transformation_callback(op)) {
|
||||||
return false;
|
return false;
|
||||||
@ -41,7 +41,7 @@ ReshapeTransformation::ReshapeTransformation(const Params& params) : LayerTransf
|
|||||||
// we can propagate only per-tensor dq through reshape with non-const reshape_pattern
|
// we can propagate only per-tensor dq through reshape with non-const reshape_pattern
|
||||||
const auto& pattern_map = m.get_pattern_value_map();
|
const auto& pattern_map = m.get_pattern_value_map();
|
||||||
if (pattern_map.count(reshape_pattern_nonconst)) {
|
if (pattern_map.count(reshape_pattern_nonconst)) {
|
||||||
const auto mul_const = as_type_ptr<opset1::Constant>(pattern_map.at(mul_const_m).get_node_shared_ptr());
|
const auto mul_const = as_type_ptr<ov::opset1::Constant>(pattern_map.at(mul_const_m).get_node_shared_ptr());
|
||||||
if (!mul_const || ngraph::shape_size(mul_const->get_shape()) != 1) {
|
if (!mul_const || ngraph::shape_size(mul_const->get_shape()) != 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -56,14 +56,14 @@ ReshapeTransformation::ReshapeTransformation(const Params& params) : LayerTransf
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
void reshapeDequantizationConstant(const std::shared_ptr<opset1::Reshape>& reshape, const std::vector<ngraph::element::Type>& defaultPrecisions) {
|
void reshapeDequantizationConstant(const std::shared_ptr<ov::opset1::Reshape>& reshape, const std::vector<ngraph::element::Type>& defaultPrecisions) {
|
||||||
// Reshape dequantization operation Constant.
|
// Reshape dequantization operation Constant.
|
||||||
// 1. Calculate result dequantization Constant shape for broadcast based on original dequantization Constant shape and Reshape output.
|
// 1. Calculate result dequantization Constant shape for broadcast based on original dequantization Constant shape and Reshape output.
|
||||||
// For example: dequantization shape {1, 3, 1, 1}, output Reshape shape {1, 12, 3, 3}, result for broadcast: {1, 3, 4, 1},
|
// For example: dequantization shape {1, 3, 1, 1}, output Reshape shape {1, 12, 3, 3}, result for broadcast: {1, 3, 4, 1},
|
||||||
// where '4' calculated for temporary broadcast before reshape.
|
// where '4' calculated for temporary broadcast before reshape.
|
||||||
// 2. Broadcast dequantization Constant, if channels are changed
|
// 2. Broadcast dequantization Constant, if channels are changed
|
||||||
// 3. Reshape and replace
|
// 3. Reshape and replace
|
||||||
auto replaceConstant = [](const std::shared_ptr<opset1::Reshape>& reshape, const std::shared_ptr<opset1::Constant>& originalConstant) {
|
auto replaceConstant = [](const std::shared_ptr<ov::opset1::Reshape>& reshape, const std::shared_ptr<ov::opset1::Constant>& originalConstant) {
|
||||||
// reshape for element-wise constant is not required
|
// reshape for element-wise constant is not required
|
||||||
auto constantShape = originalConstant->get_shape();
|
auto constantShape = originalConstant->get_shape();
|
||||||
if (NetworkHelper::isScalarLike(originalConstant)) {
|
if (NetworkHelper::isScalarLike(originalConstant)) {
|
||||||
@ -94,7 +94,7 @@ void reshapeDequantizationConstant(const std::shared_ptr<opset1::Reshape>& resha
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto getBCastedConst = [](const std::shared_ptr<opset1::Constant>& constant, size_t dimensionsToBroadcast) -> std::shared_ptr<Node> {
|
auto getBCastedConst = [](const std::shared_ptr<ov::opset1::Constant>& constant, size_t dimensionsToBroadcast) -> std::shared_ptr<Node> {
|
||||||
if (dimensionsToBroadcast == 1ul) {
|
if (dimensionsToBroadcast == 1ul) {
|
||||||
return constant;
|
return constant;
|
||||||
}
|
}
|
||||||
@ -107,24 +107,24 @@ void reshapeDequantizationConstant(const std::shared_ptr<opset1::Reshape>& resha
|
|||||||
newOperationConstantBroadcastedShape[2] = dimensionsToBroadcast;
|
newOperationConstantBroadcastedShape[2] = dimensionsToBroadcast;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto targetShapeConstant = opset1::Constant::create(
|
const auto targetShapeConstant = ov::opset1::Constant::create(
|
||||||
element::i32,
|
element::i32,
|
||||||
Shape{ newOperationConstantBroadcastedShape.size() },
|
Shape{ newOperationConstantBroadcastedShape.size() },
|
||||||
newOperationConstantBroadcastedShape);
|
newOperationConstantBroadcastedShape);
|
||||||
|
|
||||||
return fold<opset1::Broadcast>(constant, targetShapeConstant);
|
return fold<ov::opset1::Broadcast>(constant, targetShapeConstant);
|
||||||
};
|
};
|
||||||
|
|
||||||
const std::shared_ptr<Node> broadcastedConstant = getBCastedConst(originalConstant, dimensionsToBroadcast);
|
const std::shared_ptr<Node> broadcastedConstant = getBCastedConst(originalConstant, dimensionsToBroadcast);
|
||||||
|
|
||||||
std::vector<int> newReshapeConstValues(reshapeOutputRank.get_length(), 1ul);
|
std::vector<int> newReshapeConstValues(reshapeOutputRank.get_length(), 1ul);
|
||||||
newReshapeConstValues[1] = static_cast<int>(reshapeOutputPShape[1].get_length());
|
newReshapeConstValues[1] = static_cast<int>(reshapeOutputPShape[1].get_length());
|
||||||
const std::shared_ptr<opset1::Constant> newReshapeConstant = std::make_shared<opset1::Constant>(
|
const std::shared_ptr<ov::opset1::Constant> newReshapeConstant = std::make_shared<ov::opset1::Constant>(
|
||||||
element::i32,
|
element::i32,
|
||||||
Shape({ newReshapeConstValues.size() }),
|
Shape({ newReshapeConstValues.size() }),
|
||||||
newReshapeConstValues);
|
newReshapeConstValues);
|
||||||
|
|
||||||
const std::shared_ptr<Node> resultConstant = fold<opset1::Reshape>(
|
const std::shared_ptr<Node> resultConstant = fold<ov::opset1::Reshape>(
|
||||||
broadcastedConstant,
|
broadcastedConstant,
|
||||||
newReshapeConstant,
|
newReshapeConstant,
|
||||||
reshape->get_special_zero());
|
reshape->get_special_zero());
|
||||||
@ -146,7 +146,7 @@ void reshapeDequantizationConstant(const std::shared_ptr<opset1::Reshape>& resha
|
|||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
bool ReshapeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
bool ReshapeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
||||||
std::shared_ptr<opset1::Reshape> reshape = ov::as_type_ptr<opset1::Reshape>(m.get_match_root());
|
std::shared_ptr<ov::opset1::Reshape> reshape = ov::as_type_ptr<ov::opset1::Reshape>(m.get_match_root());
|
||||||
if (NetworkHelper::isConstantPath(reshape)) {
|
if (NetworkHelper::isConstantPath(reshape)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -155,7 +155,7 @@ bool ReshapeTransformation::transform(TransformationContext& context, ngraph::pa
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
reshape = ov::as_type_ptr<opset1::Reshape>(NetworkHelper::separateInStandaloneBranch(reshape, defaultPrecisions));
|
reshape = ov::as_type_ptr<ov::opset1::Reshape>(NetworkHelper::separateInStandaloneBranch(reshape, defaultPrecisions));
|
||||||
reshapeDequantizationConstant(reshape, defaultPrecisions);
|
reshapeDequantizationConstant(reshape, defaultPrecisions);
|
||||||
moveDequantizationAfter(context, reshape, NetworkHelper::getDequantization(reshape, defaultPrecisions, 0), false);
|
moveDequantizationAfter(context, reshape, NetworkHelper::getDequantization(reshape, defaultPrecisions, 0), false);
|
||||||
return true;
|
return true;
|
||||||
@ -200,7 +200,7 @@ bool ReshapeTransformation::canBeTransformed(const TransformationContext& contex
|
|||||||
const auto inputs = op->get_output_target_inputs(0);
|
const auto inputs = op->get_output_target_inputs(0);
|
||||||
if (inputs.size() == 1ul) {
|
if (inputs.size() == 1ul) {
|
||||||
const auto consumer = inputs.begin()->get_node();
|
const auto consumer = inputs.begin()->get_node();
|
||||||
ignorePerTensorQuantizationCheck = ngraph::as_type<ngraph::opset1::MatMul>(consumer) != nullptr;
|
ignorePerTensorQuantizationCheck = ngraph::as_type<ov::opset1::MatMul>(consumer) != nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
|
|
||||||
#include "low_precision/network_helper.hpp"
|
#include "low_precision/network_helper.hpp"
|
||||||
#include "itt.hpp"
|
#include "itt.hpp"
|
||||||
|
#include "ngraph/validation_util.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
@ -16,9 +17,9 @@ namespace low_precision {
|
|||||||
|
|
||||||
SplitTransformation::SplitTransformation(const Params& params) : LayerTransformation(params) {
|
SplitTransformation::SplitTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(SplitTransformation);
|
MATCHER_SCOPE(SplitTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::Split>({ pattern::wrap_type<opset1::Multiply>(), pattern::wrap_type<opset1::Constant>() });
|
auto matcher = pattern::wrap_type<ov::opset1::Split>({ pattern::wrap_type<ov::opset1::Multiply>(), pattern::wrap_type<ov::opset1::Constant>() });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ov::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
if (transformation_callback(op)) {
|
if (transformation_callback(op)) {
|
||||||
return false;
|
return false;
|
||||||
@ -43,11 +44,11 @@ bool SplitTransformation::transform(TransformationContext& context, ngraph::patt
|
|||||||
|
|
||||||
const auto newSplit = split->clone_with_new_inputs(inputs);
|
const auto newSplit = split->clone_with_new_inputs(inputs);
|
||||||
newSplit->set_friendly_name(split->get_friendly_name());
|
newSplit->set_friendly_name(split->get_friendly_name());
|
||||||
ngraph::copy_runtime_info(split, newSplit);
|
ov::copy_runtime_info(split, newSplit);
|
||||||
|
|
||||||
const int64_t axis = ov::as_type_ptr<opset1::Constant>(split->get_input_node_shared_ptr(1))->cast_vector<int64_t>()[0];
|
const int64_t axis = ov::as_type_ptr<ov::opset1::Constant>(split->get_input_node_shared_ptr(1))->cast_vector<int64_t>()[0];
|
||||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||||
const size_t normalizedAxis = normalize_axis(split->get_friendly_name(), axis, split->get_input_partial_shape(0).rank());
|
const size_t normalizedAxis = ngraph::normalize_axis(split->get_friendly_name(), axis, split->get_input_partial_shape(0).rank());
|
||||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||||
const size_t outputSize = newSplit->get_output_size();
|
const size_t outputSize = newSplit->get_output_size();
|
||||||
|
|
||||||
@ -97,7 +98,7 @@ bool SplitTransformation::transform(TransformationContext& context, ngraph::patt
|
|||||||
parent = subtract;
|
parent = subtract;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto multiply = std::make_shared<ov::op::TypeRelaxed<opset1::Multiply>>(parent, splitedMul[i]);
|
const auto multiply = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Multiply>>(parent, splitedMul[i]);
|
||||||
NetworkHelper::setOutDataPrecisionForTypeRelaxed(multiply, dequantization.multiply->get_output_element_type(0));
|
NetworkHelper::setOutDataPrecisionForTypeRelaxed(multiply, dequantization.multiply->get_output_element_type(0));
|
||||||
copy_runtime_info({ newSplit, multiply }, multiply);
|
copy_runtime_info({ newSplit, multiply }, multiply);
|
||||||
|
|
||||||
@ -114,7 +115,7 @@ bool SplitTransformation::transform(TransformationContext& context, ngraph::patt
|
|||||||
// We do it to avoid dequantization propagation to the shapeOf subgraphs
|
// We do it to avoid dequantization propagation to the shapeOf subgraphs
|
||||||
for (size_t i = 0; i < replacement.size(); ++i) {
|
for (size_t i = 0; i < replacement.size(); ++i) {
|
||||||
for (const auto& input : replacement[i].get_target_inputs()) {
|
for (const auto& input : replacement[i].get_target_inputs()) {
|
||||||
if (const auto shapeOf = as_type_ptr<opset1::ShapeOf>(input.get_node()->shared_from_this())) {
|
if (const auto shapeOf = as_type_ptr<ov::opset1::ShapeOf>(input.get_node()->shared_from_this())) {
|
||||||
const auto newShapeOf = shapeOf->clone_with_new_inputs({ newSplit->output(i) });
|
const auto newShapeOf = shapeOf->clone_with_new_inputs({ newSplit->output(i) });
|
||||||
replace_node_update_name(shapeOf, newShapeOf);
|
replace_node_update_name(shapeOf, newShapeOf);
|
||||||
}
|
}
|
||||||
@ -139,7 +140,7 @@ void SplitTransformation::updateOutputs(
|
|||||||
const auto lastNode = lastNodes[i];
|
const auto lastNode = lastNodes[i];
|
||||||
for (auto output : lastNodes[i]->outputs()) {
|
for (auto output : lastNodes[i]->outputs()) {
|
||||||
for (auto input : output.get_target_inputs()) {
|
for (auto input : output.get_target_inputs()) {
|
||||||
if (ov::is_type<ngraph::opset1::Result>(input.get_node())) {
|
if (ov::is_type<ov::opset1::Result>(input.get_node())) {
|
||||||
originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix);
|
originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix);
|
||||||
lastNode->set_friendly_name(originalName + "." + std::to_string(i));
|
lastNode->set_friendly_name(originalName + "." + std::to_string(i));
|
||||||
break;
|
break;
|
||||||
|
@ -17,10 +17,10 @@ namespace low_precision {
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
std::shared_ptr<opset1::Constant> stridedSliceDeqConstant(
|
std::shared_ptr<ov::opset1::Constant> stridedSliceDeqConstant(
|
||||||
const std::shared_ptr<ngraph::Node> strSlice,
|
const std::shared_ptr<ngraph::Node> strSlice,
|
||||||
const std::shared_ptr<ngraph::Node> dequantizaitonConstant) {
|
const std::shared_ptr<ngraph::Node> dequantizaitonConstant) {
|
||||||
auto constant = ov::as_type_ptr<ngraph::opset1::Constant>(dequantizaitonConstant);
|
auto constant = ov::as_type_ptr<ov::opset1::Constant>(dequantizaitonConstant);
|
||||||
auto constantShape = constant->get_shape();
|
auto constantShape = constant->get_shape();
|
||||||
if (shape_size(constantShape) == 1ul) {
|
if (shape_size(constantShape) == 1ul) {
|
||||||
return NetworkHelper::toScalar(constant);
|
return NetworkHelper::toScalar(constant);
|
||||||
@ -43,13 +43,13 @@ std::shared_ptr<opset1::Constant> stridedSliceDeqConstant(
|
|||||||
}
|
}
|
||||||
constantShape = newConstantShape;
|
constantShape = newConstantShape;
|
||||||
|
|
||||||
const auto newConstant = fold<ngraph::opset1::Broadcast>(
|
const auto newConstant = fold<ov::opset1::Broadcast>(
|
||||||
constant,
|
constant,
|
||||||
ngraph::opset1::Constant::create(ngraph::element::i32, { newConstantShape.size() }, newConstantShape));
|
ov::opset1::Constant::create(ngraph::element::i32, { newConstantShape.size() }, newConstantShape));
|
||||||
constant = ov::as_type_ptr<ngraph::opset1::Constant>(newConstant);
|
constant = ov::as_type_ptr<ov::opset1::Constant>(newConstant);
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto stridedSlice = ov::as_type_ptr<ngraph::opset1::StridedSlice>(strSlice);
|
const auto stridedSlice = ov::as_type_ptr<ov::opset1::StridedSlice>(strSlice);
|
||||||
|
|
||||||
auto beginMask = stridedSlice->get_begin_mask();
|
auto beginMask = stridedSlice->get_begin_mask();
|
||||||
auto endMask = stridedSlice->get_end_mask();
|
auto endMask = stridedSlice->get_end_mask();
|
||||||
@ -64,7 +64,7 @@ std::shared_ptr<opset1::Constant> stridedSliceDeqConstant(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto result = fold<ngraph::opset1::StridedSlice>(
|
const auto result = fold<ov::opset1::StridedSlice>(
|
||||||
constant,
|
constant,
|
||||||
stridedSlice->input_value(1),
|
stridedSlice->input_value(1),
|
||||||
stridedSlice->input_value(2),
|
stridedSlice->input_value(2),
|
||||||
@ -75,14 +75,14 @@ std::shared_ptr<opset1::Constant> stridedSliceDeqConstant(
|
|||||||
stridedSlice->get_shrink_axis_mask(),
|
stridedSlice->get_shrink_axis_mask(),
|
||||||
stridedSlice->get_ellipsis_mask());
|
stridedSlice->get_ellipsis_mask());
|
||||||
|
|
||||||
return ov::as_type_ptr<opset1::Constant>(NetworkHelper::toScalarIfPossible(result));
|
return ov::as_type_ptr<ov::opset1::Constant>(NetworkHelper::toScalarIfPossible(result));
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
StridedSliceTransformation::StridedSliceTransformation(const Params& params) : LayerTransformation(params) {
|
StridedSliceTransformation::StridedSliceTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(StridedSliceTransformation);
|
MATCHER_SCOPE(StridedSliceTransformation);
|
||||||
auto matcher = ngraph::pattern::wrap_type<opset1::StridedSlice>();
|
auto matcher = ngraph::pattern::wrap_type<ov::opset1::StridedSlice>();
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -119,7 +119,7 @@ bool StridedSliceTransformation::transform(TransformationContext& context, ngrap
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool StridedSliceTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> operation) const {
|
bool StridedSliceTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> operation) const {
|
||||||
if (!ov::is_type<ngraph::opset1::StridedSlice>(operation)) {
|
if (!ov::is_type<ov::opset1::StridedSlice>(operation)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,10 +24,10 @@ namespace low_precision {
|
|||||||
|
|
||||||
SubtractTransformation::SubtractTransformation(const Params& params) : LayerTransformation(params) {
|
SubtractTransformation::SubtractTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(SubtractTransformation);
|
MATCHER_SCOPE(SubtractTransformation);
|
||||||
auto convert = pattern::wrap_type<opset1::Convert>();
|
auto convert = pattern::wrap_type<ov::opset1::Convert>();
|
||||||
auto multiply = pattern::wrap_type<opset1::Multiply>();
|
auto multiply = pattern::wrap_type<ov::opset1::Multiply>();
|
||||||
auto subParent = std::make_shared<pattern::op::Or>(OutputVector{ convert, multiply });
|
auto subParent = std::make_shared<pattern::op::Or>(OutputVector{ convert, multiply });
|
||||||
auto subtract = pattern::wrap_type<opset1::Subtract>({ subParent, pattern::wrap_type<opset1::Constant>() });
|
auto subtract = pattern::wrap_type<ov::opset1::Subtract>({ subParent, pattern::wrap_type<ov::opset1::Constant>() });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -42,7 +42,7 @@ SubtractTransformation::SubtractTransformation(const Params& params) : LayerTran
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool SubtractTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
bool SubtractTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
||||||
std::shared_ptr<opset1::Subtract> subtract = ov::as_type_ptr<opset1::Subtract>(m.get_match_root());
|
std::shared_ptr<ov::opset1::Subtract> subtract = ov::as_type_ptr<ov::opset1::Subtract>(m.get_match_root());
|
||||||
if (!canBeTransformed(context, subtract)) {
|
if (!canBeTransformed(context, subtract)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -54,9 +54,9 @@ bool SubtractTransformation::transform(TransformationContext& context, ngraph::p
|
|||||||
// before: Y = X * SC - SH, after: Y = (X - SH') * SC
|
// before: Y = X * SC - SH, after: Y = (X - SH') * SC
|
||||||
// X * SC - SH = X * SC - SH' * SC
|
// X * SC - SH = X * SC - SH' * SC
|
||||||
// SH' = SH / SC
|
// SH' = SH / SC
|
||||||
std::shared_ptr<opset1::Subtract> newSubtract = ov::as_type_ptr<opset1::Subtract>(subtract->clone_with_new_inputs({
|
std::shared_ptr<ov::opset1::Subtract> newSubtract = ov::as_type_ptr<ov::opset1::Subtract>(subtract->clone_with_new_inputs({
|
||||||
dequantization.multiply->input_value(0),
|
dequantization.multiply->input_value(0),
|
||||||
ngraph::pass::low_precision::fold<ngraph::opset1::Divide>(
|
ngraph::pass::low_precision::fold<ov::opset1::Divide>(
|
||||||
subtract->input_value(1),
|
subtract->input_value(1),
|
||||||
dequantization.multiply->input_value(1))
|
dequantization.multiply->input_value(1))
|
||||||
}));
|
}));
|
||||||
@ -71,9 +71,9 @@ bool SubtractTransformation::transform(TransformationContext& context, ngraph::p
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (dequantization.subtract != nullptr) {
|
if (dequantization.subtract != nullptr) {
|
||||||
std::shared_ptr<opset1::Subtract> newSubtract = ov::as_type_ptr<opset1::Subtract>(subtract->clone_with_new_inputs({
|
std::shared_ptr<ov::opset1::Subtract> newSubtract = ov::as_type_ptr<ov::opset1::Subtract>(subtract->clone_with_new_inputs({
|
||||||
dequantization.subtract->input_value(0),
|
dequantization.subtract->input_value(0),
|
||||||
fold<ngraph::opset1::Add>(subtract->input_value(1), dequantization.subtractConstant)
|
fold<ov::opset1::Add>(subtract->input_value(1), dequantization.subtractConstant)
|
||||||
}));
|
}));
|
||||||
|
|
||||||
replace_node(subtract, newSubtract);
|
replace_node(subtract, newSubtract);
|
||||||
@ -85,7 +85,7 @@ bool SubtractTransformation::transform(TransformationContext& context, ngraph::p
|
|||||||
// std::shared_ptr<Node> newSubtract = NetworkHelper::optimizeElementwise(subtract);
|
// std::shared_ptr<Node> newSubtract = NetworkHelper::optimizeElementwise(subtract);
|
||||||
subtract->set_output_type(0, originalPrecision, subtract->get_output_partial_shape(0));
|
subtract->set_output_type(0, originalPrecision, subtract->get_output_partial_shape(0));
|
||||||
|
|
||||||
replace_node(subtract, std::make_shared<ov::op::TypeRelaxed<opset1::Subtract>>(
|
replace_node(subtract, std::make_shared<ov::op::TypeRelaxed<ov::opset1::Subtract>>(
|
||||||
subtract->input_value(0),
|
subtract->input_value(0),
|
||||||
subtract->input_value(1)));
|
subtract->input_value(1)));
|
||||||
}
|
}
|
||||||
|
@ -19,7 +19,7 @@ namespace low_precision {
|
|||||||
|
|
||||||
TransposeTransformation::TransposeTransformation(const Params& params) : LayerTransformation(params) {
|
TransposeTransformation::TransposeTransformation(const Params& params) : LayerTransformation(params) {
|
||||||
MATCHER_SCOPE(TransposeTransformation);
|
MATCHER_SCOPE(TransposeTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::Transpose>({ pattern::wrap_type<opset1::Multiply>(), pattern::wrap_type<opset1::Constant>() });
|
auto matcher = pattern::wrap_type<ov::opset1::Transpose>({ pattern::wrap_type<ov::opset1::Multiply>(), pattern::wrap_type<ov::opset1::Constant>() });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
@ -45,7 +45,7 @@ void transposeDequantizationConstant(std::shared_ptr<Node>& transpose, const std
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto transposeDeqConstant = [](
|
auto transposeDeqConstant = [](
|
||||||
const std::shared_ptr<opset1::Constant>& dequantizationConstant,
|
const std::shared_ptr<ov::opset1::Constant>& dequantizationConstant,
|
||||||
const PartialShape& transposeOutputPShape,
|
const PartialShape& transposeOutputPShape,
|
||||||
const std::shared_ptr<Node>& transposeConstant) -> std::shared_ptr<Node> {
|
const std::shared_ptr<Node>& transposeConstant) -> std::shared_ptr<Node> {
|
||||||
const auto constantShape = dequantizationConstant->get_shape();
|
const auto constantShape = dequantizationConstant->get_shape();
|
||||||
@ -56,11 +56,11 @@ void transposeDequantizationConstant(std::shared_ptr<Node>& transpose, const std
|
|||||||
assert(transposeOutputPShape.rank().is_static());
|
assert(transposeOutputPShape.rank().is_static());
|
||||||
const size_t transposeOutRank = transposeOutputPShape.rank().get_length();
|
const size_t transposeOutRank = transposeOutputPShape.rank().get_length();
|
||||||
if (constantShape.size() != transposeOutRank) {
|
if (constantShape.size() != transposeOutRank) {
|
||||||
const auto unsqueezeConst = opset1::Constant::create(element::i32, Shape{ 1 }, std::vector<size_t>{ 0 });
|
const auto unsqueezeConst = ov::opset1::Constant::create(element::i32, Shape{ 1 }, std::vector<size_t>{ 0 });
|
||||||
const auto deqConstantWithBatch = fold<opset1::Unsqueeze>(dequantizationConstant, unsqueezeConst);
|
const auto deqConstantWithBatch = fold<ov::opset1::Unsqueeze>(dequantizationConstant, unsqueezeConst);
|
||||||
return fold<opset1::Transpose>(deqConstantWithBatch, transposeConstant);
|
return fold<ov::opset1::Transpose>(deqConstantWithBatch, transposeConstant);
|
||||||
} else {
|
} else {
|
||||||
return fold<opset1::Transpose>(dequantizationConstant, transposeConstant);
|
return fold<ov::opset1::Transpose>(dequantizationConstant, transposeConstant);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -104,7 +104,7 @@ bool TransposeTransformation::canBeTransformed(const TransformationContext& cont
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::shared_ptr<opset1::Constant> constant = ov::as_type_ptr<opset1::Constant>(op->get_input_node_shared_ptr(1));
|
const std::shared_ptr<ov::opset1::Constant> constant = ov::as_type_ptr<ov::opset1::Constant>(op->get_input_node_shared_ptr(1));
|
||||||
if (constant == nullptr) {
|
if (constant == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -133,7 +133,7 @@ bool TransposeTransformation::canBeTransformed(const TransformationContext& cont
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto checkShape = [](const std::shared_ptr<opset1::Constant>& dequantizationConstant, const PartialShape& transposeOutputShape) -> bool {
|
auto checkShape = [](const std::shared_ptr<ov::opset1::Constant>& dequantizationConstant, const PartialShape& transposeOutputShape) -> bool {
|
||||||
const auto dequantizationShape = dequantizationConstant->get_shape();
|
const auto dequantizationShape = dequantizationConstant->get_shape();
|
||||||
const auto rank = transposeOutputShape.rank();
|
const auto rank = transposeOutputShape.rank();
|
||||||
if (rank.is_dynamic()) {
|
if (rank.is_dynamic()) {
|
||||||
|
@ -16,12 +16,12 @@ namespace low_precision {
|
|||||||
|
|
||||||
VariadicSplitTransformation::VariadicSplitTransformation(const Params& params) : SplitTransformation(params) {
|
VariadicSplitTransformation::VariadicSplitTransformation(const Params& params) : SplitTransformation(params) {
|
||||||
MATCHER_SCOPE(VariadicSplitTransformation);
|
MATCHER_SCOPE(VariadicSplitTransformation);
|
||||||
auto matcher = pattern::wrap_type<opset1::VariadicSplit>({
|
auto matcher = pattern::wrap_type<ov::opset1::VariadicSplit>({
|
||||||
pattern::wrap_type<opset1::Multiply>(),
|
pattern::wrap_type<ov::opset1::Multiply>(),
|
||||||
pattern::wrap_type<opset1::Constant>(),
|
pattern::wrap_type<ov::opset1::Constant>(),
|
||||||
pattern::wrap_type<opset1::Constant>() });
|
pattern::wrap_type<ov::opset1::Constant>() });
|
||||||
|
|
||||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
ov::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||||
auto op = m.get_match_root();
|
auto op = m.get_match_root();
|
||||||
if (transformation_callback(op)) {
|
if (transformation_callback(op)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -16,19 +16,19 @@ namespace low_precision {
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
std::vector<size_t> getWeightsDequantizationIdces(const std::shared_ptr<const Node> weightableLayer) {
|
std::vector<size_t> getWeightsDequantizationIdces(const std::shared_ptr<const Node> weightableLayer) {
|
||||||
if (ov::is_type<opset1::Convolution>(weightableLayer)) {
|
if (ov::is_type<ov::opset1::Convolution>(weightableLayer)) {
|
||||||
return std::vector<size_t>{0};
|
return std::vector<size_t>{0};
|
||||||
} else if (ov::is_type<opset1::ConvolutionBackpropData>(weightableLayer)) {
|
} else if (ov::is_type<ov::opset1::ConvolutionBackpropData>(weightableLayer)) {
|
||||||
return std::vector<size_t>{1};
|
return std::vector<size_t>{1};
|
||||||
} else if (ov::is_type<opset1::GroupConvolution>(weightableLayer)) {
|
} else if (ov::is_type<ov::opset1::GroupConvolution>(weightableLayer)) {
|
||||||
return ov::is_type<opset1::Reshape>(weightableLayer->get_input_node_shared_ptr(1)) ? std::vector<size_t>{0}
|
return ov::is_type<ov::opset1::Reshape>(weightableLayer->get_input_node_shared_ptr(1)) ? std::vector<size_t>{0}
|
||||||
: std::vector<size_t>{0, 1};
|
: std::vector<size_t>{0, 1};
|
||||||
} else {
|
} else {
|
||||||
THROW_IE_LPT_EXCEPTION(*weightableLayer) << "getWeightsDequantizationIdces is called for unexpected layer";
|
THROW_IE_LPT_EXCEPTION(*weightableLayer) << "getWeightsDequantizationIdces is called for unexpected layer";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool checkConstShape(const std::vector<size_t>& idcesToCheck, const std::shared_ptr<opset1::Constant> constant) {
|
bool checkConstShape(const std::vector<size_t>& idcesToCheck, const std::shared_ptr<ov::opset1::Constant> constant) {
|
||||||
const auto& shape = constant->get_shape();
|
const auto& shape = constant->get_shape();
|
||||||
if (shape_size(shape) == 1) {
|
if (shape_size(shape) == 1) {
|
||||||
return true;
|
return true;
|
||||||
@ -62,7 +62,7 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<opset1::Reshape> reshapeFromWeights = ov::as_type_ptr<opset1::Reshape>(layer->get_input_node_shared_ptr(1));
|
std::shared_ptr<ov::opset1::Reshape> reshapeFromWeights = ov::as_type_ptr<ov::opset1::Reshape>(layer->get_input_node_shared_ptr(1));
|
||||||
dequantization = reshapeFromWeights == nullptr ?
|
dequantization = reshapeFromWeights == nullptr ?
|
||||||
NetworkHelper::getDequantization(layer, defaultPrecisions, 1ul) :
|
NetworkHelper::getDequantization(layer, defaultPrecisions, 1ul) :
|
||||||
NetworkHelper::getDequantization(reshapeFromWeights, defaultPrecisions);
|
NetworkHelper::getDequantization(reshapeFromWeights, defaultPrecisions);
|
||||||
@ -162,20 +162,20 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
|
|||||||
|
|
||||||
// TODO Implement similar checks in other weightable operaitons
|
// TODO Implement similar checks in other weightable operaitons
|
||||||
|
|
||||||
const std::shared_ptr<opset1::Reshape> reshapeFromWeights = ov::as_type_ptr<opset1::Reshape>(layer->get_input_node_shared_ptr(1));
|
const std::shared_ptr<ov::opset1::Reshape> reshapeFromWeights = ov::as_type_ptr<ov::opset1::Reshape>(layer->get_input_node_shared_ptr(1));
|
||||||
|
|
||||||
std::shared_ptr<opset1::FakeQuantize> fqFromWeights;
|
std::shared_ptr<ov::opset1::FakeQuantize> fqFromWeights;
|
||||||
if (reshapeFromWeights == nullptr) {
|
if (reshapeFromWeights == nullptr) {
|
||||||
fqFromWeights = ov::as_type_ptr<opset1::FakeQuantize>(layer->get_input_node_shared_ptr(1));
|
fqFromWeights = ov::as_type_ptr<ov::opset1::FakeQuantize>(layer->get_input_node_shared_ptr(1));
|
||||||
if (fqFromWeights == nullptr) {
|
if (fqFromWeights == nullptr) {
|
||||||
const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(layer, defaultPrecisions, 1ul);
|
const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(layer, defaultPrecisions, 1ul);
|
||||||
fqFromWeights = ov::as_type_ptr<opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr());
|
fqFromWeights = ov::as_type_ptr<ov::opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
fqFromWeights = ov::as_type_ptr<opset1::FakeQuantize>(reshapeFromWeights->get_input_node_shared_ptr(0));
|
fqFromWeights = ov::as_type_ptr<ov::opset1::FakeQuantize>(reshapeFromWeights->get_input_node_shared_ptr(0));
|
||||||
if (fqFromWeights == nullptr) {
|
if (fqFromWeights == nullptr) {
|
||||||
const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(reshapeFromWeights, defaultPrecisions, 0ul);
|
const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(reshapeFromWeights, defaultPrecisions, 0ul);
|
||||||
fqFromWeights = ov::as_type_ptr<opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr());
|
fqFromWeights = ov::as_type_ptr<ov::opset1::FakeQuantize>(dequantization.data.get_node_shared_ptr());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -225,7 +225,7 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto weightsData = ov::as_type_ptr<opset1::Constant>(dequantizationOnWeights.data.get_node_shared_ptr());
|
const auto weightsData = ov::as_type_ptr<ov::opset1::Constant>(dequantizationOnWeights.data.get_node_shared_ptr());
|
||||||
if (weightsData == nullptr) {
|
if (weightsData == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -258,18 +258,18 @@ bool WeightableLayerTransformation::isQuantizedStatic(const std::shared_ptr<cons
|
|||||||
FakeQuantizeDequantization dequantizationOnWeights;
|
FakeQuantizeDequantization dequantizationOnWeights;
|
||||||
if (reshapeIsRequired) {
|
if (reshapeIsRequired) {
|
||||||
const auto reshape = layer->get_input_node_shared_ptr(1);
|
const auto reshape = layer->get_input_node_shared_ptr(1);
|
||||||
std::shared_ptr<Node> parent = ov::is_type<opset1::Reshape>(reshape) ?
|
std::shared_ptr<Node> parent = ov::is_type<ov::opset1::Reshape>(reshape) ?
|
||||||
reshape->get_input_node_shared_ptr(0) :
|
reshape->get_input_node_shared_ptr(0) :
|
||||||
reshape;
|
reshape;
|
||||||
|
|
||||||
const auto fq = ov::as_type_ptr<opset1::FakeQuantize>(parent);
|
const auto fq = ov::as_type_ptr<ov::opset1::FakeQuantize>(parent);
|
||||||
if (fq != nullptr) {
|
if (fq != nullptr) {
|
||||||
return NetworkHelper::isQuantizeSupported(fq);
|
return NetworkHelper::isQuantizeSupported(fq);
|
||||||
}
|
}
|
||||||
|
|
||||||
dequantizationOnWeights = NetworkHelper::getDequantization(parent, defaultPrecisions, 0, true);
|
dequantizationOnWeights = NetworkHelper::getDequantization(parent, defaultPrecisions, 0, true);
|
||||||
} else if (ov::is_type<opset1::FakeQuantize>(layer->get_input_node_shared_ptr(1))) {
|
} else if (ov::is_type<ov::opset1::FakeQuantize>(layer->get_input_node_shared_ptr(1))) {
|
||||||
const std::shared_ptr<opset1::FakeQuantize> fq = ov::as_type_ptr<opset1::FakeQuantize>(layer->get_input_node_shared_ptr(1));
|
const std::shared_ptr<ov::opset1::FakeQuantize> fq = ov::as_type_ptr<ov::opset1::FakeQuantize>(layer->get_input_node_shared_ptr(1));
|
||||||
return NetworkHelper::isQuantizeSupported(fq);
|
return NetworkHelper::isQuantizeSupported(fq);
|
||||||
} else {
|
} else {
|
||||||
// TODO: update NetworkHelper API later
|
// TODO: update NetworkHelper API later
|
||||||
@ -289,11 +289,11 @@ bool WeightableLayerTransformation::isQuantizedStatic(const std::shared_ptr<cons
|
|||||||
|
|
||||||
auto deqData = dequantizationOnWeights.data.get_node_shared_ptr();
|
auto deqData = dequantizationOnWeights.data.get_node_shared_ptr();
|
||||||
// Quantize/Dequantize case
|
// Quantize/Dequantize case
|
||||||
if (ov::is_type<ngraph::opset1::Convert>(deqData)) {
|
if (ov::is_type<ov::opset1::Convert>(deqData)) {
|
||||||
deqData = deqData->get_input_node_shared_ptr(0);
|
deqData = deqData->get_input_node_shared_ptr(0);
|
||||||
}
|
}
|
||||||
// TODO: LPT: is it possible to share with canBeTransformed?
|
// TODO: LPT: is it possible to share with canBeTransformed?
|
||||||
if (ov::is_type<opset1::Constant>(deqData)) {
|
if (ov::is_type<ov::opset1::Constant>(deqData)) {
|
||||||
const ngraph::element::Type weightsDataPrecision = dequantizationOnWeights.data.get_element_type();
|
const ngraph::element::Type weightsDataPrecision = dequantizationOnWeights.data.get_element_type();
|
||||||
if (!DataPrecision::isSupported(weightsDataPrecision)) {
|
if (!DataPrecision::isSupported(weightsDataPrecision)) {
|
||||||
return false;
|
return false;
|
||||||
@ -306,9 +306,9 @@ bool WeightableLayerTransformation::isQuantizedStatic(const std::shared_ptr<cons
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
} else if (auto fq = ov::as_type_ptr<opset1::FakeQuantize>(deqData)) {
|
} else if (auto fq = ov::as_type_ptr<ov::opset1::FakeQuantize>(deqData)) {
|
||||||
for (size_t i = 1; i < fq->get_input_size(); ++i) {
|
for (size_t i = 1; i < fq->get_input_size(); ++i) {
|
||||||
if (auto constant = ov::as_type_ptr<ngraph::opset1::Constant>(fq->get_input_node_shared_ptr(i))) {
|
if (auto constant = ov::as_type_ptr<ov::opset1::Constant>(fq->get_input_node_shared_ptr(i))) {
|
||||||
if (!checkConstShape(dqIdces, constant)) {
|
if (!checkConstShape(dqIdces, constant)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -358,7 +358,7 @@ bool WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const st
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ov::as_type_ptr<ngraph::opset1::Constant>(fqOnWeights) == nullptr) {
|
if (ov::as_type_ptr<ov::opset1::Constant>(fqOnWeights) == nullptr) {
|
||||||
THROW_IE_LPT_EXCEPTION(*fqOnWeights) << "FakeQuantize on weights was not folded to constant";
|
THROW_IE_LPT_EXCEPTION(*fqOnWeights) << "FakeQuantize on weights was not folded to constant";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -366,7 +366,7 @@ bool WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const st
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool WeightableLayerTransformation::isGroup(const std::shared_ptr<Node>& layer) {
|
bool WeightableLayerTransformation::isGroup(const std::shared_ptr<Node>& layer) {
|
||||||
if (!ov::is_type<opset1::Convolution>(layer) && !ov::is_type<opset1::GroupConvolution>(layer)) {
|
if (!ov::is_type<ov::opset1::Convolution>(layer) && !ov::is_type<ov::opset1::GroupConvolution>(layer)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -375,7 +375,7 @@ bool WeightableLayerTransformation::isGroup(const std::shared_ptr<Node>& layer)
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool WeightableLayerTransformation::isDepthwise(const std::shared_ptr<Node>& layer) {
|
bool WeightableLayerTransformation::isDepthwise(const std::shared_ptr<Node>& layer) {
|
||||||
if (!ov::as_type_ptr<opset1::Convolution>(layer) && !ov::as_type_ptr<opset1::GroupConvolution>(layer)) {
|
if (!ov::as_type_ptr<ov::opset1::Convolution>(layer) && !ov::as_type_ptr<ov::opset1::GroupConvolution>(layer)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -385,11 +385,11 @@ bool WeightableLayerTransformation::isDepthwise(const std::shared_ptr<Node>& lay
|
|||||||
return (group == inputChannelsCount) && (inputChannelsCount == outputChannelsCount);
|
return (group == inputChannelsCount) && (inputChannelsCount == outputChannelsCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<opset1::FakeQuantize> WeightableLayerTransformation::getFakeQuantizeOnWeights(const std::shared_ptr<Node>& node) {
|
std::shared_ptr<ov::opset1::FakeQuantize> WeightableLayerTransformation::getFakeQuantizeOnWeights(const std::shared_ptr<Node>& node) {
|
||||||
auto fq = ov::as_type_ptr<opset1::FakeQuantize>(node->get_input_node_shared_ptr(1));
|
auto fq = ov::as_type_ptr<ov::opset1::FakeQuantize>(node->get_input_node_shared_ptr(1));
|
||||||
// TODO: temporary workaround
|
// TODO: temporary workaround
|
||||||
if (fq == nullptr) {
|
if (fq == nullptr) {
|
||||||
fq = ov::as_type_ptr<opset1::FakeQuantize>(node->get_input_node_ptr(1)->get_input_node_shared_ptr(0));
|
fq = ov::as_type_ptr<ov::opset1::FakeQuantize>(node->get_input_node_ptr(1)->get_input_node_shared_ptr(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
return fq;
|
return fq;
|
||||||
@ -417,7 +417,7 @@ bool WeightableLayerTransformation::isAsymmetricOnWeights(
|
|||||||
const std::vector<ngraph::element::Type>& defaultPrecisions) {
|
const std::vector<ngraph::element::Type>& defaultPrecisions) {
|
||||||
const auto n = const_cast<ngraph::Node*>(node.get())->shared_from_this();
|
const auto n = const_cast<ngraph::Node*>(node.get())->shared_from_this();
|
||||||
|
|
||||||
const auto reshapeFromWeights = ngraph::as_type_ptr<ngraph::opset1::Reshape>(n->get_input_node_shared_ptr(1));
|
const auto reshapeFromWeights = ngraph::as_type_ptr<ov::opset1::Reshape>(n->get_input_node_shared_ptr(1));
|
||||||
const auto dequantization = reshapeFromWeights == nullptr ?
|
const auto dequantization = reshapeFromWeights == nullptr ?
|
||||||
NetworkHelper::getDequantization(n, defaultPrecisions, 1ul) :
|
NetworkHelper::getDequantization(n, defaultPrecisions, 1ul) :
|
||||||
NetworkHelper::getDequantization(reshapeFromWeights, defaultPrecisions);
|
NetworkHelper::getDequantization(reshapeFromWeights, defaultPrecisions);
|
||||||
|
@ -105,7 +105,7 @@ public:
|
|||||||
testValues.actual.dequantization3);
|
testValues.actual.dequantization3);
|
||||||
|
|
||||||
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
ngraph::pass::low_precision::PrecisionsRestriction::create<ov::opset1::Convolution>({
|
||||||
{{0}, {ngraph::element::u8}},
|
{{0}, {ngraph::element::u8}},
|
||||||
{{1}, {ngraph::element::i8}}
|
{{1}, {ngraph::element::i8}}
|
||||||
})
|
})
|
||||||
@ -114,14 +114,14 @@ public:
|
|||||||
auto quantizationRestrictions = testValues.multiChannels ?
|
auto quantizationRestrictions = testValues.multiChannels ?
|
||||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
|
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
|
||||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
|
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
|
||||||
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0})
|
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ov::opset1::Convolution>({0})
|
||||||
});
|
});
|
||||||
|
|
||||||
SimpleLowPrecisionTransformer transform(supportedPrecisionsOnActivation, quantizationRestrictions);
|
SimpleLowPrecisionTransformer transform(supportedPrecisionsOnActivation, quantizationRestrictions);
|
||||||
transform.add<ngraph::pass::low_precision::ConcatTransformation, ngraph::opset1::Concat>(testValues.params);
|
transform.add<ngraph::pass::low_precision::ConcatTransformation, ov::opset1::Concat>(testValues.params);
|
||||||
transform.add<ngraph::pass::low_precision::ConvolutionTransformation, ngraph::opset1::Convolution>(testValues.params);
|
transform.add<ngraph::pass::low_precision::ConvolutionTransformation, ov::opset1::Convolution>(testValues.params);
|
||||||
transform.add<ngraph::pass::low_precision::FakeQuantizeDecompositionTransformation, ngraph::opset1::FakeQuantize>(testValues.params);
|
transform.add<ngraph::pass::low_precision::FakeQuantizeDecompositionTransformation, ov::opset1::FakeQuantize>(testValues.params);
|
||||||
transform.add<ngraph::pass::low_precision::MaxPoolTransformation, ngraph::opset1::MaxPool>(testValues.params);
|
transform.add<ngraph::pass::low_precision::MaxPoolTransformation, ov::opset1::MaxPool>(testValues.params);
|
||||||
transform.transform(actualFunction);
|
transform.transform(actualFunction);
|
||||||
|
|
||||||
referenceFunction = ngraph::builder::subgraph::PrecisionPropagationFunction::getReferenceWithNeighbors(
|
referenceFunction = ngraph::builder::subgraph::PrecisionPropagationFunction::getReferenceWithNeighbors(
|
||||||
@ -157,13 +157,13 @@ TEST_P(ConcatWithNeighborsWithConvolutionTransformation, CompareFunctions) {
|
|||||||
//auto res = compare_functions(actualFunction, referenceFunction, true, false, false);
|
//auto res = compare_functions(actualFunction, referenceFunction, true, false, false);
|
||||||
//ASSERT_TRUE(res.first) << res.second;
|
//ASSERT_TRUE(res.first) << res.second;
|
||||||
|
|
||||||
auto actualFakeQuantizes = LayerTransformation::get<opset1::FakeQuantize>(actualFunction);
|
auto actualFakeQuantizes = LayerTransformation::get<ov::opset1::FakeQuantize>(actualFunction);
|
||||||
ASSERT_EQ(3ul, actualFakeQuantizes.size()) << "unexpected FakeQuantize operations count " << actualFakeQuantizes.size();
|
ASSERT_EQ(3ul, actualFakeQuantizes.size()) << "unexpected FakeQuantize operations count " << actualFakeQuantizes.size();
|
||||||
|
|
||||||
ASSERT_TRUE(checkIfOutputAttributesSharedValuesAreTheSame<PrecisionsAttribute>(actualFakeQuantizes)) <<
|
ASSERT_TRUE(checkIfOutputAttributesSharedValuesAreTheSame<PrecisionsAttribute>(actualFakeQuantizes)) <<
|
||||||
"PrecisionsAttribute shared values are not the same";
|
"PrecisionsAttribute shared values are not the same";
|
||||||
|
|
||||||
auto actualConcatOperations = LayerTransformation::get<opset1::Concat>(actualFunction);
|
auto actualConcatOperations = LayerTransformation::get<ov::opset1::Concat>(actualFunction);
|
||||||
ASSERT_EQ(2ul, actualConcatOperations.size()) << "unexpected concat operations";
|
ASSERT_EQ(2ul, actualConcatOperations.size()) << "unexpected concat operations";
|
||||||
ASSERT_FALSE(ngraph::pass::low_precision::getAttribute<QuantizationAlignmentAttribute>(actualConcatOperations[0]).empty());
|
ASSERT_FALSE(ngraph::pass::low_precision::getAttribute<QuantizationAlignmentAttribute>(actualConcatOperations[0]).empty());
|
||||||
ASSERT_FALSE(ngraph::pass::low_precision::getAttribute<QuantizationAlignmentAttribute>(actualConcatOperations[1]).empty());
|
ASSERT_FALSE(ngraph::pass::low_precision::getAttribute<QuantizationAlignmentAttribute>(actualConcatOperations[1]).empty());
|
||||||
@ -172,7 +172,7 @@ TEST_P(ConcatWithNeighborsWithConvolutionTransformation, CompareFunctions) {
|
|||||||
ASSERT_TRUE(checkIfAttributesSharedValuesAreTheSame<IntervalsAlignmentAttribute>(actualConcatOperations)) <<
|
ASSERT_TRUE(checkIfAttributesSharedValuesAreTheSame<IntervalsAlignmentAttribute>(actualConcatOperations)) <<
|
||||||
"IntervalsAlignmentAttribute shared values are not the same";
|
"IntervalsAlignmentAttribute shared values are not the same";
|
||||||
|
|
||||||
auto convolutions = LayerTransformation::get<opset1::Convolution>(actualFunction);
|
auto convolutions = LayerTransformation::get<ov::opset1::Convolution>(actualFunction);
|
||||||
ASSERT_EQ(1ul, convolutions.size()) << "unexpected convolution operations";
|
ASSERT_EQ(1ul, convolutions.size()) << "unexpected convolution operations";
|
||||||
ASSERT_EQ(2ul, convolutions[0]->input(0).get_rt_info().size()) <<
|
ASSERT_EQ(2ul, convolutions[0]->input(0).get_rt_info().size()) <<
|
||||||
"unexpected input 0 attributes count: LowPrecision::PerTensorQuantization & LowPrecision::Precisions";
|
"unexpected input 0 attributes count: LowPrecision::PerTensorQuantization & LowPrecision::Precisions";
|
||||||
|
@ -33,7 +33,7 @@ public:
|
|||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
const auto testValues = GetParam();
|
const auto testValues = GetParam();
|
||||||
|
|
||||||
const auto input = std::make_shared<ngraph::opset1::Parameter>(testValues.precision, ngraph::Shape(testValues.shape));
|
const auto input = std::make_shared<ov::opset1::Parameter>(testValues.precision, ngraph::Shape(testValues.shape));
|
||||||
const auto fakeQuantize = ngraph::builder::subgraph::makeFakeQuantize(
|
const auto fakeQuantize = ngraph::builder::subgraph::makeFakeQuantize(
|
||||||
input,
|
input,
|
||||||
testValues.precision,
|
testValues.precision,
|
||||||
@ -44,7 +44,7 @@ public:
|
|||||||
replace_node(fakeQuantize->get_input_node_shared_ptr(3), input);
|
replace_node(fakeQuantize->get_input_node_shared_ptr(3), input);
|
||||||
}
|
}
|
||||||
|
|
||||||
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(fakeQuantize) };
|
ngraph::ResultVector results{ std::make_shared<ov::opset1::Result>(fakeQuantize) };
|
||||||
function = std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "IsFunctionQuantizedFunction");
|
function = std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "IsFunctionQuantizedFunction");
|
||||||
function->validate_nodes_and_infer_types();
|
function->validate_nodes_and_infer_types();
|
||||||
}
|
}
|
||||||
|
@ -83,7 +83,7 @@ public:
|
|||||||
pass.run_on_model(actualFunction);
|
pass.run_on_model(actualFunction);
|
||||||
|
|
||||||
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>(
|
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>(
|
||||||
{ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>(
|
{ngraph::pass::low_precision::PrecisionsRestriction::create<ov::opset1::Convolution>(
|
||||||
{{{0}, {ngraph::element::u8}}, {{1}, {ngraph::element::i8}}})});
|
{{{0}, {ngraph::element::u8}}, {{1}, {ngraph::element::i8}}})});
|
||||||
|
|
||||||
SimpleLowPrecisionTransformer transform(supportedPrecisionsOnActivation);
|
SimpleLowPrecisionTransformer transform(supportedPrecisionsOnActivation);
|
||||||
@ -132,7 +132,7 @@ TEST_P(MarkupAvgPoolPrecisionsTransformation, CompareFunctions) {
|
|||||||
ov::pass::InitNodeInfo().run_on_model(actualFunction);
|
ov::pass::InitNodeInfo().run_on_model(actualFunction);
|
||||||
actualFunction->validate_nodes_and_infer_types();
|
actualFunction->validate_nodes_and_infer_types();
|
||||||
|
|
||||||
const auto avgPoolOperations = LayerTransformation::get<opset1::AvgPool>(actualFunction);
|
const auto avgPoolOperations = LayerTransformation::get<ov::opset1::AvgPool>(actualFunction);
|
||||||
ASSERT_EQ(1ul, avgPoolOperations.size()) << "unexpected avgPoolOperations size: " << avgPoolOperations.size();
|
ASSERT_EQ(1ul, avgPoolOperations.size()) << "unexpected avgPoolOperations size: " << avgPoolOperations.size();
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -142,7 +142,7 @@ TEST_P(MarkupAvgPoolPrecisionsTransformation, CompareFunctions) {
|
|||||||
ASSERT_EQ(true, avgPoolPrecisioinPreservedAttribute.as<AvgPoolPrecisionPreservedAttribute>().value());
|
ASSERT_EQ(true, avgPoolPrecisioinPreservedAttribute.as<AvgPoolPrecisionPreservedAttribute>().value());
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto precisionPreserved = LayerTransformation::get<opset1::MaxPool>(actualFunction);
|
const auto precisionPreserved = LayerTransformation::get<ov::opset1::MaxPool>(actualFunction);
|
||||||
ASSERT_TRUE(checkIfAttributesAreTheSame<AvgPoolPrecisionPreservedAttribute>(precisionPreserved))
|
ASSERT_TRUE(checkIfAttributesAreTheSame<AvgPoolPrecisionPreservedAttribute>(precisionPreserved))
|
||||||
<< "AvgPoolPrecisionPreservedAttribute are not the same";
|
<< "AvgPoolPrecisionPreservedAttribute are not the same";
|
||||||
|
|
||||||
|
@ -18,6 +18,8 @@
|
|||||||
#include <set>
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include "openvino/util/log.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -211,7 +213,7 @@ public:
|
|||||||
|
|
||||||
bool add_callback(const std::function<bool(Mask::Ptr)>& receive_callback, Mask::Ptr mask) {
|
bool add_callback(const std::function<bool(Mask::Ptr)>& receive_callback, Mask::Ptr mask) {
|
||||||
if (m_callbacks.find(mask.get()) != m_callbacks.end())
|
if (m_callbacks.find(mask.get()) != m_callbacks.end())
|
||||||
NGRAPH_DEBUG << "Attempt to rewrite callback, could lead to unexpected behaviour";
|
OPENVINO_DEBUG << "Attempt to rewrite callback, could lead to unexpected behaviour";
|
||||||
|
|
||||||
m_callbacks[mask.get()] = receive_callback;
|
m_callbacks[mask.get()] = receive_callback;
|
||||||
m_dependencies.push_back(mask.get());
|
m_dependencies.push_back(mask.get());
|
||||||
|
@ -28,8 +28,9 @@ ngraph::pass::InitConstMask::InitConstMask(const ngraph::AxisSet& dims,
|
|||||||
|
|
||||||
for (const auto& dim : dims) {
|
for (const auto& dim : dims) {
|
||||||
if (dim >= shape.size()) {
|
if (dim >= shape.size()) {
|
||||||
NGRAPH_DEBUG << "[WARNING] Attemt to initialize masks on " << dim << " dimension which is out of shape "
|
OPENVINO_DEBUG << "[WARNING] Attemt to initialize masks on " << dim
|
||||||
<< shape << " for node (" << const_node->get_friendly_name() << ")";
|
<< " dimension which is out of shape " << shape << " for node ("
|
||||||
|
<< const_node->get_friendly_name() << ")";
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -61,7 +62,7 @@ ngraph::pass::InitConstMask::InitConstMask(const ngraph::AxisSet& dims,
|
|||||||
setInitMask(const_node, mask);
|
setInitMask(const_node, mask);
|
||||||
#endif
|
#endif
|
||||||
if (!mask->all_dims_are_empty()) {
|
if (!mask->all_dims_are_empty()) {
|
||||||
NGRAPH_DEBUG << "MASK (" << const_node->get_friendly_name() << ") " << *mask << std::endl;
|
OPENVINO_DEBUG << "MASK (" << const_node->get_friendly_name() << ") " << *mask << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -42,8 +42,8 @@ public:
|
|||||||
cur_node = cur_node->get_input_node_shared_ptr(0);
|
cur_node = cur_node->get_input_node_shared_ptr(0);
|
||||||
}
|
}
|
||||||
if (!ngraph::is_type<opset6::Constant>(cur_node)) {
|
if (!ngraph::is_type<opset6::Constant>(cur_node)) {
|
||||||
NGRAPH_DEBUG << "Can't find Constant weights for Convolution: "
|
OPENVINO_DEBUG << "Can't find Constant weights for Convolution: "
|
||||||
<< m_output.get_node()->get_friendly_name() << std::endl;
|
<< m_output.get_node()->get_friendly_name() << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -101,16 +101,17 @@ public:
|
|||||||
dim_order = new_order;
|
dim_order = new_order;
|
||||||
} else {
|
} else {
|
||||||
if (ngraph::is_type<opset6::Reshape>(cur_node) || ngraph::is_type<opset6::MatMul>(cur_node)) {
|
if (ngraph::is_type<opset6::Reshape>(cur_node) || ngraph::is_type<opset6::MatMul>(cur_node)) {
|
||||||
NGRAPH_DEBUG << "Can't init mask for MatMul: " << matmul->get_friendly_name()
|
OPENVINO_DEBUG << "Can't init mask for MatMul: " << matmul->get_friendly_name()
|
||||||
<< " because of node " << cur_node->get_friendly_name()
|
<< " because of node " << cur_node->get_friendly_name()
|
||||||
<< " in the way from weights to Matmul" << std::endl;
|
<< " in the way from weights to Matmul" << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cur_node = cur_node->get_input_node_shared_ptr(0);
|
cur_node = cur_node->get_input_node_shared_ptr(0);
|
||||||
}
|
}
|
||||||
if (!ngraph::is_type<opset6::Constant>(cur_node)) {
|
if (!ngraph::is_type<opset6::Constant>(cur_node)) {
|
||||||
NGRAPH_DEBUG << "Can't find Constant weights for MatMul: " << matmul->get_friendly_name() << std::endl;
|
OPENVINO_DEBUG << "Can't find Constant weights for MatMul: " << matmul->get_friendly_name()
|
||||||
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// 2. Get constant rank to set mask on last dimension
|
// 2. Get constant rank to set mask on last dimension
|
||||||
@ -118,7 +119,7 @@ public:
|
|||||||
const auto shape_rank = const_op->get_shape().size();
|
const auto shape_rank = const_op->get_shape().size();
|
||||||
const size_t shift = (matmul->get_transpose_b()) ? 2 : 1;
|
const size_t shift = (matmul->get_transpose_b()) ? 2 : 1;
|
||||||
if (shape_rank < shift) {
|
if (shape_rank < shift) {
|
||||||
NGRAPH_DEBUG << "Can't init mask for MatMul: " << matmul->get_friendly_name() << std::endl;
|
OPENVINO_DEBUG << "Can't init mask for MatMul: " << matmul->get_friendly_name() << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const auto idx = shape_rank - shift;
|
const auto idx = shape_rank - shift;
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <ngraph/validation_util.hpp>
|
#include <ngraph/validation_util.hpp>
|
||||||
|
|
||||||
#include "mask_attribute.hpp"
|
#include "mask_attribute.hpp"
|
||||||
|
#include "openvino/util/log.hpp"
|
||||||
#include "pruning.hpp"
|
#include "pruning.hpp"
|
||||||
|
|
||||||
namespace ngraph {
|
namespace ngraph {
|
||||||
@ -78,17 +79,17 @@ public:
|
|||||||
auto b_mask = getMask(m_b);
|
auto b_mask = getMask(m_b);
|
||||||
|
|
||||||
if (!a_mask && !b_mask) {
|
if (!a_mask && !b_mask) {
|
||||||
NGRAPH_DEBUG << "No mask for any input of " << m_matmul.get_node()->get_friendly_name() << "\n";
|
OPENVINO_DEBUG << "No mask for any input of " << m_matmul.get_node()->get_friendly_name() << "\n";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!b_mask) {
|
if (!b_mask) {
|
||||||
NGRAPH_DEBUG << "No mask for input b of " << m_matmul.get_node()->get_friendly_name() << "\n";
|
OPENVINO_DEBUG << "No mask for input b of " << m_matmul.get_node()->get_friendly_name() << "\n";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto matmul_range = m_matmul.get_shape().size();
|
const auto matmul_range = m_matmul.get_shape().size();
|
||||||
if (matmul_range < 2) {
|
if (matmul_range < 2) {
|
||||||
NGRAPH_DEBUG << "Matmul operation with rank = 1 is not supported by pruning algo by now\n";
|
OPENVINO_DEBUG << "Matmul operation with rank = 1 is not supported by pruning algo by now\n";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -216,7 +217,7 @@ public:
|
|||||||
// Weights mask for convolution should be initialized in the InitMasks pass (and propagate after it).
|
// Weights mask for convolution should be initialized in the InitMasks pass (and propagate after it).
|
||||||
// If mask isn't initialized - this weights (and hence all convolution) can't be pruned for some reason.
|
// If mask isn't initialized - this weights (and hence all convolution) can't be pruned for some reason.
|
||||||
if (!weights_mask) {
|
if (!weights_mask) {
|
||||||
NGRAPH_DEBUG << "No weights mask for " << m_output.get_node()->get_friendly_name() << "\n";
|
OPENVINO_DEBUG << "No weights mask for " << m_output.get_node()->get_friendly_name() << "\n";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
auto weights_mask_row = weights_mask.get();
|
auto weights_mask_row = weights_mask.get();
|
||||||
@ -309,8 +310,8 @@ public:
|
|||||||
weights_mask = std::make_shared<Mask>(weights_shape.size());
|
weights_mask = std::make_shared<Mask>(weights_shape.size());
|
||||||
setMask(m_weights, weights_mask);
|
setMask(m_weights, weights_mask);
|
||||||
} else {
|
} else {
|
||||||
NGRAPH_DEBUG << "GroupConvolution: No weights mask and weights aren't constant for "
|
OPENVINO_DEBUG << "GroupConvolution: No weights mask and weights aren't constant for "
|
||||||
<< *m_output.get_node() << "\n";
|
<< *m_output.get_node() << "\n";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -401,7 +402,7 @@ public:
|
|||||||
const auto constant = get_constant_from_source(m_shape.get_node_shared_ptr());
|
const auto constant = get_constant_from_source(m_shape.get_node_shared_ptr());
|
||||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||||
if (!constant) {
|
if (!constant) {
|
||||||
NGRAPH_DEBUG << "Can't get constant from source node " << m_shape.get_node()->get_friendly_name();
|
OPENVINO_DEBUG << "Can't get constant from source node " << m_shape.get_node()->get_friendly_name();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
auto input_mask_row = input_mask.get();
|
auto input_mask_row = input_mask.get();
|
||||||
@ -479,8 +480,8 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (m_output.get_node_shared_ptr()->get_autob() != op::AutoBroadcastType::NUMPY) {
|
if (m_output.get_node_shared_ptr()->get_autob() != op::AutoBroadcastType::NUMPY) {
|
||||||
NGRAPH_DEBUG << "Can't propagate mask through " << m_output.get_node()->get_friendly_name()
|
OPENVINO_DEBUG << "Can't propagate mask through " << m_output.get_node()->get_friendly_name()
|
||||||
<< " because node is using unsupported broadcast mode." << std::endl;
|
<< " because node is using unsupported broadcast mode." << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Case when input masks should be united instead of intersection
|
// Case when input masks should be united instead of intersection
|
||||||
@ -541,8 +542,8 @@ public:
|
|||||||
|
|
||||||
// Prevent case when input_shape and weights_shape both has broadcasted dims
|
// Prevent case when input_shape and weights_shape both has broadcasted dims
|
||||||
if (input_shape_broadcasted_dims.size() && weights_shape_broadcasted_dims.size()) {
|
if (input_shape_broadcasted_dims.size() && weights_shape_broadcasted_dims.size()) {
|
||||||
NGRAPH_DEBUG << "Can't propagate mask through " << m_output.get_node()->get_friendly_name()
|
OPENVINO_DEBUG << "Can't propagate mask through " << m_output.get_node()->get_friendly_name()
|
||||||
<< " because both input shapes contains broadcasted dims." << std::endl;
|
<< " because both input shapes contains broadcasted dims." << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -562,14 +563,14 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!input_mask) {
|
if (!input_mask) {
|
||||||
NGRAPH_DEBUG << "No input mask for: " << m_output.get_node()->get_friendly_name() << std::endl;
|
OPENVINO_DEBUG << "No input mask for: " << m_output.get_node()->get_friendly_name() << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!weights_mask) {
|
if (!weights_mask) {
|
||||||
// Set dummy mask to weight input in case this input has no mask
|
// Set dummy mask to weight input in case this input has no mask
|
||||||
// and has broadcastable dimentions
|
// and has broadcastable dimentions
|
||||||
if (!weights_shape_broadcasted_dims.size()) {
|
if (!weights_shape_broadcasted_dims.size()) {
|
||||||
NGRAPH_DEBUG << "No weights mask for: " << m_output.get_node()->get_friendly_name() << std::endl;
|
OPENVINO_DEBUG << "No weights mask for: " << m_output.get_node()->get_friendly_name() << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
weights_mask = std::make_shared<Mask>(m_weights.get_partial_shape().rank().get_length());
|
weights_mask = std::make_shared<Mask>(m_weights.get_partial_shape().rank().get_length());
|
||||||
@ -664,7 +665,7 @@ public:
|
|||||||
|
|
||||||
// Input mask is the only source of pruning in FQ
|
// Input mask is the only source of pruning in FQ
|
||||||
if (!input_mask) {
|
if (!input_mask) {
|
||||||
NGRAPH_DEBUG << "FakeQuantize: No input mask for " << *m_output.get_node() << "\n";
|
OPENVINO_DEBUG << "FakeQuantize: No input mask for " << *m_output.get_node() << "\n";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1138,9 +1139,9 @@ public:
|
|||||||
constant = get_constant_from_source(m_weights.get_node_shared_ptr());
|
constant = get_constant_from_source(m_weights.get_node_shared_ptr());
|
||||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||||
if (!constant) {
|
if (!constant) {
|
||||||
NGRAPH_DEBUG << "Can't process reshape node " << m_output.get_node()->get_friendly_name()
|
OPENVINO_DEBUG << "Can't process reshape node " << m_output.get_node()->get_friendly_name()
|
||||||
<< " with no constant node " << m_weights.get_node()->get_friendly_name()
|
<< " with no constant node " << m_weights.get_node()->get_friendly_name()
|
||||||
<< " as shape input.";
|
<< " as shape input.";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1385,19 +1386,19 @@ public:
|
|||||||
const auto input_order_node = get_constant_from_source(m_weights.get_node_shared_ptr());
|
const auto input_order_node = get_constant_from_source(m_weights.get_node_shared_ptr());
|
||||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||||
if (!input_order_node) {
|
if (!input_order_node) {
|
||||||
NGRAPH_DEBUG << "Can't process transpose node " << m_output.get_node()->get_friendly_name()
|
OPENVINO_DEBUG << "Can't process transpose node " << m_output.get_node()->get_friendly_name()
|
||||||
<< " with no constant node " << m_weights.get_node()->get_friendly_name()
|
<< " with no constant node " << m_weights.get_node()->get_friendly_name()
|
||||||
<< " as input_order input.";
|
<< " as input_order input.";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto input_mask = getMask(m_input);
|
const auto input_mask = getMask(m_input);
|
||||||
if (!input_mask) {
|
if (!input_mask) {
|
||||||
NGRAPH_DEBUG << "No input mask for: " << m_output.get_node()->get_friendly_name() << std::endl;
|
OPENVINO_DEBUG << "No input mask for: " << m_output.get_node()->get_friendly_name() << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (static_cast<int64_t>(input_mask->size()) != m_output.get_partial_shape().rank().get_length()) {
|
if (static_cast<int64_t>(input_mask->size()) != m_output.get_partial_shape().rank().get_length()) {
|
||||||
NGRAPH_DEBUG << "Transpose which change tensor rank is not supported yet.";
|
OPENVINO_DEBUG << "Transpose which change tensor rank is not supported yet.";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1627,8 +1628,8 @@ public:
|
|||||||
|
|
||||||
// Invalidate current mask and its parent masks
|
// Invalidate current mask and its parent masks
|
||||||
output_mask->apply_callback(input_mask);
|
output_mask->apply_callback(input_mask);
|
||||||
NGRAPH_DEBUG << "Invalidate masks for " << *input.get_node() << " because " << node
|
OPENVINO_DEBUG << "Invalidate masks for " << *input.get_node() << " because " << node
|
||||||
<< " is in scope of stop ops.\n";
|
<< " is in scope of stop ops.\n";
|
||||||
any_input_with_masks = true;
|
any_input_with_masks = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -57,7 +57,7 @@ static bool maybe_adopt_reshape_node(std::shared_ptr<ov::Node> reshape, ngraph::
|
|||||||
const auto shape = reshape->input_value(1);
|
const auto shape = reshape->input_value(1);
|
||||||
const auto consumers = shape.get_node()->get_output_target_inputs(0);
|
const auto consumers = shape.get_node()->get_output_target_inputs(0);
|
||||||
if (shape.get_node()->outputs().size() != 1 || consumers.size() != 1) {
|
if (shape.get_node()->outputs().size() != 1 || consumers.size() != 1) {
|
||||||
NGRAPH_DEBUG << "Adoptation for node " << shape.get_node()->get_friendly_name() << " is not supported.";
|
OPENVINO_DEBUG << "Adoptation for node " << shape.get_node()->get_friendly_name() << " is not supported.";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,8 +88,8 @@ static bool maybe_adopt_reshape_node(std::shared_ptr<ov::Node> reshape, ngraph::
|
|||||||
consumers.begin()->replace_source_output(sub);
|
consumers.begin()->replace_source_output(sub);
|
||||||
copy_runtime_info(shape.get_node_shared_ptr(), {sub_const, sub});
|
copy_runtime_info(shape.get_node_shared_ptr(), {sub_const, sub});
|
||||||
|
|
||||||
NGRAPH_DEBUG << "Adopting values in (" << shape.get_node()->get_friendly_name() << ")"
|
OPENVINO_DEBUG << "Adopting values in (" << shape.get_node()->get_friendly_name() << ")"
|
||||||
<< " by substracting " << vec_to_str(sub_const_vector);
|
<< " by substracting " << vec_to_str(sub_const_vector);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -200,7 +200,7 @@ bool ngraph::pass::ShrinkWeights::run_on_model(const std::shared_ptr<ngraph::Fun
|
|||||||
#ifdef ENABLE_OPENVINO_DEBUG
|
#ifdef ENABLE_OPENVINO_DEBUG
|
||||||
auto init_mask = getInitMask(node->output(0));
|
auto init_mask = getInitMask(node->output(0));
|
||||||
if (!mask && init_mask)
|
if (!mask && init_mask)
|
||||||
NGRAPH_DEBUG << "Mask was ruined for node:" << node->get_friendly_name() << "\nInit mask: " << *init_mask;
|
OPENVINO_DEBUG << "Mask was ruined for node:" << node->get_friendly_name() << "\nInit mask: " << *init_mask;
|
||||||
#endif
|
#endif
|
||||||
if (is_static_reshape_op(node) && not_empty_mask(mask) &&
|
if (is_static_reshape_op(node) && not_empty_mask(mask) &&
|
||||||
!ov::op::util::is_constant(node->get_input_node_ptr(1)))
|
!ov::op::util::is_constant(node->get_input_node_ptr(1)))
|
||||||
@ -239,8 +239,8 @@ bool ngraph::pass::ShrinkWeights::run_on_model(const std::shared_ptr<ngraph::Fun
|
|||||||
dim_current_set.end(),
|
dim_current_set.end(),
|
||||||
dim_init_set.begin(),
|
dim_init_set.begin(),
|
||||||
dim_init_set.end())) {
|
dim_init_set.end())) {
|
||||||
NGRAPH_DEBUG << "Mask was ruined for node:" << const_node->get_friendly_name()
|
OPENVINO_DEBUG << "Mask was ruined for node:" << const_node->get_friendly_name()
|
||||||
<< "\nInit mask: " << *init_mask << "\nCurrent mask: " << *mask;
|
<< "\nInit mask: " << *init_mask << "\nCurrent mask: " << *mask;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -262,8 +262,8 @@ bool ngraph::pass::ShrinkWeights::run_on_model(const std::shared_ptr<ngraph::Fun
|
|||||||
ngraph::copy_runtime_info(const_node, new_const);
|
ngraph::copy_runtime_info(const_node, new_const);
|
||||||
ngraph::replace_node(const_node, new_const);
|
ngraph::replace_node(const_node, new_const);
|
||||||
|
|
||||||
NGRAPH_DEBUG << "Adjust value in (" << const_node->get_friendly_name() << "): " << vec_to_str(value)
|
OPENVINO_DEBUG << "Adjust value in (" << const_node->get_friendly_name() << "): " << vec_to_str(value)
|
||||||
<< " to " << vec_to_str(new_const_value);
|
<< " to " << vec_to_str(new_const_value);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto last_output = const_node->output(0);
|
auto last_output = const_node->output(0);
|
||||||
@ -282,8 +282,8 @@ bool ngraph::pass::ShrinkWeights::run_on_model(const std::shared_ptr<ngraph::Fun
|
|||||||
auto new_const = opset6::Constant::create(const_node->get_element_type(), Shape{res.size()}, res);
|
auto new_const = opset6::Constant::create(const_node->get_element_type(), Shape{res.size()}, res);
|
||||||
replace_node(const_node, new_const);
|
replace_node(const_node, new_const);
|
||||||
copy_runtime_info(const_node, new_const);
|
copy_runtime_info(const_node, new_const);
|
||||||
NGRAPH_DEBUG << "Transform shape like (" << last_output.get_node()->get_friendly_name()
|
OPENVINO_DEBUG << "Transform shape like (" << last_output.get_node()->get_friendly_name()
|
||||||
<< "): " << const_node->get_shape_val() << " to " << new_const->get_shape_val() << std::endl;
|
<< "): " << const_node->get_shape_val() << " to " << new_const->get_shape_val() << std::endl;
|
||||||
new_const->set_friendly_name(const_node->get_friendly_name());
|
new_const->set_friendly_name(const_node->get_friendly_name());
|
||||||
} else {
|
} else {
|
||||||
for (size_t dim = 0; dim < mask->size(); ++dim) {
|
for (size_t dim = 0; dim < mask->size(); ++dim) {
|
||||||
@ -308,13 +308,13 @@ bool ngraph::pass::ShrinkWeights::run_on_model(const std::shared_ptr<ngraph::Fun
|
|||||||
last_output,
|
last_output,
|
||||||
opset6::Constant::create(element::i64, Shape{dims_to_keep.size()}, dims_to_keep),
|
opset6::Constant::create(element::i64, Shape{dims_to_keep.size()}, dims_to_keep),
|
||||||
opset6::Constant::create(element::i64, Shape{}, {dim}));
|
opset6::Constant::create(element::i64, Shape{}, {dim}));
|
||||||
NGRAPH_DEBUG << "Transform(" << prev_name << "): " << prev_shape << " to "
|
OPENVINO_DEBUG << "Transform(" << prev_name << "): " << prev_shape << " to "
|
||||||
<< last_output.get_partial_shape();
|
<< last_output.get_partial_shape();
|
||||||
|
|
||||||
if (prev_shape.is_static() && last_output.get_partial_shape().is_static()) {
|
if (prev_shape.is_static() && last_output.get_partial_shape().is_static()) {
|
||||||
reduced_weights_count += shape_size(prev_shape.get_shape()) - shape_size(last_output.get_shape());
|
reduced_weights_count += shape_size(prev_shape.get_shape()) - shape_size(last_output.get_shape());
|
||||||
} else {
|
} else {
|
||||||
NGRAPH_DEBUG << "[ WARNING ] Can not find the number of reduced elements due to dynamic shapes.";
|
OPENVINO_DEBUG << "[ WARNING ] Can not find the number of reduced elements due to dynamic shapes.";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Trying to fold sequence of Gather ops to avoid additional constant folding.
|
// Trying to fold sequence of Gather ops to avoid additional constant folding.
|
||||||
@ -331,7 +331,7 @@ bool ngraph::pass::ShrinkWeights::run_on_model(const std::shared_ptr<ngraph::Fun
|
|||||||
copy_runtime_info(const_node, last_output.get_node_shared_ptr());
|
copy_runtime_info(const_node, last_output.get_node_shared_ptr());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
NGRAPH_DEBUG << "[ INFO ] TOTAL WEIGHTS: " << total_weights_count << std::endl;
|
OPENVINO_DEBUG << "[ INFO ] TOTAL WEIGHTS: " << total_weights_count << std::endl;
|
||||||
NGRAPH_DEBUG << "[ INFO ] REDUCED WEIGHTS: " << reduced_weights_count << std::endl;
|
OPENVINO_DEBUG << "[ INFO ] REDUCED WEIGHTS: " << reduced_weights_count << std::endl;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -91,7 +91,9 @@ LinearIR::container LinearIR::deep_copy_range(LinearIR::container::const_iterato
|
|||||||
for (auto it = begin; it != end; it++)
|
for (auto it = begin; it != end; it++)
|
||||||
original_nodes.push_back((*it)->get_node());
|
original_nodes.push_back((*it)->get_node());
|
||||||
ngraph::NodeMap node_map;
|
ngraph::NodeMap node_map;
|
||||||
|
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||||
ngraph::clone_nodes(original_nodes, node_map);
|
ngraph::clone_nodes(original_nodes, node_map);
|
||||||
|
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||||
for (auto it = begin; it != end; it++) {
|
for (auto it = begin; it != end; it++) {
|
||||||
// copy by value, so result shared_pointer point to new objects
|
// copy by value, so result shared_pointer point to new objects
|
||||||
Expression new_expr = **it;
|
Expression new_expr = **it;
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
#include "compare.hpp"
|
#include "compare.hpp"
|
||||||
#include "itt.hpp"
|
#include "itt.hpp"
|
||||||
|
#include "openvino/util/log.hpp"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace ov;
|
using namespace ov;
|
||||||
@ -38,7 +39,7 @@ static bool simplify_gather(shared_ptr<Node> node) {
|
|||||||
|
|
||||||
auto axis = gather->get_axis();
|
auto axis = gather->get_axis();
|
||||||
if (axis == opset3::Gather::AXIS_NOT_SET_VALUE) {
|
if (axis == opset3::Gather::AXIS_NOT_SET_VALUE) {
|
||||||
NGRAPH_DEBUG << "axis value not set";
|
OPENVINO_DEBUG << "axis value not set";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -111,7 +112,7 @@ static bool eliminate_reshape_v1(const shared_ptr<Node>& node) {
|
|||||||
|
|
||||||
// check if reshape is not identity op
|
// check if reshape is not identity op
|
||||||
if (input.get_partial_shape().is_dynamic() || node->get_output_partial_shape(0).is_dynamic()) {
|
if (input.get_partial_shape().is_dynamic() || node->get_output_partial_shape(0).is_dynamic()) {
|
||||||
NGRAPH_DEBUG << node << " has dynamic shapes.";
|
OPENVINO_DEBUG << node << " has dynamic shapes.";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// remove identity op
|
// remove identity op
|
||||||
|
@ -43,6 +43,7 @@ bool ov::pass::UselessStridedSliceEraser::run_on_model(const std::shared_ptr<ngr
|
|||||||
return rewritten;
|
return rewritten;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
ngraph::SlicePlan get_slice_plan(std::shared_ptr<opset1::StridedSlice> slice) {
|
ngraph::SlicePlan get_slice_plan(std::shared_ptr<opset1::StridedSlice> slice) {
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "itt.hpp"
|
#include "itt.hpp"
|
||||||
|
#include "openvino/util/log.hpp"
|
||||||
#include "transformations/utils/utils.hpp"
|
#include "transformations/utils/utils.hpp"
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
@ -69,18 +70,18 @@ public:
|
|||||||
const auto& input_pshape = input.get_partial_shape();
|
const auto& input_pshape = input.get_partial_shape();
|
||||||
const auto input_rank = input_pshape.rank();
|
const auto input_rank = input_pshape.rank();
|
||||||
if (input_rank.is_dynamic()) {
|
if (input_rank.is_dynamic()) {
|
||||||
NGRAPH_DEBUG << "Axis calculated to materialize RIC on input: input rank is dynamic";
|
OPENVINO_DEBUG << "Axis calculated to materialize RIC on input: input rank is dynamic";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto axis = get_axis();
|
const auto axis = get_axis();
|
||||||
// Despite of m_axis is signed integer this transformartion does not handle negative axes values
|
// Despite of m_axis is signed integer this transformartion does not handle negative axes values
|
||||||
if (axis < 0 || axis >= static_cast<int64_t>(input_pshape.size())) {
|
if (axis < 0 || axis >= static_cast<int64_t>(input_pshape.size())) {
|
||||||
NGRAPH_DEBUG << "Axis calculated to materialize RIC on input: " << input << " is out of range";
|
OPENVINO_DEBUG << "Axis calculated to materialize RIC on input: " << input << " is out of range";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto& axis_dim = input_pshape[axis];
|
const auto& axis_dim = input_pshape[axis];
|
||||||
if (axis_dim.is_dynamic()) {
|
if (axis_dim.is_dynamic()) {
|
||||||
NGRAPH_DEBUG << "Axis calculated to materialize RIC on input: " << input << " is dynamic";
|
OPENVINO_DEBUG << "Axis calculated to materialize RIC on input: " << input << " is dynamic";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
auto output = input.get_source_output();
|
auto output = input.get_source_output();
|
||||||
@ -572,7 +573,7 @@ public:
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
ric.set_can_be_fused(false);
|
ric.set_can_be_fused(false);
|
||||||
NGRAPH_DEBUG << "Node is unsupported by RIC Fusion: " << *m.get_match_root() << std::endl;
|
OPENVINO_DEBUG << "Node is unsupported by RIC Fusion: " << *m.get_match_root() << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -50,7 +50,9 @@ std::shared_ptr<opset6::Constant> get_reduced_order_constant(const std::shared_p
|
|||||||
std::shared_ptr<opset6::Constant> get_reversed_order_constant(const std::shared_ptr<opset6::Constant>& order_const) {
|
std::shared_ptr<opset6::Constant> get_reversed_order_constant(const std::shared_ptr<opset6::Constant>& order_const) {
|
||||||
const auto& order = order_const->cast_vector<size_t>();
|
const auto& order = order_const->cast_vector<size_t>();
|
||||||
const auto& rank = order.size();
|
const auto& rank = order.size();
|
||||||
|
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||||
const auto& default_order = ngraph::get_default_order(rank);
|
const auto& default_order = ngraph::get_default_order(rank);
|
||||||
|
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||||
std::vector<size_t> reverse_order(rank);
|
std::vector<size_t> reverse_order(rank);
|
||||||
for (size_t i = 0; i < rank; ++i)
|
for (size_t i = 0; i < rank; ++i)
|
||||||
reverse_order[order[i]] = default_order[i];
|
reverse_order[order[i]] = default_order[i];
|
||||||
|
@ -35,7 +35,7 @@ bool ov::pass::UnrollTensorIterator::run_on_model(const std::shared_ptr<ngraph::
|
|||||||
// Assign names to the created layers.
|
// Assign names to the created layers.
|
||||||
std::vector<std::shared_ptr<ngraph::Function>> body_functions(num_iter);
|
std::vector<std::shared_ptr<ngraph::Function>> body_functions(num_iter);
|
||||||
for (int64_t idx = 0; idx < num_iter; ++idx) {
|
for (int64_t idx = 0; idx < num_iter; ++idx) {
|
||||||
body_functions[idx] = ngraph::clone_function(*function);
|
body_functions[idx] = function->clone();
|
||||||
for (auto& node : body_functions[idx]->get_ops()) {
|
for (auto& node : body_functions[idx]->get_ops()) {
|
||||||
node->set_friendly_name(sub_graph_op->get_friendly_name() + "/" + std::to_string(idx + 1) + "/" +
|
node->set_friendly_name(sub_graph_op->get_friendly_name() + "/" + std::to_string(idx + 1) + "/" +
|
||||||
node->get_friendly_name());
|
node->get_friendly_name());
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user