[LPT] Introduce new granularity attribute instead of OperationPerTensorQuantizationRestriction (#11330)
This commit is contained in:
parent
29fb8c79b1
commit
5d821453ae
@ -9,10 +9,10 @@
|
||||
<tab type="user" title="Attributes" url="@ref openvino_docs_OV_UG_lpt_attributes">
|
||||
<tab type="user" title="AvgPoolPrecisionPreserved" url="@ref openvino_docs_OV_UG_lpt_AvgPoolPrecisionPreserved"/>
|
||||
<tab type="user" title="IntervalsAlignment" url="@ref openvino_docs_OV_UG_lpt_IntervalsAlignment"/>
|
||||
<tab type="user" title="PerTensorQuantization" url="@ref openvino_docs_OV_UG_lpt_PerTensorQuantization"/>
|
||||
<tab type="user" title="PrecisionPreserved" url="@ref openvino_docs_OV_UG_lpt_PrecisionPreserved"/>
|
||||
<tab type="user" title="Precisions" url="@ref openvino_docs_OV_UG_lpt_Precisions"/>
|
||||
<tab type="user" title="QuantizationAlignment" url="@ref openvino_docs_OV_UG_lpt_QuantizationAlignment"/>
|
||||
<tab type="user" title="QuantizationGranularity" url="@ref openvino_docs_OV_UG_lpt_QuantizationGranularity"/>
|
||||
</tab>
|
||||
<tab type="user" title="Step 1. Prerequisites transformations" url="@ref openvino_docs_OV_UG_lpt_step1_prerequisites">
|
||||
<tab type="user" title="LinOpSequenceFusion" url="@ref openvino_docs_OV_UG_lpt_LinOpSequenceFusion"/>
|
||||
|
@ -1,11 +0,0 @@
|
||||
# PerTensorQuantization attribute {#openvino_docs_OV_UG_lpt_PerTensorQuantization}
|
||||
|
||||
ngraph::PerTensorQuantizationAttribute class represents the `PerTensorQuantization` attribute.
|
||||
|
||||
The attribute defines if the operation input port requires per-tensor quantization.
|
||||
|
||||
| Property name | Values |
|
||||
|---------------|----------------------------------------------|
|
||||
| Required | Yes |
|
||||
| Defined | Operation, input ports |
|
||||
| Properties | |
|
@ -0,0 +1,11 @@
|
||||
# QuantizationGranularity attribute {#openvino_docs_OV_UG_lpt_QuantizationGranularity}
|
||||
|
||||
ngraph::QuantizationAttribute class represents the `QuantizationGranularity` attribute.
|
||||
|
||||
The attribute defines quantization granularity of operation inputs.
|
||||
|
||||
| Property name | Values |
|
||||
|---------------|----------------------------------------------|
|
||||
| Required | No |
|
||||
| Defined | Input ports |
|
||||
| Properties | Quantization granularity |
|
@ -8,29 +8,30 @@
|
||||
:hidden:
|
||||
|
||||
AvgPoolPrecisionPreserved <openvino_docs_OV_UG_lpt_AvgPoolPrecisionPreserved>
|
||||
IntervalsAlignment <openvino_docs_OV_UG_lpt_IntervalsAlignment>
|
||||
PerTensorQuantization <openvino_docs_OV_UG_lpt_PerTensorQuantization>
|
||||
IntervalsAlignment <openvino_docs_OV_UG_lpt_IntervalsAlignment>
|
||||
PrecisionPreserved <openvino_docs_OV_UG_lpt_PrecisionPreserved>
|
||||
Precisions <openvino_docs_OV_UG_lpt_Precisions>
|
||||
QuantizationAlignment <openvino_docs_OV_UG_lpt_QuantizationAlignment>
|
||||
QuantizationGranularity <openvino_docs_OV_UG_lpt_QuantizationGranularity>
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
## Introduction
|
||||
|
||||
| Name | Target | Required | Mutable |
|
||||
|-------------------------------------------------------------------------------------|------------------------|----------|---------|
|
||||
| [AvgPoolPrecisionPreserved](@ref openvino_docs_OV_UG_lpt_AvgPoolPrecisionPreserved) | Precision | No | Yes |
|
||||
| [IntervalsAlignment](@ref openvino_docs_OV_UG_lpt_IntervalsAlignment) | Quantization interval | Yes | Yes |
|
||||
| [PerTensorQuantization](@ref openvino_docs_OV_UG_lpt_PerTensorQuantization) | Precision | Yes | No |
|
||||
| [PrecisionPreserved](@ref openvino_docs_OV_UG_lpt_PrecisionPreserved) | Precision | Yes | Yes |
|
||||
| [Precisions](@ref openvino_docs_OV_UG_lpt_Precisions) | Precision | Yes | Yes |
|
||||
| [QuantizationAlignment](@ref openvino_docs_OV_UG_lpt_QuantizationAlignment) | Quantization alignment | Yes | Yes |
|
||||
| Name | Target | Required | Mutable |
|
||||
|-------------------------------------------------------------------------------------|--------------------------|----------|---------|
|
||||
| [AvgPoolPrecisionPreserved](@ref openvino_docs_OV_UG_lpt_AvgPoolPrecisionPreserved) | Precision | No | Yes |
|
||||
| [IntervalsAlignment](@ref openvino_docs_OV_UG_lpt_IntervalsAlignment) | Quantization interval | Yes | Yes |
|
||||
| [PrecisionPreserved](@ref openvino_docs_OV_UG_lpt_PrecisionPreserved) | Precision | Yes | Yes |
|
||||
| [Precisions](@ref openvino_docs_OV_UG_lpt_Precisions) | Precision | Yes | Yes |
|
||||
| [QuantizationAlignment](@ref openvino_docs_OV_UG_lpt_QuantizationAlignment) | Quantization granularity | Yes | Yes |
|
||||
| [QuantizationGranularity](@ref openvino_docs_OV_UG_lpt_QuantizationGranularity) | Quantization granularity | Yes | No |
|
||||
|
||||
> `Target` attribute group defines attribute usage during model transformation for the best performance:
|
||||
> - `Precision` - the attribute defines the most optimal output port precision.
|
||||
> - `Quantization interval` - the attribute defines quantization interval.
|
||||
> - `Quantization alignment` - the attribute defines quantization alignment: per-channel or per-tensor quantization.
|
||||
> - `Quantization alignment` - the attribute defines quantization granularity in runtime: per-channel or per-tensor quantization.
|
||||
> - `Quantization granularity` - the attribute is set by plugin to define quantization granularity: per-channel or per-tensor quantization.
|
||||
>
|
||||
> `Required` attribute group defines if attribute usage is required to get an optimal model during transformation:
|
||||
> - `Yes` - the attribute is used by all OpenVINO plugins for low-precision optimization.
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
|
||||
|
||||
#include <low_precision/common/operation_per_tensor_quantization_restriction.hpp>
|
||||
#include <low_precision/common/quantization_granularity_restriction.hpp>
|
||||
#include <low_precision/convert_subtract_constant.hpp>
|
||||
#include <low_precision/convolution.hpp>
|
||||
#include <low_precision/convolution_backprop_data.hpp>
|
||||
@ -45,7 +45,7 @@ if (useLpt) {
|
||||
// nGraph common transformations happen here
|
||||
|
||||
if (useLpt) {
|
||||
// convert subtract constant to INT8 to prevent unnecessary FP16 to FP32 conversion
|
||||
// convert subtract constant to INT8 to prevent unnecessary FP16 to FP32 conversion
|
||||
manager.register_pass<ngraph::pass::low_precision::ConvertSubtractConstant>(defaultPrecisions);
|
||||
}
|
||||
|
||||
@ -70,29 +70,29 @@ manager.run_passes(nGraphFunc);
|
||||
using namespace ngraph::pass::low_precision;
|
||||
if (useLpt) {
|
||||
// Low precision transformations plugin specific configuration: restrictions definition
|
||||
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
|
||||
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, {ngraph::element::u8}},
|
||||
{1, {ngraph::element::i8}},
|
||||
}),
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
|
||||
PrecisionsRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
|
||||
{0, {ngraph::element::u8, ngraph::element::i8}},
|
||||
{1, {ngraph::element::i8}}
|
||||
}),
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::GroupConvolution>({
|
||||
PrecisionsRestriction::create<ngraph::opset1::GroupConvolution>({
|
||||
{0, {ngraph::element::u8}},
|
||||
{1, {ngraph::element::i8}}
|
||||
}),
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::Multiply>({
|
||||
PrecisionsRestriction::create<ngraph::opset1::Multiply>({
|
||||
{0, {ngraph::element::u8}},
|
||||
{1, {ngraph::element::i8}},
|
||||
}),
|
||||
});
|
||||
|
||||
// Low precision transformations plugin specific configuration: per-tensor quantization operations definition
|
||||
auto perTensorQuantization = std::vector<OperationPerTensorQuantizationRestriction>({
|
||||
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0}),
|
||||
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0})
|
||||
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
|
||||
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0}),
|
||||
QuantizationGranularityRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0})
|
||||
});
|
||||
|
||||
// Low precision transformations instantiation and registration in pass manager
|
||||
@ -133,8 +133,8 @@ ngraph::pass::Manager manager;
|
||||
|
||||
using namespace ngraph::pass::low_precision;
|
||||
//! [lpt_supported_precisions]
|
||||
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
|
||||
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, {ngraph::element::u8}},
|
||||
{1, {ngraph::element::i8}},
|
||||
}),
|
||||
@ -157,10 +157,10 @@ std::shared_ptr<ov::Model> nGraphFunc;
|
||||
//! [per_tensor_quantization]
|
||||
using namespace ngraph::pass::low_precision;
|
||||
|
||||
const std::vector<OperationPrecisionRestriction> emptyRestrictions;
|
||||
const std::vector<PrecisionsRestriction> emptyRestrictions;
|
||||
|
||||
auto perTensorQuantization = std::vector<OperationPerTensorQuantizationRestriction>({
|
||||
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0})
|
||||
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
|
||||
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0})
|
||||
});
|
||||
|
||||
ngraph::pass::Manager lptManager;
|
||||
@ -197,15 +197,15 @@ ngraph::pass::Manager manager;
|
||||
|
||||
using namespace ngraph::pass::low_precision;
|
||||
//! [lpt_markup_pipeline]
|
||||
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
|
||||
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, {ngraph::element::u8}},
|
||||
{1, {ngraph::element::i8}},
|
||||
}),
|
||||
});
|
||||
|
||||
auto perTensorQuantization = std::vector<OperationPerTensorQuantizationRestriction>({
|
||||
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0})
|
||||
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
|
||||
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0})
|
||||
});
|
||||
|
||||
ngraph::pass::Manager lptManager;
|
||||
|
@ -1,56 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class OperationPerTensorQuantizationRestriction {
|
||||
public:
|
||||
using RestrictedPorts = std::vector<size_t>;
|
||||
|
||||
ngraph::Node::type_info_t operationType;
|
||||
bool specifyVersion;
|
||||
std::vector<size_t> restrictedPorts;
|
||||
|
||||
OperationPerTensorQuantizationRestriction() = default;
|
||||
OperationPerTensorQuantizationRestriction(
|
||||
const ngraph::Node::type_info_t operationType,
|
||||
const bool specifyVersion,
|
||||
const RestrictedPorts& restrictedPorts) :
|
||||
operationType(operationType),
|
||||
specifyVersion(specifyVersion),
|
||||
restrictedPorts(restrictedPorts) {}
|
||||
|
||||
template <typename T>
|
||||
static OperationPerTensorQuantizationRestriction create(
|
||||
const RestrictedPorts& restrictedPorts = {},
|
||||
const bool specifyVersion = false) {
|
||||
return OperationPerTensorQuantizationRestriction(T::get_type_info_static(), specifyVersion, restrictedPorts);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static RestrictedPorts getPrecisionsByOperationType(std::vector<OperationPerTensorQuantizationRestriction>& restrictions) {
|
||||
for (const auto& restriction : restrictions) {
|
||||
if (restriction.operationType == T::get_type_info_static()) {
|
||||
return restriction.restrictedPorts;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -0,0 +1,31 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/pass/pass.hpp>
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
#include "low_precision/rt_info/quantization_granularity_attribute.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class LP_TRANSFORMATIONS_API PortQuantizationGranularityRestriction {
|
||||
public:
|
||||
PortQuantizationGranularityRestriction(const size_t port, QuantizationGranularityAttribute::Granularity granularity) :
|
||||
port(port),
|
||||
granularity(granularity) {}
|
||||
|
||||
size_t port;
|
||||
QuantizationGranularityAttribute::Granularity granularity;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -19,7 +19,7 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class OperationPrecisionRestriction {
|
||||
class PrecisionsRestriction {
|
||||
public:
|
||||
using PrecisionsByPort = std::vector<std::pair<size_t, std::vector<ngraph::element::Type>>>;
|
||||
|
||||
@ -27,8 +27,8 @@ public:
|
||||
bool specifyVersion;
|
||||
std::vector<std::pair<size_t, std::vector<ngraph::element::Type>>> precisionsByPort;
|
||||
|
||||
OperationPrecisionRestriction() = default;
|
||||
OperationPrecisionRestriction(
|
||||
PrecisionsRestriction() = default;
|
||||
PrecisionsRestriction(
|
||||
const ngraph::Node::type_info_t operationType,
|
||||
const bool specifyVersion,
|
||||
const PrecisionsByPort& precisionsByPort) :
|
||||
@ -37,14 +37,14 @@ public:
|
||||
precisionsByPort(precisionsByPort) {}
|
||||
|
||||
template <typename T>
|
||||
static OperationPrecisionRestriction create(
|
||||
static PrecisionsRestriction create(
|
||||
const PrecisionsByPort& precisionsByPort,
|
||||
const bool specifyVersion = false) {
|
||||
return OperationPrecisionRestriction(T::get_type_info_static(), specifyVersion, precisionsByPort);
|
||||
return PrecisionsRestriction(T::get_type_info_static(), specifyVersion, precisionsByPort);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static PrecisionsByPort getPrecisionsByOperationType(std::vector<OperationPrecisionRestriction>& restrictions) {
|
||||
static PrecisionsByPort getPrecisionsByOperationType(std::vector<PrecisionsRestriction>& restrictions) {
|
||||
for (const auto& restriction : restrictions) {
|
||||
if (restriction.operationType == T::get_type_info_static()) {
|
||||
return restriction.precisionsByPort;
|
@ -0,0 +1,71 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <low_precision/rt_info/quantization_granularity_attribute.hpp>
|
||||
#include <low_precision/common/port_quantization_granularity_restriction.hpp>
|
||||
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class LP_TRANSFORMATIONS_API QuantizationGranularityRestriction {
|
||||
public:
|
||||
ngraph::Node::type_info_t operationType;
|
||||
bool specifyVersion;
|
||||
std::vector<PortQuantizationGranularityRestriction> restrictions;
|
||||
|
||||
QuantizationGranularityRestriction() = default;
|
||||
QuantizationGranularityRestriction(
|
||||
const ngraph::Node::type_info_t operationType,
|
||||
const bool specifyVersion,
|
||||
const std::vector<PortQuantizationGranularityRestriction>& restrictions) :
|
||||
operationType(operationType),
|
||||
specifyVersion(specifyVersion),
|
||||
restrictions(restrictions) {}
|
||||
|
||||
template <typename T>
|
||||
static QuantizationGranularityRestriction create(
|
||||
const std::vector<PortQuantizationGranularityRestriction>& restrictions,
|
||||
const bool specifyVersion) {
|
||||
return QuantizationGranularityRestriction(T::get_type_info_static(), specifyVersion, restrictions);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static QuantizationGranularityRestriction create(
|
||||
const std::vector<size_t>& restrictedPorts = {},
|
||||
const bool specifyVersion = false) {
|
||||
std::vector<PortQuantizationGranularityRestriction> restrictions;
|
||||
restrictions.reserve(restrictedPorts.size());
|
||||
for (auto i = 0ul; i < restrictedPorts.size(); ++i) {
|
||||
restrictions.push_back(PortQuantizationGranularityRestriction(
|
||||
restrictedPorts[i],
|
||||
ngraph::QuantizationGranularityAttribute::Granularity::PerTensor));
|
||||
}
|
||||
return QuantizationGranularityRestriction(T::get_type_info_static(), specifyVersion, restrictions);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static std::vector<PortQuantizationGranularityRestriction> getPrecisionsByOperationType(std::vector<QuantizationGranularityRestriction>& restrictions) {
|
||||
for (const auto& restriction : restrictions) {
|
||||
if (restriction.operationType == T::get_type_info_static()) {
|
||||
return restriction.restrictions;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -21,7 +21,8 @@
|
||||
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <low_precision/common/operation_per_tensor_quantization_restriction.hpp>
|
||||
#include <low_precision/common/quantization_granularity_restriction.hpp>
|
||||
#include <low_precision/common/precisions_restriction.hpp>
|
||||
#include "low_precision/layer_transformation.hpp"
|
||||
#include "low_precision/markup_precisions.hpp"
|
||||
|
||||
@ -41,13 +42,13 @@ class ngraph::pass::low_precision::MarkupOptimizations : public ngraph::pass::Fu
|
||||
public:
|
||||
OPENVINO_RTTI("MarkupOptimizations", "0");
|
||||
MarkupOptimizations(
|
||||
const std::vector<OperationPrecisionRestriction>& precisionRestrictions,
|
||||
const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions,
|
||||
const std::vector<PrecisionsRestriction>& precisionRestrictions,
|
||||
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions,
|
||||
const AttributeParameters& params);
|
||||
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
|
||||
private:
|
||||
const std::vector<OperationPrecisionRestriction>& precisionRestrictions;
|
||||
const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions;
|
||||
const std::vector<PrecisionsRestriction>& precisionRestrictions;
|
||||
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions;
|
||||
const AttributeParameters& params;
|
||||
};
|
||||
|
||||
@ -61,8 +62,8 @@ class ngraph::pass::low_precision::LowPrecision : public ngraph::pass::FunctionP
|
||||
public:
|
||||
OPENVINO_RTTI("LowPrecision", "0");
|
||||
LowPrecision(
|
||||
const std::vector<OperationPrecisionRestriction>& precisionRestrictions = {},
|
||||
const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions = {},
|
||||
const std::vector<PrecisionsRestriction>& precisionRestrictions = {},
|
||||
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions = {},
|
||||
const LayerTransformation::Params = LayerTransformation::Params());
|
||||
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
|
||||
|
||||
@ -70,8 +71,8 @@ public:
|
||||
static bool isFQLevelsPresent(const std::shared_ptr<const ngraph::Function>& function, const std::set<size_t>& levels);
|
||||
|
||||
protected:
|
||||
std::vector<OperationPrecisionRestriction> precisionRestrictions;
|
||||
std::vector<OperationPerTensorQuantizationRestriction> quantizationRestrictions;
|
||||
std::vector<PrecisionsRestriction> precisionRestrictions;
|
||||
std::vector<QuantizationGranularityRestriction> quantizationRestrictions;
|
||||
// remove
|
||||
LayerTransformation::Params params;
|
||||
};
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
#include <ngraph/pass/pass.hpp>
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
#include "low_precision/common/operation_precision_restriction.hpp"
|
||||
#include "low_precision/common/precisions_restriction.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
@ -48,7 +48,7 @@ public:
|
||||
};
|
||||
|
||||
OPENVINO_RTTI("MarkupPrecisions", "0");
|
||||
explicit MarkupPrecisions(const std::vector<OperationPrecisionRestriction>& restrictions = {},
|
||||
explicit MarkupPrecisions(const std::vector<PrecisionsRestriction>& restrictions = {},
|
||||
const std::vector<ngraph::element::Type>& defaultPrecisions = { ngraph::element::u8, ngraph::element::i8 });
|
||||
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
|
||||
|
||||
|
@ -9,14 +9,15 @@
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/pass/pass.hpp>
|
||||
#include "common/operation_per_tensor_quantization_restriction.hpp"
|
||||
#include "low_precision/common/port_quantization_granularity_restriction.hpp"
|
||||
#include "low_precision/common/quantization_granularity_restriction.hpp"
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class LP_TRANSFORMATIONS_API MarkupPerTensorQuantization;
|
||||
class LP_TRANSFORMATIONS_API MarkupQuantizationGranularity;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
@ -31,21 +32,21 @@ class LP_TRANSFORMATIONS_API MarkupPerTensorQuantization;
|
||||
* [MarkupPerTensorQuantization](@ref openvino_docs_OV_UG_lpt_MarkupPerTensorQuantization) page
|
||||
* in the Inference Engine Developer Guide.
|
||||
*/
|
||||
class ngraph::pass::low_precision::MarkupPerTensorQuantization : public ngraph::pass::FunctionPass {
|
||||
class ngraph::pass::low_precision::MarkupQuantizationGranularity : public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
class PerTensorQuantization {
|
||||
public:
|
||||
explicit PerTensorQuantization(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {}
|
||||
void add(const uint64_t version, const std::vector<size_t>& ports) {
|
||||
portsByVersion.emplace(version, ports);
|
||||
void add(const uint64_t version, const std::vector<PortQuantizationGranularityRestriction>& restrictions) {
|
||||
portsByVersion.emplace(version, restrictions);
|
||||
}
|
||||
|
||||
bool versionIsRequired;
|
||||
std::unordered_map<uint64_t, std::vector<size_t>> portsByVersion;
|
||||
std::unordered_map<uint64_t, std::vector<PortQuantizationGranularityRestriction>> portsByVersion;
|
||||
};
|
||||
|
||||
OPENVINO_RTTI("MarkupPerTensorQuantization", "0");
|
||||
explicit MarkupPerTensorQuantization(const std::vector<OperationPerTensorQuantizationRestriction>& restrictions = {});
|
||||
explicit MarkupQuantizationGranularity(const std::vector<QuantizationGranularityRestriction>& restrictions = {});
|
||||
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
|
||||
|
||||
private:
|
@ -7,7 +7,7 @@
|
||||
#include <memory>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include "low_precision/layer_transformation.hpp"
|
||||
#include "common/operation_precision_restriction.hpp"
|
||||
#include "common/precisions_restriction.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
@ -26,7 +26,7 @@ public:
|
||||
OPENVINO_RTTI("MultiplyToGroupConvolutionTransformation", "0");
|
||||
MultiplyToGroupConvolutionTransformation(
|
||||
const Params& params = Params(),
|
||||
const OperationPrecisionRestriction::PrecisionsByPort& restrictions = {});
|
||||
const PrecisionsRestriction::PrecisionsByPort& restrictions = {});
|
||||
~MultiplyToGroupConvolutionTransformation() override {}
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
@ -39,7 +39,7 @@ public:
|
||||
void setGroupSize(const size_t groupSize);
|
||||
size_t getGroupSize() const;
|
||||
private:
|
||||
OperationPrecisionRestriction::PrecisionsByPort restrictions;
|
||||
PrecisionsRestriction::PrecisionsByPort restrictions;
|
||||
size_t groupSize;
|
||||
};
|
||||
|
||||
|
@ -18,7 +18,7 @@
|
||||
|
||||
#include "rt_info/shared_value_attribute.hpp"
|
||||
#include "rt_info/precisions_attribute.hpp"
|
||||
#include "rt_info/per_tensor_quantization_attribute.hpp"
|
||||
#include "rt_info/quantization_granularity_attribute.hpp"
|
||||
#include "rt_info/intervals_alignment_attribute.hpp"
|
||||
#include "transformation_context.hpp"
|
||||
#include "quantization_details.hpp"
|
||||
|
@ -27,6 +27,7 @@ public:
|
||||
OPENVINO_RTTI("LowPrecision::AvgPoolPrecisionPreserved", "", ov::RuntimeAttribute, 0);
|
||||
using PrecisionPreservedAttribute::PrecisionPreservedAttribute;
|
||||
void merge(std::vector<ov::Any>& attributes);
|
||||
bool is_skipped() const;
|
||||
std::string to_string() const override;
|
||||
};
|
||||
|
||||
|
@ -1,29 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include "low_precision/rt_info/shared_value_attribute.hpp"
|
||||
#include "low_precision/layer_transformation.hpp"
|
||||
#include "attribute_parameters.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief PerTensorQuantizationAttribute defines if operation input port requires per-tensor quantization.
|
||||
*
|
||||
* For more details about the attribute, refer to
|
||||
* [PerTensorQuantizationAttribute](@ref openvino_docs_OV_UG_lpt_PerTensorQuantization) page in the Inference Engine Developer Guide.
|
||||
*/
|
||||
class LP_TRANSFORMATIONS_API PerTensorQuantizationAttribute : public ov::RuntimeAttribute {
|
||||
public:
|
||||
OPENVINO_RTTI("LowPrecision::PerTensorQuantization", "", ov::RuntimeAttribute, 0);
|
||||
~PerTensorQuantizationAttribute();
|
||||
};
|
||||
} // namespace ngraph
|
@ -0,0 +1,45 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include "low_precision/rt_info/shared_value_attribute.hpp"
|
||||
#include "low_precision/layer_transformation.hpp"
|
||||
#include "attribute_parameters.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief QuantizationGranularityAttribute defines quantization granularity of operation inputs.
|
||||
*
|
||||
* For more details about the attribute, refer to
|
||||
* [QuantizationGranularityAttribute](@ref openvino_docs_OV_UG_lpt_QuantizationGranularity) page in the Inference Engine Developer Guide.
|
||||
*/
|
||||
class LP_TRANSFORMATIONS_API QuantizationGranularityAttribute : public ov::RuntimeAttribute {
|
||||
public:
|
||||
OPENVINO_RTTI("LowPrecision::QuantizationGranularity", "", ov::RuntimeAttribute, 0);
|
||||
|
||||
enum class Granularity {
|
||||
PerChannel,
|
||||
PerTensor
|
||||
};
|
||||
|
||||
QuantizationGranularityAttribute() : granularity(Granularity::PerChannel) {}
|
||||
QuantizationGranularityAttribute(const Granularity granularity) : granularity(granularity) {}
|
||||
|
||||
bool operator==(const QuantizationGranularityAttribute& attribute) const {
|
||||
return this->granularity == attribute.granularity;
|
||||
}
|
||||
|
||||
bool is_skipped() const;
|
||||
std::string to_string() const override;
|
||||
|
||||
Granularity granularity;
|
||||
};
|
||||
} // namespace ngraph
|
@ -70,7 +70,13 @@ public:
|
||||
|
||||
for (auto input : node->inputs()) {
|
||||
if (needToCheckExpectedAttributeType) {
|
||||
if (getAttribute<ExpectedAttributeType>(input).empty()) {
|
||||
const auto& attribute = getAttribute<ExpectedAttributeType>(input);
|
||||
if (attribute.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& expectedAttribute = attribute.template as<ExpectedAttributeType>();
|
||||
if (expectedAttribute.is_skipped()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -6,10 +6,10 @@
|
||||
#include <memory>
|
||||
#include "low_precision/create_attribute.hpp"
|
||||
#include "low_precision/propagate_through_precision_preserved.hpp"
|
||||
#include "low_precision/rt_info/quantization_alignment_attribute.hpp"
|
||||
#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp"
|
||||
#include "low_precision/update_shared_precision_preserved.hpp"
|
||||
#include "low_precision/rt_info/attribute_parameters.hpp"
|
||||
#include "low_precision/rt_info/quantization_alignment_attribute.hpp"
|
||||
#include "low_precision/rt_info/quantization_granularity_attribute.hpp"
|
||||
#include "low_precision/update_shared_precision_preserved.hpp"
|
||||
#include "itt.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
@ -25,7 +25,7 @@ bool ngraph::pass::low_precision::AlignQuantizationParameters::run_on_model(cons
|
||||
std::shared_ptr<ngraph::pass::GraphRewrite> propagation = manager.register_pass<ngraph::pass::GraphRewrite>();
|
||||
propagation->add_matcher<low_precision::CreateAttribute<QuantizationAlignmentAttribute>>();
|
||||
propagation->add_matcher<low_precision::PropagateThroughPrecisionPreserved<QuantizationAlignmentAttribute>>();
|
||||
propagation->add_matcher<low_precision::UpdateSharedPrecisionPreserved<QuantizationAlignmentAttribute, PerTensorQuantizationAttribute>>();
|
||||
propagation->add_matcher<low_precision::UpdateSharedPrecisionPreserved<QuantizationAlignmentAttribute, QuantizationGranularityAttribute>>();
|
||||
manager.run_passes(f);
|
||||
return false;
|
||||
}
|
||||
|
@ -14,7 +14,6 @@
|
||||
#include <ngraph/opsets/opset4.hpp>
|
||||
#include <ngraph/opsets/opset6.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <low_precision/markup_per_tensor_quantization.hpp>
|
||||
#include <low_precision/lpt_itt.hpp>
|
||||
|
||||
#include "low_precision/align_quantization_intervals.hpp"
|
||||
@ -22,6 +21,7 @@
|
||||
#include "low_precision/markup_precisions.hpp"
|
||||
#include "low_precision/markup_can_be_quantized.hpp"
|
||||
#include "low_precision/markup_avg_pool_precision_preserved.hpp"
|
||||
#include <low_precision/markup_quantization_granularity.hpp>
|
||||
#include "low_precision/propagate_precisions.hpp"
|
||||
#include "low_precision/align_quantization_parameters.hpp"
|
||||
|
||||
@ -81,8 +81,8 @@
|
||||
#include "itt.hpp"
|
||||
|
||||
ngraph::pass::low_precision::LowPrecision::LowPrecision(
|
||||
const std::vector<OperationPrecisionRestriction>& precisionRestrictions,
|
||||
const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions,
|
||||
const std::vector<PrecisionsRestriction>& precisionRestrictions,
|
||||
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions,
|
||||
const LayerTransformation::Params params) :
|
||||
precisionRestrictions(precisionRestrictions),
|
||||
quantizationRestrictions(quantizationRestrictions),
|
||||
@ -158,8 +158,8 @@ ngraph::pass::low_precision::TypeRelaxedReplacer::TypeRelaxedReplacer() {
|
||||
}
|
||||
|
||||
MarkupOptimizations::MarkupOptimizations(
|
||||
const std::vector<OperationPrecisionRestriction>& precisionRestrictions,
|
||||
const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions,
|
||||
const std::vector<PrecisionsRestriction>& precisionRestrictions,
|
||||
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions,
|
||||
const AttributeParameters& params) :
|
||||
precisionRestrictions(precisionRestrictions),
|
||||
quantizationRestrictions(quantizationRestrictions),
|
||||
@ -174,7 +174,7 @@ bool ngraph::pass::low_precision::MarkupOptimizations::run_on_model(const std::s
|
||||
markup.register_pass<low_precision::MarkupPrecisions>(precisionRestrictions, params.defaultPrecisions);
|
||||
}
|
||||
if (!quantizationRestrictions.empty()) {
|
||||
markup.register_pass<low_precision::MarkupPerTensorQuantization>(quantizationRestrictions);
|
||||
markup.register_pass<low_precision::MarkupQuantizationGranularity>(quantizationRestrictions);
|
||||
}
|
||||
if (ngraph::op::util::has_op_with_type<ngraph::opset1::AvgPool>(f)) {
|
||||
markup.register_pass<low_precision::MarkupAvgPoolPrecisionPreserved>(params.defaultPrecisions);
|
||||
@ -249,7 +249,7 @@ bool ngraph::pass::low_precision::LowPrecision::run_on_model(const std::shared_p
|
||||
// WA: precision restrictions for groupConv must be propagated to MultiplyToGroupConvolution transformation
|
||||
cleanup->add_matcher<ngraph::pass::low_precision::MultiplyToGroupConvolutionTransformation>(
|
||||
params,
|
||||
OperationPrecisionRestriction::getPrecisionsByOperationType<opset1::GroupConvolution>(precisionRestrictions));
|
||||
PrecisionsRestriction::getPrecisionsByOperationType<opset1::GroupConvolution>(precisionRestrictions));
|
||||
manager.register_pass<ngraph::pass::low_precision::FoldFakeQuantizeTransformation>(params);
|
||||
manager.register_pass<ngraph::pass::ConstantFolding>();
|
||||
|
||||
|
@ -20,7 +20,8 @@
|
||||
|
||||
using namespace ngraph;
|
||||
|
||||
ngraph::pass::low_precision::MarkupPrecisions::MarkupPrecisions(const std::vector<OperationPrecisionRestriction>& restrictions,
|
||||
ngraph::pass::low_precision::MarkupPrecisions::MarkupPrecisions(
|
||||
const std::vector<PrecisionsRestriction>& restrictions,
|
||||
const std::vector<ngraph::element::Type>& defaultPrecisions) : defaultPrecisions(defaultPrecisions) {
|
||||
for (const auto& restriction : restrictions) {
|
||||
const auto it = restrictionsByOperation.find(restriction.operationType.name);
|
||||
|
@ -2,54 +2,52 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision/markup_per_tensor_quantization.hpp"
|
||||
#include "low_precision/markup_quantization_granularity.hpp"
|
||||
|
||||
#include <cassert>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <ngraph/node.hpp>
|
||||
#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp"
|
||||
#include "itt.hpp"
|
||||
#include "low_precision/rt_info/quantization_granularity_attribute.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
|
||||
ngraph::pass::low_precision::MarkupPerTensorQuantization::MarkupPerTensorQuantization(
|
||||
const std::vector<OperationPerTensorQuantizationRestriction>& restrictions) {
|
||||
for (const OperationPerTensorQuantizationRestriction& restriction : restrictions) {
|
||||
ngraph::pass::low_precision::MarkupQuantizationGranularity::MarkupQuantizationGranularity(
|
||||
const std::vector<QuantizationGranularityRestriction>& restrictions) {
|
||||
for (const auto& restriction : restrictions) {
|
||||
const auto it = restrictionsByOperation.find(restriction.operationType.name);
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
if (it == restrictionsByOperation.end()) {
|
||||
PerTensorQuantization r(restriction.specifyVersion);
|
||||
r.portsByVersion.emplace(restriction.operationType.version, restriction.restrictedPorts);
|
||||
r.portsByVersion.emplace(restriction.operationType.version, restriction.restrictions);
|
||||
restrictionsByOperation.emplace(restriction.operationType.name, r);
|
||||
} else {
|
||||
it->second.add(restriction.operationType.version, restriction.restrictedPorts);
|
||||
it->second.add(restriction.operationType.version, restriction.restrictions);
|
||||
}
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
}
|
||||
|
||||
bool ngraph::pass::low_precision::MarkupPerTensorQuantization::run_on_model(const std::shared_ptr<ngraph::Function>& f) {
|
||||
bool ngraph::pass::low_precision::MarkupQuantizationGranularity::run_on_model(const std::shared_ptr<ngraph::Function>& f) {
|
||||
RUN_ON_FUNCTION_SCOPE(MarkupPerTensorQuantization);
|
||||
auto setRestriction = [](const std::shared_ptr<Node>& node, const std::vector<size_t>& restrictedPorts) {
|
||||
auto createAttribute = [](Input<Node>& input){
|
||||
auto setRestriction = [](const std::shared_ptr<Node>& node, const std::vector<PortQuantizationGranularityRestriction>& restrictedPorts) {
|
||||
auto createAttribute = [](Input<Node>& input, const QuantizationGranularityAttribute::Granularity granularity){
|
||||
auto &rt = input.get_rt_info();
|
||||
rt.emplace(
|
||||
PerTensorQuantizationAttribute::get_type_info_static(),
|
||||
PerTensorQuantizationAttribute());
|
||||
rt.emplace(QuantizationGranularityAttribute::get_type_info_static(), QuantizationGranularityAttribute(granularity));
|
||||
};
|
||||
|
||||
if (restrictedPorts.empty()) {
|
||||
// markup all ports
|
||||
// markup all ports with default granularity value
|
||||
for (size_t item = 0ul; item < node->get_input_size(); item++) {
|
||||
Input<Node> input = node->input(item);
|
||||
createAttribute(input);
|
||||
createAttribute(input, QuantizationGranularityAttribute::Granularity::PerTensor);
|
||||
}
|
||||
} else {
|
||||
// markup specific ports
|
||||
for (const size_t item : restrictedPorts) {
|
||||
Input<Node> input = node->input(item);
|
||||
createAttribute(input);
|
||||
for (const auto item : restrictedPorts) {
|
||||
Input<Node> input = node->input(item.port);
|
||||
createAttribute(input, item.granularity);
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -77,11 +75,11 @@ bool ngraph::pass::low_precision::MarkupPerTensorQuantization::run_on_model(cons
|
||||
continue;
|
||||
}
|
||||
|
||||
const std::vector<size_t>& restrictedPorts = it2->second;
|
||||
const std::vector<PortQuantizationGranularityRestriction>& restrictedPorts = it2->second;
|
||||
setRestriction(node, restrictedPorts);
|
||||
} else {
|
||||
assert(restriction.portsByVersion.size() == 1ul);
|
||||
const std::vector<size_t>& restrictedPorts = restriction.portsByVersion.begin()->second;
|
||||
const std::vector<PortQuantizationGranularityRestriction>& restrictedPorts = restriction.portsByVersion.begin()->second;
|
||||
setRestriction(node, restrictedPorts);
|
||||
}
|
||||
}
|
@ -15,7 +15,7 @@ namespace low_precision {
|
||||
|
||||
MultiplyToGroupConvolutionTransformation::MultiplyToGroupConvolutionTransformation(
|
||||
const Params& params,
|
||||
const OperationPrecisionRestriction::PrecisionsByPort& restrictions) : LayerTransformation(params), restrictions(restrictions), groupSize(1ul) {
|
||||
const PrecisionsRestriction::PrecisionsByPort& restrictions) : LayerTransformation(params), restrictions(restrictions), groupSize(1ul) {
|
||||
MATCHER_SCOPE(MultiplyToGroupConvolutionTransformation);
|
||||
auto matcher = pattern::wrap_type<opset1::Multiply>();
|
||||
|
||||
|
@ -15,6 +15,10 @@ using namespace ov;
|
||||
void AvgPoolPrecisionPreservedAttribute::merge(std::vector<ov::Any>& attributes) {
|
||||
}
|
||||
|
||||
bool AvgPoolPrecisionPreservedAttribute::is_skipped() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string AvgPoolPrecisionPreservedAttribute::to_string() const {
|
||||
std::stringstream ss;
|
||||
ss << attribute->get_string();
|
||||
|
@ -1,10 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace ov;
|
||||
|
||||
PerTensorQuantizationAttribute::~PerTensorQuantizationAttribute() = default;
|
@ -0,0 +1,34 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision/rt_info/quantization_granularity_attribute.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace ov;
|
||||
|
||||
bool QuantizationGranularityAttribute::is_skipped() const {
|
||||
assert((granularity == Granularity::PerChannel) || (granularity == Granularity::PerTensor));
|
||||
return granularity != Granularity::PerTensor;
|
||||
}
|
||||
|
||||
std::string QuantizationGranularityAttribute::to_string() const {
|
||||
assert((granularity == Granularity::PerChannel) || (granularity == Granularity::PerTensor));
|
||||
|
||||
std::stringstream ss;
|
||||
switch (granularity) {
|
||||
case Granularity::PerChannel: {
|
||||
ss << "PerChannel";
|
||||
break;
|
||||
}
|
||||
case Granularity::PerTensor: {
|
||||
ss << "PerTensor";
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
ss << "UNKNOWN";
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ss.str();
|
||||
}
|
@ -96,7 +96,8 @@
|
||||
#include <transformations/common_optimizations/lin_op_sequence_fusion.hpp>
|
||||
|
||||
#include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
|
||||
#include <low_precision/common/operation_per_tensor_quantization_restriction.hpp>
|
||||
#include <low_precision/common/quantization_granularity_restriction.hpp>
|
||||
#include <low_precision/common/precisions_restriction.hpp>
|
||||
#include <low_precision/convert_subtract_constant.hpp>
|
||||
#include <low_precision/convolution.hpp>
|
||||
#include <low_precision/convolution_backprop_data.hpp>
|
||||
@ -439,45 +440,45 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
CPU_LPT_SCOPE(LowPrecisionTransformations_Part4);
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "LowPrecisionTransformations");
|
||||
|
||||
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
|
||||
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, {ngraph::element::u8}},
|
||||
{1, {ngraph::element::i8}},
|
||||
}),
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
|
||||
PrecisionsRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
|
||||
{0, {ngraph::element::u8, ngraph::element::i8}},
|
||||
{1, {ngraph::element::i8}}
|
||||
}),
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::GroupConvolution>({
|
||||
PrecisionsRestriction::create<ngraph::opset1::GroupConvolution>({
|
||||
{0, {ngraph::element::u8}},
|
||||
{1, {ngraph::element::i8}}
|
||||
}),
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::Multiply>({
|
||||
PrecisionsRestriction::create<ngraph::opset1::Multiply>({
|
||||
{0, {ngraph::element::u8}},
|
||||
{1, {ngraph::element::i8}},
|
||||
}),
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::MatMul>({
|
||||
PrecisionsRestriction::create<ngraph::opset1::MatMul>({
|
||||
{0, {ngraph::element::u8, ngraph::element::i8}},
|
||||
{1, {ngraph::element::i8}}
|
||||
}),
|
||||
});
|
||||
|
||||
auto perTensorQuantization = std::vector<OperationPerTensorQuantizationRestriction>({
|
||||
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0}),
|
||||
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0})
|
||||
auto quantizationRestrictions = std::vector<QuantizationGranularityRestriction>({
|
||||
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0}),
|
||||
QuantizationGranularityRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0})
|
||||
});
|
||||
|
||||
// for GNA networks reference execution
|
||||
bool updatePrecision = true;
|
||||
if (hasINT16orINT32Levels) {
|
||||
updatePrecision = false;
|
||||
supportedPrecisions = std::vector<OperationPrecisionRestriction>({});
|
||||
supportedPrecisions = std::vector<PrecisionsRestriction>({});
|
||||
}
|
||||
|
||||
ngraph::pass::Manager lptManager;
|
||||
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(
|
||||
supportedPrecisions,
|
||||
perTensorQuantization,
|
||||
quantizationRestrictions,
|
||||
LayerTransformation::Params(updatePrecision, ngraph::element::f32, defaultPrecisions));
|
||||
lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::MarkupPrecisions>([](const_node_ptr& node) -> bool {
|
||||
if (const auto mulitply = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
|
||||
|
@ -345,24 +345,24 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|
||||
manager.run_passes(func);
|
||||
}
|
||||
|
||||
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
|
||||
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, {ngraph::element::u8, ngraph::element::i8}},
|
||||
{1, {ngraph::element::i8}},
|
||||
}),
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
|
||||
PrecisionsRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
|
||||
{0, {ngraph::element::u8, ngraph::element::i8}},
|
||||
{1, {ngraph::element::i8}}
|
||||
}),
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::GroupConvolution>({
|
||||
PrecisionsRestriction::create<ngraph::opset1::GroupConvolution>({
|
||||
{0, {ngraph::element::u8, ngraph::element::i8}},
|
||||
{1, {ngraph::element::i8}}
|
||||
})
|
||||
});
|
||||
|
||||
auto perTensorQuantization = std::vector<OperationPerTensorQuantizationRestriction>({
|
||||
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0}),
|
||||
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0}),
|
||||
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
|
||||
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0}),
|
||||
QuantizationGranularityRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0}),
|
||||
});
|
||||
|
||||
ngraph::pass::Manager lptManager;
|
||||
|
@ -74,15 +74,15 @@ public:
|
||||
additionalLayer,
|
||||
testValues.actual.dequantization);
|
||||
|
||||
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, {ngraph::element::u8}},
|
||||
{1, {ngraph::element::i8}}
|
||||
})
|
||||
});
|
||||
|
||||
auto perTensorQuantization = std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
|
||||
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0}),
|
||||
auto perTensorQuantization = std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
|
||||
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0}),
|
||||
});
|
||||
|
||||
SimpleLowPrecisionTransformer transform(supportedPrecisions, perTensorQuantization);
|
||||
|
@ -85,8 +85,8 @@ public:
|
||||
testValues.actual.fakeQuantize1,
|
||||
testValues.actual.fakeQuantize2);
|
||||
|
||||
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, {ngraph::element::u8}}
|
||||
})
|
||||
});
|
||||
|
@ -12,9 +12,9 @@
|
||||
#include <low_precision/rt_info/precision_preserved_attribute.hpp>
|
||||
#include <low_precision/rt_info/intervals_alignment_attribute.hpp>
|
||||
#include <low_precision/rt_info/quantization_alignment_attribute.hpp>
|
||||
#include <low_precision/common/precisions_restriction.hpp>
|
||||
#include <low_precision/common/quantization_granularity_restriction.hpp>
|
||||
|
||||
#include <low_precision/common/operation_precision_restriction.hpp>
|
||||
#include <low_precision/common/operation_per_tensor_quantization_restriction.hpp>
|
||||
#include <low_precision/concat.hpp>
|
||||
#include <low_precision/fake_quantize_decomposition.hpp>
|
||||
#include <low_precision/fuse_subtract_to_fake_quantize.hpp>
|
||||
@ -146,14 +146,14 @@ public:
|
||||
{},
|
||||
testValues.axis,
|
||||
testValues.addNotPrecisionPreservedOperation);
|
||||
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
|
||||
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
|
||||
});
|
||||
|
||||
auto quantizationRestrictions = testValues.multiChannels ?
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
|
||||
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::AvgPool>()
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
|
||||
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::AvgPool>()
|
||||
});
|
||||
|
||||
const auto params = TestTransformationParams::toParams(testValues.params);
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include "lpt_ngraph_functions/concat_function.hpp"
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
|
||||
#include "simple_low_precision_transformer.hpp"
|
||||
#include "low_precision/common/operation_per_tensor_quantization_restriction.hpp"
|
||||
#include "low_precision/common/quantization_granularity_restriction.hpp"
|
||||
|
||||
|
||||
using namespace testing;
|
||||
@ -92,9 +92,9 @@ public:
|
||||
testValues.actual.fakeQuantize2);
|
||||
|
||||
auto quantizationRestrictions = testValues.multiChannels ?
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
|
||||
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::AvgPool>()
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
|
||||
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::AvgPool>()
|
||||
});
|
||||
|
||||
SimpleLowPrecisionTransformer transform({}, quantizationRestrictions);
|
||||
|
@ -89,14 +89,14 @@ public:
|
||||
testValues.actual.fakeQuantize1,
|
||||
testValues.actual.fakeQuantize2);
|
||||
|
||||
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
|
||||
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
|
||||
});
|
||||
|
||||
auto quantizationRestrictions = testValues.multiChannels ?
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
|
||||
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::AvgPool>()
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
|
||||
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::AvgPool>()
|
||||
});
|
||||
|
||||
SimpleLowPrecisionTransformer transform(supportedPrecisionsOnActivation, quantizationRestrictions);
|
||||
|
@ -91,9 +91,9 @@ public:
|
||||
testValues.actual.fakeQuantize2);
|
||||
|
||||
auto quantizationRestrictions = testValues.multiChannels ?
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
|
||||
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>()
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
|
||||
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>()
|
||||
});
|
||||
|
||||
SimpleLowPrecisionTransformer transform({}, quantizationRestrictions);
|
||||
|
@ -92,9 +92,9 @@ public:
|
||||
testValues.actual.fakeQuantize2);
|
||||
|
||||
auto quantizationRestrictions = testValues.multiChannels ?
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
|
||||
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::AvgPool>()
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
|
||||
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::AvgPool>()
|
||||
});
|
||||
|
||||
SimpleLowPrecisionTransformer transform({}, quantizationRestrictions);
|
||||
|
@ -102,17 +102,17 @@ public:
|
||||
testValues.neighborType,
|
||||
testValues.additionalLayer);
|
||||
|
||||
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, testValues.params.precisionsOnActivations},
|
||||
{1, testValues.params.precisionsOnWeights}
|
||||
})
|
||||
});
|
||||
|
||||
auto quantizationRestrictions = testValues.multiChannels ?
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
|
||||
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>()
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
|
||||
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>()
|
||||
});
|
||||
|
||||
SimpleLowPrecisionTransformer transform(supportedPrecisionsOnActivation, quantizationRestrictions);
|
||||
|
@ -104,17 +104,17 @@ public:
|
||||
testValues.actual.convert3,
|
||||
testValues.actual.dequantization3);
|
||||
|
||||
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, {ngraph::element::u8}},
|
||||
{1, {ngraph::element::i8}}
|
||||
})
|
||||
});
|
||||
|
||||
auto quantizationRestrictions = testValues.multiChannels ?
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
|
||||
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0})
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
|
||||
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0})
|
||||
});
|
||||
|
||||
SimpleLowPrecisionTransformer transform(supportedPrecisionsOnActivation, quantizationRestrictions);
|
||||
|
@ -22,7 +22,7 @@
|
||||
#include <low_precision/fuse_subtract_to_fake_quantize.hpp>
|
||||
#include <low_precision/fuse_multiply_to_fake_quantize.hpp>
|
||||
#include <low_precision/markup_can_be_quantized.hpp>
|
||||
#include <low_precision/markup_per_tensor_quantization.hpp>
|
||||
#include <low_precision/markup_quantization_granularity.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
#include "lpt_ngraph_functions/concat_function.hpp"
|
||||
@ -155,16 +155,16 @@ public:
|
||||
testValues.axis,
|
||||
testValues.addNotPrecisionPreservedOperation);
|
||||
|
||||
auto precisionsRestrictions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
auto precisionsRestrictions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, {ngraph::element::u8}},
|
||||
{1, {ngraph::element::i8}}
|
||||
}),
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
|
||||
});
|
||||
|
||||
auto quantizationRestrictions = std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
|
||||
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0})
|
||||
auto quantizationRestrictions = std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
|
||||
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0})
|
||||
});
|
||||
|
||||
const auto params = TestTransformationParams(testValues.params.updatePrecisions);
|
||||
@ -172,7 +172,7 @@ public:
|
||||
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::low_precision::MarkupPrecisions>(precisionsRestrictions);
|
||||
manager.register_pass<ngraph::pass::low_precision::MarkupPerTensorQuantization>(quantizationRestrictions);
|
||||
manager.register_pass<ngraph::pass::low_precision::MarkupQuantizationGranularity>(quantizationRestrictions);
|
||||
manager.register_pass<ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved>(params.defaultPrecisions);
|
||||
manager.register_pass<ngraph::pass::low_precision::PropagatePrecisions>();
|
||||
manager.register_pass<ngraph::pass::low_precision::AlignQuantizationIntervals>(params.defaultPrecisions);
|
||||
|
@ -20,8 +20,8 @@
|
||||
#include <low_precision/propagate_precisions.hpp>
|
||||
#include <low_precision/markup_avg_pool_precision_preserved.hpp>
|
||||
#include <low_precision/markup_precisions.hpp>
|
||||
#include <low_precision/markup_per_tensor_quantization.hpp>
|
||||
#include "low_precision/common/operation_precision_restriction.hpp"
|
||||
#include <low_precision/markup_quantization_granularity.hpp>
|
||||
#include "low_precision/common/precisions_restriction.hpp"
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
#include "lpt_ngraph_functions/concat_function.hpp"
|
||||
@ -99,17 +99,17 @@ public:
|
||||
testValues.actual.fakeQuantize2,
|
||||
addConvolution);
|
||||
|
||||
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, testValues.params.precisionsOnActivations},
|
||||
{1, testValues.params.precisionsOnWeights},
|
||||
})
|
||||
});
|
||||
|
||||
auto quantizationRestrictions = testValues.multiChannels ?
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
|
||||
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>()
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
|
||||
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>()
|
||||
});
|
||||
|
||||
SimpleLowPrecisionTransformer transform(supportedPrecisions, quantizationRestrictions);
|
||||
|
@ -92,17 +92,17 @@ public:
|
||||
testValues.ssBeforeConcat,
|
||||
testValues.ssAfterConcat);
|
||||
|
||||
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, testValues.params.precisionsOnActivations},
|
||||
{1, testValues.params.precisionsOnWeights},
|
||||
})
|
||||
});
|
||||
|
||||
auto quantizationRestrictions = testValues.multiChannels ?
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
|
||||
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>()
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
|
||||
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>()
|
||||
});
|
||||
|
||||
SimpleLowPrecisionTransformer transform(supportedPrecisions, quantizationRestrictions);
|
||||
|
@ -89,8 +89,8 @@ public:
|
||||
testValues.actual.fakeQuantizeOnWeights
|
||||
});
|
||||
|
||||
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, testValues.precisionsOnActivationForLimitedOperation},
|
||||
{1, { element::i8 }}
|
||||
})
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <low_precision/avg_pool.hpp>
|
||||
#include <low_precision/common/operation_precision_restriction.hpp>
|
||||
#include <low_precision/common/precisions_restriction.hpp>
|
||||
#include <low_precision/fake_quantize_decomposition.hpp>
|
||||
#include <low_precision/low_precision.hpp>
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
@ -88,8 +88,8 @@ public:
|
||||
fakeQuantizeOnData.actual,
|
||||
fakeQuantizeOnData.addNotPrecisionPreservedOperation);
|
||||
|
||||
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::AvgPool>({{0, params.precisionsOnActivations}})
|
||||
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::AvgPool>({{0, params.precisionsOnActivations}})
|
||||
});
|
||||
|
||||
SimpleLowPrecisionTransformer transform(supportedPrecisions, {}, { ngraph::element::f32, defaultPrecisions });
|
||||
|
@ -80,15 +80,15 @@ public:
|
||||
testValues.actual.dequantizationOnWeights,
|
||||
testValues.actual.dequantizationAfter);
|
||||
|
||||
auto precisionsRestrictions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
auto precisionsRestrictions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, {ngraph::element::u8}},
|
||||
{1, {ngraph::element::i8}}
|
||||
})
|
||||
});
|
||||
|
||||
auto quantizationRestrictions = std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
|
||||
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>()
|
||||
auto quantizationRestrictions = std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
|
||||
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>()
|
||||
});
|
||||
|
||||
SimpleLowPrecisionTransformer transformer(precisionsRestrictions, quantizationRestrictions);
|
||||
|
@ -85,8 +85,8 @@ public:
|
||||
ngraph::pass::low_precision::TypeRelaxedReplacer pass;
|
||||
pass.run_on_function(actualFunction);
|
||||
|
||||
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, {ngraph::element::u8}},
|
||||
{1, {ngraph::element::i8}}
|
||||
})
|
||||
|
@ -154,14 +154,14 @@ public:
|
||||
testValues.axis,
|
||||
oneInputWithSplit);
|
||||
|
||||
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
|
||||
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
|
||||
});
|
||||
|
||||
auto quantizationRestrictions = testValues.multiChannels ?
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
|
||||
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::AvgPool>()
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
|
||||
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
|
||||
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::AvgPool>()
|
||||
});
|
||||
|
||||
const auto params = TestTransformationParams::toParams(testValues.params);
|
||||
|
@ -61,8 +61,8 @@ public:
|
||||
testValues.actual.dequantization,
|
||||
testValues.haveMultiplyWithNoConstBeforeDequantization);
|
||||
|
||||
auto precisionRestrictions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
|
||||
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Multiply>({
|
||||
auto precisionRestrictions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
|
||||
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Multiply>({
|
||||
{0, {ngraph::element::u8}},
|
||||
{1, {ngraph::element::i8}}
|
||||
})
|
||||
|
@ -0,0 +1,107 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "layer_transformation.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <memory>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <low_precision/common/port_quantization_granularity_restriction.hpp>
|
||||
#include <low_precision/common/quantization_granularity_restriction.hpp>
|
||||
#include <low_precision/markup_quantization_granularity.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
#include "lpt_ngraph_functions/convolution_function.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ngraph;
|
||||
using namespace ngraph::pass;
|
||||
|
||||
class OperationQuantizationRestrictionTestValues {
|
||||
public:
|
||||
std::vector<ngraph::pass::low_precision::PortQuantizationGranularityRestriction> restrictions;
|
||||
};
|
||||
|
||||
typedef std::tuple<
|
||||
OperationQuantizationRestrictionTestValues,
|
||||
bool
|
||||
> OperationQuantizationRestrictionParams;
|
||||
|
||||
class OperationQuantizationRestrictionTest : public LayerTransformation, public testing::WithParamInterface<OperationQuantizationRestrictionParams> {
|
||||
public:
|
||||
void SetUp() override {
|
||||
const auto testValues = std::get<0>(GetParam());
|
||||
const auto explicitly = std::get<1>(GetParam());
|
||||
|
||||
std::vector<size_t> ports;
|
||||
if (!explicitly) {
|
||||
for (size_t i = 0; i < testValues.restrictions.size(); ++i) {
|
||||
ports.push_back(testValues.restrictions[i].port);
|
||||
}
|
||||
}
|
||||
|
||||
actualFunction = ngraph::builder::subgraph::ConvolutionFunction::get(
|
||||
Shape({ 1, 3, 16, 16 }),
|
||||
element::f32,
|
||||
{ 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
|
||||
std::vector<float>({ 1.f }),
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } });
|
||||
|
||||
ngraph::pass::Manager manager;
|
||||
const auto quantizationRestrictions = std::vector<low_precision::QuantizationGranularityRestriction>({
|
||||
explicitly ?
|
||||
low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>(testValues.restrictions, false) :
|
||||
low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>(ports)
|
||||
});
|
||||
manager.register_pass<ngraph::pass::low_precision::MarkupQuantizationGranularity>(quantizationRestrictions);
|
||||
manager.run_passes(actualFunction);
|
||||
|
||||
referenceFunction = ngraph::builder::subgraph::ConvolutionFunction::get(
|
||||
Shape({ 1, 3, 16, 16 }),
|
||||
element::f32,
|
||||
{ 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
|
||||
std::vector<float>({ 1.f }),
|
||||
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
|
||||
quantizationRestrictions);
|
||||
}
|
||||
|
||||
static std::string getTestCaseName(testing::TestParamInfo<OperationQuantizationRestrictionParams> obj) {
|
||||
const auto testValues = std::get<0>(obj.param);
|
||||
const auto explicitly = std::get<1>(obj.param);
|
||||
|
||||
std::ostringstream result;
|
||||
result << testValues.restrictions.size() << "_" << explicitly;
|
||||
return result.str();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(OperationQuantizationRestrictionTest, CompareFunctions) {
|
||||
auto res = compare_functions(actualFunction, referenceFunction, true, true, true, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
const std::vector<OperationQuantizationRestrictionTestValues> testValues = {
|
||||
{
|
||||
{}
|
||||
},
|
||||
{
|
||||
{{0, QuantizationGranularityAttribute::Granularity::PerTensor}}
|
||||
},
|
||||
{
|
||||
{{0, QuantizationGranularityAttribute::Granularity::PerTensor}, {1, QuantizationGranularityAttribute::Granularity::PerChannel}}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<bool> explicitly = { true, false };
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_LPT,
|
||||
OperationQuantizationRestrictionTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(testValues),
|
||||
::testing::ValuesIn(explicitly)),
|
||||
OperationQuantizationRestrictionTest::getTestCaseName);
|
@ -12,7 +12,7 @@
|
||||
#include <low_precision/transformation_context.hpp>
|
||||
#include <low_precision/low_precision.hpp>
|
||||
#include <low_precision/align_quantization_parameters.hpp>
|
||||
#include <low_precision/markup_per_tensor_quantization.hpp>
|
||||
#include <low_precision/markup_quantization_granularity.hpp>
|
||||
#include <low_precision/markup_can_be_quantized.hpp>
|
||||
|
||||
using namespace testing;
|
||||
@ -21,8 +21,8 @@ using namespace ngraph::pass;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
|
||||
SimpleLowPrecisionTransformer::SimpleLowPrecisionTransformer(
|
||||
const std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>& precisionRestrictions,
|
||||
const std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>& quantizationRestrictions,
|
||||
const std::vector<ngraph::pass::low_precision::PrecisionsRestriction>& precisionRestrictions,
|
||||
const std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>& quantizationRestrictions,
|
||||
const AttributeParameters& params) {
|
||||
auto passConfig = get_pass_config();
|
||||
|
||||
@ -30,7 +30,7 @@ SimpleLowPrecisionTransformer::SimpleLowPrecisionTransformer(
|
||||
markup = std::make_shared<ngraph::pass::Manager>(passConfig);
|
||||
markup->register_pass<ngraph::pass::low_precision::MarkupCanBeQuantized>(params.defaultPrecisions);
|
||||
markup->register_pass<ngraph::pass::low_precision::MarkupPrecisions>(precisionRestrictions, params.defaultPrecisions);
|
||||
markup->register_pass<ngraph::pass::low_precision::MarkupPerTensorQuantization>(quantizationRestrictions);
|
||||
markup->register_pass<ngraph::pass::low_precision::MarkupQuantizationGranularity>(quantizationRestrictions);
|
||||
markup->register_pass<ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved>(params.defaultPrecisions);
|
||||
markup->register_pass<ngraph::pass::low_precision::PropagatePrecisions>(params);
|
||||
markup->register_pass<ngraph::pass::low_precision::AlignQuantizationIntervals>(params.defaultPrecisions);
|
||||
|
@ -11,14 +11,14 @@
|
||||
#include "layer_transformation.hpp"
|
||||
#include "common_test_utils/test_common.hpp"
|
||||
#include "low_precision/layer_transformation.hpp"
|
||||
#include "low_precision/common/operation_precision_restriction.hpp"
|
||||
#include "low_precision/common/operation_per_tensor_quantization_restriction.hpp"
|
||||
#include "low_precision/common/precisions_restriction.hpp"
|
||||
#include "low_precision/common/quantization_granularity_restriction.hpp"
|
||||
|
||||
class SimpleLowPrecisionTransformer : public ngraph::pass::FunctionPass{
|
||||
public:
|
||||
SimpleLowPrecisionTransformer(
|
||||
const std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>& precisionRestrictions = {},
|
||||
const std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>& quantizationRestrictions = {},
|
||||
const std::vector<ngraph::pass::low_precision::PrecisionsRestriction>& precisionRestrictions = {},
|
||||
const std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>& quantizationRestrictions = {},
|
||||
const AttributeParameters& params = AttributeParameters());
|
||||
|
||||
template <class T, class Operation>
|
||||
|
@ -8,6 +8,8 @@
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
|
||||
#include <low_precision/common/quantization_granularity_restriction.hpp>
|
||||
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
|
||||
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
|
||||
@ -70,7 +72,8 @@ public:
|
||||
const ngraph::element::Type precision,
|
||||
const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData,
|
||||
const std::vector<float>& weightsValues,
|
||||
const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights);
|
||||
const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights,
|
||||
const std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>& restrictions = {});
|
||||
};
|
||||
} // namespace subgraph
|
||||
} // namespace builder
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <ngraph_ops/type_relaxed.hpp>
|
||||
#include "ngraph_functions/subgraph_builders.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/rt_info/quantization_granularity_attribute.hpp"
|
||||
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
|
||||
@ -321,7 +322,8 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::get(
|
||||
const ngraph::element::Type precision,
|
||||
const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData,
|
||||
const std::vector<float>& weightsValues,
|
||||
const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights) {
|
||||
const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights,
|
||||
const std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>& restrictions) {
|
||||
const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape));
|
||||
input->set_friendly_name("input");
|
||||
|
||||
@ -369,6 +371,14 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::get(
|
||||
convolutionOriginal,
|
||||
std::vector<element::Type>{ element::f32, element::f32 },
|
||||
std::vector<element::Type>{});
|
||||
convolution->set_friendly_name("convolution");
|
||||
|
||||
for (const auto& r : restrictions) {
|
||||
for (const auto& restrictedPort : r.restrictions) {
|
||||
auto& rt = convolution->input(restrictedPort.port).get_rt_info();
|
||||
rt[QuantizationGranularityAttribute::get_type_info_static()] = QuantizationGranularityAttribute(restrictedPort.granularity);
|
||||
}
|
||||
}
|
||||
|
||||
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(convolution) };
|
||||
return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "ConvolutionFunction");
|
||||
|
Loading…
Reference in New Issue
Block a user