[LPT] Introduce new granularity attribute instead of OperationPerTensorQuantizationRestriction (#11330)

This commit is contained in:
Edward Shogulin 2022-04-03 19:35:04 +03:00 committed by GitHub
parent 29fb8c79b1
commit 5d821453ae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
51 changed files with 523 additions and 303 deletions

View File

@ -9,10 +9,10 @@
<tab type="user" title="Attributes" url="@ref openvino_docs_OV_UG_lpt_attributes">
<tab type="user" title="AvgPoolPrecisionPreserved" url="@ref openvino_docs_OV_UG_lpt_AvgPoolPrecisionPreserved"/>
<tab type="user" title="IntervalsAlignment" url="@ref openvino_docs_OV_UG_lpt_IntervalsAlignment"/>
<tab type="user" title="PerTensorQuantization" url="@ref openvino_docs_OV_UG_lpt_PerTensorQuantization"/>
<tab type="user" title="PrecisionPreserved" url="@ref openvino_docs_OV_UG_lpt_PrecisionPreserved"/>
<tab type="user" title="Precisions" url="@ref openvino_docs_OV_UG_lpt_Precisions"/>
<tab type="user" title="QuantizationAlignment" url="@ref openvino_docs_OV_UG_lpt_QuantizationAlignment"/>
<tab type="user" title="QuantizationGranularity" url="@ref openvino_docs_OV_UG_lpt_QuantizationGranularity"/>
</tab>
<tab type="user" title="Step 1. Prerequisites transformations" url="@ref openvino_docs_OV_UG_lpt_step1_prerequisites">
<tab type="user" title="LinOpSequenceFusion" url="@ref openvino_docs_OV_UG_lpt_LinOpSequenceFusion"/>

View File

@ -1,11 +0,0 @@
# PerTensorQuantization attribute {#openvino_docs_OV_UG_lpt_PerTensorQuantization}
ngraph::PerTensorQuantizationAttribute class represents the `PerTensorQuantization` attribute.
The attribute defines if the operation input port requires per-tensor quantization.
| Property name | Values |
|---------------|----------------------------------------------|
| Required | Yes |
| Defined | Operation, input ports |
| Properties | |

View File

@ -0,0 +1,11 @@
# QuantizationGranularity attribute {#openvino_docs_OV_UG_lpt_QuantizationGranularity}
ngraph::QuantizationAttribute class represents the `QuantizationGranularity` attribute.
The attribute defines quantization granularity of operation inputs.
| Property name | Values |
|---------------|----------------------------------------------|
| Required | No |
| Defined | Input ports |
| Properties | Quantization granularity |

View File

@ -8,29 +8,30 @@
:hidden:
AvgPoolPrecisionPreserved <openvino_docs_OV_UG_lpt_AvgPoolPrecisionPreserved>
IntervalsAlignment <openvino_docs_OV_UG_lpt_IntervalsAlignment>
PerTensorQuantization <openvino_docs_OV_UG_lpt_PerTensorQuantization>
IntervalsAlignment <openvino_docs_OV_UG_lpt_IntervalsAlignment>
PrecisionPreserved <openvino_docs_OV_UG_lpt_PrecisionPreserved>
Precisions <openvino_docs_OV_UG_lpt_Precisions>
QuantizationAlignment <openvino_docs_OV_UG_lpt_QuantizationAlignment>
QuantizationGranularity <openvino_docs_OV_UG_lpt_QuantizationGranularity>
@endsphinxdirective
## Introduction
| Name | Target | Required | Mutable |
|-------------------------------------------------------------------------------------|------------------------|----------|---------|
| [AvgPoolPrecisionPreserved](@ref openvino_docs_OV_UG_lpt_AvgPoolPrecisionPreserved) | Precision | No | Yes |
| [IntervalsAlignment](@ref openvino_docs_OV_UG_lpt_IntervalsAlignment) | Quantization interval | Yes | Yes |
| [PerTensorQuantization](@ref openvino_docs_OV_UG_lpt_PerTensorQuantization) | Precision | Yes | No |
| [PrecisionPreserved](@ref openvino_docs_OV_UG_lpt_PrecisionPreserved) | Precision | Yes | Yes |
| [Precisions](@ref openvino_docs_OV_UG_lpt_Precisions) | Precision | Yes | Yes |
| [QuantizationAlignment](@ref openvino_docs_OV_UG_lpt_QuantizationAlignment) | Quantization alignment | Yes | Yes |
| Name | Target | Required | Mutable |
|-------------------------------------------------------------------------------------|--------------------------|----------|---------|
| [AvgPoolPrecisionPreserved](@ref openvino_docs_OV_UG_lpt_AvgPoolPrecisionPreserved) | Precision | No | Yes |
| [IntervalsAlignment](@ref openvino_docs_OV_UG_lpt_IntervalsAlignment) | Quantization interval | Yes | Yes |
| [PrecisionPreserved](@ref openvino_docs_OV_UG_lpt_PrecisionPreserved) | Precision | Yes | Yes |
| [Precisions](@ref openvino_docs_OV_UG_lpt_Precisions) | Precision | Yes | Yes |
| [QuantizationAlignment](@ref openvino_docs_OV_UG_lpt_QuantizationAlignment) | Quantization granularity | Yes | Yes |
| [QuantizationGranularity](@ref openvino_docs_OV_UG_lpt_QuantizationGranularity) | Quantization granularity | Yes | No |
> `Target` attribute group defines attribute usage during model transformation for the best performance:
> - `Precision` - the attribute defines the most optimal output port precision.
> - `Quantization interval` - the attribute defines quantization interval.
> - `Quantization alignment` - the attribute defines quantization alignment: per-channel or per-tensor quantization.
> - `Quantization alignment` - the attribute defines quantization granularity in runtime: per-channel or per-tensor quantization.
> - `Quantization granularity` - the attribute is set by plugin to define quantization granularity: per-channel or per-tensor quantization.
>
> `Required` attribute group defines if attribute usage is required to get an optimal model during transformation:
> - `Yes` - the attribute is used by all OpenVINO plugins for low-precision optimization.

View File

@ -2,7 +2,7 @@
#include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
#include <low_precision/common/operation_per_tensor_quantization_restriction.hpp>
#include <low_precision/common/quantization_granularity_restriction.hpp>
#include <low_precision/convert_subtract_constant.hpp>
#include <low_precision/convolution.hpp>
#include <low_precision/convolution_backprop_data.hpp>
@ -45,7 +45,7 @@ if (useLpt) {
// nGraph common transformations happen here
if (useLpt) {
// convert subtract constant to INT8 to prevent unnecessary FP16 to FP32 conversion
// convert subtract constant to INT8 to prevent unnecessary FP16 to FP32 conversion
manager.register_pass<ngraph::pass::low_precision::ConvertSubtractConstant>(defaultPrecisions);
}
@ -70,29 +70,29 @@ manager.run_passes(nGraphFunc);
using namespace ngraph::pass::low_precision;
if (useLpt) {
// Low precision transformations plugin specific configuration: restrictions definition
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}},
}),
OperationPrecisionRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
PrecisionsRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
{0, {ngraph::element::u8, ngraph::element::i8}},
{1, {ngraph::element::i8}}
}),
OperationPrecisionRestriction::create<ngraph::opset1::GroupConvolution>({
PrecisionsRestriction::create<ngraph::opset1::GroupConvolution>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}}
}),
OperationPrecisionRestriction::create<ngraph::opset1::Multiply>({
PrecisionsRestriction::create<ngraph::opset1::Multiply>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}},
}),
});
// Low precision transformations plugin specific configuration: per-tensor quantization operations definition
auto perTensorQuantization = std::vector<OperationPerTensorQuantizationRestriction>({
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0}),
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0})
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0}),
QuantizationGranularityRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0})
});
// Low precision transformations instantiation and registration in pass manager
@ -133,8 +133,8 @@ ngraph::pass::Manager manager;
using namespace ngraph::pass::low_precision;
//! [lpt_supported_precisions]
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}},
}),
@ -157,10 +157,10 @@ std::shared_ptr<ov::Model> nGraphFunc;
//! [per_tensor_quantization]
using namespace ngraph::pass::low_precision;
const std::vector<OperationPrecisionRestriction> emptyRestrictions;
const std::vector<PrecisionsRestriction> emptyRestrictions;
auto perTensorQuantization = std::vector<OperationPerTensorQuantizationRestriction>({
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0})
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0})
});
ngraph::pass::Manager lptManager;
@ -197,15 +197,15 @@ ngraph::pass::Manager manager;
using namespace ngraph::pass::low_precision;
//! [lpt_markup_pipeline]
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}},
}),
});
auto perTensorQuantization = std::vector<OperationPerTensorQuantizationRestriction>({
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0})
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0})
});
ngraph::pass::Manager lptManager;

View File

@ -1,56 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
#include <ngraph/node.hpp>
#include <ngraph/variant.hpp>
#include <low_precision/lpt_visibility.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
namespace ngraph {
namespace pass {
namespace low_precision {
class OperationPerTensorQuantizationRestriction {
public:
using RestrictedPorts = std::vector<size_t>;
ngraph::Node::type_info_t operationType;
bool specifyVersion;
std::vector<size_t> restrictedPorts;
OperationPerTensorQuantizationRestriction() = default;
OperationPerTensorQuantizationRestriction(
const ngraph::Node::type_info_t operationType,
const bool specifyVersion,
const RestrictedPorts& restrictedPorts) :
operationType(operationType),
specifyVersion(specifyVersion),
restrictedPorts(restrictedPorts) {}
template <typename T>
static OperationPerTensorQuantizationRestriction create(
const RestrictedPorts& restrictedPorts = {},
const bool specifyVersion = false) {
return OperationPerTensorQuantizationRestriction(T::get_type_info_static(), specifyVersion, restrictedPorts);
}
template <typename T>
static RestrictedPorts getPrecisionsByOperationType(std::vector<OperationPerTensorQuantizationRestriction>& restrictions) {
for (const auto& restriction : restrictions) {
if (restriction.operationType == T::get_type_info_static()) {
return restriction.restrictedPorts;
}
}
return {};
}
};
} // namespace low_precision
} // namespace pass
} // namespace ngraph

View File

@ -0,0 +1,31 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include <unordered_map>
#include <vector>
#include <ngraph/pass/pass.hpp>
#include "low_precision/lpt_visibility.hpp"
#include "low_precision/rt_info/quantization_granularity_attribute.hpp"
namespace ngraph {
namespace pass {
namespace low_precision {
class LP_TRANSFORMATIONS_API PortQuantizationGranularityRestriction {
public:
PortQuantizationGranularityRestriction(const size_t port, QuantizationGranularityAttribute::Granularity granularity) :
port(port),
granularity(granularity) {}
size_t port;
QuantizationGranularityAttribute::Granularity granularity;
};
} // namespace low_precision
} // namespace pass
} // namespace ngraph

View File

@ -19,7 +19,7 @@ namespace ngraph {
namespace pass {
namespace low_precision {
class OperationPrecisionRestriction {
class PrecisionsRestriction {
public:
using PrecisionsByPort = std::vector<std::pair<size_t, std::vector<ngraph::element::Type>>>;
@ -27,8 +27,8 @@ public:
bool specifyVersion;
std::vector<std::pair<size_t, std::vector<ngraph::element::Type>>> precisionsByPort;
OperationPrecisionRestriction() = default;
OperationPrecisionRestriction(
PrecisionsRestriction() = default;
PrecisionsRestriction(
const ngraph::Node::type_info_t operationType,
const bool specifyVersion,
const PrecisionsByPort& precisionsByPort) :
@ -37,14 +37,14 @@ public:
precisionsByPort(precisionsByPort) {}
template <typename T>
static OperationPrecisionRestriction create(
static PrecisionsRestriction create(
const PrecisionsByPort& precisionsByPort,
const bool specifyVersion = false) {
return OperationPrecisionRestriction(T::get_type_info_static(), specifyVersion, precisionsByPort);
return PrecisionsRestriction(T::get_type_info_static(), specifyVersion, precisionsByPort);
}
template <typename T>
static PrecisionsByPort getPrecisionsByOperationType(std::vector<OperationPrecisionRestriction>& restrictions) {
static PrecisionsByPort getPrecisionsByOperationType(std::vector<PrecisionsRestriction>& restrictions) {
for (const auto& restriction : restrictions) {
if (restriction.operationType == T::get_type_info_static()) {
return restriction.precisionsByPort;

View File

@ -0,0 +1,71 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
#include <ngraph/node.hpp>
#include <ngraph/variant.hpp>
#include <low_precision/lpt_visibility.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
#include <low_precision/rt_info/quantization_granularity_attribute.hpp>
#include <low_precision/common/port_quantization_granularity_restriction.hpp>
namespace ngraph {
namespace pass {
namespace low_precision {
class LP_TRANSFORMATIONS_API QuantizationGranularityRestriction {
public:
ngraph::Node::type_info_t operationType;
bool specifyVersion;
std::vector<PortQuantizationGranularityRestriction> restrictions;
QuantizationGranularityRestriction() = default;
QuantizationGranularityRestriction(
const ngraph::Node::type_info_t operationType,
const bool specifyVersion,
const std::vector<PortQuantizationGranularityRestriction>& restrictions) :
operationType(operationType),
specifyVersion(specifyVersion),
restrictions(restrictions) {}
template <typename T>
static QuantizationGranularityRestriction create(
const std::vector<PortQuantizationGranularityRestriction>& restrictions,
const bool specifyVersion) {
return QuantizationGranularityRestriction(T::get_type_info_static(), specifyVersion, restrictions);
}
template <typename T>
static QuantizationGranularityRestriction create(
const std::vector<size_t>& restrictedPorts = {},
const bool specifyVersion = false) {
std::vector<PortQuantizationGranularityRestriction> restrictions;
restrictions.reserve(restrictedPorts.size());
for (auto i = 0ul; i < restrictedPorts.size(); ++i) {
restrictions.push_back(PortQuantizationGranularityRestriction(
restrictedPorts[i],
ngraph::QuantizationGranularityAttribute::Granularity::PerTensor));
}
return QuantizationGranularityRestriction(T::get_type_info_static(), specifyVersion, restrictions);
}
template <typename T>
static std::vector<PortQuantizationGranularityRestriction> getPrecisionsByOperationType(std::vector<QuantizationGranularityRestriction>& restrictions) {
for (const auto& restriction : restrictions) {
if (restriction.operationType == T::get_type_info_static()) {
return restriction.restrictions;
}
}
return {};
}
};
} // namespace low_precision
} // namespace pass
} // namespace ngraph

View File

@ -21,7 +21,8 @@
#include <low_precision/lpt_visibility.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
#include <low_precision/common/operation_per_tensor_quantization_restriction.hpp>
#include <low_precision/common/quantization_granularity_restriction.hpp>
#include <low_precision/common/precisions_restriction.hpp>
#include "low_precision/layer_transformation.hpp"
#include "low_precision/markup_precisions.hpp"
@ -41,13 +42,13 @@ class ngraph::pass::low_precision::MarkupOptimizations : public ngraph::pass::Fu
public:
OPENVINO_RTTI("MarkupOptimizations", "0");
MarkupOptimizations(
const std::vector<OperationPrecisionRestriction>& precisionRestrictions,
const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions,
const std::vector<PrecisionsRestriction>& precisionRestrictions,
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions,
const AttributeParameters& params);
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
private:
const std::vector<OperationPrecisionRestriction>& precisionRestrictions;
const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions;
const std::vector<PrecisionsRestriction>& precisionRestrictions;
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions;
const AttributeParameters& params;
};
@ -61,8 +62,8 @@ class ngraph::pass::low_precision::LowPrecision : public ngraph::pass::FunctionP
public:
OPENVINO_RTTI("LowPrecision", "0");
LowPrecision(
const std::vector<OperationPrecisionRestriction>& precisionRestrictions = {},
const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions = {},
const std::vector<PrecisionsRestriction>& precisionRestrictions = {},
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions = {},
const LayerTransformation::Params = LayerTransformation::Params());
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
@ -70,8 +71,8 @@ public:
static bool isFQLevelsPresent(const std::shared_ptr<const ngraph::Function>& function, const std::set<size_t>& levels);
protected:
std::vector<OperationPrecisionRestriction> precisionRestrictions;
std::vector<OperationPerTensorQuantizationRestriction> quantizationRestrictions;
std::vector<PrecisionsRestriction> precisionRestrictions;
std::vector<QuantizationGranularityRestriction> quantizationRestrictions;
// remove
LayerTransformation::Params params;
};

View File

@ -10,7 +10,7 @@
#include <ngraph/pass/pass.hpp>
#include "low_precision/lpt_visibility.hpp"
#include "low_precision/common/operation_precision_restriction.hpp"
#include "low_precision/common/precisions_restriction.hpp"
namespace ngraph {
namespace pass {
@ -48,7 +48,7 @@ public:
};
OPENVINO_RTTI("MarkupPrecisions", "0");
explicit MarkupPrecisions(const std::vector<OperationPrecisionRestriction>& restrictions = {},
explicit MarkupPrecisions(const std::vector<PrecisionsRestriction>& restrictions = {},
const std::vector<ngraph::element::Type>& defaultPrecisions = { ngraph::element::u8, ngraph::element::i8 });
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;

View File

@ -9,14 +9,15 @@
#include <vector>
#include <ngraph/pass/pass.hpp>
#include "common/operation_per_tensor_quantization_restriction.hpp"
#include "low_precision/common/port_quantization_granularity_restriction.hpp"
#include "low_precision/common/quantization_granularity_restriction.hpp"
#include "low_precision/lpt_visibility.hpp"
namespace ngraph {
namespace pass {
namespace low_precision {
class LP_TRANSFORMATIONS_API MarkupPerTensorQuantization;
class LP_TRANSFORMATIONS_API MarkupQuantizationGranularity;
} // namespace low_precision
} // namespace pass
@ -31,21 +32,21 @@ class LP_TRANSFORMATIONS_API MarkupPerTensorQuantization;
* [MarkupPerTensorQuantization](@ref openvino_docs_OV_UG_lpt_MarkupPerTensorQuantization) page
* in the Inference Engine Developer Guide.
*/
class ngraph::pass::low_precision::MarkupPerTensorQuantization : public ngraph::pass::FunctionPass {
class ngraph::pass::low_precision::MarkupQuantizationGranularity : public ngraph::pass::FunctionPass {
public:
class PerTensorQuantization {
public:
explicit PerTensorQuantization(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {}
void add(const uint64_t version, const std::vector<size_t>& ports) {
portsByVersion.emplace(version, ports);
void add(const uint64_t version, const std::vector<PortQuantizationGranularityRestriction>& restrictions) {
portsByVersion.emplace(version, restrictions);
}
bool versionIsRequired;
std::unordered_map<uint64_t, std::vector<size_t>> portsByVersion;
std::unordered_map<uint64_t, std::vector<PortQuantizationGranularityRestriction>> portsByVersion;
};
OPENVINO_RTTI("MarkupPerTensorQuantization", "0");
explicit MarkupPerTensorQuantization(const std::vector<OperationPerTensorQuantizationRestriction>& restrictions = {});
explicit MarkupQuantizationGranularity(const std::vector<QuantizationGranularityRestriction>& restrictions = {});
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
private:

View File

@ -7,7 +7,7 @@
#include <memory>
#include <ngraph/ngraph.hpp>
#include "low_precision/layer_transformation.hpp"
#include "common/operation_precision_restriction.hpp"
#include "common/precisions_restriction.hpp"
namespace ngraph {
namespace pass {
@ -26,7 +26,7 @@ public:
OPENVINO_RTTI("MultiplyToGroupConvolutionTransformation", "0");
MultiplyToGroupConvolutionTransformation(
const Params& params = Params(),
const OperationPrecisionRestriction::PrecisionsByPort& restrictions = {});
const PrecisionsRestriction::PrecisionsByPort& restrictions = {});
~MultiplyToGroupConvolutionTransformation() override {}
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
@ -39,7 +39,7 @@ public:
void setGroupSize(const size_t groupSize);
size_t getGroupSize() const;
private:
OperationPrecisionRestriction::PrecisionsByPort restrictions;
PrecisionsRestriction::PrecisionsByPort restrictions;
size_t groupSize;
};

View File

@ -18,7 +18,7 @@
#include "rt_info/shared_value_attribute.hpp"
#include "rt_info/precisions_attribute.hpp"
#include "rt_info/per_tensor_quantization_attribute.hpp"
#include "rt_info/quantization_granularity_attribute.hpp"
#include "rt_info/intervals_alignment_attribute.hpp"
#include "transformation_context.hpp"
#include "quantization_details.hpp"

View File

@ -27,6 +27,7 @@ public:
OPENVINO_RTTI("LowPrecision::AvgPoolPrecisionPreserved", "", ov::RuntimeAttribute, 0);
using PrecisionPreservedAttribute::PrecisionPreservedAttribute;
void merge(std::vector<ov::Any>& attributes);
bool is_skipped() const;
std::string to_string() const override;
};

View File

@ -1,29 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/node.hpp>
#include <ngraph/variant.hpp>
#include <low_precision/lpt_visibility.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
#include "low_precision/rt_info/shared_value_attribute.hpp"
#include "low_precision/layer_transformation.hpp"
#include "attribute_parameters.hpp"
namespace ngraph {
/**
* @ingroup ie_transformation_common_api
* @brief PerTensorQuantizationAttribute defines if operation input port requires per-tensor quantization.
*
* For more details about the attribute, refer to
* [PerTensorQuantizationAttribute](@ref openvino_docs_OV_UG_lpt_PerTensorQuantization) page in the Inference Engine Developer Guide.
*/
class LP_TRANSFORMATIONS_API PerTensorQuantizationAttribute : public ov::RuntimeAttribute {
public:
OPENVINO_RTTI("LowPrecision::PerTensorQuantization", "", ov::RuntimeAttribute, 0);
~PerTensorQuantizationAttribute();
};
} // namespace ngraph

View File

@ -0,0 +1,45 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ngraph/node.hpp>
#include <ngraph/variant.hpp>
#include <low_precision/lpt_visibility.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
#include "low_precision/rt_info/shared_value_attribute.hpp"
#include "low_precision/layer_transformation.hpp"
#include "attribute_parameters.hpp"
namespace ngraph {
/**
* @ingroup ie_transformation_common_api
* @brief QuantizationGranularityAttribute defines quantization granularity of operation inputs.
*
* For more details about the attribute, refer to
* [QuantizationGranularityAttribute](@ref openvino_docs_OV_UG_lpt_QuantizationGranularity) page in the Inference Engine Developer Guide.
*/
class LP_TRANSFORMATIONS_API QuantizationGranularityAttribute : public ov::RuntimeAttribute {
public:
OPENVINO_RTTI("LowPrecision::QuantizationGranularity", "", ov::RuntimeAttribute, 0);
enum class Granularity {
PerChannel,
PerTensor
};
QuantizationGranularityAttribute() : granularity(Granularity::PerChannel) {}
QuantizationGranularityAttribute(const Granularity granularity) : granularity(granularity) {}
bool operator==(const QuantizationGranularityAttribute& attribute) const {
return this->granularity == attribute.granularity;
}
bool is_skipped() const;
std::string to_string() const override;
Granularity granularity;
};
} // namespace ngraph

View File

@ -70,7 +70,13 @@ public:
for (auto input : node->inputs()) {
if (needToCheckExpectedAttributeType) {
if (getAttribute<ExpectedAttributeType>(input).empty()) {
const auto& attribute = getAttribute<ExpectedAttributeType>(input);
if (attribute.empty()) {
return false;
}
const auto& expectedAttribute = attribute.template as<ExpectedAttributeType>();
if (expectedAttribute.is_skipped()) {
return false;
}
}

View File

@ -6,10 +6,10 @@
#include <memory>
#include "low_precision/create_attribute.hpp"
#include "low_precision/propagate_through_precision_preserved.hpp"
#include "low_precision/rt_info/quantization_alignment_attribute.hpp"
#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp"
#include "low_precision/update_shared_precision_preserved.hpp"
#include "low_precision/rt_info/attribute_parameters.hpp"
#include "low_precision/rt_info/quantization_alignment_attribute.hpp"
#include "low_precision/rt_info/quantization_granularity_attribute.hpp"
#include "low_precision/update_shared_precision_preserved.hpp"
#include "itt.hpp"
using namespace ngraph;
@ -25,7 +25,7 @@ bool ngraph::pass::low_precision::AlignQuantizationParameters::run_on_model(cons
std::shared_ptr<ngraph::pass::GraphRewrite> propagation = manager.register_pass<ngraph::pass::GraphRewrite>();
propagation->add_matcher<low_precision::CreateAttribute<QuantizationAlignmentAttribute>>();
propagation->add_matcher<low_precision::PropagateThroughPrecisionPreserved<QuantizationAlignmentAttribute>>();
propagation->add_matcher<low_precision::UpdateSharedPrecisionPreserved<QuantizationAlignmentAttribute, PerTensorQuantizationAttribute>>();
propagation->add_matcher<low_precision::UpdateSharedPrecisionPreserved<QuantizationAlignmentAttribute, QuantizationGranularityAttribute>>();
manager.run_passes(f);
return false;
}

View File

@ -14,7 +14,6 @@
#include <ngraph/opsets/opset4.hpp>
#include <ngraph/opsets/opset6.hpp>
#include <transformations/utils/utils.hpp>
#include <low_precision/markup_per_tensor_quantization.hpp>
#include <low_precision/lpt_itt.hpp>
#include "low_precision/align_quantization_intervals.hpp"
@ -22,6 +21,7 @@
#include "low_precision/markup_precisions.hpp"
#include "low_precision/markup_can_be_quantized.hpp"
#include "low_precision/markup_avg_pool_precision_preserved.hpp"
#include <low_precision/markup_quantization_granularity.hpp>
#include "low_precision/propagate_precisions.hpp"
#include "low_precision/align_quantization_parameters.hpp"
@ -81,8 +81,8 @@
#include "itt.hpp"
ngraph::pass::low_precision::LowPrecision::LowPrecision(
const std::vector<OperationPrecisionRestriction>& precisionRestrictions,
const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions,
const std::vector<PrecisionsRestriction>& precisionRestrictions,
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions,
const LayerTransformation::Params params) :
precisionRestrictions(precisionRestrictions),
quantizationRestrictions(quantizationRestrictions),
@ -158,8 +158,8 @@ ngraph::pass::low_precision::TypeRelaxedReplacer::TypeRelaxedReplacer() {
}
MarkupOptimizations::MarkupOptimizations(
const std::vector<OperationPrecisionRestriction>& precisionRestrictions,
const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions,
const std::vector<PrecisionsRestriction>& precisionRestrictions,
const std::vector<QuantizationGranularityRestriction>& quantizationRestrictions,
const AttributeParameters& params) :
precisionRestrictions(precisionRestrictions),
quantizationRestrictions(quantizationRestrictions),
@ -174,7 +174,7 @@ bool ngraph::pass::low_precision::MarkupOptimizations::run_on_model(const std::s
markup.register_pass<low_precision::MarkupPrecisions>(precisionRestrictions, params.defaultPrecisions);
}
if (!quantizationRestrictions.empty()) {
markup.register_pass<low_precision::MarkupPerTensorQuantization>(quantizationRestrictions);
markup.register_pass<low_precision::MarkupQuantizationGranularity>(quantizationRestrictions);
}
if (ngraph::op::util::has_op_with_type<ngraph::opset1::AvgPool>(f)) {
markup.register_pass<low_precision::MarkupAvgPoolPrecisionPreserved>(params.defaultPrecisions);
@ -249,7 +249,7 @@ bool ngraph::pass::low_precision::LowPrecision::run_on_model(const std::shared_p
// WA: precision restrictions for groupConv must be propagated to MultiplyToGroupConvolution transformation
cleanup->add_matcher<ngraph::pass::low_precision::MultiplyToGroupConvolutionTransformation>(
params,
OperationPrecisionRestriction::getPrecisionsByOperationType<opset1::GroupConvolution>(precisionRestrictions));
PrecisionsRestriction::getPrecisionsByOperationType<opset1::GroupConvolution>(precisionRestrictions));
manager.register_pass<ngraph::pass::low_precision::FoldFakeQuantizeTransformation>(params);
manager.register_pass<ngraph::pass::ConstantFolding>();

View File

@ -20,7 +20,8 @@
using namespace ngraph;
ngraph::pass::low_precision::MarkupPrecisions::MarkupPrecisions(const std::vector<OperationPrecisionRestriction>& restrictions,
ngraph::pass::low_precision::MarkupPrecisions::MarkupPrecisions(
const std::vector<PrecisionsRestriction>& restrictions,
const std::vector<ngraph::element::Type>& defaultPrecisions) : defaultPrecisions(defaultPrecisions) {
for (const auto& restriction : restrictions) {
const auto it = restrictionsByOperation.find(restriction.operationType.name);

View File

@ -2,54 +2,52 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "low_precision/markup_per_tensor_quantization.hpp"
#include "low_precision/markup_quantization_granularity.hpp"
#include <cassert>
#include <memory>
#include <vector>
#include <ngraph/node.hpp>
#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp"
#include "itt.hpp"
#include "low_precision/rt_info/quantization_granularity_attribute.hpp"
using namespace ngraph;
ngraph::pass::low_precision::MarkupPerTensorQuantization::MarkupPerTensorQuantization(
const std::vector<OperationPerTensorQuantizationRestriction>& restrictions) {
for (const OperationPerTensorQuantizationRestriction& restriction : restrictions) {
ngraph::pass::low_precision::MarkupQuantizationGranularity::MarkupQuantizationGranularity(
const std::vector<QuantizationGranularityRestriction>& restrictions) {
for (const auto& restriction : restrictions) {
const auto it = restrictionsByOperation.find(restriction.operationType.name);
OPENVINO_SUPPRESS_DEPRECATED_START
if (it == restrictionsByOperation.end()) {
PerTensorQuantization r(restriction.specifyVersion);
r.portsByVersion.emplace(restriction.operationType.version, restriction.restrictedPorts);
r.portsByVersion.emplace(restriction.operationType.version, restriction.restrictions);
restrictionsByOperation.emplace(restriction.operationType.name, r);
} else {
it->second.add(restriction.operationType.version, restriction.restrictedPorts);
it->second.add(restriction.operationType.version, restriction.restrictions);
}
OPENVINO_SUPPRESS_DEPRECATED_END
}
}
bool ngraph::pass::low_precision::MarkupPerTensorQuantization::run_on_model(const std::shared_ptr<ngraph::Function>& f) {
bool ngraph::pass::low_precision::MarkupQuantizationGranularity::run_on_model(const std::shared_ptr<ngraph::Function>& f) {
RUN_ON_FUNCTION_SCOPE(MarkupPerTensorQuantization);
auto setRestriction = [](const std::shared_ptr<Node>& node, const std::vector<size_t>& restrictedPorts) {
auto createAttribute = [](Input<Node>& input){
auto setRestriction = [](const std::shared_ptr<Node>& node, const std::vector<PortQuantizationGranularityRestriction>& restrictedPorts) {
auto createAttribute = [](Input<Node>& input, const QuantizationGranularityAttribute::Granularity granularity){
auto &rt = input.get_rt_info();
rt.emplace(
PerTensorQuantizationAttribute::get_type_info_static(),
PerTensorQuantizationAttribute());
rt.emplace(QuantizationGranularityAttribute::get_type_info_static(), QuantizationGranularityAttribute(granularity));
};
if (restrictedPorts.empty()) {
// markup all ports
// markup all ports with default granularity value
for (size_t item = 0ul; item < node->get_input_size(); item++) {
Input<Node> input = node->input(item);
createAttribute(input);
createAttribute(input, QuantizationGranularityAttribute::Granularity::PerTensor);
}
} else {
// markup specific ports
for (const size_t item : restrictedPorts) {
Input<Node> input = node->input(item);
createAttribute(input);
for (const auto item : restrictedPorts) {
Input<Node> input = node->input(item.port);
createAttribute(input, item.granularity);
}
}
};
@ -77,11 +75,11 @@ bool ngraph::pass::low_precision::MarkupPerTensorQuantization::run_on_model(cons
continue;
}
const std::vector<size_t>& restrictedPorts = it2->second;
const std::vector<PortQuantizationGranularityRestriction>& restrictedPorts = it2->second;
setRestriction(node, restrictedPorts);
} else {
assert(restriction.portsByVersion.size() == 1ul);
const std::vector<size_t>& restrictedPorts = restriction.portsByVersion.begin()->second;
const std::vector<PortQuantizationGranularityRestriction>& restrictedPorts = restriction.portsByVersion.begin()->second;
setRestriction(node, restrictedPorts);
}
}

View File

@ -15,7 +15,7 @@ namespace low_precision {
MultiplyToGroupConvolutionTransformation::MultiplyToGroupConvolutionTransformation(
const Params& params,
const OperationPrecisionRestriction::PrecisionsByPort& restrictions) : LayerTransformation(params), restrictions(restrictions), groupSize(1ul) {
const PrecisionsRestriction::PrecisionsByPort& restrictions) : LayerTransformation(params), restrictions(restrictions), groupSize(1ul) {
MATCHER_SCOPE(MultiplyToGroupConvolutionTransformation);
auto matcher = pattern::wrap_type<opset1::Multiply>();

View File

@ -15,6 +15,10 @@ using namespace ov;
void AvgPoolPrecisionPreservedAttribute::merge(std::vector<ov::Any>& attributes) {
}
bool AvgPoolPrecisionPreservedAttribute::is_skipped() const {
return false;
}
std::string AvgPoolPrecisionPreservedAttribute::to_string() const {
std::stringstream ss;
ss << attribute->get_string();

View File

@ -1,10 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp"
using namespace ngraph;
using namespace ov;
PerTensorQuantizationAttribute::~PerTensorQuantizationAttribute() = default;

View File

@ -0,0 +1,34 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "low_precision/rt_info/quantization_granularity_attribute.hpp"
using namespace ngraph;
using namespace ov;
bool QuantizationGranularityAttribute::is_skipped() const {
assert((granularity == Granularity::PerChannel) || (granularity == Granularity::PerTensor));
return granularity != Granularity::PerTensor;
}
std::string QuantizationGranularityAttribute::to_string() const {
assert((granularity == Granularity::PerChannel) || (granularity == Granularity::PerTensor));
std::stringstream ss;
switch (granularity) {
case Granularity::PerChannel: {
ss << "PerChannel";
break;
}
case Granularity::PerTensor: {
ss << "PerTensor";
break;
}
default: {
ss << "UNKNOWN";
break;
}
}
return ss.str();
}

View File

@ -96,7 +96,8 @@
#include <transformations/common_optimizations/lin_op_sequence_fusion.hpp>
#include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
#include <low_precision/common/operation_per_tensor_quantization_restriction.hpp>
#include <low_precision/common/quantization_granularity_restriction.hpp>
#include <low_precision/common/precisions_restriction.hpp>
#include <low_precision/convert_subtract_constant.hpp>
#include <low_precision/convolution.hpp>
#include <low_precision/convolution_backprop_data.hpp>
@ -439,45 +440,45 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
CPU_LPT_SCOPE(LowPrecisionTransformations_Part4);
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "LowPrecisionTransformations");
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}},
}),
OperationPrecisionRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
PrecisionsRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
{0, {ngraph::element::u8, ngraph::element::i8}},
{1, {ngraph::element::i8}}
}),
OperationPrecisionRestriction::create<ngraph::opset1::GroupConvolution>({
PrecisionsRestriction::create<ngraph::opset1::GroupConvolution>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}}
}),
OperationPrecisionRestriction::create<ngraph::opset1::Multiply>({
PrecisionsRestriction::create<ngraph::opset1::Multiply>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}},
}),
OperationPrecisionRestriction::create<ngraph::opset1::MatMul>({
PrecisionsRestriction::create<ngraph::opset1::MatMul>({
{0, {ngraph::element::u8, ngraph::element::i8}},
{1, {ngraph::element::i8}}
}),
});
auto perTensorQuantization = std::vector<OperationPerTensorQuantizationRestriction>({
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0}),
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0})
auto quantizationRestrictions = std::vector<QuantizationGranularityRestriction>({
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0}),
QuantizationGranularityRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0})
});
// for GNA networks reference execution
bool updatePrecision = true;
if (hasINT16orINT32Levels) {
updatePrecision = false;
supportedPrecisions = std::vector<OperationPrecisionRestriction>({});
supportedPrecisions = std::vector<PrecisionsRestriction>({});
}
ngraph::pass::Manager lptManager;
lptManager.register_pass<ngraph::pass::low_precision::LowPrecision>(
supportedPrecisions,
perTensorQuantization,
quantizationRestrictions,
LayerTransformation::Params(updatePrecision, ngraph::element::f32, defaultPrecisions));
lptManager.get_pass_config()->set_callback<ngraph::pass::low_precision::MarkupPrecisions>([](const_node_ptr& node) -> bool {
if (const auto mulitply = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {

View File

@ -345,24 +345,24 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
manager.run_passes(func);
}
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisions = std::vector<PrecisionsRestriction>({
PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8, ngraph::element::i8}},
{1, {ngraph::element::i8}},
}),
OperationPrecisionRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
PrecisionsRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
{0, {ngraph::element::u8, ngraph::element::i8}},
{1, {ngraph::element::i8}}
}),
OperationPrecisionRestriction::create<ngraph::opset1::GroupConvolution>({
PrecisionsRestriction::create<ngraph::opset1::GroupConvolution>({
{0, {ngraph::element::u8, ngraph::element::i8}},
{1, {ngraph::element::i8}}
})
});
auto perTensorQuantization = std::vector<OperationPerTensorQuantizationRestriction>({
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0}),
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0}),
auto perTensorQuantization = std::vector<QuantizationGranularityRestriction>({
QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0}),
QuantizationGranularityRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0}),
});
ngraph::pass::Manager lptManager;

View File

@ -74,15 +74,15 @@ public:
additionalLayer,
testValues.actual.dequantization);
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}}
})
});
auto perTensorQuantization = std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0}),
auto perTensorQuantization = std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0}),
});
SimpleLowPrecisionTransformer transform(supportedPrecisions, perTensorQuantization);

View File

@ -85,8 +85,8 @@ public:
testValues.actual.fakeQuantize1,
testValues.actual.fakeQuantize2);
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8}}
})
});

View File

@ -12,9 +12,9 @@
#include <low_precision/rt_info/precision_preserved_attribute.hpp>
#include <low_precision/rt_info/intervals_alignment_attribute.hpp>
#include <low_precision/rt_info/quantization_alignment_attribute.hpp>
#include <low_precision/common/precisions_restriction.hpp>
#include <low_precision/common/quantization_granularity_restriction.hpp>
#include <low_precision/common/operation_precision_restriction.hpp>
#include <low_precision/common/operation_per_tensor_quantization_restriction.hpp>
#include <low_precision/concat.hpp>
#include <low_precision/fake_quantize_decomposition.hpp>
#include <low_precision/fuse_subtract_to_fake_quantize.hpp>
@ -146,14 +146,14 @@ public:
{},
testValues.axis,
testValues.addNotPrecisionPreservedOperation);
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
});
auto quantizationRestrictions = testValues.multiChannels ?
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::AvgPool>()
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::AvgPool>()
});
const auto params = TestTransformationParams::toParams(testValues.params);

View File

@ -19,7 +19,7 @@
#include "lpt_ngraph_functions/concat_function.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
#include "simple_low_precision_transformer.hpp"
#include "low_precision/common/operation_per_tensor_quantization_restriction.hpp"
#include "low_precision/common/quantization_granularity_restriction.hpp"
using namespace testing;
@ -92,9 +92,9 @@ public:
testValues.actual.fakeQuantize2);
auto quantizationRestrictions = testValues.multiChannels ?
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::AvgPool>()
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::AvgPool>()
});
SimpleLowPrecisionTransformer transform({}, quantizationRestrictions);

View File

@ -89,14 +89,14 @@ public:
testValues.actual.fakeQuantize1,
testValues.actual.fakeQuantize2);
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
});
auto quantizationRestrictions = testValues.multiChannels ?
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::AvgPool>()
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::AvgPool>()
});
SimpleLowPrecisionTransformer transform(supportedPrecisionsOnActivation, quantizationRestrictions);

View File

@ -91,9 +91,9 @@ public:
testValues.actual.fakeQuantize2);
auto quantizationRestrictions = testValues.multiChannels ?
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>()
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>()
});
SimpleLowPrecisionTransformer transform({}, quantizationRestrictions);

View File

@ -92,9 +92,9 @@ public:
testValues.actual.fakeQuantize2);
auto quantizationRestrictions = testValues.multiChannels ?
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::AvgPool>()
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::AvgPool>()
});
SimpleLowPrecisionTransformer transform({}, quantizationRestrictions);

View File

@ -102,17 +102,17 @@ public:
testValues.neighborType,
testValues.additionalLayer);
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, testValues.params.precisionsOnActivations},
{1, testValues.params.precisionsOnWeights}
})
});
auto quantizationRestrictions = testValues.multiChannels ?
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>()
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>()
});
SimpleLowPrecisionTransformer transform(supportedPrecisionsOnActivation, quantizationRestrictions);

View File

@ -104,17 +104,17 @@ public:
testValues.actual.convert3,
testValues.actual.dequantization3);
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}}
})
});
auto quantizationRestrictions = testValues.multiChannels ?
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0})
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0})
});
SimpleLowPrecisionTransformer transform(supportedPrecisionsOnActivation, quantizationRestrictions);

View File

@ -22,7 +22,7 @@
#include <low_precision/fuse_subtract_to_fake_quantize.hpp>
#include <low_precision/fuse_multiply_to_fake_quantize.hpp>
#include <low_precision/markup_can_be_quantized.hpp>
#include <low_precision/markup_per_tensor_quantization.hpp>
#include <low_precision/markup_quantization_granularity.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
#include "lpt_ngraph_functions/concat_function.hpp"
@ -155,16 +155,16 @@ public:
testValues.axis,
testValues.addNotPrecisionPreservedOperation);
auto precisionsRestrictions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto precisionsRestrictions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}}
}),
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
});
auto quantizationRestrictions = std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0})
auto quantizationRestrictions = std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>({0})
});
const auto params = TestTransformationParams(testValues.params.updatePrecisions);
@ -172,7 +172,7 @@ public:
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::low_precision::MarkupPrecisions>(precisionsRestrictions);
manager.register_pass<ngraph::pass::low_precision::MarkupPerTensorQuantization>(quantizationRestrictions);
manager.register_pass<ngraph::pass::low_precision::MarkupQuantizationGranularity>(quantizationRestrictions);
manager.register_pass<ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved>(params.defaultPrecisions);
manager.register_pass<ngraph::pass::low_precision::PropagatePrecisions>();
manager.register_pass<ngraph::pass::low_precision::AlignQuantizationIntervals>(params.defaultPrecisions);

View File

@ -20,8 +20,8 @@
#include <low_precision/propagate_precisions.hpp>
#include <low_precision/markup_avg_pool_precision_preserved.hpp>
#include <low_precision/markup_precisions.hpp>
#include <low_precision/markup_per_tensor_quantization.hpp>
#include "low_precision/common/operation_precision_restriction.hpp"
#include <low_precision/markup_quantization_granularity.hpp>
#include "low_precision/common/precisions_restriction.hpp"
#include "common_test_utils/ngraph_test_utils.hpp"
#include "lpt_ngraph_functions/concat_function.hpp"
@ -99,17 +99,17 @@ public:
testValues.actual.fakeQuantize2,
addConvolution);
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, testValues.params.precisionsOnActivations},
{1, testValues.params.precisionsOnWeights},
})
});
auto quantizationRestrictions = testValues.multiChannels ?
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>()
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>()
});
SimpleLowPrecisionTransformer transform(supportedPrecisions, quantizationRestrictions);

View File

@ -92,17 +92,17 @@ public:
testValues.ssBeforeConcat,
testValues.ssAfterConcat);
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, testValues.params.precisionsOnActivations},
{1, testValues.params.precisionsOnWeights},
})
});
auto quantizationRestrictions = testValues.multiChannels ?
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>()
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>()
});
SimpleLowPrecisionTransformer transform(supportedPrecisions, quantizationRestrictions);

View File

@ -89,8 +89,8 @@ public:
testValues.actual.fakeQuantizeOnWeights
});
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, testValues.precisionsOnActivationForLimitedOperation},
{1, { element::i8 }}
})

View File

@ -12,7 +12,7 @@
#include <gtest/gtest.h>
#include <low_precision/avg_pool.hpp>
#include <low_precision/common/operation_precision_restriction.hpp>
#include <low_precision/common/precisions_restriction.hpp>
#include <low_precision/fake_quantize_decomposition.hpp>
#include <low_precision/low_precision.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
@ -88,8 +88,8 @@ public:
fakeQuantizeOnData.actual,
fakeQuantizeOnData.addNotPrecisionPreservedOperation);
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::AvgPool>({{0, params.precisionsOnActivations}})
auto supportedPrecisions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::AvgPool>({{0, params.precisionsOnActivations}})
});
SimpleLowPrecisionTransformer transform(supportedPrecisions, {}, { ngraph::element::f32, defaultPrecisions });

View File

@ -80,15 +80,15 @@ public:
testValues.actual.dequantizationOnWeights,
testValues.actual.dequantizationAfter);
auto precisionsRestrictions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto precisionsRestrictions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}}
})
});
auto quantizationRestrictions = std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>()
auto quantizationRestrictions = std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>()
});
SimpleLowPrecisionTransformer transformer(precisionsRestrictions, quantizationRestrictions);

View File

@ -85,8 +85,8 @@ public:
ngraph::pass::low_precision::TypeRelaxedReplacer pass;
pass.run_on_function(actualFunction);
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Convolution>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}}
})

View File

@ -154,14 +154,14 @@ public:
testValues.axis,
oneInputWithSplit);
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
});
auto quantizationRestrictions = testValues.multiChannels ?
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>() :
std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>({
ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction::create<ngraph::opset1::AvgPool>()
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>() :
std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>({
ngraph::pass::low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::AvgPool>()
});
const auto params = TestTransformationParams::toParams(testValues.params);

View File

@ -61,8 +61,8 @@ public:
testValues.actual.dequantization,
testValues.haveMultiplyWithNoConstBeforeDequantization);
auto precisionRestrictions = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::Multiply>({
auto precisionRestrictions = std::vector<ngraph::pass::low_precision::PrecisionsRestriction>({
ngraph::pass::low_precision::PrecisionsRestriction::create<ngraph::opset1::Multiply>({
{0, {ngraph::element::u8}},
{1, {ngraph::element::i8}}
})

View File

@ -0,0 +1,107 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "layer_transformation.hpp"
#include <string>
#include <sstream>
#include <memory>
#include <gtest/gtest.h>
#include <low_precision/common/port_quantization_granularity_restriction.hpp>
#include <low_precision/common/quantization_granularity_restriction.hpp>
#include <low_precision/markup_quantization_granularity.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
#include "lpt_ngraph_functions/convolution_function.hpp"
using namespace testing;
using namespace ngraph;
using namespace ngraph::pass;
class OperationQuantizationRestrictionTestValues {
public:
std::vector<ngraph::pass::low_precision::PortQuantizationGranularityRestriction> restrictions;
};
typedef std::tuple<
OperationQuantizationRestrictionTestValues,
bool
> OperationQuantizationRestrictionParams;
class OperationQuantizationRestrictionTest : public LayerTransformation, public testing::WithParamInterface<OperationQuantizationRestrictionParams> {
public:
void SetUp() override {
const auto testValues = std::get<0>(GetParam());
const auto explicitly = std::get<1>(GetParam());
std::vector<size_t> ports;
if (!explicitly) {
for (size_t i = 0; i < testValues.restrictions.size(); ++i) {
ports.push_back(testValues.restrictions[i].port);
}
}
actualFunction = ngraph::builder::subgraph::ConvolutionFunction::get(
Shape({ 1, 3, 16, 16 }),
element::f32,
{ 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
std::vector<float>({ 1.f }),
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } });
ngraph::pass::Manager manager;
const auto quantizationRestrictions = std::vector<low_precision::QuantizationGranularityRestriction>({
explicitly ?
low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>(testValues.restrictions, false) :
low_precision::QuantizationGranularityRestriction::create<ngraph::opset1::Convolution>(ports)
});
manager.register_pass<ngraph::pass::low_precision::MarkupQuantizationGranularity>(quantizationRestrictions);
manager.run_passes(actualFunction);
referenceFunction = ngraph::builder::subgraph::ConvolutionFunction::get(
Shape({ 1, 3, 16, 16 }),
element::f32,
{ 256ul, {}, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
std::vector<float>({ 1.f }),
{ 255ul, Shape({ 1, 1, 1, 1 }), { 0.f }, { 254.f }, { -1.27f }, { 1.27f } },
quantizationRestrictions);
}
static std::string getTestCaseName(testing::TestParamInfo<OperationQuantizationRestrictionParams> obj) {
const auto testValues = std::get<0>(obj.param);
const auto explicitly = std::get<1>(obj.param);
std::ostringstream result;
result << testValues.restrictions.size() << "_" << explicitly;
return result.str();
}
};
TEST_P(OperationQuantizationRestrictionTest, CompareFunctions) {
auto res = compare_functions(actualFunction, referenceFunction, true, true, true, true);
ASSERT_TRUE(res.first) << res.second;
}
const std::vector<OperationQuantizationRestrictionTestValues> testValues = {
{
{}
},
{
{{0, QuantizationGranularityAttribute::Granularity::PerTensor}}
},
{
{{0, QuantizationGranularityAttribute::Granularity::PerTensor}, {1, QuantizationGranularityAttribute::Granularity::PerChannel}}
}
};
const std::vector<bool> explicitly = { true, false };
INSTANTIATE_TEST_SUITE_P(
smoke_LPT,
OperationQuantizationRestrictionTest,
::testing::Combine(
::testing::ValuesIn(testValues),
::testing::ValuesIn(explicitly)),
OperationQuantizationRestrictionTest::getTestCaseName);

View File

@ -12,7 +12,7 @@
#include <low_precision/transformation_context.hpp>
#include <low_precision/low_precision.hpp>
#include <low_precision/align_quantization_parameters.hpp>
#include <low_precision/markup_per_tensor_quantization.hpp>
#include <low_precision/markup_quantization_granularity.hpp>
#include <low_precision/markup_can_be_quantized.hpp>
using namespace testing;
@ -21,8 +21,8 @@ using namespace ngraph::pass;
OPENVINO_SUPPRESS_DEPRECATED_START
SimpleLowPrecisionTransformer::SimpleLowPrecisionTransformer(
const std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>& precisionRestrictions,
const std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>& quantizationRestrictions,
const std::vector<ngraph::pass::low_precision::PrecisionsRestriction>& precisionRestrictions,
const std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>& quantizationRestrictions,
const AttributeParameters& params) {
auto passConfig = get_pass_config();
@ -30,7 +30,7 @@ SimpleLowPrecisionTransformer::SimpleLowPrecisionTransformer(
markup = std::make_shared<ngraph::pass::Manager>(passConfig);
markup->register_pass<ngraph::pass::low_precision::MarkupCanBeQuantized>(params.defaultPrecisions);
markup->register_pass<ngraph::pass::low_precision::MarkupPrecisions>(precisionRestrictions, params.defaultPrecisions);
markup->register_pass<ngraph::pass::low_precision::MarkupPerTensorQuantization>(quantizationRestrictions);
markup->register_pass<ngraph::pass::low_precision::MarkupQuantizationGranularity>(quantizationRestrictions);
markup->register_pass<ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved>(params.defaultPrecisions);
markup->register_pass<ngraph::pass::low_precision::PropagatePrecisions>(params);
markup->register_pass<ngraph::pass::low_precision::AlignQuantizationIntervals>(params.defaultPrecisions);

View File

@ -11,14 +11,14 @@
#include "layer_transformation.hpp"
#include "common_test_utils/test_common.hpp"
#include "low_precision/layer_transformation.hpp"
#include "low_precision/common/operation_precision_restriction.hpp"
#include "low_precision/common/operation_per_tensor_quantization_restriction.hpp"
#include "low_precision/common/precisions_restriction.hpp"
#include "low_precision/common/quantization_granularity_restriction.hpp"
class SimpleLowPrecisionTransformer : public ngraph::pass::FunctionPass{
public:
SimpleLowPrecisionTransformer(
const std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>& precisionRestrictions = {},
const std::vector<ngraph::pass::low_precision::OperationPerTensorQuantizationRestriction>& quantizationRestrictions = {},
const std::vector<ngraph::pass::low_precision::PrecisionsRestriction>& precisionRestrictions = {},
const std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>& quantizationRestrictions = {},
const AttributeParameters& params = AttributeParameters());
template <class T, class Operation>

View File

@ -8,6 +8,8 @@
#include <ngraph/ngraph.hpp>
#include <ngraph/opsets/opset1.hpp>
#include <low_precision/common/quantization_granularity_restriction.hpp>
#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
@ -70,7 +72,8 @@ public:
const ngraph::element::Type precision,
const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData,
const std::vector<float>& weightsValues,
const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights);
const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights,
const std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>& restrictions = {});
};
} // namespace subgraph
} // namespace builder

View File

@ -8,6 +8,7 @@
#include <ngraph_ops/type_relaxed.hpp>
#include "ngraph_functions/subgraph_builders.hpp"
#include "low_precision/network_helper.hpp"
#include "low_precision/rt_info/quantization_granularity_attribute.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_weights.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
@ -321,7 +322,8 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::get(
const ngraph::element::Type precision,
const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantizeOnData,
const std::vector<float>& weightsValues,
const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights) {
const ngraph::builder::subgraph::FakeQuantizeOnWeights& fakeQuantizeOnWeights,
const std::vector<ngraph::pass::low_precision::QuantizationGranularityRestriction>& restrictions) {
const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape));
input->set_friendly_name("input");
@ -369,6 +371,14 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::get(
convolutionOriginal,
std::vector<element::Type>{ element::f32, element::f32 },
std::vector<element::Type>{});
convolution->set_friendly_name("convolution");
for (const auto& r : restrictions) {
for (const auto& restrictedPort : r.restrictions) {
auto& rt = convolution->input(restrictedPort.port).get_rt_info();
rt[QuantizationGranularityAttribute::get_type_info_static()] = QuantizationGranularityAttribute(restrictedPort.granularity);
}
}
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(convolution) };
return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "ConvolutionFunction");