[LPT] Refactoring: PoC (#5226)
* LPT fix for Windows * LPT fix for Windows * Remove inference_engine_transformations_EXPORTS * [nGraph] Register new node in GraphRewrite * [LPT] nGraph alignment * [LPT] nGraph alignment: tests Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
This commit is contained in:
parent
0d9212683f
commit
c3c26b4807
@ -70,9 +70,12 @@
|
||||
#include <transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp>
|
||||
#include <low_precision/pull_reshape_through_dequantization.hpp>
|
||||
#include <low_precision/pull_transpose_through_dequantization.hpp>
|
||||
#include <low_precision/transformer.hpp>
|
||||
#include <low_precision/convolution.hpp>
|
||||
#include <low_precision/convolution_backprop_data.hpp>
|
||||
#include <low_precision/group_convolution.hpp>
|
||||
#include <low_precision/low_precision.hpp>
|
||||
#include <low_precision/mat_mul.hpp>
|
||||
#include <low_precision/multiply_to_group_convolution.hpp>
|
||||
#include <low_precision/strided_slice.hpp>
|
||||
#include <low_precision/network_helper.hpp>
|
||||
|
||||
@ -151,10 +154,12 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork");
|
||||
auto nGraphFunc = clonedNetwork.getFunction();
|
||||
|
||||
using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
|
||||
|
||||
bool enableInt8;
|
||||
{
|
||||
ngraph::pass::Manager manager;
|
||||
enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc);
|
||||
enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(nGraphFunc);
|
||||
if (enableInt8) {
|
||||
manager.register_pass<ngraph::pass::DisableConvertConstantFoldingOnConstPath>(
|
||||
std::vector<ngraph::element::Type>{ ngraph::element::i8, ngraph::element::u8, ngraph::element::i4, ngraph::element::u4 });
|
||||
@ -208,8 +213,6 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
|
||||
|
||||
auto pass_config = manager.get_pass_config();
|
||||
|
||||
using const_node_ptr = const std::shared_ptr<const ngraph::Node>;
|
||||
|
||||
// SpaceToDepth/DepthToSpace node implementation supports only equal input/output tensors with rank <= 5
|
||||
pass_config->set_callback<ngraph::pass::ConvertSpaceToDepth,
|
||||
ngraph::pass::ConvertDepthToSpace>(
|
||||
@ -391,28 +394,78 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
|
||||
if (!config.enable_fp16_for_quantized_models) {
|
||||
manager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }});
|
||||
}
|
||||
auto lptPrerequisites = manager.register_pass<ngraph::pass::GraphRewrite>();
|
||||
const std::vector<ngraph::element::Type> supportedTypes = { ngraph::element::i8, ngraph::element::u8 };
|
||||
lptPrerequisites->add_matcher<PullReshapeThroughDequantization>(supportedTypes);
|
||||
lptPrerequisites->add_matcher<PullTransposeThroughDequantization>(supportedTypes);
|
||||
lptPrerequisites->add_matcher<ngraph::pass::LinOpSequenceFusion>();
|
||||
manager.run_passes(nGraphFunc);
|
||||
|
||||
auto params = LayerTransformation::Params(true, // updatePrecisions
|
||||
LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations
|
||||
LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights
|
||||
true); // supportAsymmetricQuantization
|
||||
LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params)
|
||||
.add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params)
|
||||
.setSupportAsymmetricQuantization(false)
|
||||
.setSupport3DTensorOnActivations(false))
|
||||
.add<ConvolutionBackpropDataTransformation, ngraph::opset1::ConvolutionBackpropData>(LayerTransformation::Params(params)
|
||||
.setSupportAsymmetricQuantization(false)
|
||||
.setDeconvolutionSpecificChannelsRatio(true))
|
||||
// INT8 StridedSlice not supported
|
||||
.remove<StridedSliceTransformation, ngraph::opset1::StridedSlice>());
|
||||
auto supportedPrecisions = std::vector<OperationPrecisionRestriction>({
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::Convolution>({
|
||||
{0, {ngraph::element::u8, ngraph::element::i8}},
|
||||
{1, {ngraph::element::i8}},
|
||||
}),
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::ConvolutionBackpropData>({
|
||||
{0, {ngraph::element::u8, ngraph::element::i8}},
|
||||
{1, {ngraph::element::i8}}
|
||||
}),
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::GroupConvolution>({
|
||||
{0, {ngraph::element::u8, ngraph::element::i8}},
|
||||
{1, {ngraph::element::i8}}
|
||||
}),
|
||||
OperationPrecisionRestriction::create<ngraph::opset1::StridedSlice>({})
|
||||
});
|
||||
|
||||
transformer.transform(nGraphFunc);
|
||||
auto perTensorQuantization = std::vector<OperationPerTensorQuantizationRestriction>({
|
||||
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::Convolution>({0}),
|
||||
OperationPerTensorQuantizationRestriction::create<ngraph::opset1::ConvolutionBackpropData>({0}),
|
||||
});
|
||||
|
||||
ngraph::pass::Manager lptManager;
|
||||
|
||||
auto lptPassConfig = lptManager.get_pass_config();
|
||||
lptPassConfig->disable<ngraph::pass::low_precision::StridedSliceTransformation>();
|
||||
lptPassConfig->set_callback<ngraph::pass::low_precision::MarkupPrecisions>([](const_node_ptr& node) -> bool {
|
||||
if (const auto mulitply = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
|
||||
return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply);
|
||||
}
|
||||
return false;
|
||||
});
|
||||
lptPassConfig->set_callback<ConvolutionBackpropDataTransformation>([](const_node_ptr& node) -> bool {
|
||||
auto fillStaticChannel = [](const ngraph::PartialShape& shape, size_t& channel) -> bool {
|
||||
const auto rank = shape.rank();
|
||||
if (rank.is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
if (rank.get_length() < 2ul) {
|
||||
return false;
|
||||
}
|
||||
const auto dimension = shape[1];
|
||||
if (dimension.is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
channel = dimension.get_length();
|
||||
return true;
|
||||
};
|
||||
|
||||
size_t inputChannels;
|
||||
if (!fillStaticChannel(node->get_input_partial_shape(0), inputChannels)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t outputChannels;
|
||||
if (!fillStaticChannel(node->get_output_partial_shape(0), outputChannels)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
if ((inputChannels % 4 != 0) || (outputChannels % 16 != 0)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node);
|
||||
});
|
||||
lptPassConfig->set_callback<MatMulTransformation>([](const_node_ptr& node) -> bool {
|
||||
return MatMulTransformation::is3DTensorOnActivations(node);
|
||||
});
|
||||
|
||||
lptManager.register_pass<LowPrecision>(supportedPrecisions, perTensorQuantization);
|
||||
lptManager.run_passes(nGraphFunc);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -28,8 +28,6 @@ ie_faster_build(${TARGET_NAME}
|
||||
ie_add_vs_version_file(NAME ${TARGET_NAME}
|
||||
FILEDESCRIPTION "Inference Engine LP transformations library")
|
||||
|
||||
target_compile_definitions(${TARGET_NAME} PRIVATE inference_engine_transformations_EXPORTS)
|
||||
|
||||
target_link_libraries(${TARGET_NAME} PUBLIC inference_engine_transformations
|
||||
PRIVATE openvino::itt)
|
||||
|
||||
|
@ -11,12 +11,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API AddTransformation : public EltwiseBaseTransformation {
|
||||
class LP_TRANSFORMATIONS_API AddTransformation : public EltwiseBaseTransformation {
|
||||
public:
|
||||
AddTransformation(const Params& params) : EltwiseBaseTransformation(params) {}
|
||||
~AddTransformation() override {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
AddTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
};
|
||||
|
||||
|
@ -0,0 +1,25 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <ngraph/pass/pass.hpp>
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class LP_TRANSFORMATIONS_API AlignQuantizationIntervals;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
class ngraph::pass::low_precision::AlignQuantizationIntervals : public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
|
||||
};
|
@ -0,0 +1,26 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <ngraph/pass/pass.hpp>
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class LP_TRANSFORMATIONS_API AlignQuantizationParameters;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
class ngraph::pass::low_precision::AlignQuantizationParameters : public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
|
||||
};
|
@ -11,11 +11,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API AvgPoolTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API AvgPoolTransformation : public LayerTransformation {
|
||||
public:
|
||||
AvgPoolTransformation(const Params& params);
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
AvgPoolTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
};
|
||||
|
@ -0,0 +1,24 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include "rt_info/attribute_parameters.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class LP_TRANSFORMATIONS_API BaseMatcherPass;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::BaseMatcherPass : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
BaseMatcherPass(const AttributeParameters& params = AttributeParameters());
|
||||
AttributeParameters params;
|
||||
};
|
@ -12,11 +12,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API ClampTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API ClampTransformation : public LayerTransformation {
|
||||
public:
|
||||
ClampTransformation(const Params& params);
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ClampTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
@ -13,7 +13,7 @@
|
||||
#include <ngraph/check.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
|
||||
#include "transformations_visibility.hpp"
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
#include "transformations/rt_info/dequantization_attribute.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
@ -21,7 +21,7 @@ namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
// template<typename BaseOp2>
|
||||
// class TRANSFORMATIONS_API DequantizationOp : public BaseOp2 {
|
||||
// class LP_TRANSFORMATIONS_API DequantizationOp : public BaseOp2 {
|
||||
// public:
|
||||
// template <typename ... Args>
|
||||
// DequantizationOp(Args&&... args) : BaseOp2(std::forward<Args>(args)...) {
|
||||
@ -63,7 +63,7 @@ void copyRuntimeInfo(const ngraph::Node& from, ngraph::Node& to) {
|
||||
|
||||
} // namespace
|
||||
|
||||
class TRANSFORMATIONS_API DequantizationConvert : public ngraph::opset1::Convert {
|
||||
class LP_TRANSFORMATIONS_API DequantizationConvert : public ngraph::opset1::Convert {
|
||||
public:
|
||||
DequantizationConvert(const ngraph::Output<Node>& arg, const ngraph::element::Type& destination_type) :
|
||||
ngraph::opset1::Convert(arg, destination_type) {
|
||||
@ -77,7 +77,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class TRANSFORMATIONS_API DequantizationSubtract : public ngraph::opset1::Subtract {
|
||||
class LP_TRANSFORMATIONS_API DequantizationSubtract : public ngraph::opset1::Subtract {
|
||||
public:
|
||||
DequantizationSubtract(
|
||||
const ngraph::Output<Node>& arg0,
|
||||
@ -94,7 +94,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class TRANSFORMATIONS_API DequantizationMultiply : public ngraph::opset1::Multiply {
|
||||
class LP_TRANSFORMATIONS_API DequantizationMultiply : public ngraph::opset1::Multiply {
|
||||
public:
|
||||
DequantizationMultiply(
|
||||
const Output<Node>& arg0,
|
||||
@ -116,7 +116,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class TRANSFORMATIONS_API DequantizationAdd : public ngraph::opset1::Add {
|
||||
class LP_TRANSFORMATIONS_API DequantizationAdd : public ngraph::opset1::Add {
|
||||
public:
|
||||
DequantizationAdd(
|
||||
const ngraph::Output<Node>& arg0,
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <tuple>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
@ -15,7 +16,7 @@ namespace low_precision {
|
||||
|
||||
typedef std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> FakeQuantizeDequantizationValues;
|
||||
|
||||
class FakeQuantizeDequantization {
|
||||
class LP_TRANSFORMATIONS_API FakeQuantizeDequantization {
|
||||
public:
|
||||
FakeQuantizeDequantization();
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <exception>
|
||||
#include <string>
|
||||
#include <ngraph/node.hpp>
|
||||
#include <transformations_visibility.hpp>
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
|
||||
/**
|
||||
* @def THROW_TRANSFORMATION_EXCEPTION_LPT
|
||||
@ -19,7 +19,7 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API Exception : std::exception {
|
||||
class LP_TRANSFORMATIONS_API Exception : std::exception {
|
||||
std::shared_ptr<std::ostringstream> buffer;
|
||||
mutable std::string buffer_str;
|
||||
public:
|
||||
@ -42,7 +42,7 @@ public:
|
||||
#define THROW_TRANSFORMATION_EXCEPTION throw ::ngraph::pass::low_precision::Exception() << __FILE__ << ":" << __LINE__ << " "
|
||||
|
||||
|
||||
class TRANSFORMATIONS_API InferenceEngineLptException : public Exception {
|
||||
class LP_TRANSFORMATIONS_API InferenceEngineLptException : public Exception {
|
||||
public:
|
||||
InferenceEngineLptException(const std::string& filename, const size_t line, const Node& node) {
|
||||
*this
|
||||
|
@ -0,0 +1,56 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class OperationPerTensorQuantizationRestriction {
|
||||
public:
|
||||
using RestrictedPorts = std::vector<size_t>;
|
||||
|
||||
ngraph::Node::type_info_t operationType;
|
||||
bool specifyVersion;
|
||||
std::vector<size_t> restrictedPorts;
|
||||
|
||||
OperationPerTensorQuantizationRestriction() = default;
|
||||
OperationPerTensorQuantizationRestriction(
|
||||
const ngraph::Node::type_info_t operationType,
|
||||
const bool specifyVersion,
|
||||
const RestrictedPorts& restrictedPorts) :
|
||||
operationType(operationType),
|
||||
specifyVersion(specifyVersion),
|
||||
restrictedPorts(restrictedPorts) {}
|
||||
|
||||
template <typename T>
|
||||
static OperationPerTensorQuantizationRestriction create(
|
||||
const RestrictedPorts& restrictedPorts = {},
|
||||
const bool specifyVersion = false) {
|
||||
return OperationPerTensorQuantizationRestriction(T::get_type_info_static(), specifyVersion, restrictedPorts);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static RestrictedPorts getPrecisionsByOperationType(std::vector<OperationPerTensorQuantizationRestriction>& restrictions) {
|
||||
for (const auto& restriction : restrictions) {
|
||||
if (restriction.operationType == T::get_type_info_static()) {
|
||||
return restriction.restrictedPorts;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -0,0 +1,59 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class OperationPrecisionRestriction {
|
||||
public:
|
||||
using PrecisionsByPort = std::vector<std::pair<size_t, std::vector<ngraph::element::Type>>>;
|
||||
|
||||
ngraph::Node::type_info_t operationType;
|
||||
bool specifyVersion;
|
||||
std::vector<std::pair<size_t, std::vector<ngraph::element::Type>>> precisionsByPort;
|
||||
|
||||
OperationPrecisionRestriction() = default;
|
||||
OperationPrecisionRestriction(
|
||||
const ngraph::Node::type_info_t operationType,
|
||||
const bool specifyVersion,
|
||||
const PrecisionsByPort& precisionsByPort) :
|
||||
operationType(operationType),
|
||||
specifyVersion(specifyVersion),
|
||||
precisionsByPort(precisionsByPort) {}
|
||||
|
||||
template <typename T>
|
||||
static OperationPrecisionRestriction create(
|
||||
const PrecisionsByPort& precisionsByPort,
|
||||
const bool specifyVersion = false) {
|
||||
return OperationPrecisionRestriction(T::get_type_info_static(), specifyVersion, precisionsByPort);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static PrecisionsByPort getPrecisionsByOperationType(std::vector<OperationPrecisionRestriction>& restrictions) {
|
||||
for (const auto& restriction : restrictions) {
|
||||
if (restriction.operationType == T::get_type_info_static()) {
|
||||
return restriction.precisionsByPort;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -1,42 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/check.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include "../ilayer_transformations_manager.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class Subgraph {
|
||||
public:
|
||||
Subgraph(ngraph::pass::ILayerTransformationsManager* layerTransformationsManager);
|
||||
|
||||
bool fillSubgraphForConcat(const std::shared_ptr<ngraph::opset1::Concat>& concat, std::unordered_set<std::string>& handledLayers);
|
||||
bool empty() const;
|
||||
|
||||
std::vector<std::shared_ptr<ngraph::Node>> quantizationLayers;
|
||||
std::vector<std::shared_ptr<ngraph::opset1::Concat>> concatLayers;
|
||||
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>> layers;
|
||||
|
||||
private:
|
||||
bool atLeastOneIsIntermediate(const std::shared_ptr<ngraph::Node>& node) const;
|
||||
bool fillSubgraphForQuantization(const std::shared_ptr<ngraph::opset1::FakeQuantize>& fakeQuantize, std::unordered_set<std::string>& handledLayers);
|
||||
bool fillSubgraphForIntermediate(const std::shared_ptr<ngraph::Node>& intermediate, std::unordered_set<std::string>& handledLayers);
|
||||
bool fill(const std::shared_ptr<ngraph::Node>& concat, std::unordered_set<std::string>& handledLayers);
|
||||
const ngraph::pass::ILayerTransformationsManager* layerTransformationsManager;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -13,32 +13,21 @@
|
||||
#include <ngraph/ngraph.hpp>
|
||||
|
||||
#include "layer_transformation.hpp"
|
||||
#include "common/subgraph.hpp"
|
||||
#include "common/fake_quantize_dequantization.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API ConcatTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API ConcatTransformation : public LayerTransformation {
|
||||
public:
|
||||
ConcatTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~ConcatTransformation() override {};
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConcatTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
|
||||
protected:
|
||||
void addDequantizationLayers(
|
||||
TransformationContext& context,
|
||||
ngraph::pass::low_precision::Subgraph& subgraph,
|
||||
std::function<void(
|
||||
std::shared_ptr<ngraph::Node> layer,
|
||||
std::shared_ptr<ngraph::Node> child,
|
||||
const std::string originalLayerName,
|
||||
std::vector<FakeQuantizeDequantization>& dequantizationsToConcatenate)> getLayerDequantizationCallback) const;
|
||||
|
||||
static bool isHandled(
|
||||
const TransformationContext& context,
|
||||
const std::vector<std::shared_ptr<ngraph::Node>>& quantizationOperations);
|
||||
@ -51,14 +40,6 @@ protected:
|
||||
NodeVector& multiplyNodes) const;
|
||||
|
||||
std::shared_ptr<Node> concatenateDeqNodes(NodeVector& nodes) const;
|
||||
|
||||
private:
|
||||
size_t getMinQuantizationLevels(
|
||||
const DataPrecision& dataPrecision,
|
||||
const float maxOutputInterval,
|
||||
const std::vector<QuantizationDetails>& quantizationLayersDetails,
|
||||
const float outputLowValue,
|
||||
const float outputHighValue) const;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
|
@ -1,51 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <ngraph/ngraph.hpp>
|
||||
|
||||
#include "concat.hpp"
|
||||
#include "common/subgraph.hpp"
|
||||
#include "common/fake_quantize_dequantization.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API ConcatMultiChannelsTransformation : public ConcatTransformation {
|
||||
public:
|
||||
ConcatMultiChannelsTransformation(const Params& params) : ConcatTransformation(params) {}
|
||||
~ConcatMultiChannelsTransformation() override {};
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
|
||||
private:
|
||||
// Go through the parent elements of the layer and fill dequantization collection
|
||||
// with Dq operations that should be inserted before the layer.
|
||||
void fillDequantization(
|
||||
const std::shared_ptr<ngraph::Node> layer,
|
||||
const std::unordered_map<std::string, FakeQuantizeDequantization>& dequantizationByFakeQuantize,
|
||||
std::vector<FakeQuantizeDequantization>& dequantization) const;
|
||||
|
||||
FakeQuantizeDequantization getConcatenatedDequantization(
|
||||
const std::shared_ptr<ngraph::opset1::Concat> concat,
|
||||
const std::vector<FakeQuantizeDequantization>& dequantization) const;
|
||||
|
||||
static FakeQuantizeDequantization getFoldedDequantization(
|
||||
const std::shared_ptr<ngraph::Node> operation,
|
||||
const FakeQuantizeDequantization& dequantization,
|
||||
const size_t sourceOutputIdx);
|
||||
|
||||
bool isMultiChannel(const std::vector<std::shared_ptr<ngraph::opset1::Concat>>& concatLayers) const noexcept;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -11,12 +11,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API ConvertTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API ConvertTransformation : public LayerTransformation {
|
||||
public:
|
||||
ConvertTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~ConvertTransformation() override {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvertTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
||||
|
@ -7,14 +7,14 @@
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include <transformations_visibility.hpp>
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API ConvertSubtractConstant;
|
||||
class LP_TRANSFORMATIONS_API ConvertSubtractConstant;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
|
@ -11,12 +11,13 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API ConvolutionTransformation : public WeightableLayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API ConvolutionTransformation : public WeightableLayerTransformation {
|
||||
public:
|
||||
ConvolutionTransformation(const Params& params);
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ConvolutionTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isQuantized(const std::shared_ptr<const Node>& layer) const noexcept override;
|
||||
static bool isQuantizedStatic(const std::shared_ptr<const Node>& layer) noexcept;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
|
@ -11,13 +11,13 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation {
|
||||
public:
|
||||
ConvolutionBackpropDataTransformation(const Params& params);
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
ConvolutionBackpropDataTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
||||
bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool isQuantized(const std::shared_ptr<const Node>& layer) const noexcept override;
|
||||
static bool isQuantizedStatic(const std::shared_ptr<const Node>& layer) noexcept;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
|
@ -0,0 +1,61 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
#include "low_precision/base_matcher_pass.hpp"
|
||||
#include "low_precision/lpt_itt.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
template <typename AttributeType, typename OperationType>
|
||||
class CreateAttribute;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
enum class AttributeSource {
|
||||
Node,
|
||||
OutputPort
|
||||
};
|
||||
|
||||
template <typename AttributeType, typename OperationType = ngraph::pattern::op::Label>
|
||||
class ngraph::pass::low_precision::CreateAttribute : public ngraph::pass::low_precision::BaseMatcherPass {
|
||||
public:
|
||||
CreateAttribute(const AttributeSource source = AttributeSource::Node) {
|
||||
assert((source == AttributeSource::Node) || (source == AttributeSource::OutputPort));
|
||||
auto operation = std::is_same<OperationType, pattern::op::Label>::value ?
|
||||
pattern::any_input() :
|
||||
pattern::wrap_type<OperationType>();
|
||||
|
||||
ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) {
|
||||
auto op = m.get_match_root();
|
||||
if (transformation_callback(op)) {
|
||||
return false;
|
||||
}
|
||||
{
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "CreateAttribute");
|
||||
const auto attribute = ngraph::VariantWrapper<AttributeType>::create(op, params);
|
||||
if (attribute == nullptr) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
auto matcher = std::make_shared<ngraph::pattern::Matcher>(operation, "CreateAttribute");
|
||||
this->register_matcher(matcher, callback);
|
||||
}
|
||||
};
|
@ -0,0 +1,70 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include "rt_info/precision_preserved_attribute.hpp"
|
||||
#include "network_helper.hpp"
|
||||
#include "lpt_itt.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
template <typename AttributeType, typename OperationType>
|
||||
class CreatePrecisionsDependentAttribute;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
template <typename AttributeType, typename OperationType>
|
||||
class ngraph::pass::low_precision::CreatePrecisionsDependentAttribute : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
CreatePrecisionsDependentAttribute() {
|
||||
auto operation = pattern::wrap_type<OperationType>();
|
||||
|
||||
ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) {
|
||||
auto node = m.get_match_root();
|
||||
if (transformation_callback(node)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
{
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "CreatePrecisionsDependentAttribute");
|
||||
auto &rt = node->get_rt_info();
|
||||
|
||||
const auto precisionPreservedAttribute = std::make_shared<ngraph::VariantWrapper<PrecisionPreservedAttributePtr>>(
|
||||
std::make_shared<PrecisionPreservedAttribute>(false));
|
||||
rt[ngraph::VariantWrapper<PrecisionPreservedAttributePtr>::type_info.name] = precisionPreservedAttribute;
|
||||
const auto &targetSharedValue = precisionPreservedAttribute->get()->sharedValue;
|
||||
|
||||
const auto attribute = std::make_shared<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>>(
|
||||
std::make_shared<AttributeType>());
|
||||
rt[ngraph::VariantWrapper<std::shared_ptr<AttributeType>>::type_info.name] = attribute;
|
||||
|
||||
ngraph::pass::low_precision::NetworkHelper::reassign<PrecisionPreservedSharedValue, PrecisionPreservedAttribute>(
|
||||
targetSharedValue,
|
||||
{
|
||||
std::dynamic_pointer_cast<PrecisionPreservedAttribute>(attribute->get()),
|
||||
std::dynamic_pointer_cast<PrecisionPreservedAttribute>(precisionPreservedAttribute->get())
|
||||
});
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
auto matcher = std::make_shared<ngraph::pattern::Matcher>(operation, "CreatePrecisionsDependentAttribute");
|
||||
this->register_matcher(matcher, callback);
|
||||
}
|
||||
};
|
@ -10,12 +10,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API DepthToSpaceTransformation : public TransparentBaseTransformation {
|
||||
class LP_TRANSFORMATIONS_API DepthToSpaceTransformation : public TransparentBaseTransformation {
|
||||
public:
|
||||
DepthToSpaceTransformation(const Params& params) : TransparentBaseTransformation(params) {}
|
||||
~DepthToSpaceTransformation() override {}
|
||||
bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override;
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
DepthToSpaceTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
};
|
||||
|
@ -12,7 +12,7 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API EltwiseBaseTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API EltwiseBaseTransformation : public LayerTransformation {
|
||||
public:
|
||||
EltwiseBaseTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
|
@ -13,17 +13,20 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API FakeQuantizeTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API FakeQuantizeTransformation : public LayerTransformation {
|
||||
public:
|
||||
FakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
FakeQuantizeTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
|
||||
static bool checkElementwise(const std::shared_ptr<Node>& eltwise);
|
||||
|
||||
private:
|
||||
std::shared_ptr<opset1::FakeQuantize> fuseElementwise(TransformationContext& context, const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize) const;
|
||||
std::shared_ptr<opset1::FakeQuantize> fuseElementwise(
|
||||
TransformationContext& context,
|
||||
MatcherPass* matcherPass,
|
||||
const std::shared_ptr<opset1::FakeQuantize>& fakeQuantize) const;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
|
@ -13,11 +13,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API FakeQuantizeDecompositionTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API FakeQuantizeDecompositionTransformation : public LayerTransformation {
|
||||
public:
|
||||
FakeQuantizeDecompositionTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
FakeQuantizeDecompositionTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
||||
|
@ -12,12 +12,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API FoldConvertTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API FoldConvertTransformation : public LayerTransformation {
|
||||
public:
|
||||
FoldConvertTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~FoldConvertTransformation() override {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
FoldConvertTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
@ -0,0 +1,25 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include "low_precision/layer_transformation.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class LP_TRANSFORMATIONS_API FoldFakeQuantizeTransformation : public LayerTransformation {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
FoldFakeQuantizeTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -12,12 +12,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API FuseConvertTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API FuseConvertTransformation : public LayerTransformation {
|
||||
public:
|
||||
FuseConvertTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~FuseConvertTransformation() override {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
FuseConvertTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
@ -12,12 +12,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API FuseFakeQuantizeTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API FuseFakeQuantizeTransformation : public LayerTransformation {
|
||||
public:
|
||||
FuseFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~FuseFakeQuantizeTransformation() override {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
FuseFakeQuantizeTransformation(const Params& params);
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
|
||||
private:
|
||||
|
@ -12,12 +12,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API FuseMultiplyToFakeQuantizeTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API FuseMultiplyToFakeQuantizeTransformation : public LayerTransformation {
|
||||
public:
|
||||
FuseMultiplyToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~FuseMultiplyToFakeQuantizeTransformation() override {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
FuseMultiplyToFakeQuantizeTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
@ -12,12 +12,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API FuseSubtractToFakeQuantizeTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API FuseSubtractToFakeQuantizeTransformation : public LayerTransformation {
|
||||
public:
|
||||
FuseSubtractToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~FuseSubtractToFakeQuantizeTransformation() override {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
FuseSubtractToFakeQuantizeTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
@ -11,12 +11,13 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API GroupConvolutionTransformation : public ConvolutionTransformation {
|
||||
class LP_TRANSFORMATIONS_API GroupConvolutionTransformation : public ConvolutionTransformation {
|
||||
public:
|
||||
GroupConvolutionTransformation(const Params& params);
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
GroupConvolutionTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isQuantized(const std::shared_ptr<const Node>& layer) const noexcept override;
|
||||
static bool isQuantizedStatic(const std::shared_ptr<const Node>& layer) noexcept;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
|
@ -1,24 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <ngraph/node.hpp>
|
||||
#include "transformations_visibility.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
|
||||
/**
|
||||
* @brief low precision transformation component interface.
|
||||
*/
|
||||
class TRANSFORMATIONS_API ILayerTransformationsManager {
|
||||
public:
|
||||
virtual bool isQuantized(const std::shared_ptr<Node>& layer) const noexcept = 0;
|
||||
virtual bool isPrecisionPreserved(const std::shared_ptr<Node>& layer) const noexcept = 0;
|
||||
};
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -10,12 +10,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API InterpolateTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API InterpolateTransformation : public LayerTransformation {
|
||||
public:
|
||||
InterpolateTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~InterpolateTransformation() override {}
|
||||
bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override;
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
InterpolateTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
};
|
||||
|
@ -1,24 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <transformations_visibility.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
|
||||
/**
|
||||
* @brief low precision transformation component interface.
|
||||
*/
|
||||
class TRANSFORMATIONS_API IParamsManager {
|
||||
public:
|
||||
// TODO FIXME: it is not correct to have a string as a key here, try to use NodeTypeInfo
|
||||
virtual std::vector<element::Type> getPrecisionsOnActivations(const Node& op) const noexcept = 0;
|
||||
};
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -13,8 +13,6 @@
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
#include "iparams_manager.hpp"
|
||||
#include "ilayer_transformations_manager.hpp"
|
||||
#include "transformation_context.hpp"
|
||||
#include "quantization_details.hpp"
|
||||
#include "low_precision/common/ie_lpt_exception.hpp"
|
||||
@ -41,7 +39,7 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API DataPrecision {
|
||||
class LP_TRANSFORMATIONS_API DataPrecision {
|
||||
public:
|
||||
DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {}
|
||||
|
||||
@ -108,6 +106,17 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
// Return maximum value for quantization level. Quantization level is maximum value for precision.
|
||||
static float getMaxValue(const size_t maxLevelsForPrecision) {
|
||||
if (maxLevelsForPrecision == 255ul) {
|
||||
return 254.f;
|
||||
} else if (maxLevelsForPrecision == 256ul) {
|
||||
return 255.f;
|
||||
} else {
|
||||
THROW_TRANSFORMATION_EXCEPTION << "unexpected quantization level " << maxLevelsForPrecision;
|
||||
}
|
||||
}
|
||||
|
||||
static bool hasNegativeValues(const std::vector<float>& values) {
|
||||
for (const float value : values) {
|
||||
if (value < 0.0) {
|
||||
@ -148,92 +157,28 @@ inline std::ostream &operator << (std::ostream &os, const DataPrecision& value)
|
||||
}
|
||||
|
||||
// Base class for all LP transformations, holds some common data structures
|
||||
class TRANSFORMATIONS_API LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API LayerTransformation : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
enum QuantizedTensorAlignment {
|
||||
None,
|
||||
UpdateLevel
|
||||
};
|
||||
|
||||
class Params {
|
||||
public:
|
||||
Params(
|
||||
const bool updatePrecisions = true,
|
||||
const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations = QuantizedTensorAlignment::UpdateLevel,
|
||||
const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights = QuantizedTensorAlignment::None,
|
||||
bool supportAsymmetricQuantization = false,
|
||||
std::vector<element::Type> precisionsOnActivations = { element::u8, element::i8 },
|
||||
std::vector<element::Type> precisionsOnWeights = { element::i8 },
|
||||
element::Type deqPrecision = element::f32,
|
||||
bool support3DTensorOnActivations = true,
|
||||
bool deconvolutionSpecificChannelsRatio = false) :
|
||||
updatePrecisions(updatePrecisions),
|
||||
quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations),
|
||||
quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights),
|
||||
supportAsymmetricQuantization(supportAsymmetricQuantization),
|
||||
precisionsOnActivations(precisionsOnActivations),
|
||||
precisionsOnWeights(precisionsOnWeights),
|
||||
deqPrecision(deqPrecision),
|
||||
support3DTensorOnActivations(support3DTensorOnActivations),
|
||||
deconvolutionSpecificChannelsRatio(deconvolutionSpecificChannelsRatio) {
|
||||
if (precisionsOnActivations.size() == 0ul) {
|
||||
THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed";
|
||||
}
|
||||
|
||||
if (precisionsOnWeights.size() == 0ul) {
|
||||
THROW_TRANSFORMATION_EXCEPTION << "precisions on weights are not specisifed";
|
||||
}
|
||||
}
|
||||
const bool updatePrecisions = true,
|
||||
element::Type deqPrecision = element::f32) :
|
||||
updatePrecisions(updatePrecisions),
|
||||
deqPrecision(deqPrecision) {}
|
||||
|
||||
Params& setUpdatePrecisions(const bool updatePrecisions) {
|
||||
this->updatePrecisions = updatePrecisions;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Params& setQuantizedTensorAlignmentOnActivations(const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations) {
|
||||
this->quantizedTensorAlignmentOnActivations = quantizedTensorAlignmentOnActivations;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Params& setQuantizedTensorAlignmentOnWeights(const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights) {
|
||||
this->quantizedTensorAlignmentOnWeights = quantizedTensorAlignmentOnWeights;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Params& setSupportAsymmetricQuantization(const bool supportAsymmetricQuantization) {
|
||||
this->supportAsymmetricQuantization = supportAsymmetricQuantization;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Params& setPrecisionsOnActivations(const std::vector<element::Type>& precisionsOnActivations) {
|
||||
this->precisionsOnActivations = precisionsOnActivations;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Params& setPrecisionsOnWeights(const std::vector<element::Type>& precisionsOnWeights) {
|
||||
this->precisionsOnWeights = precisionsOnWeights;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Params& setSupport3DTensorOnActivations(const bool support3DTensorOnActivations) {
|
||||
this->support3DTensorOnActivations = support3DTensorOnActivations;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Params& setDeconvolutionSpecificChannelsRatio(const bool deconvolutionSpecificChannelsRatio) {
|
||||
this->deconvolutionSpecificChannelsRatio = deconvolutionSpecificChannelsRatio;
|
||||
Params& setDeqPrecision(const element::Type& deqPrecision) {
|
||||
this->deqPrecision = deqPrecision;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool updatePrecisions;
|
||||
QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
|
||||
QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
|
||||
bool supportAsymmetricQuantization;
|
||||
std::vector<element::Type> precisionsOnActivations;
|
||||
std::vector<element::Type> precisionsOnWeights;
|
||||
element::Type deqPrecision;
|
||||
bool support3DTensorOnActivations;
|
||||
bool deconvolutionSpecificChannelsRatio;
|
||||
};
|
||||
|
||||
class PrecisionDetails {
|
||||
@ -243,55 +188,49 @@ public:
|
||||
hasNegativeOutput(hasNegativeOutput),
|
||||
hasZeroPoint(hasZeroPoint) {}
|
||||
|
||||
const element::Type precision;
|
||||
const bool hasNegativeOutput;
|
||||
const bool hasZeroPoint;
|
||||
element::Type precision;
|
||||
bool hasNegativeOutput;
|
||||
bool hasZeroPoint;
|
||||
};
|
||||
|
||||
LayerTransformation(const Params& params);
|
||||
virtual ~LayerTransformation() = default;
|
||||
virtual void registerMatcherIn(ngraph::pass::GraphRewrite& pass, TransformationContext& context) const = 0;
|
||||
virtual bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const = 0;
|
||||
virtual bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) = 0;
|
||||
|
||||
void setParamsManager(IParamsManager* paramsManager) noexcept;
|
||||
void setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept;
|
||||
void setContext(TransformationContext* context) noexcept;
|
||||
|
||||
void setUpdatePrecisions(const bool updatePrecisions);
|
||||
void setQuantizedTensorAlignmentOnActivations(const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations);
|
||||
void setQuantizedTensorAlignmentOnWeights(const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights);
|
||||
|
||||
void setQuantizationIntervalAsymmetryThreshold(const float value);
|
||||
void setZeroThreshold(const float value);
|
||||
void setMinQuantizationLevels(const size_t levels);
|
||||
|
||||
const std::vector<element::Type>& getPrecisionsOnActivations() const;
|
||||
const std::vector<element::Type>& getPrecisionsOnWeights() const;
|
||||
|
||||
virtual bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const;
|
||||
|
||||
bool canSubtractBeHandled(const std::shared_ptr<Node>& op, const size_t parentIndex = 0ul) const;
|
||||
static bool canBeTransformedStatic(const std::shared_ptr<Node>& layer);
|
||||
|
||||
bool canSubtractBeHandled(const std::shared_ptr<Node>& op, const FakeQuantizeDequantization& dequantization) const;
|
||||
|
||||
PrecisionDetails getPrecisionDetails(const QuantizationDetails& quantizationDetails) const;
|
||||
// Get precision based on FakeQuantize operation.
|
||||
// Undefined value is expected. In this case the accuracy has to be defined by the calling code.
|
||||
// TODO: LPT: INT8 specific here
|
||||
static PrecisionDetails getPrecisionDetails(
|
||||
const size_t quantizationLevels,
|
||||
const std::vector<float>& outputLowValues,
|
||||
const std::vector<float>& outputHighValues);
|
||||
static PrecisionDetails getPrecisionDetails(const QuantizationDetails& quantizationDetails);
|
||||
|
||||
static bool isAsymmetricQuantization(const std::shared_ptr<const Node>& node);
|
||||
|
||||
// return true if operation can be quantized and false otherwise
|
||||
// for example: if convolution operation weights are not quantized, then isQuantize returns false and true otherwise
|
||||
// note: dequantization operations on activations are absent during method execution
|
||||
virtual bool isQuantized(std::shared_ptr<Node> layer) const noexcept;
|
||||
virtual bool isQuantized(const std::shared_ptr<const Node>& layer) const noexcept;
|
||||
|
||||
// return true if operation can be preserved for precision
|
||||
// note: dequantization operations on activations are absent during method execution
|
||||
virtual bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept = 0;
|
||||
|
||||
DataPrecision getDataPrecision(
|
||||
std::shared_ptr<Node> layer,
|
||||
// weights specific
|
||||
static DataPrecision getDataPrecision(
|
||||
const std::shared_ptr<Node>& layer,
|
||||
const QuantizationDetails& quantizationDetails,
|
||||
const bool onWeights) const;
|
||||
|
||||
void fillAvailablePrecisions(std::shared_ptr<Node> layer, std::vector<element::Type>& availablePrecisions) const;
|
||||
|
||||
std::vector<std::shared_ptr<Node>> getChildrenRecursivelyExceptPrecisionPreserved(const std::shared_ptr<Node>& op) const noexcept;
|
||||
const std::vector<element::Type>& precisions);
|
||||
|
||||
protected:
|
||||
#ifdef LPT_PRINT_DEQUANTIZATION_INFO
|
||||
@ -303,24 +242,10 @@ protected:
|
||||
#endif
|
||||
|
||||
bool updatePrecisions;
|
||||
QuantizedTensorAlignment quantizedTensorAlignmentOnActivations;
|
||||
QuantizedTensorAlignment quantizedTensorAlignmentOnWeights;
|
||||
bool supportAsymmetricQuantization;
|
||||
std::vector<element::Type> precisionsOnActivations;
|
||||
std::vector<element::Type> precisionsOnWeights;
|
||||
element::Type deqPrecision;
|
||||
bool support3DTensorOnActivations;
|
||||
bool deconvolutionSpecificChannelsRatio;
|
||||
|
||||
// absolute value, used to determine quantization interval asymmetry
|
||||
float quantizationIntervalAsymmetryThreshold;
|
||||
// absolute value, used to determine zero
|
||||
float zeroThreshold;
|
||||
size_t minQuantizationLevels;
|
||||
|
||||
static const char originalLayerPostfix[];
|
||||
IParamsManager* paramsManager;
|
||||
ILayerTransformationsManager* layerTransformationsManager;
|
||||
TransformationContext* context;
|
||||
|
||||
protected:
|
||||
std::shared_ptr<ngraph::Node> moveDequantizationAfter(
|
||||
@ -340,7 +265,7 @@ protected:
|
||||
std::shared_ptr<ngraph::Node> lastNode,
|
||||
std::string originalName) const;
|
||||
|
||||
void addPattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr<Node> patternRoot) const;
|
||||
void addPattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr<Node> patternRoot);
|
||||
|
||||
//TODO: replace with canBeTransformed when quantization by special dimension is supported for all transformations
|
||||
bool canBeTransformedSpatialDimension(const TransformationContext& context, std::shared_ptr<Node> layer) const;
|
||||
@ -358,38 +283,6 @@ protected:
|
||||
}
|
||||
};
|
||||
|
||||
inline std::ostream &operator << (std::ostream &os, const LayerTransformation::QuantizedTensorAlignment& value) {
|
||||
switch (value) {
|
||||
case LayerTransformation::QuantizedTensorAlignment::None: {
|
||||
os << "None";
|
||||
break;
|
||||
}
|
||||
case LayerTransformation::QuantizedTensorAlignment::UpdateLevel: {
|
||||
os << "UpdateLevel";
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
os << static_cast<int>(value);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
inline std::ostream &operator << (std::ostream &os, const std::vector<element::Type>& values) {
|
||||
os << "{";
|
||||
for (size_t i = 0; i < values.size(); ++i) {
|
||||
const element::Type& value = values[i];
|
||||
if (i > 0) {
|
||||
os << value;
|
||||
} else {
|
||||
os << ", " << value;
|
||||
}
|
||||
}
|
||||
os << "}";
|
||||
return os;
|
||||
}
|
||||
|
||||
typedef std::shared_ptr<LayerTransformation> LayerTransformationPtr;
|
||||
|
||||
} // namespace low_precision
|
||||
|
@ -0,0 +1,74 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
// one place to include all Low Precision Transformations from ngraph::pass::low_precision
|
||||
#include <low_precision/rt_info/intervals_alignment_attribute.hpp>
|
||||
#include <low_precision/rt_info/quantization_alignment_attribute.hpp>
|
||||
#include <low_precision/rt_info/precisions_attribute.hpp>
|
||||
#include <low_precision/rt_info/precision_preserved_attribute.hpp>
|
||||
|
||||
#include <low_precision/markup_precisions.hpp>
|
||||
#include <low_precision/markup_avg_pool_precision_preserved.hpp>
|
||||
#include <low_precision/propagate_precisions.hpp>
|
||||
#include <low_precision/align_quantization_intervals.hpp>
|
||||
|
||||
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <low_precision/common/operation_per_tensor_quantization_restriction.hpp>
|
||||
#include "low_precision/layer_transformation.hpp"
|
||||
#include "low_precision/markup_precisions.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class LP_TRANSFORMATIONS_API TypeRelaxedReplacer;
|
||||
class LP_TRANSFORMATIONS_API MarkupOptimizations;
|
||||
class LP_TRANSFORMATIONS_API LowPrecision;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::MarkupOptimizations : public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
MarkupOptimizations(
|
||||
const std::vector<OperationPrecisionRestriction>& precisionRestrictions,
|
||||
const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions);
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
|
||||
private:
|
||||
const std::vector<OperationPrecisionRestriction>& precisionRestrictions;
|
||||
const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions;
|
||||
};
|
||||
|
||||
class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::TypeRelaxedReplacer : public ngraph::pass::GraphRewrite {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
TypeRelaxedReplacer();
|
||||
};
|
||||
|
||||
class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::LowPrecision : public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
LowPrecision(
|
||||
const std::vector<OperationPrecisionRestriction>& precisionRestrictions = {},
|
||||
const std::vector<OperationPerTensorQuantizationRestriction>& quantizationRestrictions = {},
|
||||
const LayerTransformation::Params = LayerTransformation::Params());
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
|
||||
|
||||
static bool isFunctionQuantized(const std::shared_ptr<const ngraph::Function>& function);
|
||||
|
||||
protected:
|
||||
std::vector<OperationPrecisionRestriction> precisionRestrictions;
|
||||
std::vector<OperationPerTensorQuantizationRestriction> quantizationRestrictions;
|
||||
// remove
|
||||
LayerTransformation::Params params;
|
||||
};
|
@ -4,11 +4,12 @@
|
||||
|
||||
/**
|
||||
* @brief Defines openvino domains for tracing
|
||||
* @file lpt_itt.h
|
||||
* @file lpt_itt.hpp
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
|
||||
#include <openvino/itt.hpp>
|
||||
|
||||
namespace ngraph {
|
@ -0,0 +1,18 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ngraph/visibility.hpp"
|
||||
|
||||
/**
|
||||
* @file lpt_visibility.hpp
|
||||
* @brief Defines visibility settings for Inference Engine LP Transformations library
|
||||
*/
|
||||
|
||||
#ifdef inference_engine_lp_transformations_EXPORTS
|
||||
#define LP_TRANSFORMATIONS_API NGRAPH_HELPER_DLL_EXPORT
|
||||
#else
|
||||
#define LP_TRANSFORMATIONS_API NGRAPH_HELPER_DLL_IMPORT
|
||||
#endif
|
@ -1,36 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <ie_api.h>
|
||||
|
||||
#include <ngraph/ngraph.hpp>
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <low_precision/ilayer_transformations_manager.hpp>
|
||||
#include <low_precision/iparams_manager.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
|
||||
class TRANSFORMATIONS_API LowPrecisionTransformations: public ngraph::pass::GraphRewrite, IParamsManager, ILayerTransformationsManager {
|
||||
public:
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
|
||||
|
||||
// IParamsManager interface implementation
|
||||
std::vector<element::Type> getPrecisionsOnActivations(const NodeTypeInfo& layerName) const noexcept override;
|
||||
|
||||
// ILayerTransformationsManager interface implementation
|
||||
bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
||||
}// namespace pass
|
||||
}// namespace ngraph
|
@ -0,0 +1,25 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <ngraph/pass/pass.hpp>
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class LP_TRANSFORMATIONS_API MarkupAvgPoolPrecisionPreserved;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
class ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved : public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
|
||||
};
|
@ -0,0 +1,25 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <ngraph/pass/pass.hpp>
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class LP_TRANSFORMATIONS_API MarkupCanBeQuantized;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
class ngraph::pass::low_precision::MarkupCanBeQuantized : public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
|
||||
};
|
@ -0,0 +1,44 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/pass/pass.hpp>
|
||||
#include "common/operation_per_tensor_quantization_restriction.hpp"
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class LP_TRANSFORMATIONS_API MarkupPerTensorQuantization;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
class ngraph::pass::low_precision::MarkupPerTensorQuantization : public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
class PerTensorQuantization {
|
||||
public:
|
||||
explicit PerTensorQuantization(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {}
|
||||
void add(const uint64_t version, const std::vector<size_t>& ports) {
|
||||
portsByVersion.emplace(version, ports);
|
||||
}
|
||||
|
||||
bool versionIsRequired;
|
||||
std::unordered_map<uint64_t, std::vector<size_t>> portsByVersion;
|
||||
};
|
||||
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
explicit MarkupPerTensorQuantization(const std::vector<OperationPerTensorQuantizationRestriction>& restrictions = {});
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, PerTensorQuantization> restrictionsByOperation;
|
||||
};
|
@ -0,0 +1,47 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/pass/pass.hpp>
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
#include "low_precision/common/operation_precision_restriction.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class LP_TRANSFORMATIONS_API MarkupPrecisions;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
// Transformation is used to add customization options runtime
|
||||
class ngraph::pass::low_precision::MarkupPrecisions : public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
class Restriction {
|
||||
public:
|
||||
explicit Restriction(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {}
|
||||
void add(const uint64_t version, const std::vector<std::pair<size_t, std::vector<ngraph::element::Type>>>& precisions) {
|
||||
precisionsByVersion.emplace(version, precisions);
|
||||
}
|
||||
|
||||
bool versionIsRequired;
|
||||
std::unordered_map<uint64_t, std::vector<std::pair<size_t, std::vector<ngraph::element::Type>>>> precisionsByVersion;
|
||||
};
|
||||
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
explicit MarkupPrecisions(const std::vector<OperationPrecisionRestriction>& restrictions = {});
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
|
||||
|
||||
private:
|
||||
static bool isPrecisionPreserved(const std::shared_ptr<Node>& node);
|
||||
static bool isSupported(const std::shared_ptr<Node>& node);
|
||||
std::unordered_map<std::string, Restriction> restrictionsByOperation;
|
||||
};
|
@ -11,14 +11,14 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API MatMulTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API MatMulTransformation : public LayerTransformation {
|
||||
public:
|
||||
MatMulTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~MatMulTransformation() override {}
|
||||
bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override;
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
MatMulTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
static bool is3DTensorOnActivations(const std::shared_ptr<const Node>& node);
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
|
@ -12,12 +12,12 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API MaxPoolTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API MaxPoolTransformation : public LayerTransformation {
|
||||
public:
|
||||
MaxPoolTransformation(const Params& params);
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
MaxPoolTransformation(const Params& params = Params());
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
||||
|
@ -11,12 +11,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API MultiplyTransformation : public EltwiseBaseTransformation {
|
||||
class LP_TRANSFORMATIONS_API MultiplyTransformation : public EltwiseBaseTransformation {
|
||||
public:
|
||||
MultiplyTransformation(const Params& params) : EltwiseBaseTransformation(params) {}
|
||||
~MultiplyTransformation() override {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
MultiplyTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
|
@ -7,24 +7,29 @@
|
||||
#include <memory>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include "low_precision/layer_transformation.hpp"
|
||||
#include "common/operation_precision_restriction.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API MultiplyToGroupConvolutionTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API MultiplyToGroupConvolutionTransformation : public LayerTransformation {
|
||||
public:
|
||||
MultiplyToGroupConvolutionTransformation(const Params& params) : LayerTransformation(params), groupSize(1ul) {}
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
MultiplyToGroupConvolutionTransformation(
|
||||
const Params& params = Params(),
|
||||
const OperationPrecisionRestriction::PrecisionsByPort& restrictions = {});
|
||||
~MultiplyToGroupConvolutionTransformation() override {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool isQuantized(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool isQuantized(const std::shared_ptr<const Node>& layer) const noexcept override;
|
||||
static bool canBeTransformedToGroupConvolution(const std::shared_ptr<const Node>& layer) noexcept;
|
||||
|
||||
void setGroupSize(const size_t groupSize);
|
||||
size_t getGroupSize() const;
|
||||
private:
|
||||
OperationPrecisionRestriction::PrecisionsByPort restrictions;
|
||||
size_t groupSize;
|
||||
};
|
||||
|
||||
|
@ -10,11 +10,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API MVNTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API MVNTransformation : public LayerTransformation {
|
||||
public:
|
||||
MVNTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
MVNTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
@ -16,6 +16,10 @@
|
||||
#include "ngraph_ops/type_relaxed.hpp"
|
||||
#include <ngraph/rt_info.hpp>
|
||||
|
||||
#include "rt_info/shared_value_attribute.hpp"
|
||||
#include "rt_info/precisions_attribute.hpp"
|
||||
#include "rt_info/per_tensor_quantization_attribute.hpp"
|
||||
#include "rt_info/intervals_alignment_attribute.hpp"
|
||||
#include "transformation_context.hpp"
|
||||
#include "quantization_details.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
@ -30,7 +34,7 @@ namespace low_precision {
|
||||
/**
|
||||
* @brief NetworkHelper class encapsulates manipulations with nGraph function.
|
||||
*/
|
||||
class TRANSFORMATIONS_API NetworkHelper {
|
||||
class LP_TRANSFORMATIONS_API NetworkHelper {
|
||||
public:
|
||||
// Return true if `type` can be castable to at least one of `type`
|
||||
static bool is_castable_to_one_of(NodeTypeInfo type, const std::unordered_set<NodeTypeInfo>& types);
|
||||
@ -76,6 +80,10 @@ public:
|
||||
|
||||
static std::shared_ptr<Node> swapMultiplyAndAdd(std::shared_ptr<opset1::Add> addAfterMultiply, const int multiplyBranch);
|
||||
|
||||
static void copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::vector<std::shared_ptr<Node>>& targets);
|
||||
|
||||
static void copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::shared_ptr<Node>& target);
|
||||
|
||||
static void copyInfo(const std::shared_ptr<Node>& source, const std::shared_ptr<Node>& target);
|
||||
|
||||
static void cleanRunTimeInfo(const std::shared_ptr<Node>& layer);
|
||||
@ -116,7 +124,8 @@ public:
|
||||
std::shared_ptr<opset1::FakeQuantize> fq,
|
||||
element::Type precision,
|
||||
float min,
|
||||
float max);
|
||||
float max,
|
||||
const bool replace = true);
|
||||
|
||||
static FakeQuantizeDequantization makeDequantization(
|
||||
const float dequantizationMul,
|
||||
@ -124,7 +133,8 @@ public:
|
||||
const ngraph::element::Type originalPrecision,
|
||||
const ngraph::PartialShape dataNodeOutputShape,
|
||||
element::Type precision,
|
||||
const element::Type deqPrecision = element::f32);
|
||||
const element::Type deqPrecision = element::f32,
|
||||
std::shared_ptr<ngraph::Node> input = nullptr);
|
||||
|
||||
static FakeQuantizeDequantization createDequantizationFromFakeQuantize(
|
||||
std::shared_ptr<opset1::FakeQuantize> fq,
|
||||
@ -143,7 +153,7 @@ public:
|
||||
|
||||
static FakeQuantizeDequantization getDequantization(const std::shared_ptr<Node>& node, const size_t parentIndex = 0ul, const bool inPlace = false);
|
||||
|
||||
static FakeQuantizeDequantization getDequantizationBelow(const std::shared_ptr<Node>& node);
|
||||
static FakeQuantizeDequantization getDequantizationBelow(const std::shared_ptr<Node>& node, const bool convertIsMandatory = false);
|
||||
|
||||
static FakeQuantizeDequantization normalizeDequantization(FakeQuantizeDequantization dequantization);
|
||||
|
||||
@ -200,6 +210,115 @@ public:
|
||||
|
||||
static bool isDQByDynamicDimension(const std::shared_ptr<Node>& layer, size_t inputIdx = 0);
|
||||
|
||||
static bool isPrecisionPreserved(const std::shared_ptr<ngraph::Node>& node);
|
||||
|
||||
static void replaceAttributeInNodes(
|
||||
std::shared_ptr<ngraph::Function> f,
|
||||
const std::string& name,
|
||||
const std::shared_ptr<ngraph::Variant> newAttribute,
|
||||
const std::shared_ptr<ngraph::Variant> oldAttribute,
|
||||
const std::shared_ptr<ngraph::Node>& initialNode) {
|
||||
std::set<std::shared_ptr<Node>> visited;
|
||||
std::deque<std::shared_ptr<Node>> nodes;
|
||||
nodes.emplace_back(initialNode);
|
||||
|
||||
while (!nodes.empty()) {
|
||||
auto node = nodes.front();
|
||||
nodes.pop_front();
|
||||
|
||||
if (visited.count(node) || is_type<op::Constant>(node)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
visited.insert(node);
|
||||
|
||||
bool handleConnectedNodes = false;
|
||||
if (NetworkHelper::isPrecisionPreserved(node) || is_type<opset1::FakeQuantize>(node)) {
|
||||
auto& rt = node->get_rt_info();
|
||||
|
||||
if (node == initialNode) {
|
||||
rt[name] = newAttribute;
|
||||
handleConnectedNodes = true;
|
||||
} else {
|
||||
auto it = rt.find(name);
|
||||
if (it != rt.end()) {
|
||||
const auto currentAttribute = it->second;
|
||||
if (oldAttribute.get() == currentAttribute.get()) {
|
||||
rt[name] = newAttribute;
|
||||
}
|
||||
handleConnectedNodes = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!handleConnectedNodes) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!is_type<opset1::FakeQuantize>(node)) {
|
||||
for (size_t index = 0ul; index < node->get_input_size(); ++index) {
|
||||
auto getInput = [](const std::shared_ptr<ngraph::Node>& node, const size_t index) {
|
||||
const auto dequantization = NetworkHelper::getDequantization(node, index);
|
||||
if (!dequantization.empty() &&
|
||||
(is_type<opset1::Convert>(dequantization.data.get_node())) &&
|
||||
is_type<opset1::FakeQuantize>(dequantization.data.get_node()->get_input_node_ptr(0))) {
|
||||
const auto input = dequantization.data.get_node()->input(0);
|
||||
return input;
|
||||
}
|
||||
return node->input(index);
|
||||
};
|
||||
|
||||
const auto& input = getInput(node, index);
|
||||
const auto& input_node = input.get_source_output().get_node_shared_ptr();
|
||||
|
||||
//const auto& input_node = input.get_source_output().get_node_shared_ptr();
|
||||
if (visited.count(input_node) || is_type<op::Constant>(input_node)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
nodes.push_front(input_node);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& output : node->outputs()) {
|
||||
for (auto& input_value : output.get_target_inputs()) {
|
||||
const auto& output_node = input_value.get_node()->shared_from_this();
|
||||
if (visited.count(output_node) || is_type<op::Constant>(output_node)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
nodes.push_front(output_node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename SharedValueType, typename SharedAttributeType>
|
||||
static void reassign(
|
||||
const std::shared_ptr<SharedValueType>& sharedValue,
|
||||
const std::vector<std::weak_ptr<SharedAttributeType>>& attributes) {
|
||||
for (const auto attributeWeakPtr : attributes) {
|
||||
auto attribute = attributeWeakPtr.lock();
|
||||
if (attribute == nullptr) {
|
||||
continue;
|
||||
}
|
||||
attribute->sharedValue = sharedValue;
|
||||
sharedValue->attributes.push_back(attribute);
|
||||
}
|
||||
}
|
||||
|
||||
static size_t calculateLevels(
|
||||
const float dataPrecisionMin,
|
||||
const float dataPrecisionMax,
|
||||
const float combinedIntervalLow,
|
||||
const float combinedIntervalHigh,
|
||||
const float minIntervalLow,
|
||||
const float minIntervalHigh,
|
||||
float& dequantizationMul,
|
||||
float& dequantizationSub,
|
||||
float& updatedOutputLowValue,
|
||||
float& updatedOutputHighValue);
|
||||
|
||||
private:
|
||||
static std::shared_ptr<Node> foldFakeQuantize(
|
||||
const std::shared_ptr<opset1::FakeQuantize>& fq,
|
||||
@ -292,6 +411,54 @@ std::shared_ptr<Node> fold_reshape(Args&&... args) {
|
||||
return node;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::shared_ptr<ngraph::VariantWrapper<T>> getAttribute(const std::shared_ptr<Node>& inputNode) {
|
||||
auto& rt = inputNode->get_rt_info();
|
||||
auto it = rt.find(ngraph::VariantWrapper<T>::type_info.name);
|
||||
if (it == rt.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto attribute = std::dynamic_pointer_cast<ngraph::VariantWrapper<T>>(it->second);
|
||||
assert(attribute != nullptr);
|
||||
return attribute;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::shared_ptr<ngraph::VariantWrapper<T>> getAttribute(const Input<Node>& input) {
|
||||
auto& rt = input.get_rt_info();
|
||||
auto it = rt.find(ngraph::VariantWrapper<T>::type_info.name);
|
||||
if (it == rt.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto attribute = std::dynamic_pointer_cast<ngraph::VariantWrapper<T>>(it->second);
|
||||
assert(attribute != nullptr);
|
||||
return attribute;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::shared_ptr<ngraph::VariantWrapper<T>> getAttributeFromOutput(const Output<Node>& output) {
|
||||
auto& rt = output.get_rt_info();
|
||||
auto it = rt.find(ngraph::VariantWrapper<T>::type_info.name);
|
||||
if (it == rt.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto attribute = std::dynamic_pointer_cast<ngraph::VariantWrapper<T>>(it->second);
|
||||
assert(attribute != nullptr);
|
||||
return attribute;
|
||||
}
|
||||
|
||||
bool isDisabled(const std::shared_ptr<Node>& node);
|
||||
|
||||
template <typename T, typename ... Args>
|
||||
std::shared_ptr<T> make_shared_attribute(Args&& ... args) {
|
||||
std::shared_ptr<T> attribute = std::make_shared<T>(std::forward<Args>(args)...);
|
||||
attribute->sharedValue->attributes.push_back(attribute);
|
||||
return attribute;
|
||||
}
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
@ -10,11 +10,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API NormalizeL2Transformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API NormalizeL2Transformation : public LayerTransformation {
|
||||
public:
|
||||
NormalizeL2Transformation(const Params& params) : LayerTransformation(params) {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
NormalizeL2Transformation(const Params& params = Params());
|
||||
bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
@ -12,12 +12,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API PReluTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API PReluTransformation : public LayerTransformation {
|
||||
public:
|
||||
PReluTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~PReluTransformation() override {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
PReluTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
||||
};
|
||||
|
@ -0,0 +1,29 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class LP_TRANSFORMATIONS_API PropagatePrecisions;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
class ngraph::pass::low_precision::PropagatePrecisions : public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override;
|
||||
};
|
@ -0,0 +1,164 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <assert.h>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "lpt_itt.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
template <class AttributeType>
|
||||
class LP_TRANSFORMATIONS_API PropagateSharedValue;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
template <class AttributeType>
|
||||
class ngraph::pass::low_precision::PropagateSharedValue : public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
bool run_on_function(std::shared_ptr<ngraph::Function> f) override {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "PropagateSharedValue");
|
||||
|
||||
std::vector<std::shared_ptr<ngraph::Node>> nodes(f->get_ordered_ops());
|
||||
for (auto it = nodes.begin(); it != nodes.end(); it++) {
|
||||
const std::shared_ptr<Node> node = *it;
|
||||
if (is_type<opset1::FakeQuantize>(node)) {
|
||||
assert(node->get_output_size() == 1ul);
|
||||
auto& outputRtInfo = node->output(0).get_rt_info();
|
||||
|
||||
auto attribute = make_shared_attribute<AttributeType>(std::set<element::Type>{element::u8, element::i8});
|
||||
|
||||
auto attributeWrapper = std::make_shared<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>(attribute);
|
||||
outputRtInfo[ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>::type_info.name] = attributeWrapper;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!NetworkHelper::isPrecisionPreserved(node)) {
|
||||
for (auto& input : node->inputs()) {
|
||||
auto parentNode = input.get_source_output().get_node_shared_ptr();
|
||||
|
||||
auto getAttributes = [](const Input<Node>& nodeInput) {
|
||||
const std::string name = ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>::type_info.name;
|
||||
|
||||
auto node = nodeInput.get_source_output().get_node_shared_ptr();
|
||||
std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>> attributes;
|
||||
if (is_type<opset1::FakeQuantize>(node)) {
|
||||
// output
|
||||
auto& rt = nodeInput.get_source_output().get_rt_info();
|
||||
auto it = rt.find(name);
|
||||
if (it != rt.end()) {
|
||||
const auto& attribute = std::dynamic_pointer_cast<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>(it->second);
|
||||
attributes.push_back(attribute);
|
||||
}
|
||||
}
|
||||
|
||||
return attributes;
|
||||
};
|
||||
|
||||
auto& nodeRt = input.get_rt_info();
|
||||
|
||||
const std::string name = ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>::type_info.name;
|
||||
const auto it = nodeRt.find(name);
|
||||
if (it == nodeRt.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& attribute = std::dynamic_pointer_cast<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>(it->second);
|
||||
std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>> attributes{ attribute };
|
||||
|
||||
auto parentAttributes = getAttributes(input);
|
||||
if (parentAttributes.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (auto& parentAttribute : parentAttributes) {
|
||||
parentAttribute->merge(attributes);
|
||||
}
|
||||
|
||||
nodeRt[name] = parentAttributes[0];
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
handle(f, node);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>> getParentInputRestrictions(
|
||||
const std::shared_ptr<ngraph::Node> node) {
|
||||
std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>> parentAttributes;
|
||||
for (size_t index = 0ul; index < node->get_input_size(); index++) {
|
||||
const Input<Node>& input = node->input(index);
|
||||
auto inputNode = input.get_source_output().get_node()->shared_from_this();
|
||||
|
||||
const auto dequantization = NetworkHelper::getDequantization(node, index);
|
||||
if (!dequantization.empty() &&
|
||||
(is_type<opset1::Convert>(dequantization.data.get_node())) &&
|
||||
is_type<opset1::FakeQuantize>(dequantization.data.get_node()->get_input_node_ptr(0))) {
|
||||
inputNode = dequantization.data.get_node()->get_input_node_shared_ptr(0);
|
||||
}
|
||||
|
||||
if (NetworkHelper::isPrecisionPreserved(inputNode)) {
|
||||
auto& inputRtInfo = inputNode->get_rt_info();
|
||||
auto inputAttributeIt = inputRtInfo.find(ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>::type_info.name);
|
||||
if (inputAttributeIt != inputRtInfo.end()) {
|
||||
const auto attribute = std::dynamic_pointer_cast<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>(inputAttributeIt->second);
|
||||
parentAttributes.push_back(attribute);
|
||||
}
|
||||
} else if (is_type<opset1::FakeQuantize>(inputNode)) {
|
||||
const auto& outputPortRtInfo = inputNode->outputs()[0].get_rt_info();
|
||||
auto attributeIt = outputPortRtInfo.find(ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>::type_info.name);
|
||||
if (attributeIt != outputPortRtInfo.end()) {
|
||||
const auto attribute = std::dynamic_pointer_cast<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>(attributeIt->second);
|
||||
parentAttributes.push_back(attribute);
|
||||
}
|
||||
}
|
||||
}
|
||||
return parentAttributes;
|
||||
}
|
||||
|
||||
void handle(std::shared_ptr<ngraph::Function> f, const std::shared_ptr<ngraph::Node>& node) {
|
||||
const bool precisionPreserved = NetworkHelper::isPrecisionPreserved(node);
|
||||
if (precisionPreserved) {
|
||||
const auto parentRestrictions = getParentInputRestrictions(node);
|
||||
if (parentRestrictions.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// one operation - one output precision
|
||||
// merge parent inputs to one current output
|
||||
auto resultAttribute = parentRestrictions[0];
|
||||
|
||||
std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>> toMerge = parentRestrictions;
|
||||
toMerge.erase(toMerge.begin());
|
||||
resultAttribute->merge(toMerge);
|
||||
|
||||
for (size_t index = 1ul; index < parentRestrictions.size(); index++) {
|
||||
const auto oldAttribute = parentRestrictions[index]->get();
|
||||
NetworkHelper::reassign<PrecisionsSharedValue, PrecisionsAttribute>(
|
||||
resultAttribute->get()->sharedValue,
|
||||
parentRestrictions[index]->get()->sharedValue->attributes);
|
||||
}
|
||||
|
||||
auto& rt = node->get_rt_info();
|
||||
rt[ngraph::VariantWrapper<std::shared_ptr<PrecisionsAttribute>>::type_info.name] = resultAttribute;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -0,0 +1,118 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/lpt_itt.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
template <typename AttributeType>
|
||||
class PropagateThroughPrecisionPreserved;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
template <typename AttributeType>
|
||||
class ngraph::pass::low_precision::PropagateThroughPrecisionPreserved : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
PropagateThroughPrecisionPreserved() {
|
||||
ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) {
|
||||
auto node = m.get_match_root();
|
||||
if (transformation_callback(node)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
{
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "PropagateThroughPrecisionPreserved");
|
||||
|
||||
if (!ngraph::pass::low_precision::NetworkHelper::isPrecisionPreserved(node)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto parentRestrictions = getParentInputRestrictions(node);
|
||||
if (parentRestrictions.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto resultAttribute = parentRestrictions[0];
|
||||
|
||||
std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>>> toMerge = parentRestrictions;
|
||||
// TODO: LPT: handle pointer on itself in VariantWrapper<IntervalsAlignmentAttributePtr>::merge and remove erase, task #59498
|
||||
toMerge.erase(toMerge.begin());
|
||||
resultAttribute->merge(toMerge);
|
||||
|
||||
for (size_t index = 1ul; index < parentRestrictions.size(); index++) {
|
||||
const auto attributes = parentRestrictions[index]->get()->sharedValue->attributes;
|
||||
for (const auto attributeWeakPtr : attributes) {
|
||||
auto attribute = attributeWeakPtr.lock();
|
||||
if (attribute == nullptr) {
|
||||
continue;
|
||||
}
|
||||
attribute->sharedValue = resultAttribute->get()->sharedValue;
|
||||
resultAttribute->get()->sharedValue->attributes.push_back(attribute);
|
||||
}
|
||||
}
|
||||
|
||||
auto &rt = node->get_rt_info();
|
||||
rt[ngraph::VariantWrapper<std::shared_ptr<AttributeType>>::type_info.name] = resultAttribute;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
auto matcher = std::make_shared<ngraph::pattern::Matcher>(pattern::any_input(), "PropagateThroughPrecisionPreserved");
|
||||
this->register_matcher(matcher, callback);
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>> getSourceOutputAttribute(const Input<Node>& input) {
|
||||
auto input2 = input;
|
||||
auto output = input2.get_source_output();
|
||||
std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>> attribute = getAttributeFromOutput<std::shared_ptr<AttributeType>>(output);
|
||||
if (attribute == nullptr) {
|
||||
attribute = getAttribute<std::shared_ptr<AttributeType>>(output.get_node_shared_ptr());
|
||||
}
|
||||
return attribute;
|
||||
}
|
||||
|
||||
// TODO: possible duplicate: PropagateToInput::getSourceOutputAttribute
|
||||
std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>>> getParentInputRestrictions(
|
||||
const std::shared_ptr<ngraph::Node> node) {
|
||||
std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>>> parentAttributes;
|
||||
auto getInput = [](const std::shared_ptr<ngraph::Node>& node, const size_t index) -> Input<Node> {
|
||||
const auto dequantization = NetworkHelper::getDequantization(node, index);
|
||||
if (!dequantization.empty() &&
|
||||
is_type<opset1::Convert>(dequantization.data.get_node()) &&
|
||||
(dequantization.data.get_node()->get_input_size() == 1ul) &&
|
||||
is_type<opset1::FakeQuantize>(dequantization.data.get_node()->get_input_node_ptr(0))) {
|
||||
return dequantization.data.get_node()->input(0);
|
||||
}
|
||||
|
||||
return node->input(index);
|
||||
};
|
||||
|
||||
for (size_t index = 0ul; index < node->get_input_size(); index++) {
|
||||
const Input<Node>& input = getInput(node, index);
|
||||
const auto attribute = getSourceOutputAttribute(input);
|
||||
if (attribute != nullptr) {
|
||||
parentAttributes.push_back(attribute);
|
||||
}
|
||||
}
|
||||
|
||||
return parentAttributes;
|
||||
}
|
||||
};
|
@ -0,0 +1,105 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include "network_helper.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
template <typename AttributeType>
|
||||
class PropagateToInput;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
template <typename AttributeType>
|
||||
class ngraph::pass::low_precision::PropagateToInput : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
PropagateToInput() {
|
||||
ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) {
|
||||
auto node = m.get_match_root();
|
||||
if (transformation_callback(node)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
{
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "PropagateToInput");
|
||||
|
||||
for (auto input : node->inputs()) {
|
||||
auto parentAttribute = getSourceOutputAttribute(input);
|
||||
if (parentAttribute == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto attribute = getAttribute<std::shared_ptr<AttributeType>>(input);
|
||||
if (attribute != nullptr) {
|
||||
if ((attribute->get()->sharedValue != nullptr) && (attribute->get()->sharedValue->precisions.empty())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<AttributeType>>>> attributes = { attribute };
|
||||
parentAttribute->merge(attributes);
|
||||
}
|
||||
|
||||
auto& rt = input.get_rt_info();
|
||||
rt[ngraph::VariantWrapper<std::shared_ptr<AttributeType>>::type_info.name] = parentAttribute;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
auto matcher = std::make_shared<ngraph::pattern::Matcher>(pattern::any_input(), "PropagateThroughPrecisionPreserved");
|
||||
this->register_matcher(matcher, callback);
|
||||
}
|
||||
|
||||
private:
|
||||
// TODO: possible duplicate: PropagateThroughPrecisionPreserved::getParentInputRestrictions
|
||||
std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>> getSourceOutputAttribute(const Input<Node>& input) {
|
||||
auto getInput = [](const Input<Node>& input) {
|
||||
const auto dequantization = NetworkHelper::getDequantization(input.get_node()->shared_from_this(), input.get_index());
|
||||
if (!dequantization.empty() &&
|
||||
is_type<opset1::Convert>(dequantization.data.get_node()) &&
|
||||
(dequantization.data.get_node()->get_input_size() == 1ul) &&
|
||||
is_type<opset1::FakeQuantize>(dequantization.data.get_node()->get_input_node_ptr(0))) {
|
||||
return dequantization.data.get_node()->input(0);
|
||||
}
|
||||
|
||||
return input;
|
||||
};
|
||||
|
||||
auto input2 = getInput(input);
|
||||
auto output = input2.get_source_output();
|
||||
std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>> attribute = getAttributeFromOutput<std::shared_ptr<AttributeType>>(output);
|
||||
if (attribute == nullptr) {
|
||||
attribute = getAttribute<std::shared_ptr<AttributeType>>(output.get_node_shared_ptr());
|
||||
}
|
||||
return attribute;
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>>> getParentInputRestrictions(
|
||||
const std::shared_ptr<ngraph::Node> node) {
|
||||
std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AttributeType>>>> parentAttributes;
|
||||
for (size_t index = 0ul; index < node->get_input_size(); index++) {
|
||||
const Input<Node>& input = node->input(index);
|
||||
const auto attribute = getSourceOutputAttribute(input);
|
||||
if (attribute != nullptr) {
|
||||
parentAttributes.push_back(attribute);
|
||||
}
|
||||
}
|
||||
return parentAttributes;
|
||||
}
|
||||
};
|
@ -6,14 +6,14 @@
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <transformations_visibility.hpp>
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API PullReshapeThroughDequantization;
|
||||
class LP_TRANSFORMATIONS_API PullReshapeThroughDequantization;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
|
@ -6,14 +6,14 @@
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <transformations_visibility.hpp>
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API PullTransposeThroughDequantization;
|
||||
class LP_TRANSFORMATIONS_API PullTransposeThroughDequantization;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
|
@ -8,7 +8,7 @@
|
||||
#include <ostream>
|
||||
#include <vector>
|
||||
|
||||
#include <transformations_visibility.hpp>
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
@ -18,7 +18,7 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API QuantizationDetails {
|
||||
class LP_TRANSFORMATIONS_API QuantizationDetails {
|
||||
public:
|
||||
QuantizationDetails();
|
||||
QuantizationDetails(const QuantizationDetails& quantizationDetails);
|
||||
@ -27,33 +27,25 @@ public:
|
||||
const std::vector<float>& inputLowValues,
|
||||
const std::vector<float>& inputHighValues,
|
||||
const std::vector<float>& outputLowValues,
|
||||
const std::vector<float>& outputHighValues,
|
||||
const size_t inputIntervalsCount,
|
||||
const size_t outputIntervalsCount,
|
||||
const size_t outputChannelsCount);
|
||||
const std::vector<float>& outputHighValues);
|
||||
|
||||
static bool outputLayoutIsSupported(std::shared_ptr<opset1::FakeQuantize> quantize);
|
||||
|
||||
static void getInputIntervals(
|
||||
std::shared_ptr<opset1::FakeQuantize> quantize,
|
||||
std::vector<float>& inputLowValues,
|
||||
std::vector<float>& inputHighValues,
|
||||
size_t& inputIntervalsCount);
|
||||
std::vector<float>& inputHighValues);
|
||||
|
||||
static void getOutputIntervals(
|
||||
std::shared_ptr<opset1::FakeQuantize> quantize,
|
||||
std::vector<float>& outputLowValues,
|
||||
std::vector<float>& outputHighValues,
|
||||
size_t& outputIntervalsCount);
|
||||
std::vector<float>& outputHighValues);
|
||||
|
||||
static QuantizationDetails getDetails(std::shared_ptr<opset1::FakeQuantize>);
|
||||
bool hasNegativeOutput() const;
|
||||
float maxOutput(const size_t channel) const;
|
||||
float maxInput(const size_t channel) const;
|
||||
|
||||
float maxOutputHigh() const;
|
||||
float minOutputLow() const;
|
||||
|
||||
float getInputLowValue(const size_t channel) const;
|
||||
float getInputHighValue(const size_t channel) const;
|
||||
float getOutputLowValue(const size_t channel) const;
|
||||
@ -66,19 +58,15 @@ public:
|
||||
const std::vector<float> inputHighValues;
|
||||
const std::vector<float> outputLowValues;
|
||||
const std::vector<float> outputHighValues;
|
||||
const size_t inputIntervalsCount;
|
||||
const size_t outputIntervalsCount;
|
||||
const size_t outputChannelsCount;
|
||||
|
||||
private:
|
||||
static void validate(std::shared_ptr<Node> constantLayer);
|
||||
static std::vector<float> getBlobValue(std::shared_ptr<Node> constantLayer);
|
||||
};
|
||||
|
||||
inline std::ostream &operator << (std::ostream &os, const QuantizationDetails& value) {
|
||||
os << "levels: " << value.levels <<
|
||||
", input 1/" << value.inputIntervalsCount << ": [" << value.getInputLowValue(0) << " : " << value.getInputHighValue(0) << "], " <<
|
||||
", output 1/" << value.outputIntervalsCount << ": [" << value.getOutputLowValue(0) << " : " << value.getOutputHighValue(0) << "]";
|
||||
", input 1/" << value.inputLowValues.size() << ": [" << value.getInputLowValue(0) << " : " << value.getInputHighValue(0) << "], " <<
|
||||
", output 1/" << value.outputLowValues.size() << ": [" << value.getOutputLowValue(0) << " : " << value.getOutputHighValue(0) << "]";
|
||||
return os;
|
||||
}
|
||||
|
||||
|
@ -19,10 +19,10 @@ namespace low_precision {
|
||||
*
|
||||
*/
|
||||
|
||||
class TRANSFORMATIONS_API ReduceBaseTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API ReduceBaseTransformation : public LayerTransformation {
|
||||
public:
|
||||
ReduceBaseTransformation(const Params& params);
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override;
|
||||
ReduceBaseTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const override;
|
||||
|
||||
protected:
|
||||
|
@ -14,11 +14,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API ReduceMaxTransformation : public ReduceBaseTransformation {
|
||||
class LP_TRANSFORMATIONS_API ReduceMaxTransformation : public ReduceBaseTransformation {
|
||||
public:
|
||||
ReduceMaxTransformation(const Params& params);
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ReduceMaxTransformation(const Params& params = Params());
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> reduce) const noexcept override;
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const override;
|
||||
|
||||
protected:
|
||||
|
@ -14,11 +14,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API ReduceMeanTransformation : public ReduceBaseTransformation {
|
||||
class LP_TRANSFORMATIONS_API ReduceMeanTransformation : public ReduceBaseTransformation {
|
||||
public:
|
||||
ReduceMeanTransformation(const Params& params);
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ReduceMeanTransformation(const Params& params = Params());
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> reduce) const noexcept override;
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const override;
|
||||
|
||||
protected:
|
||||
|
@ -14,11 +14,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API ReduceMinTransformation : public ReduceBaseTransformation {
|
||||
class LP_TRANSFORMATIONS_API ReduceMinTransformation : public ReduceBaseTransformation {
|
||||
public:
|
||||
ReduceMinTransformation(const Params& params);
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ReduceMinTransformation(const Params& params = Params());
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> reduce) const noexcept override;
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const override;
|
||||
|
||||
protected:
|
||||
|
@ -14,11 +14,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API ReduceSumTransformation : public ReduceBaseTransformation {
|
||||
class LP_TRANSFORMATIONS_API ReduceSumTransformation : public ReduceBaseTransformation {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ReduceSumTransformation(const Params& params);
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> reduce) const noexcept override;
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> reduce) const override;
|
||||
|
||||
protected:
|
||||
|
@ -12,12 +12,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API ReluTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API ReluTransformation : public LayerTransformation {
|
||||
public:
|
||||
ReluTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~ReluTransformation() override {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ReluTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
||||
};
|
||||
|
@ -11,12 +11,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API ReshapeTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API ReshapeTransformation : public LayerTransformation {
|
||||
public:
|
||||
ReshapeTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~ReshapeTransformation() override {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ReshapeTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
||||
|
||||
|
@ -0,0 +1,14 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/type/element_type.hpp>
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
|
||||
class LP_TRANSFORMATIONS_API AttributeParameters {
|
||||
public:
|
||||
AttributeParameters(const ngraph::element::Type deqPrecision = ngraph::element::f32) : deqPrecision(deqPrecision) {}
|
||||
ngraph::element::Type deqPrecision;
|
||||
};
|
@ -0,0 +1,39 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
#include "low_precision/rt_info/precision_preserved_attribute.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
class LP_TRANSFORMATIONS_API AvgPoolPrecisionPreservedAttribute : public PrecisionPreservedAttribute {
|
||||
};
|
||||
|
||||
using AvgPoolPrecisionPreservedAttributePtr = std::shared_ptr<AvgPoolPrecisionPreservedAttribute>;
|
||||
|
||||
extern template class LP_TRANSFORMATIONS_API VariantImpl<AvgPoolPrecisionPreservedAttributePtr>;
|
||||
|
||||
template<>
|
||||
class LP_TRANSFORMATIONS_API VariantWrapper<AvgPoolPrecisionPreservedAttributePtr> : public VariantImpl<AvgPoolPrecisionPreservedAttributePtr> {
|
||||
public:
|
||||
static constexpr VariantTypeInfo type_info{ "LowPrecision::AvgPoolPrecisionPreserved", 0 };
|
||||
|
||||
const VariantTypeInfo& get_type_info() const override {
|
||||
return type_info;
|
||||
}
|
||||
|
||||
VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
|
||||
|
||||
AvgPoolPrecisionPreservedAttributePtr get() { return this->m_value; }
|
||||
|
||||
void merge(std::vector<std::shared_ptr<ngraph::VariantWrapper<std::shared_ptr<AvgPoolPrecisionPreservedAttribute>>>>& attributes);
|
||||
std::string to_string() override;
|
||||
};
|
||||
} // namespace ngraph
|
@ -0,0 +1,88 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
#include "low_precision/rt_info/shared_value_attribute.hpp"
|
||||
#include "low_precision/rt_info/attribute_parameters.hpp"
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
class IntervalsAlignmentAttribute;
|
||||
|
||||
class LP_TRANSFORMATIONS_API IntervalsAlignmentSharedValue : public SharedValue<IntervalsAlignmentAttribute> {
|
||||
public:
|
||||
class Interval {
|
||||
public:
|
||||
Interval() = default;
|
||||
Interval(const float low, const float high) : low(low), high(high) {}
|
||||
float low;
|
||||
float high;
|
||||
};
|
||||
|
||||
IntervalsAlignmentSharedValue() = default;
|
||||
IntervalsAlignmentSharedValue(
|
||||
const Interval& combinedInterval,
|
||||
const Interval& minInterval,
|
||||
const size_t minLevels) :
|
||||
combinedInterval(combinedInterval),
|
||||
minInterval(minInterval),
|
||||
minLevels(minLevels) {}
|
||||
|
||||
Interval combinedInterval;
|
||||
Interval minInterval;
|
||||
size_t minLevels;
|
||||
// preferable precisions which are preferred by affected quantization operations to avoid zero points
|
||||
std::set<element::Type> preferablePrecisions;
|
||||
|
||||
#ifdef LPT_DEBUG
|
||||
std::string minLevelsOperation;
|
||||
#endif
|
||||
};
|
||||
|
||||
class LP_TRANSFORMATIONS_API IntervalsAlignmentAttribute : public SharedValueAttribute<IntervalsAlignmentSharedValue> {
|
||||
public:
|
||||
IntervalsAlignmentAttribute() = default;
|
||||
IntervalsAlignmentAttribute(IntervalsAlignmentSharedValue::Interval combinedInterval, size_t levels);
|
||||
IntervalsAlignmentAttribute(
|
||||
const IntervalsAlignmentSharedValue::Interval combinedInterval,
|
||||
const size_t levels,
|
||||
const IntervalsAlignmentSharedValue::Interval minInterval,
|
||||
const size_t minLevels);
|
||||
|
||||
// specify subgraph original levels
|
||||
size_t levels;
|
||||
};
|
||||
|
||||
using IntervalsAlignmentAttributePtr = std::shared_ptr<IntervalsAlignmentAttribute>;
|
||||
|
||||
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<IntervalsAlignmentAttributePtr>;
|
||||
|
||||
template<>
|
||||
class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>> :
|
||||
public VariantImpl<std::shared_ptr<IntervalsAlignmentAttribute>> {
|
||||
public:
|
||||
static constexpr VariantTypeInfo type_info{ "LowPrecision::IntervalsAlignment", 0 };
|
||||
|
||||
const VariantTypeInfo& get_type_info() const override {
|
||||
return type_info;
|
||||
}
|
||||
|
||||
VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
|
||||
|
||||
std::shared_ptr<IntervalsAlignmentAttribute> get() const { return this->m_value; }
|
||||
|
||||
static std::shared_ptr<VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>>> create(
|
||||
const std::shared_ptr<ngraph::Node>& node,
|
||||
const AttributeParameters& params);
|
||||
void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<IntervalsAlignmentAttribute>>>>& attributes);
|
||||
std::string to_string() override;
|
||||
};
|
||||
} // namespace ngraph
|
@ -0,0 +1,33 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include "low_precision/rt_info/shared_value_attribute.hpp"
|
||||
#include "low_precision/layer_transformation.hpp"
|
||||
#include "attribute_parameters.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
class LP_TRANSFORMATIONS_API PerTensorQuantizationAttribute {
|
||||
};
|
||||
|
||||
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<PerTensorQuantizationAttribute>;
|
||||
|
||||
template<>
|
||||
class LP_TRANSFORMATIONS_API VariantWrapper<PerTensorQuantizationAttribute> : public VariantImpl<PerTensorQuantizationAttribute> {
|
||||
public:
|
||||
static constexpr VariantTypeInfo type_info { "LowPrecision::PerTensorQuantization", 0 };
|
||||
|
||||
VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
|
||||
|
||||
const VariantTypeInfo& get_type_info() const override {
|
||||
return type_info;
|
||||
}
|
||||
};
|
||||
} // namespace ngraph
|
@ -0,0 +1,51 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
#include "low_precision/rt_info/shared_value_attribute.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
|
||||
class LP_TRANSFORMATIONS_API PrecisionPreservedAttribute;
|
||||
|
||||
class LP_TRANSFORMATIONS_API PrecisionPreservedSharedValue : public SharedValue<PrecisionPreservedAttribute> {
|
||||
public:
|
||||
PrecisionPreservedSharedValue() = default;
|
||||
PrecisionPreservedSharedValue(const bool value) : value(value) {}
|
||||
bool value;
|
||||
};
|
||||
|
||||
class LP_TRANSFORMATIONS_API PrecisionPreservedAttribute : public SharedValueAttribute<PrecisionPreservedSharedValue> {
|
||||
public:
|
||||
PrecisionPreservedAttribute() = default;
|
||||
PrecisionPreservedAttribute(const bool value);
|
||||
};
|
||||
|
||||
using PrecisionPreservedAttributePtr = std::shared_ptr<PrecisionPreservedAttribute>;
|
||||
|
||||
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<PrecisionPreservedAttributePtr>;
|
||||
|
||||
template<>
|
||||
class LP_TRANSFORMATIONS_API VariantWrapper<PrecisionPreservedAttributePtr> : public VariantImpl<PrecisionPreservedAttributePtr> {
|
||||
public:
|
||||
static constexpr VariantTypeInfo type_info{ "LowPrecision::PrecisionPreserved", 0 };
|
||||
|
||||
const VariantTypeInfo& get_type_info() const override {
|
||||
return type_info;
|
||||
}
|
||||
|
||||
VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
|
||||
|
||||
PrecisionPreservedAttributePtr get() { return this->m_value; }
|
||||
|
||||
std::string to_string() override;
|
||||
};
|
||||
} // namespace ngraph
|
@ -0,0 +1,64 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
#include "low_precision/layer_transformation.hpp"
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
#include "low_precision/rt_info/attribute_parameters.hpp"
|
||||
#include "low_precision/rt_info/shared_value_attribute.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
|
||||
class PrecisionsAttribute;
|
||||
|
||||
class LP_TRANSFORMATIONS_API PrecisionsSharedValue : public SharedValue<PrecisionsAttribute> {
|
||||
public:
|
||||
std::vector<ngraph::element::Type> precisions;
|
||||
};
|
||||
|
||||
using PrecisionsAttributePtr = std::shared_ptr<PrecisionsAttribute>;
|
||||
|
||||
class LP_TRANSFORMATIONS_API PrecisionsAttribute : public SharedValueAttribute<PrecisionsSharedValue> {
|
||||
public:
|
||||
static const std::vector<ngraph::element::Type> defaultPrecisions;
|
||||
PrecisionsAttribute(const std::vector<ngraph::element::Type>& precisions = defaultPrecisions);
|
||||
};
|
||||
|
||||
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<std::shared_ptr<PrecisionsAttribute>>;
|
||||
|
||||
template<>
|
||||
class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<PrecisionsAttribute>> : public VariantImpl<std::shared_ptr<PrecisionsAttribute>> {
|
||||
public:
|
||||
static constexpr VariantTypeInfo type_info{ "LowPrecision::Precisions", 0 };
|
||||
|
||||
const VariantTypeInfo& get_type_info() const override {
|
||||
return type_info;
|
||||
}
|
||||
|
||||
VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
|
||||
|
||||
std::shared_ptr<Variant> init(const std::shared_ptr<ngraph::Node>& node) override;
|
||||
|
||||
std::shared_ptr<PrecisionsAttribute> get() { return this->m_value; }
|
||||
|
||||
// create attribute instance for node
|
||||
static std::shared_ptr<VariantWrapper<std::shared_ptr<PrecisionsAttribute>>> create(
|
||||
const std::shared_ptr<ngraph::Node>& node,
|
||||
const AttributeParameters& params);
|
||||
// merge attribute instances which can be got from different sources: node, input port or output port
|
||||
void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<PrecisionsAttribute>>>>& attributes);
|
||||
// vizualize shared attributes details in VizualizeTree pass
|
||||
std::string to_string() override;
|
||||
};
|
||||
} // namespace ngraph
|
@ -0,0 +1,60 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include "shared_value_attribute.hpp"
|
||||
#include "attribute_parameters.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
class QuantizationAlignmentAttribute;
|
||||
|
||||
class LP_TRANSFORMATIONS_API QuantizationAlignmentSharedValue : public SharedValue<QuantizationAlignmentAttribute> {
|
||||
public:
|
||||
QuantizationAlignmentSharedValue(const bool value = false) : value(value) {}
|
||||
bool value;
|
||||
};
|
||||
|
||||
class LP_TRANSFORMATIONS_API QuantizationAlignmentAttribute : public SharedValueAttribute<QuantizationAlignmentSharedValue>{
|
||||
public:
|
||||
QuantizationAlignmentAttribute(const bool value = false);
|
||||
};
|
||||
|
||||
using QuantizationAlignmentAttributePtr = std::shared_ptr<QuantizationAlignmentAttribute>;
|
||||
|
||||
extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl<QuantizationAlignmentAttributePtr>;
|
||||
|
||||
template<>
|
||||
class LP_TRANSFORMATIONS_API VariantWrapper<std::shared_ptr<QuantizationAlignmentAttribute>> :
|
||||
public VariantImpl<std::shared_ptr<QuantizationAlignmentAttribute>> {
|
||||
public:
|
||||
static constexpr VariantTypeInfo type_info{ "LowPrecision::QuantizationAlignment", 0 };
|
||||
|
||||
const VariantTypeInfo& get_type_info() const override {
|
||||
return type_info;
|
||||
}
|
||||
|
||||
VariantWrapper(const value_type& value) : VariantImpl<value_type>(value) {}
|
||||
|
||||
std::shared_ptr<Variant> init(const std::shared_ptr<ngraph::Node>& node) override;
|
||||
|
||||
std::shared_ptr<QuantizationAlignmentAttribute> get() { return this->m_value; }
|
||||
|
||||
static std::shared_ptr<VariantWrapper<std::shared_ptr<QuantizationAlignmentAttribute>>> create(
|
||||
const std::shared_ptr<ngraph::Node>& node,
|
||||
const AttributeParameters& params);
|
||||
void merge(std::vector<std::shared_ptr<VariantWrapper<std::shared_ptr<QuantizationAlignmentAttribute>>>>& attributes);
|
||||
std::string to_string() override;
|
||||
};
|
||||
} // namespace ngraph
|
@ -0,0 +1,59 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
#include <low_precision/lpt_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
|
||||
template <class SharedAttributeType>
|
||||
class LP_TRANSFORMATIONS_API SharedValue;
|
||||
|
||||
template <class SharedValueType>
|
||||
class LP_TRANSFORMATIONS_API SharedValueAttribute {
|
||||
public:
|
||||
SharedValueAttribute() : sharedValue(std::make_shared<SharedValueType>()) {}
|
||||
virtual ~SharedValueAttribute() = default;
|
||||
std::shared_ptr<SharedValueType> sharedValue;
|
||||
std::string get_string() {
|
||||
std::stringstream ss;
|
||||
|
||||
const size_t rawPointer = (size_t)this;
|
||||
ss << rawPointer << ": ";
|
||||
|
||||
const size_t sharedValueRawPointer = (size_t)sharedValue.get();
|
||||
ss << "sharedValue: " << sharedValueRawPointer;
|
||||
|
||||
bool firstAttribute = true;
|
||||
ss << ", attributes: {";
|
||||
for (auto& attributeWeakPtr : sharedValue->attributes) {
|
||||
auto attribute = attributeWeakPtr.lock();
|
||||
if (attribute == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!firstAttribute) {
|
||||
ss << ", ";
|
||||
}
|
||||
ss << (size_t)attribute.get();
|
||||
firstAttribute = false;
|
||||
}
|
||||
ss << "}, ";
|
||||
return ss.str();
|
||||
}
|
||||
};
|
||||
|
||||
template <class SharedValueAttributeType>
|
||||
class LP_TRANSFORMATIONS_API SharedValue {
|
||||
public:
|
||||
virtual ~SharedValue() = default;
|
||||
std::vector<std::weak_ptr<SharedValueAttributeType>> attributes;
|
||||
};
|
@ -11,11 +11,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API ShuffleChannelsTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API ShuffleChannelsTransformation : public LayerTransformation {
|
||||
public:
|
||||
ShuffleChannelsTransformation(const Params& params);
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
ShuffleChannelsTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
||||
};
|
||||
|
@ -13,11 +13,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API SplitTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API SplitTransformation : public LayerTransformation {
|
||||
public:
|
||||
SplitTransformation(const Params& params);
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
SplitTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
void updateOutputs(
|
||||
|
@ -11,11 +11,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API SqueezeTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API SqueezeTransformation : public LayerTransformation {
|
||||
public:
|
||||
SqueezeTransformation(const Params& params);
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
SqueezeTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
};
|
||||
|
@ -12,11 +12,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API StridedSliceTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API StridedSliceTransformation : public LayerTransformation {
|
||||
public:
|
||||
StridedSliceTransformation(const Params& params);
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
StridedSliceTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
@ -11,12 +11,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API SubtractTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API SubtractTransformation : public LayerTransformation {
|
||||
public:
|
||||
SubtractTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~SubtractTransformation() override {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
SubtractTransformation(const Params& params);
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
|
@ -12,12 +12,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API SubtractMultiplyToMultiplyAddTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API SubtractMultiplyToMultiplyAddTransformation : public LayerTransformation {
|
||||
public:
|
||||
SubtractMultiplyToMultiplyAddTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~SubtractMultiplyToMultiplyAddTransformation() override {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
SubtractMultiplyToMultiplyAddTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
@ -13,8 +13,9 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API TransformationContext {
|
||||
class LP_TRANSFORMATIONS_API TransformationContext {
|
||||
public:
|
||||
TransformationContext();
|
||||
explicit TransformationContext(std::shared_ptr<Function> function);
|
||||
std::shared_ptr<Function> function;
|
||||
|
||||
|
@ -1,316 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph_ops/type_relaxed.hpp>
|
||||
|
||||
#include "layer_transformation.hpp"
|
||||
#include "iparams_manager.hpp"
|
||||
#include "ilayer_transformations_manager.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
struct StandaloneCleanup {
|
||||
std::string typeName;
|
||||
std::string typeId;
|
||||
LayerTransformationPtr transformation;
|
||||
};
|
||||
|
||||
class TRANSFORMATIONS_API LowPrecisionTransformations {
|
||||
public:
|
||||
LowPrecisionTransformations() {}
|
||||
LowPrecisionTransformations(
|
||||
const std::map<std::string, LayerTransformationPtr>& branchSpecificTransformations,
|
||||
const std::map<std::string, LayerTransformationPtr>& decompositionTransformations,
|
||||
const std::map<std::string, LayerTransformationPtr>& transformations,
|
||||
const std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>>& cleanupTransformations,
|
||||
const std::vector<StandaloneCleanup>& standaloneCleanupTransformations);
|
||||
|
||||
void setUpdatePrecisions(const bool updatePrecisions);
|
||||
void setQuantizedTensorAlignmentOnActivations(const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnActivations);
|
||||
void setQuantizedTensorAlignmentOnWeights(const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnWeights);
|
||||
|
||||
/**
|
||||
* Remove branch specific transformation. Transformation type and operation type are required.
|
||||
* Operation type is used to find transformation by operation during precision definition.
|
||||
*/
|
||||
template <class Transformation, class Operation>
|
||||
LowPrecisionTransformations& removeBranchSpecific() {
|
||||
const std::string operationType = getType<Operation>();
|
||||
const std::string transformationType = typeid(Transformation).name();
|
||||
|
||||
for (auto it = branchSpecificTransformations.begin(); it != branchSpecificTransformations.end(); ++it) {
|
||||
const auto& tranformationPtr = *it->second;
|
||||
if ((it->first == operationType) && (typeid(tranformationPtr).name() == transformationType)) {
|
||||
branchSpecificTransformations.erase(it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove transformation. Transformation type and operation type are required.
|
||||
* Operation type is used to find transformation by operation during precision definition.
|
||||
*/
|
||||
template <class Transformation, class Operation>
|
||||
LowPrecisionTransformations& remove() {
|
||||
const std::string operationType = getType<Operation>();
|
||||
const std::string transformationType = typeid(Transformation).name();
|
||||
|
||||
for (auto it = transformations.begin(); it != transformations.end(); ++it) {
|
||||
const auto& tranformationPtr = *it->second;
|
||||
if ((it->first == operationType) && (typeid(tranformationPtr).name() == transformationType)) {
|
||||
transformations.erase(it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove cleanup transformation. Transformation type and operation type are required.
|
||||
* Operation type is used to find transformation by operation during precision definition.
|
||||
*/
|
||||
template <class Transformation, class Operation>
|
||||
LowPrecisionTransformations& removeCleanup() {
|
||||
const std::string operationType = getType<Operation>();
|
||||
const std::string transformationType = typeid(Transformation).name();
|
||||
|
||||
const auto it = cleanupTransformations.find(operationType);
|
||||
if (it != cleanupTransformations.end()) {
|
||||
const auto it1 = std::find_if(it->second.begin(), it->second.end(),
|
||||
[&](const std::pair<std::string, LayerTransformationPtr>& transformation) {
|
||||
return transformation.first == transformationType;
|
||||
});
|
||||
if (it1 != it->second.end()) {
|
||||
it->second.erase(it1);
|
||||
if (it->second.empty()) {
|
||||
cleanupTransformations.erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove standalone cleanup transformation. Transformation type and operation type are required.
|
||||
* Operation type is used to find transformation by operation during precision definition.
|
||||
*/
|
||||
template <class Transformation, class Operation>
|
||||
LowPrecisionTransformations& removeStandaloneCleanup() {
|
||||
const std::string operationType = getType<Operation>();
|
||||
const std::string transformationType = typeid(Transformation).name();
|
||||
|
||||
for (auto it = standaloneCleanupTransformations.begin(); it != standaloneCleanupTransformations.end(); ++it) {
|
||||
const auto& standaloneCleanup = *it;
|
||||
if ((operationType == standaloneCleanup.typeName) && (transformationType == standaloneCleanup.typeId)) {
|
||||
standaloneCleanupTransformations.erase(it);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <class Transformation, class Operation>
|
||||
LowPrecisionTransformations& removeAll() {
|
||||
removeBranchSpecific<Transformation, Operation>();
|
||||
remove<Transformation, Operation>();
|
||||
removeCleanup<Transformation, Operation>();
|
||||
removeStandaloneCleanup<Transformation, Operation>();
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add branch specific transformation. Transformation type and operation type are required.
|
||||
* Operation type is used to find transformation by operation during precision definition.
|
||||
*/
|
||||
template <class Transformation, class Operation>
|
||||
LowPrecisionTransformations& addBranchSpecific(const LayerTransformation::Params& params) {
|
||||
const std::string typeName = getType<Operation>();
|
||||
const auto it = branchSpecificTransformations.find(typeName);
|
||||
if (it != branchSpecificTransformations.end()) {
|
||||
branchSpecificTransformations.erase(it);
|
||||
}
|
||||
|
||||
branchSpecificTransformations.emplace(typeName, std::make_shared<Transformation>(params));
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add decomposition transformation. Transformation type and operation type are required.
|
||||
* Operation type is used to find transformation by operation during precision definition.
|
||||
*/
|
||||
template <class Transformation, class Operation>
|
||||
LowPrecisionTransformations& addDecomposition(const LayerTransformation::Params& params) {
|
||||
const std::string typeName = getType<Operation>();
|
||||
const auto it = decompositionTransformations.find(typeName);
|
||||
if (it != decompositionTransformations.end()) {
|
||||
decompositionTransformations.erase(it);
|
||||
}
|
||||
|
||||
decompositionTransformations.emplace(typeName, std::make_shared<Transformation>(params));
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add transformation. Transformation type and operation type are required.
|
||||
* Operation type is used to find transformation by operation during precision definition.
|
||||
*/
|
||||
template <class Transformation, class Operation>
|
||||
LowPrecisionTransformations& add(const LayerTransformation::Params& params) {
|
||||
const std::string typeName = getType<Operation>();
|
||||
const auto it = transformations.find(typeName);
|
||||
if (it != transformations.end()) {
|
||||
transformations.erase(it);
|
||||
}
|
||||
|
||||
transformations.emplace(typeName, std::make_shared<Transformation>(params));
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add cleanup transformation. Transformation type and operation type are required.
|
||||
* Operation type is used to find transformation by operation during precision definition.
|
||||
*/
|
||||
template <class Transformation, class Operation>
|
||||
LowPrecisionTransformations& addCleanup(const LayerTransformation::Params& params) {
|
||||
const std::string typeName = getType<Operation>();
|
||||
const std::string typeId = typeid(Transformation).name();
|
||||
const auto it = cleanupTransformations.find(typeName);
|
||||
if (it == cleanupTransformations.end()) {
|
||||
cleanupTransformations.emplace(typeName,
|
||||
std::vector<std::pair<std::string, LayerTransformationPtr>>{ std::make_pair(typeId, std::make_shared<Transformation>(params)) });
|
||||
} else {
|
||||
const auto it1 = std::find_if(it->second.begin(), it->second.end(),
|
||||
[&](const std::pair<std::string, LayerTransformationPtr>& transformation) {
|
||||
return transformation.first == typeName;
|
||||
});
|
||||
if (it1 != it->second.end()) {
|
||||
it->second.erase(it1);
|
||||
}
|
||||
it->second.emplace_back(std::make_pair(typeId, std::make_shared<Transformation>(params)));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add cleanup transformation. Transformation type and operation type are required.
|
||||
* Operation type is used to find transformation by operation during precision definition.
|
||||
*/
|
||||
template <class Transformation, class Operation>
|
||||
LowPrecisionTransformations& addStandaloneCleanup(const LayerTransformation::Params& params) {
|
||||
const std::string typeName = getType<Operation>();
|
||||
const std::string typeId = typeid(Transformation).name();
|
||||
const auto it = std::find_if(standaloneCleanupTransformations.begin(), standaloneCleanupTransformations.end(),
|
||||
[&](const StandaloneCleanup& transformation) {
|
||||
return transformation.typeName == typeName && transformation.typeId == typeId;
|
||||
});
|
||||
if (it == standaloneCleanupTransformations.end()) {
|
||||
standaloneCleanupTransformations.emplace_back(StandaloneCleanup{ typeName, typeId, std::make_shared<Transformation>(params) });
|
||||
} else {
|
||||
*it = { typeName, typeId, std::make_shared<Transformation>(params) };
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <class Operation>
|
||||
static std::string getType() {
|
||||
return Operation::get_type_info_static().name;
|
||||
}
|
||||
|
||||
static std::string getType(const Node& operation) {
|
||||
return operation.get_type_name();
|
||||
}
|
||||
|
||||
std::vector<LayerTransformationPtr> find(const std::string& transformationName) const;
|
||||
|
||||
template <class Operation>
|
||||
std::vector<LayerTransformationPtr> find() const {
|
||||
const std::string transformationKey = getType<Operation>();
|
||||
return find(transformationKey);
|
||||
}
|
||||
|
||||
void setParamsManager(IParamsManager* paramsManager) noexcept;
|
||||
void setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept;
|
||||
|
||||
// Key is not a layer type, but just a name of transformation
|
||||
// Layer type (or a pattern) is defined by transformation itself as an ngraph matcher
|
||||
std::map<std::string, LayerTransformationPtr> branchSpecificTransformations;
|
||||
std::map<std::string, LayerTransformationPtr> decompositionTransformations;
|
||||
std::map<std::string, LayerTransformationPtr> transformations;
|
||||
std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>> cleanupTransformations;
|
||||
std::vector<StandaloneCleanup> standaloneCleanupTransformations;
|
||||
|
||||
private:
|
||||
static void setParamsManager(IParamsManager* paramsManager, std::map<std::string, LayerTransformationPtr>& transformations) noexcept;
|
||||
static void setParamsManager(
|
||||
IParamsManager* paramsManager,
|
||||
std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>>& transformations) noexcept;
|
||||
static void setParamsManager(IParamsManager* paramsManager, std::vector<StandaloneCleanup>& transformations) noexcept;
|
||||
static void setLayerTransformationsManager(
|
||||
ILayerTransformationsManager* layerTransformationsManager,
|
||||
std::map<std::string, LayerTransformationPtr>& transformations) noexcept;
|
||||
static void setLayerTransformationsManager(
|
||||
ILayerTransformationsManager* layerTransformationsManager,
|
||||
std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>>& transformations) noexcept;
|
||||
static void setLayerTransformationsManager(
|
||||
ILayerTransformationsManager* layerTransformationsManager,
|
||||
std::vector<StandaloneCleanup>& transformations) noexcept;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief low precision transformation component.
|
||||
*/
|
||||
class TRANSFORMATIONS_API LowPrecisionTransformer : public IParamsManager, ILayerTransformationsManager {
|
||||
public:
|
||||
static LowPrecisionTransformations getAllTransformations(const LayerTransformation::Params& params = LayerTransformation::Params());
|
||||
|
||||
static bool isFunctionQuantized(const std::shared_ptr<const Function>& function);
|
||||
|
||||
LowPrecisionTransformer();
|
||||
LowPrecisionTransformer(const LowPrecisionTransformations& transformations);
|
||||
void transform(std::shared_ptr<Function> network);
|
||||
|
||||
// IParamsManager interface implementation
|
||||
std::vector<element::Type> getPrecisionsOnActivations(const Node& op) const noexcept override;
|
||||
|
||||
// ILayerTransformationsManager interface implementation
|
||||
bool isQuantized(const std::shared_ptr<Node>& layer) const noexcept override;
|
||||
bool isPrecisionPreserved(const std::shared_ptr<Node>& layer) const noexcept override;
|
||||
|
||||
private:
|
||||
LowPrecisionTransformations transformations;
|
||||
|
||||
void registerAllMatchers(
|
||||
std::map<std::string, LayerTransformationPtr> transformations,
|
||||
GraphRewrite& pass,
|
||||
TransformationContext& context);
|
||||
|
||||
void registerAllMatchers(
|
||||
std::map<std::string, std::vector<std::pair<std::string, LayerTransformationPtr>>> transformations,
|
||||
GraphRewrite& pass,
|
||||
TransformationContext& context);
|
||||
};
|
||||
|
||||
class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite {
|
||||
public:
|
||||
TypeRelaxedReplacer();
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -12,11 +12,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API TransparentBaseTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API TransparentBaseTransformation : public LayerTransformation {
|
||||
public:
|
||||
TransparentBaseTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~TransparentBaseTransformation() override {};
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
};
|
||||
|
||||
|
@ -12,12 +12,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API TransposeTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API TransposeTransformation : public LayerTransformation {
|
||||
public:
|
||||
TransposeTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
~TransposeTransformation() override {}
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
TransposeTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const override;
|
||||
};
|
||||
|
@ -11,11 +11,11 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API UnsqueezeTransformation : public LayerTransformation {
|
||||
class LP_TRANSFORMATIONS_API UnsqueezeTransformation : public LayerTransformation {
|
||||
public:
|
||||
UnsqueezeTransformation(const Params& params);
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
UnsqueezeTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
};
|
||||
|
@ -0,0 +1,107 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/pass/pass.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/lpt_itt.hpp"
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
template <typename AttributeType, typename OperationType>
|
||||
class UpdateSharedPrecisionPreserved;
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
||||
template <typename AttributeType, typename ExpectedAttributeType = AttributeType>
|
||||
class ngraph::pass::low_precision::UpdateSharedPrecisionPreserved : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
UpdateSharedPrecisionPreserved() {
|
||||
ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) {
|
||||
auto node = m.get_match_root();
|
||||
|
||||
const bool needToCheckExpectedAttributeType = !std::is_same<ExpectedAttributeType, AttributeType>::value;
|
||||
if (!needToCheckExpectedAttributeType) {
|
||||
// expected attribute is ignored, set attributes for node inputs except Result & FakeQuantize operations
|
||||
if (is_type<ngraph::opset1::Result>(node) ||
|
||||
is_type<ngraph::opset1::FakeQuantize>(node) ||
|
||||
transformation_callback(node)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (ngraph::pass::low_precision::NetworkHelper::isPrecisionPreserved(node) || is_type<opset1::FakeQuantize>(node)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
{
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "UpdateSharedPrecisionPreserved");
|
||||
|
||||
// TODO: check if node can be quantized, if not, then doesn't update
|
||||
for (auto input : node->inputs()) {
|
||||
auto precisionsAttributeWrapper = getAttribute<PrecisionsAttributePtr>(input);
|
||||
if (precisionsAttributeWrapper != nullptr) {
|
||||
const auto precisionsAttribute = precisionsAttributeWrapper->get();
|
||||
assert(precisionsAttribute != nullptr);
|
||||
if (precisionsAttribute->sharedValue->precisions.empty()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto input : node->inputs()) {
|
||||
if (needToCheckExpectedAttributeType) {
|
||||
if (getAttribute<ExpectedAttributeType>(input) == nullptr) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
auto parentAttribute = getSourceAttribute(input);
|
||||
if (parentAttribute == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
parentAttribute->get()->sharedValue->value = true;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
auto matcher = std::make_shared<ngraph::pattern::Matcher>(pattern::any_input(), "PropagateThroughPrecisionPreserved");
|
||||
this->register_matcher(matcher, callback);
|
||||
}
|
||||
|
||||
private:
|
||||
Input<Node> getDequantizationInput(const Input<Node>& input) {
|
||||
const auto dequantization = NetworkHelper::getDequantization(input.get_node()->shared_from_this(), input.get_index());
|
||||
if (!dequantization.empty() &&
|
||||
(is_type<opset1::Convert>(dequantization.data.get_node())) &&
|
||||
is_type<opset1::FakeQuantize>(dequantization.data.get_node()->get_input_node_ptr(0))) {
|
||||
assert(dequantization.data.get_target_inputs().size() == 1ul);
|
||||
return *dequantization.data.get_target_inputs().begin();
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::VariantWrapper<AttributeType>> getSourceAttribute(const Input<Node>& input) {
|
||||
const auto dequantizationInput = getDequantizationInput(input);
|
||||
const auto output = dequantizationInput.get_source_output();
|
||||
auto attribute = ngraph::pass::low_precision::getAttribute<AttributeType>(output.get_node()->shared_from_this());
|
||||
if (attribute == nullptr) {
|
||||
attribute = ngraph::pass::low_precision::getAttribute<AttributeType>(output.get_node_shared_ptr());
|
||||
}
|
||||
return attribute;
|
||||
}
|
||||
};
|
@ -13,10 +13,10 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API VariadicSplitTransformation : public SplitTransformation {
|
||||
class LP_TRANSFORMATIONS_API VariadicSplitTransformation : public SplitTransformation {
|
||||
public:
|
||||
VariadicSplitTransformation(const Params& params);
|
||||
void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override;
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
VariadicSplitTransformation(const Params& params = Params());
|
||||
};
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
|
@ -13,21 +13,30 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class TRANSFORMATIONS_API WeightableLayerTransformation : public LayerTransformation{
|
||||
class LP_TRANSFORMATIONS_API WeightableLayerTransformation : public LayerTransformation{
|
||||
public:
|
||||
WeightableLayerTransformation(const Params& params);
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
bool canConvolutionBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const;
|
||||
bool isQuantized(std::shared_ptr<Node> layer, bool reshapeIsRequired) const noexcept;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
|
||||
static bool checkPrecisionOnActivation(
|
||||
const std::shared_ptr<const ngraph::Node>& node,
|
||||
const std::vector<ngraph::element::Type>& supportedPrecisionsOnActivations) {
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool isQuantizedStatic(const std::shared_ptr<const Node>& layer, const bool reshapeIsRequired) noexcept;
|
||||
|
||||
protected:
|
||||
void decomposeFakeQuantizeForWeightsPath(const std::shared_ptr<Node>& weightableLayer, size_t outChannelsShapeIndex = 0ul) const;
|
||||
bool decomposeFakeQuantizeForWeightsPath(const std::shared_ptr<Node>& weightableLayer, size_t outChannelsShapeIndex = 0ul) const;
|
||||
static bool isGroup(const std::shared_ptr<Node>& node);
|
||||
static bool isDepthwise(const std::shared_ptr<Node>& node);
|
||||
|
||||
std::shared_ptr<opset1::FakeQuantize> getFakeQuantizeOnWeights(const std::shared_ptr<Node>& node) const;
|
||||
DataPrecision getDataPrecisionOnWeights(const std::shared_ptr<Node>& node) const;
|
||||
public:
|
||||
static std::shared_ptr<opset1::FakeQuantize> getFakeQuantizeOnWeights(const std::shared_ptr<Node>& node);
|
||||
static DataPrecision getDataPrecisionOnWeights(const std::shared_ptr<Node>& node);
|
||||
static bool isAsymmetricOnWeights(const std::shared_ptr<const Node>& node);
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include "ngraph_ops/type_relaxed.hpp"
|
||||
|
||||
#include "low_precision/common/ie_lpt_exception.hpp"
|
||||
@ -20,6 +21,8 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(AddTransformation, "AddTransformation", 0);
|
||||
|
||||
std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>& op) {
|
||||
// TODO: separate this part to standalone transformation: AddToSubtractTransformation
|
||||
// motivation:
|
||||
@ -88,11 +91,22 @@ std::shared_ptr<opset1::Subtract> fuseWithSubtract(const std::shared_ptr<Node>&
|
||||
return newSubtract;
|
||||
}
|
||||
|
||||
void AddTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
|
||||
addSingleNodePattern<opset1::Add>(pass, context);
|
||||
AddTransformation::AddTransformation(const Params& params) : EltwiseBaseTransformation(params) {
|
||||
auto matcher = ngraph::pattern::wrap_type<opset1::Add>();
|
||||
|
||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||
auto op = m.get_match_root();
|
||||
if (transformation_callback(op)) {
|
||||
return false;
|
||||
}
|
||||
return transform(*context, m);
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "AddTransformation");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
bool AddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
|
||||
bool AddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
||||
std::shared_ptr<opset1::Add> op = as_type_ptr<opset1::Add>(m.get_match_root());
|
||||
if ((op == nullptr) || (!canBeTransformed(context, op))) {
|
||||
return false;
|
||||
|
@ -0,0 +1,25 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision/align_quantization_intervals.hpp"
|
||||
#include <memory>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include "low_precision/create_attribute.hpp"
|
||||
#include "low_precision/propagate_through_precision_preserved.hpp"
|
||||
#include "low_precision/rt_info/intervals_alignment_attribute.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace ngraph::pass::low_precision;
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::AlignQuantizationIntervals, "AlignQuantizationIntervals", 0);
|
||||
|
||||
bool ngraph::pass::low_precision::AlignQuantizationIntervals::run_on_function(std::shared_ptr<ngraph::Function> f) {
|
||||
ngraph::pass::Manager manager;
|
||||
manager.set_per_pass_validation(false);
|
||||
std::shared_ptr<ngraph::pass::GraphRewrite> intervalsAlignment = manager.register_pass<ngraph::pass::GraphRewrite>();
|
||||
intervalsAlignment->add_matcher<low_precision::CreateAttribute<IntervalsAlignmentAttributePtr, opset1::FakeQuantize>>();
|
||||
intervalsAlignment->add_matcher<low_precision::PropagateThroughPrecisionPreserved<IntervalsAlignmentAttribute>>();
|
||||
manager.run_passes(f);
|
||||
return false;
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision/align_quantization_parameters.hpp"
|
||||
#include <memory>
|
||||
#include "low_precision/create_attribute.hpp"
|
||||
#include "low_precision/propagate_through_precision_preserved.hpp"
|
||||
#include "low_precision/rt_info/quantization_alignment_attribute.hpp"
|
||||
#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp"
|
||||
#include "low_precision/update_shared_precision_preserved.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace ngraph::pass::low_precision;
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::AlignQuantizationParameters, "AlignQuantizationParameters", 0);
|
||||
|
||||
bool ngraph::pass::low_precision::AlignQuantizationParameters::run_on_function(std::shared_ptr<ngraph::Function> f) {
|
||||
ngraph::pass::Manager manager;
|
||||
manager.set_per_pass_validation(false);
|
||||
std::shared_ptr<ngraph::pass::GraphRewrite> propagation = manager.register_pass<ngraph::pass::GraphRewrite>();
|
||||
propagation->add_matcher<low_precision::CreateAttribute<QuantizationAlignmentAttributePtr>>();
|
||||
propagation->add_matcher<low_precision::PropagateThroughPrecisionPreserved<QuantizationAlignmentAttribute>>();
|
||||
propagation->add_matcher<low_precision::UpdateSharedPrecisionPreserved<QuantizationAlignmentAttributePtr, PerTensorQuantizationAttribute>>();
|
||||
manager.run_passes(f);
|
||||
return false;
|
||||
}
|
@ -7,39 +7,39 @@
|
||||
#include <memory>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/rt_info/precision_preserved_attribute.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::AvgPoolTransformation, "AvgPoolTransformation", 0);
|
||||
|
||||
AvgPoolTransformation::AvgPoolTransformation(const Params& params) : LayerTransformation(params) {
|
||||
auto matcher = pattern::wrap_type<opset1::AvgPool>({ pattern::wrap_type<opset1::Multiply>() });
|
||||
|
||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||
auto op = m.get_match_root();
|
||||
if (transformation_callback(op)) {
|
||||
return false;
|
||||
}
|
||||
return transform(*context, m);
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "AvgPoolTransformation");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
void AvgPoolTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
|
||||
addPattern(
|
||||
pass,
|
||||
context,
|
||||
make_op_pattern<opset1::AvgPool>({ make_op_label<opset1::Multiply>() }));
|
||||
}
|
||||
|
||||
bool AvgPoolTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
|
||||
bool AvgPoolTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
||||
if (!canBeTransformed(context, m.get_match_root())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const std::shared_ptr<Node> pooling = NetworkHelper::separateInStandaloneBranch(m.get_match_root());
|
||||
|
||||
const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(pooling);
|
||||
|
||||
bool updatePrecision;
|
||||
if ((children.size() == 1ul) && (!this->layerTransformationsManager->isQuantized(children[0]))) {
|
||||
updatePrecision = false;
|
||||
} else {
|
||||
updatePrecision = NetworkHelper::notAllChildrensAreFQ(children);
|
||||
}
|
||||
|
||||
const bool updatePrecision = isPrecisionPreserved(pooling);
|
||||
moveDequantizationAfter(context, pooling, NetworkHelper::getDequantization(pooling), updatePrecision);
|
||||
return true;
|
||||
}
|
||||
@ -55,8 +55,7 @@ bool AvgPoolTransformation::canBeTransformed(const TransformationContext& contex
|
||||
}
|
||||
|
||||
bool AvgPoolTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept {
|
||||
const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(layer);
|
||||
return NetworkHelper::notAllChildrensAreFQ(children);
|
||||
return NetworkHelper::isPrecisionPreserved(layer);
|
||||
}
|
||||
|
||||
} // namespace low_precision
|
||||
|
@ -0,0 +1,13 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision/base_matcher_pass.hpp"
|
||||
#include <ngraph/node.hpp>
|
||||
#include "low_precision/rt_info/attribute_parameters.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace ngraph::pass::low_precision;
|
||||
|
||||
ngraph::pass::low_precision::BaseMatcherPass::BaseMatcherPass(const AttributeParameters& params) : params(params) {
|
||||
}
|
@ -6,21 +6,32 @@
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <ngraph/ngraph.hpp>
|
||||
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
ClampTransformation::ClampTransformation(const Params& params) : LayerTransformation(params) {}
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ClampTransformation, "ClampTransformation", 0);
|
||||
|
||||
void ClampTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const {
|
||||
addPattern(pass,
|
||||
context,
|
||||
make_op_pattern<opset1::Clamp>({ make_op_label<opset1::Multiply>() }));
|
||||
ClampTransformation::ClampTransformation(const Params& params) : LayerTransformation(params) {
|
||||
auto matcher = pattern::wrap_type<opset1::Clamp>({ pattern::wrap_type<opset1::Multiply>() });
|
||||
|
||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||
auto op = m.get_match_root();
|
||||
if (transformation_callback(op)) {
|
||||
return false;
|
||||
}
|
||||
return transform(*context, m);
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "ClampTransformation");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
bool ClampTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) const {
|
||||
bool ClampTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) {
|
||||
auto subWithTheSameValues = [](std::shared_ptr<ngraph::opset1::Subtract> sub) {
|
||||
if (sub == nullptr) {
|
||||
return false;
|
||||
|
@ -0,0 +1,19 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision/common/operation_precision_restriction.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_set>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/pattern/op/or.hpp>
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/rt_info/precisions_attribute.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
|
@ -11,11 +11,11 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
|
||||
#include "low_precision/common/fake_quantize_dequantization.hpp"
|
||||
#include "low_precision/common/ie_lpt_exception.hpp"
|
||||
#include "low_precision/common/subgraph.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
@ -23,218 +23,155 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
void ConcatTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const {
|
||||
addSingleNodePattern<opset1::Concat>(pass, context);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ConcatTransformation, "ConcatTransformation", 0);
|
||||
|
||||
ConcatTransformation::ConcatTransformation(const Params& params) : LayerTransformation(params) {
|
||||
auto matcher = ngraph::pattern::wrap_type<opset1::Concat>();
|
||||
|
||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||
auto op = m.get_match_root();
|
||||
if (transformation_callback(op)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return transform(*context, m);
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "ConcatTransformation");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
bool ConcatTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
|
||||
bool ConcatTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
||||
std::shared_ptr<ngraph::opset1::Concat> concat = ngraph::as_type_ptr<ngraph::opset1::Concat>(m.get_match_root());
|
||||
if (!canBeTransformed(context, concat)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ngraph::pass::low_precision::Subgraph subgraph(layerTransformationsManager);
|
||||
std::unordered_set<std::string> handledLayers;
|
||||
if (!subgraph.fillSubgraphForConcat(concat, handledLayers)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (subgraph.quantizationLayers.empty() || isHandled(context, subgraph.quantizationLayers)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Concat operations precision is defined:
|
||||
// 1. consumers after Concat
|
||||
// 2. FakeQuantize precisions without zero point
|
||||
ngraph::Node& quantizationLayer = *subgraph.quantizationLayers[0];
|
||||
std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(quantizationLayer.shared_from_this());
|
||||
if (!NetworkHelper::isQuantizeSupported(fq)) {
|
||||
return false;
|
||||
}
|
||||
DataPrecision dataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
|
||||
if (dataPrecision.precision == ngraph::element::undefined) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<element::Type> concatChildrenPrecisions = precisionsOnActivations;
|
||||
|
||||
for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
|
||||
fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
|
||||
if (fq == nullptr) {
|
||||
std::vector<FakeQuantizeDequantization> layerDequantizations;
|
||||
layerDequantizations.reserve(concat->get_input_size());
|
||||
for (size_t parentIndex = 0ul; parentIndex < concat->get_input_size(); parentIndex++) {
|
||||
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(concat, parentIndex);
|
||||
if (dequantization.empty()) {
|
||||
return false;
|
||||
}
|
||||
layerDequantizations.push_back(dequantization);
|
||||
}
|
||||
|
||||
if (!NetworkHelper::isQuantizeSupported(fq)) {
|
||||
return false;
|
||||
bool allDequantizationShiftAreZero = true;
|
||||
bool allDequantizationMultiplyAreZero = true;
|
||||
for (const auto& dequantization : layerDequantizations) {
|
||||
if (dequantization.subtract != nullptr) {
|
||||
allDequantizationShiftAreZero = false;
|
||||
}
|
||||
|
||||
const QuantizationDetails& quantizationDetails = QuantizationDetails::getDetails(fq);
|
||||
|
||||
// per tensor scale is supported only
|
||||
if (quantizationDetails.inputHighValues.size() != 1ul) {
|
||||
return false;
|
||||
if (dequantization.multiply != nullptr) {
|
||||
allDequantizationMultiplyAreZero = false;
|
||||
}
|
||||
|
||||
// define concatenation operation consumers precisions
|
||||
std::vector<element::Type> fqChildrenPrecisions = precisionsOnActivations;
|
||||
fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrenPrecisions);
|
||||
concatChildrenPrecisions = NetworkHelper::precisionIntersection(concatChildrenPrecisions, fqChildrenPrecisions);
|
||||
if (concatChildrenPrecisions.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// define FakeQuantize precisions without zero point
|
||||
const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false);
|
||||
if (dataPrecision2.precision == ngraph::element::undefined) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dataPrecision.precision != dataPrecision2.precision) {
|
||||
dataPrecision = dataPrecision.precision.is_signed() ? dataPrecision : dataPrecision2;
|
||||
if (!allDequantizationShiftAreZero && !allDequantizationMultiplyAreZero) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (std::find(concatChildrenPrecisions.begin(), concatChildrenPrecisions.end(), dataPrecision.precision) == concatChildrenPrecisions.end()) {
|
||||
dataPrecision = DataPrecision(concatChildrenPrecisions[0]);
|
||||
}
|
||||
auto broadcastElementWiseConst = [](
|
||||
// FakeQuantize constant shape must be broadcastable to the shape on data.
|
||||
std::shared_ptr<ngraph::opset1::Constant> operation,
|
||||
const ngraph::Shape targetShape) -> std::shared_ptr<Node> {
|
||||
auto targetShapeConst = std::make_shared<ngraph::opset1::Constant>(
|
||||
element::i64, ngraph::Shape{ targetShape.size() },
|
||||
targetShape);
|
||||
|
||||
std::vector<QuantizationDetails> quantizationLayersDetails;
|
||||
for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
|
||||
std::shared_ptr<opset1::FakeQuantize> fakeQuantize = as_type_ptr<opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
|
||||
auto newFakeQuantize = NetworkHelper::fuseConvert(fakeQuantize);
|
||||
if (newFakeQuantize == nullptr) {
|
||||
subgraph.quantizationLayers[i] = fakeQuantize;
|
||||
quantizationLayersDetails.push_back(QuantizationDetails::getDetails(fakeQuantize));
|
||||
continue;
|
||||
}
|
||||
auto broadcast = ngraph::pass::low_precision::fold<ngraph::opset1::Broadcast>(
|
||||
operation,
|
||||
targetShapeConst,
|
||||
ngraph::op::AutoBroadcastType::NUMPY);
|
||||
|
||||
fakeQuantize = newFakeQuantize;
|
||||
newFakeQuantize = NetworkHelper::composeFakeQuantize(fakeQuantize);
|
||||
if (newFakeQuantize == nullptr) {
|
||||
subgraph.quantizationLayers[i] = fakeQuantize;
|
||||
quantizationLayersDetails.push_back(QuantizationDetails::getDetails(fakeQuantize));
|
||||
continue;
|
||||
}
|
||||
|
||||
fakeQuantize = newFakeQuantize;
|
||||
subgraph.quantizationLayers[i] = fakeQuantize;
|
||||
quantizationLayersDetails.push_back(QuantizationDetails::getDetails(fakeQuantize));
|
||||
}
|
||||
|
||||
FakeQuantizeDequantization dequantization;
|
||||
|
||||
if ((quantizationLayersDetails[0].inputHighValues.size() == 1)) {
|
||||
float outputLowValue = quantizationLayersDetails[0].outputLowValues[0];
|
||||
float outputHighValue = quantizationLayersDetails[0].outputHighValues[0];
|
||||
|
||||
for (size_t index = 0lu; index < subgraph.quantizationLayers.size(); index++) {
|
||||
const QuantizationDetails& quantizationDetails = quantizationLayersDetails[index];
|
||||
if (outputLowValue > quantizationDetails.outputLowValues[0]) {
|
||||
outputLowValue = quantizationDetails.outputLowValues[0];
|
||||
}
|
||||
if (outputHighValue < quantizationDetails.outputHighValues[0]) {
|
||||
outputHighValue = quantizationDetails.outputHighValues[0];
|
||||
}
|
||||
}
|
||||
|
||||
if ((outputLowValue == 0.f) && (outputHighValue == 0.f)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const float maxOutputInterval = outputHighValue - outputLowValue;
|
||||
if (quantizedTensorAlignmentOnActivations == QuantizedTensorAlignment::UpdateLevel) {
|
||||
const size_t minLevels = getMinQuantizationLevels(
|
||||
dataPrecision,
|
||||
maxOutputInterval,
|
||||
quantizationLayersDetails,
|
||||
outputLowValue,
|
||||
outputHighValue);
|
||||
if (minLevels < this->minQuantizationLevels) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// FQ -> SUB_quantization -> MUL_quantization -[INT8]-> SUB_dequantization -> MUL_dequantization ->
|
||||
const float quantizationMul = (dataPrecision.max - dataPrecision.min) / maxOutputInterval;
|
||||
const float dequantizationMul = maxOutputInterval / (dataPrecision.max - dataPrecision.min);
|
||||
|
||||
// FQ outputLowValue = dataPrecision.min * dequantizationMul - quantizationSub
|
||||
const float quantizationSub = outputLowValue - dataPrecision.min * dequantizationMul;
|
||||
const float dequantizationSub = std::round(-quantizationSub * quantizationMul);
|
||||
|
||||
// 1. get data for dequantization. Dequantization data will be used several times later.
|
||||
dequantization = ngraph::pass::low_precision::NetworkHelper::makeDequantization(
|
||||
dequantizationMul,
|
||||
dequantizationSub,
|
||||
subgraph.quantizationLayers[0]->get_output_element_type(0),
|
||||
subgraph.quantizationLayers[0]->get_output_partial_shape(0),
|
||||
updatePrecisions ? dataPrecision.precision : subgraph.quantizationLayers[0]->get_output_element_type(0),
|
||||
deqPrecision);
|
||||
|
||||
for (size_t index = 0; index < subgraph.quantizationLayers.size(); index++) {
|
||||
std::shared_ptr<ngraph::opset1::FakeQuantize> fakeQuantizeLayer = as_type_ptr<ngraph::opset1::FakeQuantize>(
|
||||
subgraph.quantizationLayers[index]->shared_from_this());
|
||||
|
||||
const QuantizationDetails& quantizationDetails = quantizationLayersDetails[index];
|
||||
|
||||
switch (quantizedTensorAlignmentOnActivations) {
|
||||
case QuantizedTensorAlignment::None: {
|
||||
THROW_TRANSFORMATION_EXCEPTION << "not implemented: " << quantizedTensorAlignmentOnActivations;
|
||||
}
|
||||
case QuantizedTensorAlignment::UpdateLevel: {
|
||||
const float updatedOutputLowValue = (quantizationDetails.outputLowValues[0] - quantizationSub) * quantizationMul;
|
||||
const float updatedOutputHighValue = (quantizationDetails.outputHighValues[0] - quantizationSub) * quantizationMul;
|
||||
|
||||
// 2. update FakeQuantize - one time action
|
||||
std::shared_ptr<opset1::FakeQuantize> newFakeQuantizeLayer = ngraph::pass::low_precision::NetworkHelper::updateFakeQuantize(
|
||||
fakeQuantizeLayer,
|
||||
updatePrecisions ? dataPrecision.precision : fakeQuantizeLayer->get_output_element_type(0),
|
||||
roundf(updatedOutputLowValue),
|
||||
roundf(updatedOutputHighValue));
|
||||
|
||||
const size_t levels = static_cast<size_t>(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0);
|
||||
newFakeQuantizeLayer->set_levels(levels);
|
||||
|
||||
subgraph.quantizationLayers[index] = newFakeQuantizeLayer;
|
||||
subgraph.layers[fakeQuantizeLayer->get_friendly_name()] = newFakeQuantizeLayer;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
THROW_TRANSFORMATION_EXCEPTION << "unexpected value " << quantizedTensorAlignmentOnActivations;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto dequantizationValuesCallback = [&](
|
||||
std::shared_ptr<ngraph::Node> layer,
|
||||
std::shared_ptr<ngraph::Node> child,
|
||||
const std::string originalLayerName,
|
||||
std::vector<FakeQuantizeDequantization>& dequantizationsToConcatenate) {
|
||||
dequantizationsToConcatenate.push_back(dequantization);
|
||||
return broadcast;
|
||||
};
|
||||
|
||||
addDequantizationLayers(context, subgraph, dequantizationValuesCallback);
|
||||
bool someDqInLowPrecision = std::any_of(
|
||||
layerDequantizations.begin(),
|
||||
layerDequantizations.end(),
|
||||
[](const FakeQuantizeDequantization& value) { return value.isLowPrecision(); });
|
||||
|
||||
if (updatePrecisions) {
|
||||
for (const auto it : subgraph.layers) {
|
||||
const std::shared_ptr<ngraph::Node>& node = it.second;
|
||||
if (std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(node) != nullptr) {
|
||||
ngraph::pass::low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(node->shared_from_this(), dataPrecision.precision);
|
||||
} else {
|
||||
// set precision to explicitly to have updated precision during transformation
|
||||
for (size_t i = 0; i < node->get_output_size(); ++i) {
|
||||
node->set_output_type(i, dataPrecision.precision, node->get_output_partial_shape(i));
|
||||
}
|
||||
}
|
||||
bool someDqInFpPrecision = std::any_of(
|
||||
layerDequantizations.begin(),
|
||||
layerDequantizations.end(),
|
||||
[](const FakeQuantizeDequantization& value) { return !value.isLowPrecision(); });
|
||||
|
||||
bool DqWithDifferentPrecision = someDqInLowPrecision && someDqInFpPrecision;
|
||||
|
||||
OutputVector dataNodes;
|
||||
NodeVector convertNodes;
|
||||
NodeVector subtractNodes;
|
||||
NodeVector multiplyNodes;
|
||||
for (size_t i = 0; i < layerDequantizations.size(); ++i) {
|
||||
const auto& dequantization = layerDequantizations[i];
|
||||
|
||||
if (DqWithDifferentPrecision && dequantization.isLowPrecision()) {
|
||||
dataNodes.push_back(dequantization.convert);
|
||||
} else {
|
||||
dataNodes.push_back(dequantization.data);
|
||||
}
|
||||
|
||||
if (dequantization.convert != nullptr) {
|
||||
convertNodes.push_back(dequantization.convert);
|
||||
}
|
||||
|
||||
Shape targetShape(concat->get_input_partial_shape(i).rank().get_length(), 1ul);
|
||||
targetShape[1] = concat->get_input_partial_shape(i)[1].get_length();
|
||||
|
||||
if (!allDequantizationShiftAreZero) {
|
||||
subtractNodes.push_back(dequantization.subtract == nullptr ?
|
||||
std::make_shared<ngraph::opset1::Constant>(deqPrecision, targetShape, std::vector<float>({ 0.f })) :
|
||||
broadcastElementWiseConst(dequantization.subtractConstant, targetShape));
|
||||
}
|
||||
|
||||
if (!allDequantizationMultiplyAreZero) {
|
||||
multiplyNodes.push_back(dequantization.multiply == nullptr ?
|
||||
std::make_shared<ngraph::opset1::Constant>(deqPrecision, targetShape, std::vector<float>({ 1.0f })) :
|
||||
broadcastElementWiseConst(dequantization.multiplyConstant, targetShape));
|
||||
}
|
||||
}
|
||||
|
||||
for (const std::shared_ptr<ngraph::Node>& quantizationLayer : subgraph.quantizationLayers) {
|
||||
context.quantizedFakeQuantizeNames.insert(quantizationLayer->get_friendly_name());
|
||||
const auto newConcat = concat->clone_with_new_inputs(dataNodes);
|
||||
|
||||
std::shared_ptr<ngraph::Node> lastDequantization = newConcat;
|
||||
if (!convertNodes.empty()) {
|
||||
const auto convert = convertNodes[0]->clone_with_new_inputs({ newConcat });
|
||||
|
||||
NetworkHelper::copyInfo({ concat, convert }, convert);
|
||||
lastDequantization = convert;
|
||||
}
|
||||
|
||||
// concatenation axis is 1
|
||||
if (!subtractNodes.empty()) {
|
||||
const auto subtract = std::make_shared<DequantizationSubtract>(
|
||||
lastDequantization,
|
||||
NetworkHelper::toScalarIfPossible(subtractNodes.size() == 1ul ?
|
||||
subtractNodes[0] :
|
||||
ngraph::pass::low_precision::fold<ngraph::opset1::Concat>(subtractNodes, 1)));
|
||||
|
||||
NetworkHelper::copyInfo({ concat, subtract }, subtract);
|
||||
lastDequantization = subtract;
|
||||
}
|
||||
|
||||
if (!multiplyNodes.empty()) {
|
||||
const auto multiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
DequantizationMultiply(
|
||||
lastDequantization,
|
||||
NetworkHelper::toScalarIfPossible(multiplyNodes.size() == 1ul ?
|
||||
multiplyNodes[0] :
|
||||
ngraph::pass::low_precision::fold<ngraph::opset1::Concat>(multiplyNodes, 1))),
|
||||
layerDequantizations[0].multiply->get_output_element_type(0));
|
||||
|
||||
NetworkHelper::copyInfo({ concat, multiply }, multiply);
|
||||
lastDequantization = multiply;
|
||||
}
|
||||
|
||||
replace_node(concat, lastDequantization);
|
||||
NetworkHelper::copyInfo(concat, newConcat);
|
||||
updateOutput(context, lastDequantization, newConcat);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -251,6 +188,8 @@ bool ConcatTransformation::canBeTransformed(const TransformationContext& context
|
||||
const auto axis = concat->get_axis();
|
||||
const auto outPShape = concat->get_output_partial_shape(0);
|
||||
const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, outPShape.rank());
|
||||
|
||||
// TODO: LPT: to support current flow: #58269
|
||||
if (normalizedAxis != 1ul) {
|
||||
return false;
|
||||
}
|
||||
@ -259,6 +198,27 @@ bool ConcatTransformation::canBeTransformed(const TransformationContext& context
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool perTensorQuantizationIsRequired = normalizedAxis != 1ul;
|
||||
|
||||
element::Type precision;
|
||||
for (size_t i = 0ul; i < concat->get_input_size(); i++) {
|
||||
const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(concat, i);
|
||||
if (dequantization.empty() || (updatePrecisions && !dequantization.isLowPrecision())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (precision == element::undefined) {
|
||||
precision = dequantization.data.get_element_type();
|
||||
} else if (precision != dequantization.data.get_element_type()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (perTensorQuantizationIsRequired &&
|
||||
(((dequantization.subtractConstant != nullptr) && !NetworkHelper::isScalarLike(dequantization.subtractConstant)) ||
|
||||
((dequantization.multiplyConstant != nullptr) && !NetworkHelper::isScalarLike(dequantization.multiplyConstant)))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -338,115 +298,6 @@ std::shared_ptr<Node> ConcatTransformation::concatenateDeqNodes(NodeVector& node
|
||||
return nodes.size() == 1ul ? nodes[0] : fold<ngraph::opset1::Concat>(nodes, 1);
|
||||
}
|
||||
|
||||
void ConcatTransformation::addDequantizationLayers(
|
||||
TransformationContext& context,
|
||||
ngraph::pass::low_precision::Subgraph& subgraph,
|
||||
std::function<void(
|
||||
std::shared_ptr<ngraph::Node> layer,
|
||||
std::shared_ptr<ngraph::Node> child,
|
||||
const std::string originalLayerName,
|
||||
std::vector<FakeQuantizeDequantization>& dequantizationsToConcatenate)> getLayerDequantizationCallback) const {
|
||||
std::unordered_map<std::string, ngraph::Node*> outputs;
|
||||
for (size_t i = 0; i < context.function->get_output_size(); ++i) {
|
||||
ngraph::Node* node = context.function->get_output_op(i).get();
|
||||
if (node->get_input_size() != 1ul) {
|
||||
THROW_IE_LPT_EXCEPTION(*node) << "unexpected inputs count for result node";
|
||||
}
|
||||
|
||||
outputs.emplace(node->get_input_node_shared_ptr(0)->get_friendly_name(), node);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>> notHandledSubgraphLayers = subgraph.layers;
|
||||
while (notHandledSubgraphLayers.size() != 0ul) {
|
||||
const auto layerIt = notHandledSubgraphLayers.begin();
|
||||
std::shared_ptr<ngraph::Node> layer = layerIt->second;
|
||||
notHandledSubgraphLayers.erase(layerIt);
|
||||
|
||||
std::vector<FakeQuantizeDequantization> layerDequantizations;
|
||||
|
||||
for (size_t i = 0; i < layer->get_output_size(); ++i) {
|
||||
const auto childInputs = layer->get_output_target_inputs(i);
|
||||
for (const auto childInput : childInputs) {
|
||||
ngraph::Node& child = *childInput.get_node();
|
||||
|
||||
if (subgraph.layers.find(child.get_friendly_name()) == subgraph.layers.end()) {
|
||||
std::shared_ptr<ngraph::Node> source = layer;
|
||||
const std::shared_ptr<ngraph::Node> destination = child.shared_from_this();
|
||||
|
||||
if (layerDequantizations.size() == 0ul) {
|
||||
// fill layerDequantizations collection
|
||||
getLayerDequantizationCallback(source, destination, source->get_friendly_name(), layerDequantizations);
|
||||
}
|
||||
|
||||
{
|
||||
NodeVector convertNodes;
|
||||
NodeVector subtractNodes;
|
||||
NodeVector multiplyNodes;
|
||||
|
||||
// forming nodes for concatenation
|
||||
fillDequantizationNodes(layerDequantizations, layer, convertNodes, subtractNodes, multiplyNodes);
|
||||
|
||||
// TODO: the second place (first is FQ decomposition) where dequantization operations are inserted
|
||||
if (!convertNodes.empty()) {
|
||||
const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination);
|
||||
std::shared_ptr<ngraph::Node> convert =
|
||||
convertNodes[0]->clone_with_new_inputs({ destination->get_input_source_output(sourceOutputIdx) });
|
||||
|
||||
insert_new_node_between(source, destination, convert);
|
||||
ngraph::copy_runtime_info({ layer, convert }, convert);
|
||||
source = convert;
|
||||
}
|
||||
|
||||
// concatenation axis is 1
|
||||
if (!subtractNodes.empty()) {
|
||||
const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination);
|
||||
std::shared_ptr<ngraph::opset1::Subtract> subtract = std::make_shared<DequantizationSubtract>(
|
||||
destination->get_input_source_output(sourceOutputIdx),
|
||||
NetworkHelper::toScalarIfPossible(concatenateDeqNodes(subtractNodes)));
|
||||
|
||||
insert_new_node_between(source, destination, subtract);
|
||||
ngraph::copy_runtime_info({ layer, subtract }, subtract);
|
||||
source = subtract;
|
||||
}
|
||||
|
||||
if (!multiplyNodes.empty()) {
|
||||
const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination);
|
||||
std::shared_ptr<ngraph::opset1::Multiply> multiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
DequantizationMultiply(
|
||||
destination->get_input_source_output(sourceOutputIdx),
|
||||
NetworkHelper::toScalarIfPossible(concatenateDeqNodes(multiplyNodes))),
|
||||
layerDequantizations[0].multiply->get_output_element_type(0));
|
||||
|
||||
insert_new_node_between(source, destination, multiply);
|
||||
ngraph::copy_runtime_info({ layer, multiply }, multiply);
|
||||
source = multiply;
|
||||
}
|
||||
}
|
||||
|
||||
// first input is used
|
||||
const ngraph::element::Type precision = layerDequantizations[0].data.get_element_type();
|
||||
layer->set_output_type(0, precision, layer->get_output_partial_shape(0));
|
||||
|
||||
const auto it = outputs.find(layer->get_friendly_name());
|
||||
if (it != outputs.end() && is_type<ngraph::opset1::Result>(child.shared_from_this())) {
|
||||
const std::string originalName = layer->get_friendly_name();
|
||||
const std::string newName = layer->get_friendly_name() + LayerTransformation::originalLayerPostfix;
|
||||
layer->set_friendly_name(newName);
|
||||
|
||||
// Split & VariadicSplit have other naming rules
|
||||
if (is_type<opset1::Split>(layer) || is_type<opset1::VariadicSplit>(layer)) {
|
||||
source->set_friendly_name(originalName + "." + std::to_string(i));
|
||||
} else {
|
||||
source->set_friendly_name(originalName);
|
||||
}
|
||||
subgraph.layers[layer->get_friendly_name()] = layer;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ConcatTransformation::isHandled(const TransformationContext& context, const std::vector<std::shared_ptr<ngraph::Node>>& quantizationOperations) {
|
||||
for (const std::shared_ptr<ngraph::Node>& quantizationLayer : quantizationOperations) {
|
||||
if (context.quantizedFakeQuantizeNames.find(quantizationLayer->get_friendly_name()) != context.quantizedFakeQuantizeNames.end()) {
|
||||
@ -457,32 +308,6 @@ bool ConcatTransformation::isHandled(const TransformationContext& context, const
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t ConcatTransformation::getMinQuantizationLevels(
|
||||
const DataPrecision& dataPrecision,
|
||||
const float maxOutputInterval,
|
||||
const std::vector<QuantizationDetails>& quantizationLayersDetails,
|
||||
const float outputLowValue,
|
||||
const float outputHighValue) const {
|
||||
size_t minLevels = std::numeric_limits<std::size_t>::max();
|
||||
for (const QuantizationDetails quantizationDetails : quantizationLayersDetails) {
|
||||
// if there is negative part then calculation is based on `outputLowValue` if not then on `outputHighValue` only
|
||||
const float updatedOutputLowValue = outputLowValue != 0.f ?
|
||||
(quantizationDetails.outputLowValues[0] / outputLowValue) * dataPrecision.min :
|
||||
(quantizationDetails.outputLowValues[0] / outputHighValue) * dataPrecision.max;
|
||||
|
||||
// if there is positive part then calculation is based on `outputHighValue` if not then on `outputLowValue` only
|
||||
const float updatedOutputHighValue = outputHighValue != 0.f ?
|
||||
(quantizationDetails.outputHighValues[0] / outputHighValue) * dataPrecision.max :
|
||||
(quantizationDetails.outputHighValues[0] / outputLowValue) * dataPrecision.min;
|
||||
|
||||
const size_t levels = static_cast<size_t>(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0);
|
||||
if (minLevels > levels) {
|
||||
minLevels = levels;
|
||||
}
|
||||
}
|
||||
return minLevels;
|
||||
}
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
||||
|
@ -1,334 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision/concat_multi_channels.hpp"
|
||||
|
||||
#include <queue>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
|
||||
#include "low_precision/common/fake_quantize_dequantization.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
#include "low_precision/common/ie_lpt_exception.hpp"
|
||||
#include "low_precision/common/subgraph.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
bool ConcatMultiChannelsTransformation::isMultiChannel(const std::vector<std::shared_ptr<ngraph::opset1::Concat>>& concatLayers) const noexcept {
|
||||
for (const std::shared_ptr<ngraph::opset1::Concat>& concat : concatLayers) {
|
||||
const std::vector<std::shared_ptr<ngraph::Node>> children = getChildrenRecursivelyExceptPrecisionPreserved(concat);
|
||||
for (const std::shared_ptr<ngraph::Node>& child : children) {
|
||||
if ((is_type<ngraph::opset1::Convolution>(child.get()) ||
|
||||
is_type<ngraph::opset1::ConvolutionBackpropData>(child.get())) &&
|
||||
this->layerTransformationsManager->isQuantized(child)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void ConcatMultiChannelsTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const {
|
||||
addSingleNodePattern<opset1::Concat>(pass, context);
|
||||
}
|
||||
|
||||
bool ConcatMultiChannelsTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
|
||||
std::shared_ptr<ngraph::opset1::Concat> concat = ngraph::as_type_ptr<ngraph::opset1::Concat>(m.get_match_root());
|
||||
if (!canBeTransformed(context, concat)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ngraph::pass::low_precision::Subgraph subgraph(layerTransformationsManager);
|
||||
std::unordered_set<std::string> handledLayers;
|
||||
if (!subgraph.fillSubgraphForConcat(concat, handledLayers)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (subgraph.quantizationLayers.empty() || isHandled(context, subgraph.quantizationLayers)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!isMultiChannel(subgraph.concatLayers)) {
|
||||
ConcatTransformation::transform(context, m);
|
||||
return false;
|
||||
}
|
||||
|
||||
DataPrecision dataPrecision;
|
||||
{
|
||||
std::vector<element::Type> concatChildrenPrecisions = precisionsOnActivations;
|
||||
for (auto quantizationLayer : subgraph.quantizationLayers) {
|
||||
std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(quantizationLayer->shared_from_this());
|
||||
if (!NetworkHelper::isQuantizeSupported(fq)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// define concatenation operation consumers precisions
|
||||
std::vector<element::Type> fqChildrenPrecisions = precisionsOnActivations;
|
||||
fillAvailablePrecisions(quantizationLayer, fqChildrenPrecisions);
|
||||
concatChildrenPrecisions = NetworkHelper::precisionIntersection(concatChildrenPrecisions, fqChildrenPrecisions);
|
||||
if (concatChildrenPrecisions.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// define FakeQuantize precisions without zero point
|
||||
const DataPrecision tmp = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
|
||||
if (dataPrecision.precision == ngraph::element::undefined) {
|
||||
dataPrecision = tmp;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((tmp.precision != dataPrecision.precision) && (tmp.precision == ngraph::element::u8)) {
|
||||
dataPrecision = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
if (std::find(concatChildrenPrecisions.begin(), concatChildrenPrecisions.end(), dataPrecision.precision) == concatChildrenPrecisions.end()) {
|
||||
dataPrecision = DataPrecision(concatChildrenPrecisions[0]);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
|
||||
const std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(subgraph.quantizationLayers[i]);
|
||||
if (fq == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!NetworkHelper::isQuantizeSupported(fq)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, ngraph::pass::low_precision::FakeQuantizeDequantization> dequantizations;
|
||||
|
||||
for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) {
|
||||
const std::shared_ptr<ngraph::Node>& fakeQuantizeLayer = subgraph.quantizationLayers[i];
|
||||
|
||||
std::shared_ptr<ngraph::opset1::FakeQuantize> fq = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(fakeQuantizeLayer->shared_from_this());
|
||||
assert(fq);
|
||||
|
||||
auto newFakeQuantize = NetworkHelper::fuseConvert(fq);
|
||||
if (newFakeQuantize != nullptr) {
|
||||
fq = newFakeQuantize;
|
||||
}
|
||||
|
||||
newFakeQuantize = NetworkHelper::composeFakeQuantize(fq);
|
||||
if (newFakeQuantize != nullptr) {
|
||||
fq = newFakeQuantize;
|
||||
}
|
||||
|
||||
const DataPrecision currentDataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false);
|
||||
const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq);
|
||||
|
||||
// 1. get data for dequantization. Dequantization data will be used several times later.
|
||||
const FakeQuantizeDequantization fakeQuantizeDequantization = ngraph::pass::low_precision::NetworkHelper::createDequantizationFromFakeQuantize(
|
||||
fq,
|
||||
dataPrecision.precision,
|
||||
dataPrecision.min,
|
||||
dataPrecision.max,
|
||||
dataPrecision.precision == currentDataPrecision.precision ? currentDataPrecision.hasZeroPoint : true,
|
||||
updatePrecisions,
|
||||
deqPrecision);
|
||||
dequantizations[fakeQuantizeLayer->get_friendly_name()] = fakeQuantizeDequantization;
|
||||
|
||||
// 2. update FakeQuantize - one time action
|
||||
const std::shared_ptr<opset1::FakeQuantize> newFakeQuantizeLayer = ngraph::pass::low_precision::NetworkHelper::updateFakeQuantize(
|
||||
fq,
|
||||
updatePrecisions ? dataPrecision.precision : fakeQuantizeLayer->get_output_element_type(0),
|
||||
roundf(dataPrecision.min),
|
||||
roundf(dataPrecision.max));
|
||||
|
||||
subgraph.quantizationLayers[i] = newFakeQuantizeLayer;
|
||||
subgraph.layers[fakeQuantizeLayer->get_friendly_name()] = newFakeQuantizeLayer;
|
||||
}
|
||||
|
||||
auto dequantizationValuesCallback = [&](
|
||||
std::shared_ptr<ngraph::Node> layer,
|
||||
std::shared_ptr<ngraph::Node> child,
|
||||
const std::string originalLayerName,
|
||||
std::vector<FakeQuantizeDequantization>& dequantizationsToConcatenate) {
|
||||
if (layer->get_friendly_name() != originalLayerName) {
|
||||
const auto update = [](
|
||||
const std::string& originalLayerName,
|
||||
const std::string& newLayerName,
|
||||
std::unordered_map<std::string, FakeQuantizeDequantization>& dequantizationLayers) {
|
||||
auto it = dequantizationLayers.find(originalLayerName);
|
||||
if (it != dequantizationLayers.end()) {
|
||||
dequantizationLayers.emplace(newLayerName, it->second);
|
||||
dequantizationLayers.erase(it);
|
||||
}
|
||||
};
|
||||
update(originalLayerName, layer->get_friendly_name(), dequantizations);
|
||||
}
|
||||
|
||||
fillDequantization(
|
||||
layer,
|
||||
dequantizations,
|
||||
dequantizationsToConcatenate);
|
||||
|
||||
if (!is_type<ngraph::opset1::Concat>(layer)) {
|
||||
// for intermediate layers we should get Dq operations to be inserted between layer and child
|
||||
assert(dequantizationsToConcatenate.size() == 1ul);
|
||||
const size_t sourceOutputIdx = NetworkHelper::getParentOutputIndex(layer, child);
|
||||
if (layer->get_input_partial_shape(0)[1] != layer->get_output_partial_shape(sourceOutputIdx)[1]) {
|
||||
dequantizationsToConcatenate[0] = getFoldedDequantization(layer, dequantizationsToConcatenate[0], sourceOutputIdx);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
addDequantizationLayers(context, subgraph, dequantizationValuesCallback);
|
||||
|
||||
if (updatePrecisions) {
|
||||
for (const auto it : subgraph.layers) {
|
||||
const std::shared_ptr<ngraph::Node> node = it.second;
|
||||
if (std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(node)) {
|
||||
ngraph::pass::low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(node->shared_from_this(), dataPrecision.precision);
|
||||
} else {
|
||||
// set precision to explicitly to have updated precision during transformation
|
||||
for (size_t i = 0; i < node->get_output_size(); ++i) {
|
||||
node->set_output_type(i, dataPrecision.precision, node->get_output_partial_shape(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const std::shared_ptr<ngraph::Node>& quantizationLayer : subgraph.quantizationLayers) {
|
||||
context.quantizedFakeQuantizeNames.insert(quantizationLayer->get_friendly_name());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ConcatMultiChannelsTransformation::isPrecisionPreserved(std::shared_ptr<Node>) const noexcept {
|
||||
return true;
|
||||
}
|
||||
|
||||
void ConcatMultiChannelsTransformation::fillDequantization(
|
||||
const std::shared_ptr<ngraph::Node> layer,
|
||||
const std::unordered_map<std::string, FakeQuantizeDequantization>& dequantizationByFakeQuantize,
|
||||
std::vector<FakeQuantizeDequantization>& dequantization) const {
|
||||
const auto fillDqByFakeQuantize = [&](const std::shared_ptr<ngraph::Node>& fq) {
|
||||
const auto it = dequantizationByFakeQuantize.find(fq->get_friendly_name());
|
||||
if (it == dequantizationByFakeQuantize.end()) {
|
||||
THROW_IE_LPT_EXCEPTION(*fq) << "dequantization scale values are not found";
|
||||
}
|
||||
|
||||
const FakeQuantizeDequantization& fakeQuantizeDequantization = it->second;
|
||||
dequantization.push_back(fakeQuantizeDequantization);
|
||||
};
|
||||
|
||||
if (is_type<ngraph::opset1::FakeQuantize>(layer)) {
|
||||
fillDqByFakeQuantize(layer);
|
||||
} else {
|
||||
for (size_t i = 0; i < layer->get_input_size(); ++i) {
|
||||
std::shared_ptr<ngraph::Node> parent = layer->get_input_node_shared_ptr(i);
|
||||
if (as_type_ptr<ngraph::opset1::Constant>(parent)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto fakeQuantize = ngraph::as_type_ptr<ngraph::opset1::FakeQuantize>(parent);
|
||||
if (fakeQuantize) {
|
||||
fillDqByFakeQuantize(fakeQuantize);
|
||||
} else {
|
||||
const auto concat = ngraph::as_type_ptr<ngraph::opset1::Concat>(parent);
|
||||
if (concat) {
|
||||
std::vector<FakeQuantizeDequantization> dequantizationToConcatenate;
|
||||
fillDequantization(concat, dequantizationByFakeQuantize, dequantizationToConcatenate);
|
||||
|
||||
// add concatenated dequantization operations to dequantization collection
|
||||
dequantization.push_back(getConcatenatedDequantization(concat, dequantizationToConcatenate));
|
||||
} else {
|
||||
const size_t sourceOutputIdx = NetworkHelper::getParentOutputIndex(parent, layer);
|
||||
if (parent->get_input_partial_shape(0)[1] != parent->get_output_partial_shape(sourceOutputIdx)[1]) {
|
||||
std::vector<FakeQuantizeDequantization> dequantizationToPropagate;
|
||||
fillDequantization(parent, dequantizationByFakeQuantize, dequantizationToPropagate);
|
||||
|
||||
// add folded dequantization operations to dequantization colection
|
||||
dequantization.push_back(getFoldedDequantization(parent, dequantizationToPropagate[0], sourceOutputIdx));
|
||||
} else {
|
||||
fillDequantization(parent, dequantizationByFakeQuantize, dequantization);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FakeQuantizeDequantization ConcatMultiChannelsTransformation::getConcatenatedDequantization(
|
||||
const std::shared_ptr<ngraph::opset1::Concat> concat,
|
||||
const std::vector<FakeQuantizeDequantization>& dequantization) const {
|
||||
NodeVector convertNodes;
|
||||
NodeVector subtractNodes;
|
||||
NodeVector multiplyNodes;
|
||||
|
||||
// forming nodes for concatenation
|
||||
fillDequantizationNodes(dequantization, concat, convertNodes, subtractNodes, multiplyNodes);
|
||||
|
||||
std::shared_ptr<Node> parent = concat;
|
||||
std::shared_ptr<DequantizationConvert> convert;
|
||||
if (!convertNodes.empty()) {
|
||||
convert = as_type_ptr<DequantizationConvert>(dequantization[0].convert->clone_with_new_inputs({ parent }));
|
||||
parent = convert;
|
||||
}
|
||||
|
||||
std::shared_ptr<DequantizationSubtract> subtract;
|
||||
std::shared_ptr<ngraph::opset1::Constant> subConst;
|
||||
if (!subtractNodes.empty()) {
|
||||
subConst = as_type_ptr<ngraph::opset1::Constant>(concatenateDeqNodes(subtractNodes));
|
||||
subtract = std::make_shared<DequantizationSubtract>(parent, subConst);
|
||||
parent = subtract;
|
||||
}
|
||||
|
||||
std::shared_ptr<DequantizationMultiply> multiply;
|
||||
std::shared_ptr<ngraph::opset1::Constant> mulConst;
|
||||
if (!multiplyNodes.empty()) {
|
||||
mulConst = as_type_ptr<ngraph::opset1::Constant>(concatenateDeqNodes(multiplyNodes));
|
||||
multiply = std::make_shared<DequantizationMultiply>(parent, mulConst);
|
||||
}
|
||||
|
||||
return FakeQuantizeDequantization(concat, convert, subtract, nullptr, subConst, multiply, mulConst);
|
||||
}
|
||||
|
||||
FakeQuantizeDequantization ConcatMultiChannelsTransformation::getFoldedDequantization(
|
||||
const std::shared_ptr<ngraph::Node> operation,
|
||||
const FakeQuantizeDequantization& dequantization,
|
||||
const size_t sourceOutputIdx) {
|
||||
OutputVector inputs = operation->input_values();
|
||||
OutputVector outputs(operation->get_output_size());
|
||||
Output<Node> data = operation->output(sourceOutputIdx);
|
||||
|
||||
std::shared_ptr<Node> parent = operation;
|
||||
std::shared_ptr<DequantizationConvert> convert;
|
||||
if (dequantization.convert) {
|
||||
convert = as_type_ptr<DequantizationConvert>(dequantization.convert->clone_with_new_inputs({ data }));
|
||||
parent = convert;
|
||||
}
|
||||
|
||||
std::shared_ptr<DequantizationSubtract> subtract;
|
||||
std::shared_ptr<ngraph::opset1::Constant> subConst;
|
||||
if (dequantization.subtract) {
|
||||
subConst = NetworkHelper::foldDequantizationConstant(dequantization.subtractConstant, operation, sourceOutputIdx);
|
||||
subtract = std::make_shared<DequantizationSubtract>(parent, subConst);
|
||||
parent = subtract;
|
||||
}
|
||||
|
||||
std::shared_ptr<DequantizationMultiply> multiply;
|
||||
std::shared_ptr<ngraph::opset1::Constant> mulConst;
|
||||
if (dequantization.multiply) {
|
||||
mulConst = NetworkHelper::foldDequantizationConstant(dequantization.multiplyConstant, operation, sourceOutputIdx);
|
||||
multiply = std::make_shared<DequantizationMultiply>(parent, mulConst);
|
||||
}
|
||||
|
||||
return FakeQuantizeDequantization(data, convert, subtract, nullptr, subConst, multiply, mulConst);
|
||||
}
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -11,6 +11,7 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include "low_precision/common/ie_lpt_exception.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
@ -18,11 +19,24 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
void ConvertTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
|
||||
addSingleNodePattern<opset1::Convert>(pass, context);
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ConvertTransformation, "ConvertTransformation", 0);
|
||||
|
||||
ConvertTransformation::ConvertTransformation(const Params& params) : LayerTransformation(params) {
|
||||
auto matcher = pattern::wrap_type<opset1::Convert>();
|
||||
|
||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||
auto op = m.get_match_root();
|
||||
if (transformation_callback(op)) {
|
||||
return false;
|
||||
}
|
||||
return transform(*context, m);
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "ConvertTransformation");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
bool ConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const {
|
||||
bool ConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
||||
std::shared_ptr<opset1::Convert> convert = as_type_ptr<opset1::Convert>(m.get_match_root());
|
||||
if (!convert) {
|
||||
return false;
|
||||
|
@ -10,6 +10,8 @@
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/pattern/op/or.hpp>
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
|
||||
@ -17,28 +19,39 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ConvolutionTransformation, "ConvolutionTransformation", 0);
|
||||
|
||||
ConvolutionTransformation::ConvolutionTransformation(const Params& params) : WeightableLayerTransformation(params) {
|
||||
auto matcher = ngraph::pattern::wrap_type<opset1::Convolution>({
|
||||
ngraph::pattern::wrap_type<opset1::Multiply>(),
|
||||
std::make_shared<pattern::op::Or>(OutputVector {
|
||||
pattern::wrap_type<opset1::Multiply>(),
|
||||
pattern::wrap_type<opset1::FakeQuantize>()
|
||||
})
|
||||
});
|
||||
|
||||
|
||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||
auto op = m.get_match_root();
|
||||
if (transformation_callback(op)) {
|
||||
return false;
|
||||
}
|
||||
return transform(*context, m);
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "ConvolutionTransformation");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
void ConvolutionTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const {
|
||||
addPattern(
|
||||
pass,
|
||||
context,
|
||||
make_op_pattern<opset1::Convolution>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::Multiply>() }));
|
||||
|
||||
addPattern(
|
||||
pass,
|
||||
context,
|
||||
make_op_pattern<opset1::Convolution>({ make_op_label<opset1::Multiply>(), make_op_label<opset1::FakeQuantize>() }));
|
||||
bool ConvolutionTransformation::isQuantized(const std::shared_ptr<const Node>& layer) const noexcept {
|
||||
return ConvolutionTransformation::isQuantizedStatic(layer);
|
||||
}
|
||||
|
||||
bool ConvolutionTransformation::isQuantized(std::shared_ptr<Node> layer) const noexcept {
|
||||
return WeightableLayerTransformation::isQuantized(layer, false);
|
||||
bool ConvolutionTransformation::isQuantizedStatic(const std::shared_ptr<const Node>& layer) noexcept {
|
||||
return WeightableLayerTransformation::isQuantizedStatic(layer, false);
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool ConvolutionTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const {
|
||||
bool ConvolutionTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) {
|
||||
auto convolution = m.get_match_root();
|
||||
|
||||
if (!canConvolutionBeTransformed(context, convolution)) {
|
||||
@ -150,7 +163,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
||||
reducedConstant->cast_vector<float>()[0]);
|
||||
}
|
||||
|
||||
const auto copyNode = convolution->copy_with_new_inputs({ dequantization.multiply->input_value(0), convolution->input_value(1) });
|
||||
const auto copyNode = convolution->clone_with_new_inputs({ dequantization.multiply->input_value(0), convolution->input_value(1) });
|
||||
auto conv = as_type_ptr<opset1::Convolution>(copyNode);
|
||||
std::shared_ptr<Node> relaxedNewConvolution;
|
||||
if (conv) {
|
||||
@ -164,6 +177,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
||||
std::vector<element::Type>{deqPrecision, deqPrecision},
|
||||
std::vector<element::Type>{deqPrecision});
|
||||
}
|
||||
NetworkHelper::copyInfo(convolution, relaxedNewConvolution);
|
||||
|
||||
std::shared_ptr<ngraph::opset1::Multiply> newMultiplyAfter = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
||||
@ -179,12 +193,18 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
||||
convolution->get_input_node_ptr(0)->get_input_source_output(0),
|
||||
convolution->input_value(1)});
|
||||
replace_node(convolution, newConvolution);
|
||||
NetworkHelper::copyInfo(convolution, newConvolution);
|
||||
convolution = newConvolution;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
decomposeFakeQuantizeForWeightsPath(convolution);
|
||||
const bool decomposed = decomposeFakeQuantizeForWeightsPath(convolution);
|
||||
assert((updatePrecisions && decomposed) || (!updatePrecisions));
|
||||
if (!updatePrecisions && !decomposed) {
|
||||
// TODO: LPT: issue #58685
|
||||
return false;
|
||||
}
|
||||
|
||||
std::shared_ptr<opset1::Reshape> reshapeFromWeights = as_type_ptr<opset1::Reshape>(convolution->input_value(1).get_node_shared_ptr());
|
||||
|
||||
@ -218,13 +238,16 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
||||
reshapeFromWeights->input_value(1) }));
|
||||
}
|
||||
|
||||
auto newConvolution = convolution->clone_with_new_inputs({
|
||||
convolution->input_value(0),
|
||||
reshapeFromWeights != nullptr ?
|
||||
reshapeFromWeights :
|
||||
multiplyFromWeights->input_value(0)
|
||||
});
|
||||
NetworkHelper::copyInfo(convolution, newConvolution);
|
||||
|
||||
auto newMultiplyAfter = std::make_shared<DequantizationMultiply>(
|
||||
convolution->copy_with_new_inputs({
|
||||
convolution->input_value(0),
|
||||
reshapeFromWeights != nullptr ?
|
||||
reshapeFromWeights :
|
||||
multiplyFromWeights->input_value(0)
|
||||
}),
|
||||
newConvolution,
|
||||
foldConvert(
|
||||
fold_reshape<opset1::Reshape>(
|
||||
multiplyFromWeights->input_value(1),
|
||||
@ -270,6 +293,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
||||
convolution->get_input_node_ptr(1)->input_value(0) :
|
||||
childNode->copy_with_new_inputs({convertFromWeights->input_value(0), childNode->input_value(1)})});
|
||||
replace_node(convolution, newConvolution);
|
||||
NetworkHelper::copyInfo(convolution, newConvolution);
|
||||
convolution = newConvolution;
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user