[LPT] Legacy restrictions removal: dequantization operations (#6974)
* [LPT] Legacy restrictions removal: dequantization operations * [LPT] not used tests removal
This commit is contained in:
parent
2b1637f28d
commit
b7deb11d6a
@ -18,7 +18,6 @@
|
||||
#include "ie_itt.hpp"
|
||||
#include "ngraph/opsets/opset6.hpp"
|
||||
#include "ngraph/variant.hpp"
|
||||
#include "transformations/rt_info/dequantization_attribute.hpp"
|
||||
#include "transformations/rt_info/fused_names_attribute.hpp"
|
||||
#include "transformations/rt_info/primitives_priority_attribute.hpp"
|
||||
#include "transformations/serialize.hpp"
|
||||
@ -121,9 +120,6 @@ std::string NetworkCompilationContext::computeHash(const CNNNetwork& network,
|
||||
} else if (auto intData =
|
||||
std::dynamic_pointer_cast<ngraph::VariantWrapper<std::int64_t>>(rtMapData.second)) {
|
||||
seed = hash_combine(seed, intData->get());
|
||||
} else if (auto deq = std::dynamic_pointer_cast<ngraph::VariantWrapper<ngraph::DequantizationAttr>>(
|
||||
rtMapData.second)) {
|
||||
seed = hash_combine(seed, deq->get().getDequantizationAttr());
|
||||
} else if (auto fNames =
|
||||
std::dynamic_pointer_cast<ngraph::VariantWrapper<ngraph::FusedNames>>(rtMapData.second)) {
|
||||
seed = hash_combine(seed, fNames->get().getNames());
|
||||
|
@ -145,16 +145,13 @@ ngraph::pass::ConvertMulAddToScaleShiftOrPower::ConvertMulAddToScaleShiftOrPower
|
||||
const auto output_shape = add_node->get_output_partial_shape(0);
|
||||
const auto output_shape_rank = output_shape.rank().get_length();
|
||||
|
||||
bool is_dequantization =
|
||||
(add_node->get_rt_info().count("DEQUANTIZATION") != 0 || mul_node->get_rt_info().count("DEQUANTIZATION") != 0);
|
||||
|
||||
if (res1 == CONVERSION_RESULT::NONE || res2 == CONVERSION_RESULT::NONE ||
|
||||
((res1 == CONVERSION_RESULT::SCALE_SHIFT || res2 == CONVERSION_RESULT::SCALE_SHIFT) && !is_dequantization && output_shape_rank < 4)) {
|
||||
((res1 == CONVERSION_RESULT::SCALE_SHIFT || res2 == CONVERSION_RESULT::SCALE_SHIFT) && output_shape_rank < 4)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO: in case if scale and shift constants has equal values the best way is to convert them to Power
|
||||
if (res1 == CONVERSION_RESULT::SCALE_SHIFT || res2 == CONVERSION_RESULT::SCALE_SHIFT || is_dequantization) {
|
||||
if (res1 == CONVERSION_RESULT::SCALE_SHIFT || res2 == CONVERSION_RESULT::SCALE_SHIFT) {
|
||||
NodeVector new_ops;
|
||||
|
||||
auto weights_in = ngraph::op::util::normalize_constant(const_weights_node, output_shape);
|
||||
@ -162,23 +159,11 @@ ngraph::pass::ConvertMulAddToScaleShiftOrPower::ConvertMulAddToScaleShiftOrPower
|
||||
new_ops.push_back(weights_in);
|
||||
new_ops.push_back(biases_in);
|
||||
|
||||
if (is_dequantization) {
|
||||
const Shape data_shape = data_node.get_shape();
|
||||
Shape broadcasted_shape = std::vector<size_t>(data_shape.size(), 1ul);
|
||||
broadcasted_shape[1] = data_shape[1];
|
||||
|
||||
weights_in = ngraph::op::util::broadcastTo(weights_in, broadcasted_shape);
|
||||
new_ops.push_back(weights_in);
|
||||
|
||||
biases_in = ngraph::op::util::broadcastTo(biases_in, broadcasted_shape);
|
||||
new_ops.push_back(biases_in);
|
||||
}
|
||||
|
||||
if (res1 == CONVERSION_RESULT::POWER && !is_dequantization) {
|
||||
if (res1 == CONVERSION_RESULT::POWER) {
|
||||
weights_in = ngraph::op::util::broadcastTo(weights_in, biases_in->get_shape());
|
||||
new_ops.push_back(weights_in);
|
||||
}
|
||||
if (res2 == CONVERSION_RESULT::POWER && !is_dequantization) {
|
||||
if (res2 == CONVERSION_RESULT::POWER) {
|
||||
biases_in = ngraph::op::util::broadcastTo(biases_in, weights_in->get_shape());
|
||||
new_ops.push_back(biases_in);
|
||||
}
|
||||
|
@ -125,70 +125,14 @@ ngraph::matcher_pass_callback get_callback() {
|
||||
|
||||
auto res = check_constant(const_node, data_node.get_partial_shape());
|
||||
|
||||
auto checkElementwise = [](const std::shared_ptr<ngraph::Node>& elementwise) -> bool {
|
||||
const ngraph::PartialShape partialShape = elementwise->get_input_partial_shape(0);
|
||||
if (partialShape.is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::opset1::Constant> constant = ngraph::as_type_ptr<ngraph::opset1::Constant>(elementwise->get_input_node_shared_ptr(1));
|
||||
if (constant == nullptr) {
|
||||
constant = ngraph::as_type_ptr<ngraph::opset1::Constant>(elementwise->get_input_node_shared_ptr(0));
|
||||
}
|
||||
if (constant == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const ngraph::Shape constShape = constant->get_output_shape(0);
|
||||
const ngraph::Shape shape = partialShape.to_shape();
|
||||
|
||||
if (constShape.size() == 1ul && constShape[0] != 1 && constShape[0] != shape[1]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((constShape.size() > 5ul)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((constShape.size() <= 1ul) || (std::all_of(constShape.begin(), constShape.end(), [](const size_t value) { return value == 1ul; }))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (constShape.size() == shape.size()) {
|
||||
if ((constShape[0] != 1ul) || (constShape[1] != shape[1])) {
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 2ul; i < constShape.size(); ++i) {
|
||||
if (constShape[i] != 1ul) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else if (constShape.size() == (shape.size() - 1)) {
|
||||
if (constShape[0] != shape[1]) {
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 1ul; i < constShape.size(); ++i) {
|
||||
if (constShape[i] != 1ul) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
bool is_dequantization = (lin_op->get_rt_info().count("DEQUANTIZATION") != 0) && checkElementwise(lin_op);
|
||||
|
||||
if (!is_dequantization && (res == CONVERSION_RESULT::NONE || (res == CONVERSION_RESULT::SCALE_SHIFT && output_shape_rank < 4))) {
|
||||
if (res == CONVERSION_RESULT::NONE || (res == CONVERSION_RESULT::SCALE_SHIFT && output_shape_rank < 4)) {
|
||||
return convert_to_eltwise<T>(lin_op,
|
||||
lin_op->input(0).get_source_output(),
|
||||
lin_op->input(1).get_source_output());
|
||||
}
|
||||
|
||||
// TODO: if all values in Constant are equal the best way is to convert this Eltwise to Power
|
||||
if (res == CONVERSION_RESULT::SCALE_SHIFT || is_dequantization) {
|
||||
if (res == CONVERSION_RESULT::SCALE_SHIFT) {
|
||||
auto weights_et = const_node->get_element_type();
|
||||
auto weights_shape = const_node->get_shape();
|
||||
|
||||
@ -198,14 +142,6 @@ ngraph::matcher_pass_callback get_callback() {
|
||||
auto weights = ngraph::opset1::Constant::create(weights_et, weights_shape, {1});
|
||||
auto weights_in = ngraph::op::util::normalize_constant(weights, output_shape);
|
||||
auto biases_in = ngraph::op::util::normalize_constant(const_node, output_shape);
|
||||
if (is_dequantization) {
|
||||
const ngraph::Shape data_shape = data_node.get_shape();
|
||||
ngraph::Shape broadcasted_shape = std::vector<size_t>(data_shape.size(), 1ul);
|
||||
broadcasted_shape[1] = data_shape[1];
|
||||
|
||||
weights_in = ngraph::op::util::broadcastTo(weights_in, broadcasted_shape);
|
||||
biases_in = ngraph::op::util::broadcastTo(biases_in, broadcasted_shape);
|
||||
}
|
||||
scaleshift = std::make_shared<ngraph::op::ScaleShiftIE>(data_node, weights_in, biases_in);
|
||||
} else if (std::is_same<T, ngraph::opset1::Subtract>()) {
|
||||
std::shared_ptr<ngraph::Node> new_const_node = std::make_shared<ngraph::opset1::Multiply>(
|
||||
@ -215,27 +151,11 @@ ngraph::matcher_pass_callback get_callback() {
|
||||
auto weights = ngraph::opset1::Constant::create(weights_et, weights_shape, {1});
|
||||
auto weights_in = ngraph::op::util::normalize_constant(weights, output_shape);
|
||||
auto biases_in = new_const_node;
|
||||
if (is_dequantization) {
|
||||
const ngraph::Shape data_shape = data_node.get_shape();
|
||||
ngraph::Shape broadcasted_shape = std::vector<size_t>(data_shape.size(), 1ul);
|
||||
broadcasted_shape[1] = data_shape[1];
|
||||
|
||||
weights_in = ngraph::op::util::broadcastTo(weights_in, broadcasted_shape);
|
||||
biases_in = ngraph::op::util::broadcastTo(biases_in, broadcasted_shape);
|
||||
}
|
||||
scaleshift = std::make_shared<ngraph::op::ScaleShiftIE>(data_node, weights_in, biases_in);
|
||||
} else if (std::is_same<T, ngraph::opset1::Multiply>()) {
|
||||
auto bias = ngraph::opset1::Constant::create(weights_et, weights_shape, {0});
|
||||
auto weights_in = ngraph::op::util::normalize_constant(const_node, output_shape);
|
||||
auto biases_in = ngraph::op::util::normalize_constant(bias, output_shape);
|
||||
if (is_dequantization) {
|
||||
const ngraph::Shape data_shape = data_node.get_shape();
|
||||
ngraph::Shape broadcasted_shape = std::vector<size_t>(data_shape.size(), 1ul);
|
||||
broadcasted_shape[1] = data_shape[1];
|
||||
|
||||
weights_in = ngraph::op::util::broadcastTo(weights_in, broadcasted_shape);
|
||||
biases_in = ngraph::op::util::broadcastTo(biases_in, broadcasted_shape);
|
||||
}
|
||||
scaleshift = std::make_shared<ngraph::op::ScaleShiftIE>(data_node, weights_in, biases_in);
|
||||
} else {
|
||||
return false;
|
||||
|
@ -1,138 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/check.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
|
||||
#include "low_precision/lpt_visibility.hpp"
|
||||
#include "transformations/rt_info/dequantization_attribute.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
// template<typename BaseOp2>
|
||||
// class LP_TRANSFORMATIONS_API DequantizationOp : public BaseOp2 {
|
||||
// public:
|
||||
// template <typename ... Args>
|
||||
// DequantizationOp(Args&&... args) : BaseOp2(std::forward<Args>(args)...) {
|
||||
// init();
|
||||
// }
|
||||
//
|
||||
// std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override {
|
||||
// std::shared_ptr<Node> cloned = BaseOp2::clone_with_new_inputs(inputs);
|
||||
// auto& rtInfo = cloned->get_rt_info();
|
||||
// rtInfo = get_rt_info();
|
||||
//
|
||||
// return cloned;
|
||||
// }
|
||||
//
|
||||
// protected:
|
||||
// void init() {
|
||||
// auto& rtInfo = get_rt_info();
|
||||
// rtInfo["DEQUANTIZATION"] = std::make_shared<ngraph::VariantWrapper<std::string>>("");
|
||||
// }
|
||||
// };
|
||||
//
|
||||
// using DequantizationConvert = DequantizationOp<ngraph::opset1::Convert>;
|
||||
// using DequantizationSubtract = DequantizationOp<ngraph::opset1::Subtract>;
|
||||
// using DequantizationMultiply = DequantizationOp<ngraph::opset1::Multiply>;
|
||||
|
||||
namespace {
|
||||
void initRuntimeInfo(ngraph::Node& operation) {
|
||||
auto& rtInfo = operation.get_rt_info();
|
||||
rtInfo["DEQUANTIZATION"] = std::make_shared<VariantWrapper<DequantizationAttr>>(DequantizationAttr());
|
||||
}
|
||||
|
||||
// #include <ngraph/rt_info.hpp>
|
||||
// ngraph::copy_runtime_info(from, to);
|
||||
void copyRuntimeInfo(const ngraph::Node& from, ngraph::Node& to) {
|
||||
const auto& rtInfoFrom = from.get_rt_info();
|
||||
auto& rtInfoTo = to.get_rt_info();
|
||||
rtInfoTo = rtInfoFrom;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
class LP_TRANSFORMATIONS_API DequantizationConvert : public ngraph::opset1::Convert {
|
||||
public:
|
||||
DequantizationConvert(const ngraph::Output<Node>& arg, const ngraph::element::Type& destination_type) :
|
||||
ngraph::opset1::Convert(arg, destination_type) {
|
||||
initRuntimeInfo(*this);
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override {
|
||||
std::shared_ptr<Node> cloned = ngraph::opset1::Convert::clone_with_new_inputs(inputs);
|
||||
copyRuntimeInfo(*this, *cloned);
|
||||
return cloned;
|
||||
}
|
||||
};
|
||||
|
||||
class LP_TRANSFORMATIONS_API DequantizationSubtract : public ngraph::opset1::Subtract {
|
||||
public:
|
||||
DequantizationSubtract(
|
||||
const ngraph::Output<Node>& arg0,
|
||||
const ngraph::Output<Node>& arg1,
|
||||
const ngraph::op::AutoBroadcastSpec& auto_broadcast = ngraph::op::AutoBroadcastSpec(ngraph::op::AutoBroadcastType::NUMPY)) :
|
||||
ngraph::opset1::Subtract(arg0, arg1, auto_broadcast) {
|
||||
initRuntimeInfo(*this);
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override {
|
||||
std::shared_ptr<Node> cloned = ngraph::opset1::Subtract::clone_with_new_inputs(inputs);
|
||||
copyRuntimeInfo(*this, *cloned);
|
||||
return cloned;
|
||||
}
|
||||
};
|
||||
|
||||
class LP_TRANSFORMATIONS_API DequantizationMultiply : public ngraph::opset1::Multiply {
|
||||
public:
|
||||
DequantizationMultiply(
|
||||
const Output<Node>& arg0,
|
||||
const Output<Node>& arg1,
|
||||
const ngraph::op::AutoBroadcastSpec& auto_broadcast = ngraph::op::AutoBroadcastSpec(ngraph::op::AutoBroadcastType::NUMPY)) :
|
||||
ngraph::opset1::Multiply(arg0, arg1, auto_broadcast) {
|
||||
initRuntimeInfo(*this);
|
||||
}
|
||||
|
||||
DequantizationMultiply(const ngraph::opset1::Multiply& multiply) :
|
||||
ngraph::opset1::Multiply(multiply) {
|
||||
initRuntimeInfo(*this);
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override {
|
||||
std::shared_ptr<Node> cloned = ngraph::opset1::Multiply::clone_with_new_inputs(inputs);
|
||||
copyRuntimeInfo(*this, *cloned);
|
||||
return cloned;
|
||||
}
|
||||
};
|
||||
|
||||
class LP_TRANSFORMATIONS_API DequantizationAdd : public ngraph::opset1::Add {
|
||||
public:
|
||||
DequantizationAdd(
|
||||
const ngraph::Output<Node>& arg0,
|
||||
const ngraph::Output<Node>& arg1,
|
||||
const ngraph::op::AutoBroadcastSpec& auto_broadcast = ngraph::op::AutoBroadcastSpec(ngraph::op::AutoBroadcastType::NUMPY)) :
|
||||
ngraph::opset1::Add(arg0, arg1, auto_broadcast) {
|
||||
initRuntimeInfo(*this);
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override {
|
||||
std::shared_ptr<Node> cloned = ngraph::opset1::Add::clone_with_new_inputs(inputs);
|
||||
copyRuntimeInfo(*this, *cloned);
|
||||
return cloned;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -86,8 +86,6 @@ public:
|
||||
|
||||
static void copyInfo(const std::shared_ptr<Node>& source, const std::shared_ptr<Node>& target);
|
||||
|
||||
static void cleanRunTimeInfo(const std::shared_ptr<Node>& layer);
|
||||
|
||||
static bool isScalarLike(std::shared_ptr<opset1::Constant> constant);
|
||||
|
||||
static bool isZero(std::shared_ptr<opset1::Constant> constant);
|
||||
|
@ -1,26 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include "low_precision/layer_transformation.hpp"
|
||||
#include "low_precision/eltwise_base_transformation.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
class LP_TRANSFORMATIONS_API SubtractMultiplyToMultiplyAddTransformation : public LayerTransformation {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
SubtractMultiplyToMultiplyAddTransformation(const Params& params = Params());
|
||||
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
|
||||
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
|
||||
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
|
||||
};
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -14,7 +14,6 @@
|
||||
#include "ngraph_ops/type_relaxed.hpp"
|
||||
|
||||
#include "low_precision/common/ie_lpt_exception.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
@ -54,7 +53,7 @@ std::shared_ptr<opset1::Subtract> replaceToSubtract(const std::shared_ptr<Node>&
|
||||
auto constant = fold<opset1::Negative>(add->input_value(constBranchIndex));
|
||||
auto constOutput = constant->output(0);
|
||||
|
||||
const auto subtract = std::make_shared<op::TypeRelaxed<DequantizationSubtract>>(
|
||||
const auto subtract = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ op->get_output_element_type(0) },
|
||||
ngraph::op::TemporaryReplaceOutputType(add->input_value(dataBranchIndex), element::f32).get(),
|
||||
@ -80,7 +79,7 @@ std::shared_ptr<opset1::Subtract> fuseWithSubtract(const std::shared_ptr<Node>&
|
||||
add->get_input_node_shared_ptr(0)->input_value(1),
|
||||
add->input_value(1));
|
||||
|
||||
const auto newSubtract = std::make_shared<op::TypeRelaxed<DequantizationSubtract>>(
|
||||
const auto newSubtract = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ op->get_output_element_type(0) },
|
||||
ngraph::op::TemporaryReplaceOutputType(add->get_input_node_shared_ptr(0)->input_value(0), element::f32).get(),
|
||||
@ -201,10 +200,10 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter
|
||||
// newMultiply
|
||||
|
||||
inputs[emptyPathIndex] = dequantizationEmptyPath.data;
|
||||
inputs[fullPathIndex] = std::make_shared<DequantizationMultiply>(
|
||||
inputs[fullPathIndex] = std::make_shared<opset1::Multiply>(
|
||||
newSubtractFullPathValues == nullptr ?
|
||||
fullPathInput :
|
||||
std::make_shared<DequantizationSubtract>(
|
||||
std::make_shared<opset1::Subtract>(
|
||||
// precision on branch with dequantization operations can be different with dequantization precision,
|
||||
// for example: FP16 model with FP32 dequantization
|
||||
fullPathInput.get_element_type() != newSubtractFullPathValues->get_element_type() ?
|
||||
@ -217,7 +216,7 @@ bool AddTransformation::transform(TransformationContext& context, ngraph::patter
|
||||
std::vector<element::Type>{element::f32, element::f32}, std::vector<element::Type>{ element::f32 },
|
||||
ngraph::op::TemporaryReplaceOutputType(inputs[0], element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(inputs[1], element::f32).get());
|
||||
newMultiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
newMultiply = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||
std::vector<element::Type>{element::f32, element::f32}, std::vector<element::Type>{ add->get_output_element_type(0) },
|
||||
ngraph::op::TemporaryReplaceOutputType(newAddOrSubtract, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(multiplyEmptyPathValues, element::f32).get());
|
||||
|
@ -14,7 +14,6 @@
|
||||
|
||||
#include "low_precision/common/fake_quantize_dequantization.hpp"
|
||||
#include "low_precision/common/ie_lpt_exception.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
@ -144,7 +143,7 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
|
||||
|
||||
// concatenation axis is 1
|
||||
if (!subtractNodes.empty()) {
|
||||
const auto subtract = std::make_shared<DequantizationSubtract>(
|
||||
const auto subtract = std::make_shared<opset1::Subtract>(
|
||||
lastDequantization,
|
||||
NetworkHelper::toScalarIfPossible(subtractNodes.size() == 1ul ?
|
||||
subtractNodes[0] :
|
||||
@ -155,8 +154,8 @@ bool ConcatTransformation::transform(TransformationContext& context, ngraph::pat
|
||||
}
|
||||
|
||||
if (!multiplyNodes.empty()) {
|
||||
const auto multiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
DequantizationMultiply(
|
||||
const auto multiply = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||
opset1::Multiply(
|
||||
lastDequantization,
|
||||
NetworkHelper::toScalarIfPossible(multiplyNodes.size() == 1ul ?
|
||||
multiplyNodes[0] :
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/pattern/op/or.hpp>
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
@ -84,9 +83,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
||||
{
|
||||
std::shared_ptr<opset1::Subtract> subtract;
|
||||
if (dequantization.subtract != nullptr) {
|
||||
NetworkHelper::cleanRunTimeInfo(dequantization.subtract->shared_from_this());
|
||||
auto optimizedSubtract = NetworkHelper::optimizeSubtract(dequantization.subtract);
|
||||
|
||||
if (optimizedSubtract == nullptr) {
|
||||
optimizedSubtract = dequantization.subtract;
|
||||
}
|
||||
@ -174,7 +171,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
||||
}
|
||||
NetworkHelper::copyInfo(convolution, relaxedNewConvolution);
|
||||
|
||||
std::shared_ptr<ngraph::opset1::Multiply> newMultiplyAfter = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
std::shared_ptr<ngraph::opset1::Multiply> newMultiplyAfter = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
||||
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
|
||||
ngraph::op::TemporaryReplaceOutputType(relaxedNewConvolution, deqPrecision).get(),
|
||||
@ -244,7 +241,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph
|
||||
});
|
||||
NetworkHelper::copyInfo(convolution, newConvolution);
|
||||
|
||||
auto newMultiplyAfter = std::make_shared<DequantizationMultiply>(
|
||||
auto newMultiplyAfter = std::make_shared<opset1::Multiply>(
|
||||
newConvolution,
|
||||
foldConvert(
|
||||
fold_reshape<opset1::Reshape>(
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/pattern/op/or.hpp>
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
@ -100,7 +99,6 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
||||
FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(convolutionBackpropData);
|
||||
{
|
||||
if (dequantization.subtract != nullptr) {
|
||||
NetworkHelper::cleanRunTimeInfo(dequantization.subtract->shared_from_this());
|
||||
NetworkHelper::optimizeSubtract(dequantization.subtract);
|
||||
}
|
||||
|
||||
@ -117,7 +115,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
||||
std::vector<element::Type>{deqPrecision, deqPrecision},
|
||||
std::vector<element::Type>{deqPrecision});
|
||||
|
||||
const auto newMultiplyAfter = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
const auto newMultiplyAfter = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
||||
std::vector<element::Type>{ dequantization.multiply->get_output_element_type(0) },
|
||||
ngraph::op::TemporaryReplaceOutputType(relaxedConvolutionBackpropData, deqPrecision).get(),
|
||||
@ -155,7 +153,7 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con
|
||||
|
||||
auto inputs = convolutionBackpropData->input_values();
|
||||
inputs[1] = multiplyFromWeights->input_value(0);
|
||||
auto newMultiplyAfter = std::make_shared<DequantizationMultiply>(
|
||||
auto newMultiplyAfter = std::make_shared<opset1::Multiply>(
|
||||
convolutionBackpropData->copy_with_new_inputs(inputs),
|
||||
foldConvert(
|
||||
fold_reshape<opset1::Reshape>(
|
||||
|
@ -41,12 +41,6 @@ bool EltwiseBaseTransformation::canBeTransformed(const TransformationContext& co
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((ov::as_type_ptr<ngraph::opset1::Constant>(operation->get_input_node_shared_ptr(0)) ||
|
||||
ov::as_type_ptr<ngraph::opset1::Constant>(operation->get_input_node_shared_ptr(1))) &&
|
||||
!FakeQuantizeDequantization::checkElementwise(operation)) {
|
||||
NetworkHelper::cleanRunTimeInfo(operation);
|
||||
}
|
||||
|
||||
FakeQuantizeDequantization dequantization1 = pass::low_precision::NetworkHelper::getDequantization(operation, 0ul);
|
||||
FakeQuantizeDequantization dequantization2 = pass::low_precision::NetworkHelper::getDequantization(operation, 1ul);
|
||||
if ((dequantization1.empty() || ((dequantization1.multiply != nullptr) && !FakeQuantizeDequantization::checkElementwise(dequantization1.multiply))) &&
|
||||
|
@ -204,7 +204,6 @@ std::shared_ptr<opset1::FakeQuantize> FakeQuantizeTransformation::fuseElementwis
|
||||
replace_node(fakeQuantize, newFakeQuantize);
|
||||
ngraph::copy_runtime_info({ fakeQuantize, eltwise }, newFakeQuantize);
|
||||
newFakeQuantize->set_friendly_name(fakeQuantize->get_friendly_name());
|
||||
NetworkHelper::cleanRunTimeInfo(newFakeQuantize);
|
||||
return newFakeQuantize;
|
||||
}
|
||||
|
||||
|
@ -75,7 +75,6 @@
|
||||
#include "low_precision/fuse_subtract_to_fake_quantize.hpp"
|
||||
#include "low_precision/fuse_multiply_to_fake_quantize.hpp"
|
||||
#include "low_precision/multiply_to_group_convolution.hpp"
|
||||
#include "low_precision/subtract_multiply_to_multiply_add.hpp"
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::LowPrecision, "LowPrecision", 0);
|
||||
|
||||
@ -245,7 +244,6 @@ bool ngraph::pass::low_precision::LowPrecision::run_on_function(std::shared_ptr<
|
||||
cleanup->add_matcher<ngraph::pass::low_precision::MultiplyToGroupConvolutionTransformation>(
|
||||
params,
|
||||
OperationPrecisionRestriction::getPrecisionsByOperationType<opset1::GroupConvolution>(precisionRestrictions));
|
||||
manager.register_pass<ngraph::pass::low_precision::SubtractMultiplyToMultiplyAddTransformation>(params);
|
||||
manager.register_pass<ngraph::pass::low_precision::FoldFakeQuantizeTransformation>(params);
|
||||
manager.register_pass<ngraph::pass::ConstantFolding>();
|
||||
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace ngraph::pass;
|
||||
@ -114,7 +113,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
|
||||
newMatMul->get_transpose_a(),
|
||||
newMatMul->get_transpose_b()));
|
||||
|
||||
const auto newSubtract = std::make_shared<DequantizationSubtract>(newMatMul, newSubConst);
|
||||
const auto newSubtract = std::make_shared<opset1::Subtract>(newMatMul, newSubConst);
|
||||
newSubtract->set_friendly_name(newMatMul->get_friendly_name() + "/DequantizationSubtract");
|
||||
copy_runtime_info({ newSubtract, matMul }, newSubtract);
|
||||
|
||||
@ -160,7 +159,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat
|
||||
mulConst1,
|
||||
foldConvert(mulConst2, element::f32)));
|
||||
|
||||
const auto newMultiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
const auto newMultiply = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||
std::vector<element::Type>{ deqPrecision, deqPrecision },
|
||||
std::vector<element::Type>{ dequantization1.multiply->get_output_element_type(0) },
|
||||
ngraph::op::TemporaryReplaceOutputType(parent, deqPrecision).get(),
|
||||
|
@ -15,7 +15,6 @@
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
|
||||
#include "low_precision/common/ie_lpt_exception.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
@ -95,10 +94,6 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p
|
||||
|
||||
NetworkHelper::copyInfo(multiplyParent.get_node_shared_ptr(), newMultiply);
|
||||
NetworkHelper::copyInfo(multiply, newMultiply);
|
||||
|
||||
if (!FakeQuantizeDequantization::checkElementwise(newMultiply)) {
|
||||
NetworkHelper::cleanRunTimeInfo(newMultiply);
|
||||
}
|
||||
} else {
|
||||
const int emptyPathIndex = fullPathIndex == 0 ? 1 : 0;
|
||||
|
||||
@ -135,7 +130,7 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p
|
||||
std::shared_ptr<Node> newMultiplyValuesFullPath = fold<opset1::Multiply>(multiplyValuesEmptyPath, multiplyValuesFullPath);
|
||||
OutputVector inputs{ {}, {} };
|
||||
inputs[emptyPathIndex] = dequantizationEmptyPath.data;
|
||||
inputs[fullPathIndex] = std::make_shared<DequantizationMultiply>(
|
||||
inputs[fullPathIndex] = std::make_shared<opset1::Multiply>(
|
||||
dequantizationFullPath.subtract == nullptr ?
|
||||
(dequantizationFullPath.convert == nullptr ?
|
||||
dequantizationFullPath.data : dequantizationFullPath.convert) :
|
||||
|
@ -16,7 +16,6 @@
|
||||
#include "ngraph/type/element_type.hpp"
|
||||
#include "ngraph/type/element_type_traits.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
|
||||
#include "ngraph/opsets/opset6.hpp"
|
||||
|
||||
@ -155,8 +154,8 @@ bool MVNTransformation::transform(TransformationContext &context, ngraph::patter
|
||||
NetworkHelper::setOutDataPrecisionForTypeRelaxed(newMVN, deqPrecision);
|
||||
NetworkHelper::copyInfo(mvn, newMVN);
|
||||
|
||||
auto newMultiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
DequantizationMultiply(newMVN, newScalesConst),
|
||||
auto newMultiply = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||
opset1::Multiply(newMVN, newScalesConst),
|
||||
mvn->get_output_element_type(0));
|
||||
ngraph::copy_runtime_info({ mvn, newMultiply }, newMultiply);
|
||||
|
||||
|
@ -18,7 +18,6 @@
|
||||
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include "low_precision/common/ie_lpt_exception.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
#include "low_precision/layer_transformation.hpp"
|
||||
#include "low_precision/rt_info/precision_preserved_attribute.hpp"
|
||||
#include "low_precision/rt_info/intervals_alignment_attribute.hpp"
|
||||
@ -290,7 +289,7 @@ std::shared_ptr<Node> NetworkHelper::swapMultiplyAndAdd(std::shared_ptr<opset1::
|
||||
ngraph::op::TemporaryReplaceOutputType(inputs[1], element::f32).get());
|
||||
copyInfo(addAfterMultiply, newAdd);
|
||||
|
||||
auto newMultiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
auto newMultiply = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ multiply->get_output_element_type(0) },
|
||||
ngraph::op::TemporaryReplaceOutputType(newAdd, element::f32).get(),
|
||||
@ -354,14 +353,6 @@ void NetworkHelper::copyInfo(const std::shared_ptr<Node>& source, const std::sha
|
||||
copyInfo(std::vector<std::shared_ptr<Node>>{ source }, std::vector<std::shared_ptr<Node>>{ target });
|
||||
}
|
||||
|
||||
void NetworkHelper::cleanRunTimeInfo(const std::shared_ptr<Node>& layer) {
|
||||
auto& rt_info = layer->get_rt_info();
|
||||
auto attributeIter = rt_info.find("DEQUANTIZATION");
|
||||
if (rt_info.find("DEQUANTIZATION") != rt_info.end()) {
|
||||
rt_info.erase(attributeIter);
|
||||
}
|
||||
}
|
||||
|
||||
bool NetworkHelper::isScalarLike(std::shared_ptr<opset1::Constant> constant) {
|
||||
// ticket #48857
|
||||
// return constant->get_all_data_elements_bitwise_identical();
|
||||
@ -1068,12 +1059,12 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
|
||||
THROW_IE_LPT_EXCEPTION(*newFQ) << "unexpected operation type";
|
||||
}
|
||||
|
||||
convert2 = std::make_shared<DequantizationConvert>(convert, element::f32);
|
||||
convert2 = std::make_shared<opset1::Convert>(convert, element::f32);
|
||||
convert2->set_friendly_name(convert->get_friendly_name() + "/DequantizationConvert");
|
||||
ngraph::copy_runtime_info({ newFQ, convert2 }, convert2);
|
||||
} else {
|
||||
if (newFQ->get_output_element_type(0) != element::f32) {
|
||||
convert2 = std::make_shared<DequantizationConvert>(newFQ, element::f32);
|
||||
convert2 = std::make_shared<opset1::Convert>(newFQ, element::f32);
|
||||
convert2->set_friendly_name(newFQ->get_friendly_name() + "/DequantizationConvert");
|
||||
ngraph::copy_runtime_info({ newFQ, convert2 }, convert2);
|
||||
}
|
||||
@ -1082,14 +1073,14 @@ std::tuple<std::shared_ptr<Node>, std::shared_ptr<Node>> NetworkHelper::decompos
|
||||
// TODO: why type relaxed?
|
||||
const std::shared_ptr<ngraph::Node> sub = shift == nullptr ?
|
||||
nullptr :
|
||||
std::make_shared<ngraph::op::TypeRelaxed<DequantizationSubtract>>(convert2 == nullptr ? newFQ : convert2, shift);
|
||||
std::make_shared<ngraph::op::TypeRelaxed<opset1::Subtract>>(convert2 == nullptr ? newFQ : convert2, shift);
|
||||
if (sub != nullptr) {
|
||||
sub->set_friendly_name(newFQ->get_friendly_name() + "/DequantizationSubtract");
|
||||
ngraph::copy_runtime_info({ newFQ, sub }, sub);
|
||||
}
|
||||
|
||||
const auto dequantize =
|
||||
std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||
std::vector<element::Type>{ element::f32, element::f32 },
|
||||
std::vector<element::Type>{ fq->get_output_element_type(0) },
|
||||
ngraph::op::TemporaryReplaceOutputType(sub == nullptr ? (convert2 == nullptr ? newFQ : convert2) : sub, element::f32).get(),
|
||||
@ -1143,20 +1134,20 @@ FakeQuantizeDequantization NetworkHelper::makeDequantization(
|
||||
}
|
||||
std::shared_ptr<ngraph::Node> parent = input;
|
||||
|
||||
std::shared_ptr<DequantizationConvert> convert;
|
||||
std::shared_ptr<opset1::Convert> convert;
|
||||
if (precision == deqPrecision) {
|
||||
convert = nullptr;
|
||||
} else {
|
||||
convert = std::make_shared<DequantizationConvert>(
|
||||
convert = std::make_shared<opset1::Convert>(
|
||||
parent,
|
||||
deqPrecision);
|
||||
parent = convert;
|
||||
}
|
||||
|
||||
std::shared_ptr<DequantizationSubtract> subtract;
|
||||
std::shared_ptr<opset1::Subtract> subtract;
|
||||
std::shared_ptr<opset1::Constant> subtractConstant;
|
||||
if (std::abs(dequantizationSub) > 1e-6) {
|
||||
subtract = std::make_shared<ngraph::op::TypeRelaxed<DequantizationSubtract>>(
|
||||
subtract = std::make_shared<ngraph::op::TypeRelaxed<opset1::Subtract>>(
|
||||
parent,
|
||||
std::make_shared<ngraph::opset1::Constant>(deqPrecision, ngraph::Shape({}), std::vector<float>({ dequantizationSub })));
|
||||
subtract->set_output_type(0, deqPrecision, subtract->get_output_partial_shape(0));
|
||||
@ -1165,8 +1156,8 @@ FakeQuantizeDequantization NetworkHelper::makeDequantization(
|
||||
|
||||
// mandatory
|
||||
auto multiplyConstant = std::make_shared<ngraph::opset1::Constant>(deqPrecision, ngraph::Shape({}), std::vector<float>({ dequantizationMul }));
|
||||
auto multiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
DequantizationMultiply(parent, multiplyConstant),
|
||||
auto multiply = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||
opset1::Multiply(parent, multiplyConstant),
|
||||
originalPrecision);
|
||||
|
||||
return FakeQuantizeDequantization(input, convert, subtract, nullptr, subtractConstant, multiply, multiplyConstant);
|
||||
@ -1220,7 +1211,7 @@ FakeQuantizeDequantization NetworkHelper::createDequantizationFromFakeQuantize(
|
||||
|
||||
std::shared_ptr<ngraph::opset1::Convert> convert;
|
||||
if (updatePrecision || (parent->output(0).get_element_type() != deqPrecision)) {
|
||||
convert = std::make_shared<DequantizationConvert>(parent, deqPrecision);
|
||||
convert = std::make_shared<opset1::Convert>(parent, deqPrecision);
|
||||
parent = convert;
|
||||
} else {
|
||||
convert = nullptr;
|
||||
@ -1228,13 +1219,13 @@ FakeQuantizeDequantization NetworkHelper::createDequantizationFromFakeQuantize(
|
||||
|
||||
std::shared_ptr<ngraph::opset1::Subtract> subtract;
|
||||
if (shift != nullptr) {
|
||||
subtract = make_shared<ngraph::op::TypeRelaxed<DequantizationSubtract>>(parent, shift);
|
||||
subtract = make_shared<ngraph::op::TypeRelaxed<opset1::Subtract>>(parent, shift);
|
||||
subtract->set_output_type(0, deqPrecision, subtract->get_output_partial_shape(0));
|
||||
parent = subtract;
|
||||
} else {
|
||||
subtract = nullptr;
|
||||
}
|
||||
const std::shared_ptr<ngraph::opset1::Multiply> multiply = std::make_shared<DequantizationMultiply>(parent, scale);
|
||||
const std::shared_ptr<ngraph::opset1::Multiply> multiply = std::make_shared<opset1::Multiply>(parent, scale);
|
||||
multiply->set_output_type(0, fq->get_output_element_type(0), multiply->get_output_partial_shape(0));
|
||||
|
||||
return FakeQuantizeDequantization(fq, convert, subtract, nullptr, shift, multiply, scale);
|
||||
@ -1591,7 +1582,7 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
|
||||
const auto convertOutputPrecision = dequantization.convert != nullptr ?
|
||||
dequantization.convert->get_output_element_type(0) :
|
||||
deqPrecision;
|
||||
parent = std::make_shared<DequantizationConvert>(parent, convertOutputPrecision);
|
||||
parent = std::make_shared<opset1::Convert>(parent, convertOutputPrecision);
|
||||
ngraph::copy_runtime_info({ newOperation, parent }, parent);
|
||||
}
|
||||
|
||||
@ -1605,7 +1596,7 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
|
||||
dequantization.subtractConstant->get_element_type();
|
||||
}
|
||||
|
||||
parent = std::make_shared<op::TypeRelaxed<DequantizationSubtract>>(
|
||||
parent = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(
|
||||
std::vector<element::Type>{element::f32, element::f32}, std::vector<element::Type>{ element::f32 },
|
||||
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(
|
||||
@ -1614,7 +1605,7 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
|
||||
foldConvert(dequantization.subtractConstant, parentPrecision), element::f32).get());
|
||||
ngraph::copy_runtime_info({ newOperation, parent }, parent);
|
||||
} else {
|
||||
parent = std::make_shared<DequantizationSubtract>(parent, dequantization.subtractConvert);
|
||||
parent = std::make_shared<opset1::Subtract>(parent, dequantization.subtractConvert);
|
||||
ngraph::copy_runtime_info({ newOperation, parent }, parent);
|
||||
}
|
||||
}
|
||||
@ -1628,8 +1619,8 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
|
||||
", multiply dequantization constant " << multiplyConstant->get_friendly_name() << ":" << multiplyConstant->get_element_type();
|
||||
}
|
||||
|
||||
parent = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
DequantizationMultiply(parent,
|
||||
parent = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||
opset1::Multiply(parent,
|
||||
multiplyConstant->output(0).get_element_type() == parentPrecision ?
|
||||
multiplyConstant :
|
||||
foldConvert(multiplyConstant->output(0), parentPrecision)),
|
||||
@ -1639,7 +1630,6 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
|
||||
replace_node(operation, parent);
|
||||
|
||||
if ((!moveSubtract) && (dequantization.convert != nullptr) && (dequantization.subtract != nullptr)) {
|
||||
NetworkHelper::cleanRunTimeInfo(dequantization.subtract);
|
||||
// issue #43088
|
||||
// NetworkHelper::optimizeElementwise(dequantization.subtract);
|
||||
}
|
||||
|
@ -14,7 +14,6 @@
|
||||
#include "ngraph/type/element_type.hpp"
|
||||
#include "ngraph/type/element_type_traits.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using namespace ngraph::pass;
|
||||
@ -135,7 +134,7 @@ bool NormalizeL2Transformation::transform(TransformationContext &context, ngraph
|
||||
normalize->get_eps_mode());
|
||||
NetworkHelper::copyInfo(normalize, newNormalize);
|
||||
|
||||
auto newMultiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
auto newMultiply = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
|
||||
std::vector<ngraph::element::Type>{ element::f32, element::f32 },
|
||||
std::vector<ngraph::element::Type>{normalize->get_output_element_type(0)},
|
||||
ngraph::op::TemporaryReplaceOutputType(newNormalize, element::f32).get(),
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
@ -91,12 +90,12 @@ bool SplitTransformation::transform(TransformationContext& context, ngraph::patt
|
||||
}
|
||||
|
||||
if (dequantization.subtract) {
|
||||
const auto subtract = std::make_shared<DequantizationSubtract>(parent, splitedSub[i]);
|
||||
const auto subtract = std::make_shared<opset1::Subtract>(parent, splitedSub[i]);
|
||||
copy_runtime_info({ newSplit, subtract }, subtract);
|
||||
parent = subtract;
|
||||
}
|
||||
|
||||
const auto multiply = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(parent, splitedMul[i]);
|
||||
const auto multiply = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(parent, splitedMul[i]);
|
||||
NetworkHelper::setOutDataPrecisionForTypeRelaxed(multiply, dequantization.multiply->get_output_element_type(0));
|
||||
copy_runtime_info({ newSplit, multiply }, multiply);
|
||||
|
||||
|
@ -1,176 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision/subtract_multiply_to_multiply_add.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include "low_precision/common/ie_lpt_exception.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace pass {
|
||||
namespace low_precision {
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::SubtractMultiplyToMultiplyAddTransformation, "SubtractMultiplyToMultiplyAddTransformation", 0);
|
||||
|
||||
SubtractMultiplyToMultiplyAddTransformation::SubtractMultiplyToMultiplyAddTransformation(const Params& params) : LayerTransformation(params) {
|
||||
auto matcher = pattern::wrap_type<opset1::Multiply>();
|
||||
|
||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||
auto op = m.get_match_root();
|
||||
if (transformation_callback(op)) {
|
||||
return false;
|
||||
}
|
||||
return transform(*context, m);
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(matcher, "SubtractMultiplyToMultiplyAddTransformation");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
FakeQuantizeDequantization get(const std::shared_ptr<Node> node) {
|
||||
Output<Node> dataNode = node;
|
||||
|
||||
const std::shared_ptr<ngraph::opset1::Multiply> multiply = ov::is_type<opset1::Constant>(
|
||||
dataNode.get_node_shared_ptr()->get_input_node_shared_ptr(1)) ?
|
||||
ov::as_type_ptr<ngraph::opset1::Multiply>(dataNode.get_node_shared_ptr()) :
|
||||
nullptr;
|
||||
std::shared_ptr<opset1::Constant> multiplyConstant;
|
||||
if (multiply != nullptr) {
|
||||
FakeQuantizeDequantization::fillDequantizationParams(multiply, multiplyConstant);
|
||||
dataNode = multiply->get_input_source_output(0);
|
||||
}
|
||||
|
||||
const std::shared_ptr<opset1::Subtract> subtract = (dataNode.get_node_shared_ptr()->get_input_size() > 1ul)
|
||||
&& ov::is_type<opset1::Constant>(dataNode.get_node_shared_ptr()->get_input_node_ptr(1)) ?
|
||||
ov::as_type_ptr<opset1::Subtract>(dataNode.get_node_shared_ptr()) :
|
||||
nullptr;
|
||||
std::shared_ptr<opset1::Convert> subtractConvert;
|
||||
std::shared_ptr<opset1::Constant> subtractConstant;
|
||||
if (subtract != nullptr) {
|
||||
FakeQuantizeDequantization::fillDequantizationParams(subtract, subtractConvert, subtractConstant);
|
||||
dataNode = subtract->get_input_source_output(0);
|
||||
}
|
||||
|
||||
const std::shared_ptr<opset1::Convert> convert = ov::as_type_ptr<opset1::Convert>(dataNode.get_node_shared_ptr());
|
||||
if (convert != nullptr) {
|
||||
dataNode = convert->get_input_source_output(0);
|
||||
}
|
||||
|
||||
return FakeQuantizeDequantization(dataNode, convert, subtract, subtractConvert, subtractConstant, multiply, multiplyConstant);
|
||||
}
|
||||
|
||||
bool SubtractMultiplyToMultiplyAddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) {
|
||||
auto multiply = m.get_match_root();
|
||||
if (!canBeTransformed(context, multiply)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
multiply = NetworkHelper::separateInStandaloneBranch(multiply);
|
||||
FakeQuantizeDequantization dequantization = get(multiply);
|
||||
|
||||
const element::Type precisionBeforeDequantization = dequantization.convert == nullptr ?
|
||||
(dequantization.subtract == nullptr ?
|
||||
dequantization.multiply->get_input_element_type(0) :
|
||||
dequantization.subtract->get_input_element_type(0)) :
|
||||
dequantization.convert->get_input_element_type(0);
|
||||
|
||||
const element::Type precisionAfterDequantization = dequantization.multiply->get_output_element_type(0);
|
||||
|
||||
if (dequantization.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto lastNew = dequantization.data;
|
||||
element::Type lastNewPrecision = precisionBeforeDequantization;
|
||||
std::shared_ptr<Node> lastPrevious = dequantization.multiply != nullptr ?
|
||||
std::dynamic_pointer_cast<Node>(dequantization.multiply) :
|
||||
dequantization.subtract;
|
||||
|
||||
{
|
||||
const std::shared_ptr<Node> multiplyConstant = dequantization.multiply->get_input_node_shared_ptr(1);
|
||||
|
||||
lastNew = std::make_shared<op::TypeRelaxed<DequantizationMultiply>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{deqPrecision},
|
||||
ngraph::op::TemporaryReplaceOutputType(lastNew, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(multiplyConstant, element::f32).get());
|
||||
|
||||
if (dequantization.multiply != nullptr) {
|
||||
auto lastNewPtr = lastNew.get_node_shared_ptr();
|
||||
NetworkHelper::copyInfo(dequantization.multiply, lastNewPtr);
|
||||
}
|
||||
|
||||
lastNewPrecision = deqPrecision;
|
||||
}
|
||||
|
||||
if (dequantization.subtract != nullptr) {
|
||||
std::shared_ptr<Node> originalSubtractConstant = dequantization.subtract->get_input_node_shared_ptr(1);
|
||||
|
||||
std::shared_ptr<Node> subtractConstant = fold<opset1::Multiply>(
|
||||
fold<opset1::Multiply>(
|
||||
foldConvert(originalSubtractConstant, deqPrecision),
|
||||
std::make_shared<opset1::Constant>(deqPrecision, Shape{}, std::vector<float>{ -1.f })),
|
||||
foldConvert(dequantization.multiply->get_input_node_shared_ptr(1), deqPrecision));
|
||||
|
||||
if (ov::is_type<opset1::Constant>(subtractConstant)) {
|
||||
std::shared_ptr<opset1::Constant> constant = ov::as_type_ptr<opset1::Constant>(subtractConstant);
|
||||
if (NetworkHelper::isScalarLike(constant)) {
|
||||
subtractConstant = NetworkHelper::toScalar(constant);
|
||||
}
|
||||
}
|
||||
|
||||
lastPrevious = lastNew.get_node_shared_ptr();
|
||||
lastNew = std::make_shared<op::TypeRelaxed<DequantizationAdd>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{precisionAfterDequantization},
|
||||
ngraph::op::TemporaryReplaceOutputType(lastNew, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(subtractConstant, element::f32).get());
|
||||
|
||||
auto lastNewPtr = lastNew.get_node_shared_ptr();
|
||||
NetworkHelper::copyInfo(dequantization.subtract, lastNewPtr);
|
||||
|
||||
lastNewPrecision = precisionAfterDequantization;
|
||||
} else {
|
||||
NetworkHelper::setOutDataPrecision(ov::as_type_ptr<opset1::Multiply>(lastNew.get_node_shared_ptr()), precisionAfterDequantization);
|
||||
}
|
||||
|
||||
const std::shared_ptr<Node> lastOriginal = dequantization.multiply == nullptr ?
|
||||
std::dynamic_pointer_cast<Node>(dequantization.subtract) :
|
||||
dequantization.multiply;
|
||||
const std::shared_ptr<Node> lastNewPtr = lastNew.get_node_shared_ptr();
|
||||
replace_node(lastOriginal, lastNewPtr);
|
||||
|
||||
updateOutput(context, lastNewPtr, lastPrevious);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SubtractMultiplyToMultiplyAddTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> op) const {
|
||||
FakeQuantizeDequantization dequantization = get(op);
|
||||
if (dequantization.empty() || (dequantization.multiply == nullptr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (((dequantization.subtract == nullptr) || (!dequantization.subtract->get_rt_info().count("DEQUANTIZATION"))) &&
|
||||
(!dequantization.multiply->get_rt_info().count("DEQUANTIZATION"))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return
|
||||
((dequantization.subtract == nullptr) || FakeQuantizeDequantization::checkElementwise(dequantization.subtract)) &&
|
||||
FakeQuantizeDequantization::checkElementwise(dequantization.multiply);
|
||||
}
|
||||
|
||||
bool SubtractMultiplyToMultiplyAddTransformation::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace low_precision
|
||||
} // namespace pass
|
||||
} // namespace ngraph
|
@ -375,19 +375,6 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) {
|
||||
std::string errMsg;
|
||||
return MKLDNNFakeQuantizeNode::isSupportedOperation(node, errMsg);
|
||||
});
|
||||
postLPTPassManager.get_pass_config()->set_callback<ngraph::pass::AddMultiplyFusion>([](const_node_ptr &node) -> bool {
|
||||
if (auto mul_op = std::dynamic_pointer_cast<const ngraph::opset1::Multiply>(node)) {
|
||||
auto add_op = std::dynamic_pointer_cast<const ngraph::opset1::Add>(mul_op->get_input_node_shared_ptr(0));
|
||||
auto constant = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(mul_op->get_input_node_shared_ptr(1));
|
||||
bool is_dequantization = mul_op->get_rt_info().count("DEQUANTIZATION") != 0;
|
||||
if (add_op && constant && is_dequantization) {
|
||||
return ngraph::is_type<ngraph::opset1::Convolution>(add_op->get_input_node_shared_ptr(0)) ||
|
||||
ngraph::is_type<ngraph::opset1::GroupConvolution>(add_op->get_input_node_shared_ptr(0)) ||
|
||||
ngraph::is_type<ngraph::opset1::MatMul>(add_op->get_input_node_shared_ptr(0));
|
||||
}
|
||||
}
|
||||
return false;
|
||||
});
|
||||
postLPTPassManager.get_pass_config()->set_callback<ngraph::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
|
||||
// UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
|
||||
return node->get_rt_info().count("UNROLL_TI") == 0;
|
||||
|
@ -1,78 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief Defines fused names attribute
|
||||
* @file fused_names_attribute.hpp
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <set>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
#include <transformations_visibility.hpp>
|
||||
|
||||
|
||||
namespace ngraph {
|
||||
|
||||
/**
|
||||
* @ingroup ie_runtime_attr_api
|
||||
* @brief Dequantization class represents runtime info attribute that indicates
|
||||
* whether the operation is dequantization
|
||||
*/
|
||||
class TRANSFORMATIONS_API DequantizationAttr {
|
||||
private:
|
||||
std::string dequantization_attribute;
|
||||
|
||||
public:
|
||||
/**
|
||||
* A default constructor
|
||||
*/
|
||||
DequantizationAttr() = default;
|
||||
|
||||
/**
|
||||
* @brief Constructs a new object consisting of a single name *
|
||||
* @param[in] name The name
|
||||
*/
|
||||
explicit DequantizationAttr(const std::string& name) : dequantization_attribute(name) {}
|
||||
|
||||
/**
|
||||
* @brief return string with dequantization value
|
||||
*/
|
||||
std::string getDequantizationAttr() const;
|
||||
};
|
||||
/**
|
||||
* @ingroup ie_runtime_attr_api
|
||||
* @brief getDequantization return string with dequantization value
|
||||
* @param[in] node The node will be used to get Dequantization attribute
|
||||
*/
|
||||
TRANSFORMATIONS_API std::string getDequantization(const std::shared_ptr<ngraph::Node>& node);
|
||||
|
||||
} // namespace ngraph
|
||||
|
||||
namespace ov {
|
||||
|
||||
extern template class TRANSFORMATIONS_API VariantImpl<ngraph::DequantizationAttr>;
|
||||
|
||||
template<>
|
||||
class TRANSFORMATIONS_API VariantWrapper<ngraph::DequantizationAttr> : public VariantImpl<ngraph::DequantizationAttr> {
|
||||
public:
|
||||
static constexpr VariantTypeInfo type_info{"DEQUANTIZATION", 0};
|
||||
|
||||
const VariantTypeInfo &get_type_info() const override {
|
||||
return type_info;
|
||||
}
|
||||
|
||||
VariantWrapper(const value_type &value) : VariantImpl<value_type>(value) {}
|
||||
|
||||
std::shared_ptr<ngraph::Variant> merge(const ngraph::NodeVector & nodes) override;
|
||||
|
||||
std::shared_ptr<ngraph::Variant> init(const std::shared_ptr<ngraph::Node> & node) override;
|
||||
};
|
||||
|
||||
} // namespace ov
|
@ -13,7 +13,6 @@
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
#include "transformations/rt_info/dequantization_attribute.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
|
||||
|
@ -1,57 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <assert.h>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <iterator>
|
||||
#include <ostream>
|
||||
|
||||
#include <ngraph/node.hpp>
|
||||
#include <ngraph/variant.hpp>
|
||||
|
||||
#include "transformations/rt_info/dequantization_attribute.hpp"
|
||||
|
||||
using namespace ov;
|
||||
using namespace ngraph;
|
||||
|
||||
std::string DequantizationAttr::getDequantizationAttr() const {
|
||||
return dequantization_attribute;
|
||||
}
|
||||
|
||||
std::string ngraph::getDequantization(const std::shared_ptr<ngraph::Node>& node) {
|
||||
const auto& rtInfo = node->get_rt_info();
|
||||
using getDequantizationWrapper = VariantWrapper<DequantizationAttr>;
|
||||
|
||||
if (!rtInfo.count(getDequantizationWrapper::type_info.name)) return "";
|
||||
|
||||
const auto& attr = rtInfo.at(getDequantizationWrapper::type_info.name);
|
||||
DequantizationAttr pp = ov::as_type_ptr<getDequantizationWrapper>(attr)->get();
|
||||
return pp.getDequantizationAttr();
|
||||
}
|
||||
|
||||
template class ov::VariantImpl<DequantizationAttr>;
|
||||
|
||||
constexpr VariantTypeInfo VariantWrapper<DequantizationAttr>::type_info;
|
||||
|
||||
std::shared_ptr<ngraph::Variant> VariantWrapper<DequantizationAttr>::merge(const ngraph::NodeVector & nodes) {
|
||||
std::set<std::string> dequantizations;
|
||||
|
||||
for (auto& node : nodes) {
|
||||
std::string pp = getDequantization(node);
|
||||
if (!pp.empty()) dequantizations.insert(pp);
|
||||
}
|
||||
|
||||
std::string final_primitives_priority;
|
||||
if (dequantizations.size() == 0) {
|
||||
final_primitives_priority = "";
|
||||
} else {
|
||||
final_primitives_priority = *dequantizations.begin();
|
||||
}
|
||||
return std::make_shared<VariantWrapper<DequantizationAttr>>(DequantizationAttr(final_primitives_priority));
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Variant> VariantWrapper<DequantizationAttr>::init(const std::shared_ptr<ngraph::Node> & node) {
|
||||
return std::make_shared<VariantWrapper<DequantizationAttr>>(DequantizationAttr(node->get_friendly_name()));
|
||||
}
|
@ -1,117 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "layer_transformation.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <memory>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <legacy/transformations/convert_opset1_to_legacy/convert_mul_or_add_finally.hpp>
|
||||
#include <ngraph/pass/constant_folding.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
|
||||
#include "lpt_ngraph_functions/convert_mul_or_add_finally_with_dequantization_function.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ngraph::pass;
|
||||
|
||||
namespace {
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const std::vector<float>& values) {
|
||||
os << "{ ";
|
||||
for (size_t i = 0; i < values.size(); ++i) {
|
||||
os << values[i];
|
||||
if (i != (values.size() - 1ul)) {
|
||||
os << ", ";
|
||||
}
|
||||
}
|
||||
os << " }";
|
||||
return os;
|
||||
}
|
||||
|
||||
class ConvertMulOrAddFinallyTransformationWithDequantizationTestValues {
|
||||
public:
|
||||
std::vector<float> multiplyConstValues;
|
||||
ngraph::Shape inputShape;
|
||||
ngraph::element::Type inputPrecision;
|
||||
TestTransformationParams params;
|
||||
};
|
||||
|
||||
using TestValuesType = ConvertMulOrAddFinallyTransformationWithDequantizationTestValues;
|
||||
|
||||
class ConvertMulOrAddFinallyTransformationWithDequantization : public LayerTransformation, public testing::WithParamInterface<TestValuesType> {
|
||||
public:
|
||||
void SetUp() override {
|
||||
using namespace ngraph::builder::subgraph;
|
||||
const ConvertMulOrAddFinallyTransformationWithDequantizationTestValues testValues = GetParam();
|
||||
|
||||
actualFunction = ConvertMulOrAddWithDequantizationFunction::getOriginal(testValues.inputShape,
|
||||
testValues.inputPrecision,
|
||||
testValues.multiplyConstValues);
|
||||
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::ConvertMulOrAddFinally>();
|
||||
manager.register_pass<ngraph::pass::ConstantFolding>();
|
||||
|
||||
manager.run_passes(actualFunction);
|
||||
|
||||
referenceFunction = ConvertMulOrAddWithDequantizationFunction::getReference(testValues.inputShape,
|
||||
testValues.inputPrecision,
|
||||
testValues.multiplyConstValues);
|
||||
}
|
||||
|
||||
static std::string getTestCaseName(testing::TestParamInfo<ConvertMulOrAddFinallyTransformationWithDequantizationTestValues> obj) {
|
||||
const ConvertMulOrAddFinallyTransformationWithDequantizationTestValues testValues = obj.param;
|
||||
std::ostringstream result;
|
||||
result << LayerTransformation::getTestCaseNameByParams(testValues.inputPrecision, testValues.inputShape, testValues.params) << "_" <<
|
||||
testValues.multiplyConstValues;
|
||||
return result.str();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ConvertMulOrAddFinallyTransformationWithDequantization, CompareFunctions) {
|
||||
actualFunction->validate_nodes_and_infer_types();
|
||||
auto res = compare_functions(referenceFunction, actualFunction, true, true, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
std::vector<ConvertMulOrAddFinallyTransformationWithDequantizationTestValues> testValues = {
|
||||
{
|
||||
{ -1.0 },
|
||||
{ 1, 1000 },
|
||||
ngraph::element::f32,
|
||||
LayerTransformation::createParamsU8I8()
|
||||
},
|
||||
{
|
||||
{ 128.0 },
|
||||
{ 1, 10 },
|
||||
ngraph::element::f32,
|
||||
LayerTransformation::createParamsU8I8()
|
||||
},
|
||||
{
|
||||
{ -64.5 },
|
||||
{ 1, 10 },
|
||||
ngraph::element::i8,
|
||||
LayerTransformation::createParamsU8I8()
|
||||
},
|
||||
{
|
||||
{ 1.2 },
|
||||
{ 1, 100 },
|
||||
ngraph::element::u8,
|
||||
LayerTransformation::createParamsI8I8()
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_LPT,
|
||||
ConvertMulOrAddFinallyTransformationWithDequantization,
|
||||
::testing::ValuesIn(testValues),
|
||||
ConvertMulOrAddFinallyTransformationWithDequantization::getTestCaseName);
|
||||
} // namespace
|
@ -471,7 +471,7 @@ const std::vector<GroupConvolutionTestValues> testValuesGroupConv = {
|
||||
1,
|
||||
ngraph::element::i8,
|
||||
false,
|
||||
{"DEQUANTIZATION", "DISABLED_CONSTANT_FOLDING"}
|
||||
{"DISABLED_CONSTANT_FOLDING"}
|
||||
},
|
||||
{}
|
||||
},
|
||||
|
@ -139,10 +139,6 @@ ngraph::builder::subgraph::DequantizationOperations LayerTransformation::toDequa
|
||||
|
||||
ngraph::builder::subgraph::DequantizationOperations::Multiply multiply;
|
||||
{
|
||||
const bool addDequantizationAttribute = dequantization.multiply != nullptr ?
|
||||
dequantization.multiply->get_rt_info().count("DEQUANTIZATION") != 0 :
|
||||
true;
|
||||
|
||||
const size_t constantIndex = dequantization.multiplyConstant && dequantization.multiply ?
|
||||
ngraph::pass::low_precision::NetworkHelper::getChildInputIndex(dequantization.multiplyConstant, dequantization.multiply) :
|
||||
0ul;
|
||||
@ -152,7 +148,7 @@ ngraph::builder::subgraph::DequantizationOperations LayerTransformation::toDequa
|
||||
dequantization.multiplyConstant->cast_vector<float>(),
|
||||
dequantization.multiplyConstant->output(0).get_element_type(),
|
||||
dequantization.multiplyConstant->output(0).get_shape(),
|
||||
addDequantizationAttribute,
|
||||
false,
|
||||
constantIndex) :
|
||||
ngraph::builder::subgraph::DequantizationOperations::Multiply();
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include "low_precision/convolution.hpp"
|
||||
#include "low_precision/mat_mul.hpp"
|
||||
#include "low_precision/fuse_convert.hpp"
|
||||
#include "low_precision/subtract_multiply_to_multiply_add.hpp"
|
||||
|
||||
using namespace ::testing;
|
||||
using namespace ngraph::pass::low_precision;
|
||||
|
@ -1,147 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "layer_transformation.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <memory>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <legacy/transformations/convert_opset1_to_legacy/convert_mul_or_add_finally.hpp>
|
||||
#include <legacy/transformations/convert_opset1_to_legacy/convert_mul_add_to_scaleshift_or_power.hpp>
|
||||
#include <ngraph/pass/constant_folding.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
|
||||
#include "lpt_ngraph_functions/common/add.hpp"
|
||||
#include "lpt_ngraph_functions/mul_add_to_scaleshift_or_power_function.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ngraph;
|
||||
using namespace ngraph::pass;
|
||||
|
||||
namespace {
|
||||
|
||||
class MulAddToScaleshiftOrPowerParams {
|
||||
public:
|
||||
TestTransformationParams params;
|
||||
ngraph::builder::subgraph::DequantizationOperations::Multiply mulValues;
|
||||
ngraph::builder::subgraph::Add addValues;
|
||||
ngraph::element::Type precisionAfterOperation;
|
||||
};
|
||||
|
||||
typedef std::tuple <
|
||||
ngraph::element::Type,
|
||||
bool,
|
||||
ngraph::Shape,
|
||||
MulAddToScaleshiftOrPowerParams
|
||||
> MulAddToScaleshiftOrPowerTestValues;
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const std::vector<float>& values) {
|
||||
os << "{ ";
|
||||
for (size_t i = 0; i < values.size(); ++i) {
|
||||
os << values[i];
|
||||
if (i != (values.size() - 1ul)) {
|
||||
os << ", ";
|
||||
}
|
||||
}
|
||||
os << " }";
|
||||
return os;
|
||||
}
|
||||
|
||||
|
||||
class MulAddToScaleshiftOrPowerTransformation :
|
||||
public LayerTransformation,
|
||||
public testing::WithParamInterface<MulAddToScaleshiftOrPowerTestValues> {
|
||||
public:
|
||||
void SetUp() override {
|
||||
const auto inputPrecision = std::get<0>(GetParam());
|
||||
const auto isDequantization = std::get<1>(GetParam());
|
||||
const auto inputShape = std::get<2>(GetParam());
|
||||
const auto testValues = std::get<3>(GetParam());
|
||||
|
||||
actualFunction = ngraph::builder::subgraph::MulAddToScaleshiftOrPowerFunction::getOriginal(
|
||||
inputPrecision,
|
||||
inputShape,
|
||||
isDequantization,
|
||||
testValues.mulValues,
|
||||
testValues.addValues);
|
||||
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::ConvertMulAddToScaleShiftOrPower>();
|
||||
manager.register_pass<ngraph::pass::ConstantFolding>();
|
||||
manager.run_passes(actualFunction);
|
||||
|
||||
referenceFunction = ngraph::builder::subgraph::MulAddToScaleshiftOrPowerFunction::getReference(
|
||||
inputPrecision,
|
||||
inputShape,
|
||||
isDequantization,
|
||||
testValues.mulValues,
|
||||
testValues.addValues,
|
||||
testValues.precisionAfterOperation);
|
||||
}
|
||||
|
||||
static std::string getTestCaseName(testing::TestParamInfo<MulAddToScaleshiftOrPowerTestValues> obj) {
|
||||
const auto inputPrecision = std::get<0>(obj.param);
|
||||
const auto isDequantization = std::get<1>(obj.param);
|
||||
const auto inputShape = std::get<2>(obj.param);
|
||||
const auto testValues = std::get<3>(obj.param);
|
||||
|
||||
std::ostringstream result;
|
||||
result << toString(testValues.params) << "_" << inputPrecision << "_" << inputShape << "_"
|
||||
<< testValues.mulValues.values << "_" << testValues.addValues.values << (isDequantization ? "_ScaleShift_" : "_Power_")
|
||||
<< testValues.precisionAfterOperation;
|
||||
return result.str();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(MulAddToScaleshiftOrPowerTransformation, CompareFunctions) {
|
||||
actualFunction->validate_nodes_and_infer_types();
|
||||
auto res = compare_functions(referenceFunction, actualFunction, true, true, true);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
const std::vector<ngraph::element::Type> precision = {
|
||||
ngraph::element::i32,
|
||||
ngraph::element::f32,
|
||||
ngraph::element::u8,
|
||||
ngraph::element::i8,
|
||||
};
|
||||
|
||||
const std::vector<bool> isDequantization = { false, true };
|
||||
|
||||
const std::vector<ngraph::Shape> inputShape = {
|
||||
{ 1, 3, 9, 9 },
|
||||
{ 4, 3, 9, 9 }
|
||||
};
|
||||
|
||||
const std::vector<MulAddToScaleshiftOrPowerParams> testValues = {
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{ 0.1f },
|
||||
{ 128.f },
|
||||
ngraph::element::f32
|
||||
},
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{ 0.1f },
|
||||
{ -128.f },
|
||||
ngraph::element::f32
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_LPT,
|
||||
MulAddToScaleshiftOrPowerTransformation,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(precision),
|
||||
::testing::ValuesIn(isDequantization),
|
||||
::testing::ValuesIn(inputShape),
|
||||
::testing::ValuesIn(testValues)),
|
||||
MulAddToScaleshiftOrPowerTransformation::getTestCaseName);
|
||||
} // namespace
|
@ -1,356 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "layer_transformation.hpp"
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <memory>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include "low_precision/subtract_multiply_to_multiply_add.hpp"
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
#include "simple_low_precision_transformer.hpp"
|
||||
#include "lpt_ngraph_functions/subtract_multiply_to_multiply_add_function.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ngraph;
|
||||
using namespace ngraph::pass;
|
||||
using namespace ngraph::builder::subgraph;
|
||||
|
||||
namespace {
|
||||
|
||||
class SubtractMultiplyToMultiplyAddTransformationTestValues {
|
||||
public:
|
||||
class Actual {
|
||||
public:
|
||||
ngraph::element::Type precisionBefore;
|
||||
DequantizationOperations dequantization;
|
||||
ngraph::element::Type precisionAfter;
|
||||
};
|
||||
class Expected {
|
||||
public:
|
||||
ngraph::element::Type precisionBefore;
|
||||
DequantizationOperations dequantization;
|
||||
ngraph::element::Type precisionAfter;
|
||||
Multiply multiply;
|
||||
Add add;
|
||||
};
|
||||
TestTransformationParams params;
|
||||
Actual actual;
|
||||
Expected expected;
|
||||
};
|
||||
|
||||
typedef std::tuple<
|
||||
ngraph::PartialShape,
|
||||
SubtractMultiplyToMultiplyAddTransformationTestValues> SubtractMultiplyToMultiplyAddTransformationParams;
|
||||
|
||||
class SubtractMultiplyToMultiplyAddTransformation :
|
||||
public LayerTransformation,
|
||||
public testing::WithParamInterface<SubtractMultiplyToMultiplyAddTransformationParams> {
|
||||
public:
|
||||
void SetUp() override {
|
||||
const ngraph::PartialShape inputShape = std::get<0>(GetParam());
|
||||
SubtractMultiplyToMultiplyAddTransformationTestValues testValues = std::get<1>(GetParam());
|
||||
|
||||
actualFunction = SubtractMultiplyToMultiplyAddFunction::getOriginal(
|
||||
inputShape,
|
||||
testValues.actual.precisionBefore,
|
||||
testValues.actual.dequantization,
|
||||
testValues.actual.precisionAfter);
|
||||
|
||||
SimpleLowPrecisionTransformer transform;
|
||||
transform.add<low_precision::SubtractMultiplyToMultiplyAddTransformation, ngraph::opset1::Multiply>(testValues.params);
|
||||
transform.transform(actualFunction);
|
||||
|
||||
referenceFunction = SubtractMultiplyToMultiplyAddFunction::getReference(
|
||||
inputShape,
|
||||
testValues.expected.precisionBefore,
|
||||
testValues.expected.dequantization,
|
||||
testValues.expected.precisionAfter,
|
||||
testValues.expected.multiply,
|
||||
testValues.expected.add);
|
||||
}
|
||||
|
||||
static std::string getTestCaseName(testing::TestParamInfo<SubtractMultiplyToMultiplyAddTransformationParams> obj) {
|
||||
const ngraph::PartialShape inputShape = std::get<0>(obj.param);
|
||||
SubtractMultiplyToMultiplyAddTransformationTestValues testValues = std::get<1>(obj.param);
|
||||
|
||||
std::ostringstream result;
|
||||
result <<
|
||||
inputShape << "_" <<
|
||||
testValues.actual.precisionBefore << "_" <<
|
||||
testValues.actual.dequantization << "_" <<
|
||||
testValues.actual.precisionAfter << "_" <<
|
||||
testValues.expected.precisionBefore << "_" <<
|
||||
testValues.expected.dequantization << "_" <<
|
||||
testValues.expected.precisionAfter << "_" <<
|
||||
testValues.expected.multiply.values << "_" <<
|
||||
testValues.expected.multiply.constantShape << "_" <<
|
||||
testValues.expected.multiply.outPrecision << "_" <<
|
||||
testValues.expected.add.values << "_" <<
|
||||
testValues.expected.add.constantShape << "_" <<
|
||||
testValues.expected.add.outPrecision;
|
||||
return result.str();
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(SubtractMultiplyToMultiplyAddTransformation, CompareFunctions) {
|
||||
actualFunction->validate_nodes_and_infer_types();
|
||||
auto res = compare_functions(referenceFunction, actualFunction, true, true);
|
||||
|
||||
// issue #63030
|
||||
ASSERT_TRUE(LayerTransformation::allNamesAreUnique(actualFunction)) << "Not all names are unique";
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
namespace testValues1 {
|
||||
const std::vector<ngraph::PartialShape> inputShapes = {
|
||||
{1, 3, 299, 299},
|
||||
{Dimension::dynamic(), 3, Dimension::dynamic(), Dimension::dynamic()}
|
||||
};
|
||||
|
||||
const std::vector<SubtractMultiplyToMultiplyAddTransformationTestValues> testValues = {
|
||||
// Multiply {} -> Multiply + Subtract {1x3x1x1}
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{{}, {}, {0.1f}},
|
||||
ngraph::element::f32,
|
||||
},
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{{0.1f}, {ngraph::element::f32}},
|
||||
{}
|
||||
},
|
||||
},
|
||||
// Multiply {} -> Multiply + Subtract {1x3x1x1}
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{{}, {}, {{0.1f, 0.2f, 0.3f}}},
|
||||
ngraph::element::f32,
|
||||
},
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{{0.1f, 0.2f, 0.3f}, {ngraph::element::f32}},
|
||||
{}
|
||||
},
|
||||
},
|
||||
// FP32 Subtract + Multiply {} -> Multiply + Subtract {1x3x1x1}
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{{ngraph::element::f32}, {{128.f, 128.f / 2.f, 128.f / 4.f}}, {0.1f}},
|
||||
ngraph::element::f32,
|
||||
},
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{{0.1f}, {ngraph::element::f32}},
|
||||
{{-12.8f, -12.8f / 2.f, -12.8f / 4.f}, {ngraph::element::f32}}
|
||||
},
|
||||
},
|
||||
// FP32 Subtract + Multiply {} -> Multiply + Subtract {1x3x1x1}
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{{ngraph::element::f32}, {128}, {0.1f}},
|
||||
ngraph::element::f32,
|
||||
},
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{{0.1f}, {ngraph::element::f32}},
|
||||
{{-12.8f}, {ngraph::element::f32}}
|
||||
},
|
||||
},
|
||||
// U8 Multiply {} -> Multiply + Subtract {1x3x1x1}
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {}, {0.1f}},
|
||||
ngraph::element::f32,
|
||||
},
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{},
|
||||
ngraph::element::u8,
|
||||
{{0.1f}, {ngraph::element::f32}},
|
||||
{}
|
||||
},
|
||||
},
|
||||
// U8 Subtract + Multiply {} -> Multiply + Subtract {1x3x1x1}
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {128}, {0.1f}},
|
||||
ngraph::element::f32,
|
||||
},
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{},
|
||||
ngraph::element::u8,
|
||||
{{0.1f}, {ngraph::element::f32}},
|
||||
{{-12.8f}, {ngraph::element::f32}}
|
||||
},
|
||||
},
|
||||
// empty
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {128}, {}},
|
||||
ngraph::element::f32,
|
||||
},
|
||||
{
|
||||
ngraph::element::u8,
|
||||
{{ngraph::element::f32}, {128}, {}},
|
||||
ngraph::element::u8,
|
||||
{},
|
||||
{}
|
||||
},
|
||||
},
|
||||
// empty
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
},
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{},
|
||||
{}
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_LPT,
|
||||
SubtractMultiplyToMultiplyAddTransformation,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes),
|
||||
::testing::ValuesIn(testValues)),
|
||||
SubtractMultiplyToMultiplyAddTransformation::getTestCaseName);
|
||||
} // namespace testValues1
|
||||
|
||||
namespace testValues2 {
|
||||
const std::vector<ngraph::PartialShape> inputShapes = {
|
||||
{2, 5, 2, 2}
|
||||
};
|
||||
|
||||
const std::vector<SubtractMultiplyToMultiplyAddTransformationTestValues> testValues = {
|
||||
// FP32 Multiply {5x1x1} -> Multiply + Subtract {1x5x1x1}
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{{}, {}, {{0.1f, 0.2f, 0.3f, 0.4f, 0.5f}, ngraph::element::f32, {5, 1, 1}}},
|
||||
ngraph::element::f32,
|
||||
},
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{{0.1f, 0.2f, 0.3f, 0.4f, 0.5f}, {ngraph::element::f32}, {5, 1, 1}},
|
||||
{}
|
||||
},
|
||||
},
|
||||
// FP32 Multiply {5x1x2}
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{{}, {}, {{0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f}, ngraph::element::f32, {5, 1, 2}}},
|
||||
ngraph::element::f32,
|
||||
},
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{{}, {}, {{0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f}, ngraph::element::f32, {5, 1, 2}}},
|
||||
ngraph::element::f32,
|
||||
{},
|
||||
{}
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_LPT,
|
||||
SubtractMultiplyToMultiplyAddTransformation,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapes),
|
||||
::testing::ValuesIn(testValues)),
|
||||
SubtractMultiplyToMultiplyAddTransformation::getTestCaseName);
|
||||
} // namespace testValues2
|
||||
|
||||
namespace testValues3 {
|
||||
const std::vector<ngraph::PartialShape> inputShapesWithDynamicChannels = {
|
||||
{Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic(), Dimension::dynamic()},
|
||||
PartialShape::dynamic()
|
||||
};
|
||||
|
||||
const std::vector<SubtractMultiplyToMultiplyAddTransformationTestValues> testValues = {
|
||||
// Multiply {} -> Multiply + Subtract {1x3x1x1}
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{{}, {}, {0.1f}},
|
||||
ngraph::element::f32,
|
||||
},
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{},
|
||||
ngraph::element::f32,
|
||||
{{0.1f}, {ngraph::element::f32}},
|
||||
{}
|
||||
},
|
||||
},
|
||||
// Multiply {} -> Multiply + Subtract {1x3x1x1}
|
||||
{
|
||||
LayerTransformation::createParamsU8I8(),
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{{}, {}, {{0.1f, 0.2f, 0.3f}}},
|
||||
ngraph::element::f32,
|
||||
},
|
||||
{
|
||||
ngraph::element::f32,
|
||||
{{}, {}, {{0.1f, 0.2f, 0.3f}}},
|
||||
ngraph::element::f32,
|
||||
{},
|
||||
{}
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_LPT,
|
||||
SubtractMultiplyToMultiplyAddTransformation,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(inputShapesWithDynamicChannels),
|
||||
::testing::ValuesIn(testValues)),
|
||||
SubtractMultiplyToMultiplyAddTransformation::getTestCaseName);
|
||||
} // namespace testValues3
|
||||
} // namespace
|
@ -39,7 +39,6 @@
|
||||
#include "low_precision/fuse_subtract_to_fake_quantize.hpp"
|
||||
#include "low_precision/fuse_multiply_to_fake_quantize.hpp"
|
||||
#include "low_precision/multiply_to_group_convolution.hpp"
|
||||
#include "low_precision/subtract_multiply_to_multiply_add.hpp"
|
||||
|
||||
#include "lpt_ngraph_functions/transformations_after_split_function.hpp"
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
@ -149,10 +148,6 @@ void getTransformerWithTransformationByName(
|
||||
transformer.add<MultiplyToGroupConvolutionTransformation, ngraph::opset1::Multiply>(params);
|
||||
return;
|
||||
}
|
||||
if (name == "SubtractMultiplyToMultiplyAddTransformation") {
|
||||
transformer.add<SubtractMultiplyToMultiplyAddTransformation, ngraph::opset1::Multiply>(params);
|
||||
return;
|
||||
}
|
||||
throw std::runtime_error("unexpected transformation name");
|
||||
}
|
||||
|
||||
@ -210,8 +205,7 @@ const std::vector<std::string> transformationNames = {
|
||||
"FuseConvertTransformation",
|
||||
"FuseSubtractToFakeQuantizeTransformation",
|
||||
"FuseMultiplyToFakeQuantizeTransformation",
|
||||
"MultiplyToGroupConvolutionTransformation",
|
||||
"SubtractMultiplyToMultiplyAddTransformation",
|
||||
"MultiplyToGroupConvolutionTransformation"
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
|
@ -70,15 +70,9 @@ public:
|
||||
ngraph::Output<ngraph::Node> last = input;
|
||||
if (!mul_const.skip) {
|
||||
last = std::make_shared<ngraph::opset1::Multiply>(last, create_constant(mul_const.shape, mul_const.value));
|
||||
if (is_dequantization) {
|
||||
ngraph::builder::subgraph::addDequantizationAttribute(last.get_node_shared_ptr());
|
||||
}
|
||||
}
|
||||
if (!add_const.skip) {
|
||||
last = std::make_shared<ngraph::opset1::Add>(last, create_constant(add_const.shape, add_const.value));
|
||||
if (is_dequantization) {
|
||||
ngraph::builder::subgraph::addDequantizationAttribute(last.get_node_shared_ptr());
|
||||
}
|
||||
}
|
||||
last = std::make_shared<ngraph::opset1::Relu>(last);
|
||||
return std::make_shared<ngraph::Function>(ngraph::NodeVector{last.get_node_shared_ptr()}, ngraph::ParameterVector{input});
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "test_utils/cpu_test_utils.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
|
||||
using namespace ngraph;
|
||||
using FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc;
|
||||
@ -58,10 +57,10 @@ protected:
|
||||
const auto param = std::make_shared<opset6::Parameter>(inputPrecision, inputShape);
|
||||
Shape constShape = Shape(inputShape.size(), 1);
|
||||
constShape[1] = scaleShift.second.size();
|
||||
const auto subtract = std::make_shared<pass::low_precision::DequantizationSubtract>(
|
||||
const auto subtract = std::make_shared<opset1::Subtract>(
|
||||
param,
|
||||
std::make_shared<opset6::Constant>(inputPrecision, constShape, scaleShift.second));
|
||||
const auto multiply = std::make_shared<pass::low_precision::DequantizationMultiply>(
|
||||
const auto multiply = std::make_shared<opset1::Multiply>(
|
||||
param,
|
||||
std::make_shared<opset6::Constant>(inputPrecision, constShape, scaleShift.first));
|
||||
Shape inConstShape = Shape(inputShape.size(), 1);
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include <ngraph/op/constant.hpp>
|
||||
#include "ngraph_ops/type_relaxed.hpp"
|
||||
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
#include "low_precision/rt_info/intervals_alignment_attribute.hpp"
|
||||
#include "low_precision/rt_info/quantization_alignment_attribute.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
@ -95,8 +94,6 @@ std::shared_ptr<ngraph::opset1::FakeQuantize> makeFakeQuantizeTypeRelaxed(
|
||||
const ngraph::element::Type constantPrecision,
|
||||
const FakeQuantizeOnDataWithConstant& fqOnData);
|
||||
|
||||
std::shared_ptr<Node> addDequantizationAttribute(const std::shared_ptr<Node>& op);
|
||||
|
||||
template <typename ... Args>
|
||||
void addAttribute(std::vector<std::shared_ptr<ngraph::Node>> nodes, Args&& ... args) {
|
||||
const auto attribute = std::make_shared<ngraph::VariantWrapper<QuantizationAlignmentAttributePtr>>(
|
||||
|
@ -16,7 +16,7 @@ public:
|
||||
class Convert {
|
||||
public:
|
||||
Convert();
|
||||
Convert(const ngraph::element::Type outPrecision, const bool addDeqAttr = true);
|
||||
Convert(const ngraph::element::Type outPrecision, const bool toRemove = true);
|
||||
bool empty() const noexcept;
|
||||
bool equal(const DequantizationOperations::Convert& value) const noexcept;
|
||||
bool operator==(const Convert& value) const noexcept {
|
||||
@ -32,14 +32,14 @@ public:
|
||||
class Subtract {
|
||||
public:
|
||||
Subtract();
|
||||
Subtract(const float value, const bool addDeqAttr = true);
|
||||
Subtract(const std::vector<float>& values, const bool addDeqAttr = true);
|
||||
Subtract(const std::vector<float>& values, const ngraph::element::Type outPrecision, const bool addDeqAttr = true);
|
||||
Subtract(const float value, const bool toRemove = true);
|
||||
Subtract(const std::vector<float>& values);
|
||||
Subtract(const std::vector<float>& values, const ngraph::element::Type outPrecision);
|
||||
Subtract(
|
||||
const std::vector<float>& values,
|
||||
const ngraph::element::Type outPrecision,
|
||||
const ngraph::Shape& constantShape,
|
||||
const bool addDequantizationAttribute = true,
|
||||
const bool toRemove = false,
|
||||
const size_t constantIndex = 1ul,
|
||||
const ngraph::element::Type constantPrecision = ngraph::element::undefined,
|
||||
const bool addConvert = false,
|
||||
@ -59,7 +59,6 @@ public:
|
||||
ngraph::element::Type outPrecision = ngraph::element::undefined;
|
||||
ngraph::Shape constantShape;
|
||||
bool constantShapeIsDefined = false;
|
||||
bool addDequantizationAttribute = true;
|
||||
size_t constantIndex = 1ul;
|
||||
ngraph::element::Type constantPrecision = ngraph::element::undefined;
|
||||
bool addConvert = false;
|
||||
@ -80,7 +79,7 @@ public:
|
||||
const std::vector<float>& values,
|
||||
const ngraph::element::Type outPrecision,
|
||||
const ngraph::Shape& constantShape,
|
||||
const bool addDequantizationAttribute = true,
|
||||
const bool toRemove = false,
|
||||
const size_t constantIndex = 1ul,
|
||||
const ngraph::element::Type constantPrecision = ngraph::element::undefined);
|
||||
bool empty() const noexcept;
|
||||
@ -94,7 +93,6 @@ public:
|
||||
ngraph::element::Type outPrecision = ngraph::element::undefined;
|
||||
ngraph::Shape constantShape;
|
||||
bool constantShapeIsDefined = false;
|
||||
bool addDequantizationAttribute = true;
|
||||
size_t constantIndex = 1ul;
|
||||
ngraph::element::Type constantPrecision = ngraph::element::undefined;
|
||||
|
||||
@ -128,7 +126,6 @@ inline std::ostream& operator<<(std::ostream& out, const DequantizationOperation
|
||||
subtract.outPrecision << "_" <<
|
||||
subtract.constantShape << "_" <<
|
||||
subtract.constantShapeIsDefined << "_" <<
|
||||
subtract.addDequantizationAttribute << "_" <<
|
||||
subtract.constantIndex << "_" <<
|
||||
subtract.constantPrecision << "_" <<
|
||||
subtract.addConvert;
|
||||
@ -140,7 +137,6 @@ inline std::ostream& operator<<(std::ostream& out, const DequantizationOperation
|
||||
multiply.outPrecision << "_" <<
|
||||
multiply.constantShape << "_" <<
|
||||
multiply.constantShapeIsDefined << "_" <<
|
||||
multiply.addDequantizationAttribute << "_" <<
|
||||
multiply.constantIndex << "_" <<
|
||||
multiply.constantPrecision;
|
||||
}
|
||||
|
@ -300,7 +300,7 @@ std::shared_ptr<ngraph::Function> AddFunction::getReference(
|
||||
std::vector<element::Type>{ element::f32 },
|
||||
ngraph::op::TemporaryReplaceOutputType(dequantizationOp1, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(dequantizationOp2, element::f32).get())) :
|
||||
std::make_shared<ngraph::op::TypeRelaxed<DequantizationSubtract>>(
|
||||
std::make_shared<ngraph::op::TypeRelaxed<opset1::Subtract>>(
|
||||
std::vector<element::Type>{ element::f32, element::f32 },
|
||||
std::vector<element::Type>{ element::f32 },
|
||||
ngraph::op::TemporaryReplaceOutputType(dequantizationOp1, element::f32).get(),
|
||||
|
@ -16,7 +16,7 @@ namespace ngraph {
|
||||
namespace builder {
|
||||
namespace subgraph {
|
||||
|
||||
using namespace ngraph::pass::low_precision;
|
||||
using namespace ngraph::pass::low_precision;
|
||||
|
||||
std::shared_ptr<Node> makeDequantization(
|
||||
const Output<Node>& data,
|
||||
@ -24,9 +24,7 @@ std::shared_ptr<Node> makeDequantization(
|
||||
Output<Node> parent = data;
|
||||
|
||||
if (!dequantizationOperations.convert.empty()) {
|
||||
std::shared_ptr<ngraph::opset1::Convert> convert = dequantizationOperations.convert.addDequantizationAttribute ?
|
||||
std::make_shared<ngraph::pass::low_precision::DequantizationConvert>(data, dequantizationOperations.convert.outPrecision) :
|
||||
std::make_shared<ngraph::opset1::Convert>(data, dequantizationOperations.convert.outPrecision);
|
||||
auto convert = std::make_shared<opset1::Convert>(data, dequantizationOperations.convert.outPrecision);
|
||||
NetworkHelper::copyInfo({ data.get_node_shared_ptr(), convert }, convert);
|
||||
convert->set_friendly_name(data.get_node_shared_ptr()->get_friendly_name() + "/DequantizationConvert");
|
||||
parent = convert;
|
||||
@ -80,53 +78,28 @@ std::shared_ptr<Node> makeDequantization(
|
||||
(((dequantizationOperations.subtract.constantPrecision == element::undefined) ||
|
||||
(dequantizationOperations.subtract.constantPrecision == parent.get_element_type())) ||
|
||||
dequantizationOperations.subtract.addConvert)) {
|
||||
if (dequantizationOperations.subtract.constantIndex == 1ul) {
|
||||
subtract = dequantizationOperations.subtract.addDequantizationAttribute ?
|
||||
std::make_shared<ngraph::pass::low_precision::DequantizationSubtract>(parent, subtractConst) :
|
||||
std::make_shared<ngraph::opset1::Subtract>(parent, subtractConst);
|
||||
} else {
|
||||
subtract = dequantizationOperations.subtract.addDequantizationAttribute ?
|
||||
std::make_shared<ngraph::pass::low_precision::DequantizationSubtract>(subtractConst, parent) :
|
||||
std::make_shared<ngraph::opset1::Subtract>(subtractConst, parent);
|
||||
}
|
||||
subtract = dequantizationOperations.subtract.constantIndex == 1ul ?
|
||||
std::make_shared<opset1::Subtract>(parent, subtractConst) :
|
||||
subtract = std::make_shared<opset1::Subtract>(subtractConst, parent);
|
||||
} else {
|
||||
// TODO: use templates
|
||||
if (dequantizationOperations.subtract.addDequantizationAttribute) {
|
||||
if (dequantizationOperations.subtract.constantIndex == 1ul) {
|
||||
subtract = std::make_shared<op::TypeRelaxed<ngraph::pass::low_precision::DequantizationSubtract>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ element::f32 },
|
||||
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(subtractConst, element::f32).get());
|
||||
} else {
|
||||
subtract = std::make_shared<op::TypeRelaxed<ngraph::pass::low_precision::DequantizationSubtract>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ element::f32 },
|
||||
ngraph::op::TemporaryReplaceOutputType(subtractConst, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get());
|
||||
}
|
||||
if (dequantizationOperations.subtract.constantIndex == 1ul) {
|
||||
subtract = std::make_shared<op::TypeRelaxed<ngraph::opset1::Subtract>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ element::f32 },
|
||||
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(subtractConst, element::f32).get());
|
||||
} else {
|
||||
if (dequantizationOperations.subtract.constantIndex == 1ul) {
|
||||
subtract = std::make_shared<op::TypeRelaxed<ngraph::opset1::Subtract>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ element::f32 },
|
||||
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(subtractConst, element::f32).get());
|
||||
} else {
|
||||
subtract = std::make_shared<op::TypeRelaxed<ngraph::opset1::Subtract>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ element::f32 },
|
||||
ngraph::op::TemporaryReplaceOutputType(subtractConst, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get());
|
||||
}
|
||||
subtract = std::make_shared<op::TypeRelaxed<ngraph::opset1::Subtract>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ element::f32 },
|
||||
ngraph::op::TemporaryReplaceOutputType(subtractConst, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get());
|
||||
}
|
||||
|
||||
subtract->set_friendly_name(data.get_node_shared_ptr()->get_friendly_name() + "/DequantizationSubtract");
|
||||
ngraph::pass::low_precision::NetworkHelper::setOutDataPrecision(subtract, dequantizationOperations.subtract.outPrecision);
|
||||
}
|
||||
if (!dequantizationOperations.subtract.addDequantizationAttribute) {
|
||||
ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(subtract);
|
||||
}
|
||||
|
||||
NetworkHelper::copyInfo({ data.get_node_shared_ptr(), subtract }, subtract);
|
||||
|
||||
if (!dequantizationOperations.subtract.attributes.empty()) {
|
||||
@ -179,15 +152,9 @@ std::shared_ptr<Node> makeMultiply(const Output<Node>& parent, const Dequantizat
|
||||
shape,
|
||||
values);
|
||||
|
||||
if (multiply.addDequantizationAttribute) {
|
||||
newMultiply = multiply.constantIndex == 1ul ?
|
||||
std::make_shared<ngraph::pass::low_precision::DequantizationMultiply>(parent, constant) :
|
||||
std::make_shared<ngraph::pass::low_precision::DequantizationMultiply>(constant, parent);
|
||||
} else {
|
||||
newMultiply = multiply.constantIndex == 1ul ?
|
||||
std::make_shared<ngraph::opset1::Multiply>(parent, constant) :
|
||||
std::make_shared<ngraph::opset1::Multiply>(constant, parent);
|
||||
}
|
||||
newMultiply = multiply.constantIndex == 1ul ?
|
||||
std::make_shared<ngraph::opset1::Multiply>(parent, constant) :
|
||||
std::make_shared<ngraph::opset1::Multiply>(constant, parent);
|
||||
} else {
|
||||
const std::shared_ptr<ngraph::opset1::Constant> constant = std::make_shared<ngraph::opset1::Constant>(
|
||||
multiply.constantPrecision != element::undefined ?
|
||||
@ -197,31 +164,17 @@ std::shared_ptr<Node> makeMultiply(const Output<Node>& parent, const Dequantizat
|
||||
values);
|
||||
|
||||
// TODO: use templates
|
||||
if (multiply.addDequantizationAttribute) {
|
||||
newMultiply = multiply.constantIndex == 1ul ?
|
||||
std::make_shared<op::TypeRelaxed<ngraph::pass::low_precision::DequantizationMultiply>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ multiply.outPrecision },
|
||||
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(constant, element::f32).get()) :
|
||||
std::make_shared<op::TypeRelaxed<ngraph::pass::low_precision::DequantizationMultiply>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ multiply.outPrecision },
|
||||
ngraph::op::TemporaryReplaceOutputType(constant, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get());
|
||||
} else {
|
||||
newMultiply = multiply.constantIndex == 1ul ?
|
||||
std::make_shared<op::TypeRelaxed<ngraph::opset1::Multiply>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ multiply.outPrecision },
|
||||
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(constant, element::f32).get()) :
|
||||
std::make_shared<op::TypeRelaxed<ngraph::opset1::Multiply>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ multiply.outPrecision },
|
||||
ngraph::op::TemporaryReplaceOutputType(constant, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get());
|
||||
}
|
||||
newMultiply = multiply.constantIndex == 1ul ?
|
||||
std::make_shared<op::TypeRelaxed<ngraph::opset1::Multiply>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ multiply.outPrecision },
|
||||
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(constant, element::f32).get()) :
|
||||
std::make_shared<op::TypeRelaxed<ngraph::opset1::Multiply>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ multiply.outPrecision },
|
||||
ngraph::op::TemporaryReplaceOutputType(constant, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get());
|
||||
}
|
||||
|
||||
return newMultiply;
|
||||
@ -345,12 +298,6 @@ std::shared_ptr<ngraph::opset1::FakeQuantize> makeFakeQuantizeTypeRelaxed(
|
||||
fqOnData.outputPrecision == ngraph::element::undefined ? constantPrecision : fqOnData.outputPrecision);
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> addDequantizationAttribute(const std::shared_ptr<Node>& op) {
|
||||
auto& rtInfo = op->get_rt_info();
|
||||
rtInfo["DEQUANTIZATION"] = std::make_shared<VariantWrapper<DequantizationAttr>>(DequantizationAttr());
|
||||
return op;
|
||||
}
|
||||
|
||||
void addAttributes(std::vector<std::shared_ptr<ngraph::Node>> nodes, std::vector<std::shared_ptr<Variant>> attributes) {
|
||||
for (const auto& node : nodes) {
|
||||
for (const auto& attribute : attributes) {
|
||||
|
@ -14,10 +14,9 @@ DequantizationOperations::Convert::Convert() :
|
||||
outPrecision(ngraph::element::undefined)
|
||||
{}
|
||||
|
||||
DequantizationOperations::Convert::Convert(const ngraph::element::Type outPrecision, const bool addDeqAttr) :
|
||||
DequantizationOperations::Convert::Convert(const ngraph::element::Type outPrecision, const bool toRemove) :
|
||||
isEmpty(false),
|
||||
outPrecision(outPrecision),
|
||||
addDequantizationAttribute(addDeqAttr)
|
||||
outPrecision(outPrecision)
|
||||
{}
|
||||
|
||||
bool DequantizationOperations::Convert::empty() const noexcept {
|
||||
@ -34,37 +33,34 @@ DequantizationOperations::Subtract::Subtract() :
|
||||
constantShapeIsDefined(false)
|
||||
{}
|
||||
|
||||
DequantizationOperations::Subtract::Subtract(const float value, const bool addDeqAttr) :
|
||||
DequantizationOperations::Subtract::Subtract(const float value, const bool toRemove) :
|
||||
isEmpty(false),
|
||||
values({ value }),
|
||||
outPrecision(ngraph::element::undefined),
|
||||
constantShapeIsDefined(false),
|
||||
addDequantizationAttribute(addDeqAttr) {
|
||||
constantShapeIsDefined(false) {
|
||||
}
|
||||
|
||||
DequantizationOperations::Subtract::Subtract(const std::vector<float>& values, const bool addDeqAttr) :
|
||||
DequantizationOperations::Subtract::Subtract(const std::vector<float>& values) :
|
||||
isEmpty(values.empty()),
|
||||
values(values),
|
||||
outPrecision(ngraph::element::undefined),
|
||||
constantShapeIsDefined(false),
|
||||
addDequantizationAttribute(addDeqAttr) {
|
||||
constantShapeIsDefined(false) {
|
||||
}
|
||||
|
||||
DequantizationOperations::Subtract::Subtract(const std::vector<float>& values,
|
||||
const ngraph::element::Type outPrecision,
|
||||
const bool addDeqAttr) :
|
||||
DequantizationOperations::Subtract::Subtract(
|
||||
const std::vector<float>& values,
|
||||
const ngraph::element::Type outPrecision) :
|
||||
isEmpty(false),
|
||||
values(values),
|
||||
outPrecision(outPrecision),
|
||||
constantShapeIsDefined(false),
|
||||
addDequantizationAttribute(addDeqAttr) {
|
||||
constantShapeIsDefined(false) {
|
||||
}
|
||||
|
||||
DequantizationOperations::Subtract::Subtract(
|
||||
const std::vector<float>& values,
|
||||
const ngraph::element::Type outPrecision,
|
||||
const ngraph::Shape& constantShape,
|
||||
const bool addDequantizationAttribute,
|
||||
const bool toRemove,
|
||||
const size_t constantIndex,
|
||||
const ngraph::element::Type constantPrecision,
|
||||
const bool addConvert,
|
||||
@ -75,7 +71,6 @@ DequantizationOperations::Subtract::Subtract(
|
||||
outPrecision(outPrecision),
|
||||
constantShape(constantShape),
|
||||
constantShapeIsDefined(true),
|
||||
addDequantizationAttribute(addDequantizationAttribute),
|
||||
constantIndex(constantIndex),
|
||||
constantPrecision(constantPrecision),
|
||||
addConvert(addConvert),
|
||||
@ -93,7 +88,6 @@ bool DequantizationOperations::Subtract::equal(const DequantizationOperations::S
|
||||
(outPrecision == value.outPrecision) &&
|
||||
(constantShape == value.constantShape) &&
|
||||
(constantShapeIsDefined == value.constantShapeIsDefined) &&
|
||||
(addDequantizationAttribute == value.addDequantizationAttribute) &&
|
||||
(constantIndex == value.constantIndex);
|
||||
}
|
||||
|
||||
@ -133,14 +127,13 @@ DequantizationOperations::Multiply::Multiply(
|
||||
const std::vector<float>& values,
|
||||
const ngraph::element::Type outPrecision,
|
||||
const ngraph::Shape& constantShape,
|
||||
const bool addDequantizationAttribute,
|
||||
const bool toRemove,
|
||||
const size_t constantIndex,
|
||||
ngraph::element::Type constantPrecision) :
|
||||
isEmpty(false),
|
||||
values(values),
|
||||
outPrecision(outPrecision),
|
||||
constantShape(constantShape),
|
||||
addDequantizationAttribute(addDequantizationAttribute),
|
||||
constantIndex(constantIndex),
|
||||
constantPrecision(constantPrecision),
|
||||
constantShapeIsDefined(true) {
|
||||
@ -155,7 +148,6 @@ bool DequantizationOperations::Multiply::equal(const DequantizationOperations::M
|
||||
(values == value.values) &&
|
||||
(outPrecision == value.outPrecision) &&
|
||||
(constantShape == value.constantShape) &&
|
||||
(addDequantizationAttribute == value.addDequantizationAttribute) &&
|
||||
(constantIndex == value.constantIndex) &&
|
||||
(constantPrecision == value.constantPrecision) &&
|
||||
(constantShapeIsDefined == value.constantShapeIsDefined);
|
||||
|
@ -14,7 +14,6 @@
|
||||
#include "lpt_ngraph_functions/common/builders.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include <legacy/ngraph_ops/scaleshift.hpp>
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace builder {
|
||||
@ -36,8 +35,9 @@ std::shared_ptr<ngraph::Function> ConvertMulOrAddWithDequantizationFunction::get
|
||||
std::vector<element::Type>{});
|
||||
|
||||
|
||||
const auto multiply = std::make_shared<ngraph::pass::low_precision::DequantizationMultiply>(relu,
|
||||
std::make_shared<opset1::Constant>(element::f32, inputShape, multiplyConst));
|
||||
const auto multiply = std::make_shared<opset1::Multiply>(
|
||||
relu,
|
||||
std::make_shared<opset1::Constant>(element::f32, inputShape, multiplyConst));
|
||||
|
||||
multiply->set_friendly_name("output");
|
||||
|
||||
@ -62,7 +62,6 @@ std::shared_ptr<ngraph::Function> ConvertMulOrAddWithDequantizationFunction::get
|
||||
const auto weights = std::make_shared<opset1::Constant>(element::f32, inputShape, multiplyConst);
|
||||
const auto bias = std::make_shared<opset1::Constant>(element::f32, inputShape, 0.0);
|
||||
std::shared_ptr<Node> scaleShift = std::make_shared<ngraph::op::ScaleShiftIE>(relu, weights, bias);
|
||||
addDequantizationAttribute(scaleShift);
|
||||
|
||||
scaleShift->set_friendly_name("output");
|
||||
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
|
||||
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
|
||||
#include "lpt_ngraph_functions/common/builders.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
using namespace ngraph::pass::low_precision;
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
|
||||
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
|
||||
#include "lpt_ngraph_functions/common/builders.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
using namespace ngraph::pass::low_precision;
|
||||
@ -106,7 +105,7 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginalWithIncorrectW
|
||||
fakeQuantizeOnWeights.inputLowValues, fakeQuantizeOnWeights.inputHighValues,
|
||||
fakeQuantizeOnWeights.outputLowValues, fakeQuantizeOnWeights.outputHighValues);
|
||||
|
||||
const auto subtract = isCorrect ? nullptr : std::make_shared<DequantizationSubtract>(fqOnWeights,
|
||||
const auto subtract = isCorrect ? nullptr : std::make_shared<opset1::Subtract>(fqOnWeights,
|
||||
std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, Shape{1, 1, 1, 1}, 3.0f));
|
||||
|
||||
const auto convolution = std::make_shared<ngraph::opset1::Convolution>(
|
||||
@ -148,7 +147,7 @@ std::shared_ptr<ngraph::Function> ConvolutionFunction::getOriginalWithIncorrectW
|
||||
fakeQuantizeOnWeights.inputLowValues, fakeQuantizeOnWeights.inputHighValues,
|
||||
fakeQuantizeOnWeights.outputLowValues, fakeQuantizeOnWeights.outputHighValues);
|
||||
|
||||
const auto subtract = isCorrect ? nullptr : std::make_shared<DequantizationSubtract>(fqOnWeights,
|
||||
const auto subtract = isCorrect ? nullptr : std::make_shared<opset1::Subtract>(fqOnWeights,
|
||||
std::make_shared<ngraph::opset1::Constant>(precision, Shape{ 1, 1, 1, 1 }, 3.0f));
|
||||
|
||||
const auto convolution = std::make_shared<ngraph::opset1::Convolution>(
|
||||
|
@ -26,7 +26,7 @@ std::shared_ptr<ngraph::Function> ElementwiseWithMultiParentDequantizationFuncti
|
||||
const auto input1_1 = std::make_shared<ngraph::opset1::Parameter>(precision1, inputShape);
|
||||
const auto input1_2 = std::make_shared<ngraph::opset1::Parameter>(precision1, ngraph::Shape({ inputShape[0], inputShape[1], 1, 1 }));
|
||||
const std::shared_ptr<ngraph::Node> multiply1 = std::make_shared<ngraph::op::TypeRelaxed<ngraph::opset1::Multiply>>(
|
||||
DequantizationMultiply(
|
||||
opset1::Multiply(
|
||||
ngraph::op::TemporaryReplaceOutputType(input1_1, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(input1_2, element::f32).get()),
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
@ -37,7 +37,7 @@ std::shared_ptr<ngraph::Function> ElementwiseWithMultiParentDequantizationFuncti
|
||||
const auto input2_1 = std::make_shared<ngraph::opset1::Parameter>(precision1, inputShape);
|
||||
const auto input2_2 = std::make_shared<ngraph::opset1::Parameter>(precision1, ngraph::Shape({ inputShape[0], inputShape[1], 1, 1 }));
|
||||
const std::shared_ptr<ngraph::Node> multiply2 = std::make_shared<ngraph::op::TypeRelaxed<ngraph::opset1::Multiply>>(
|
||||
DequantizationMultiply(
|
||||
opset1::Multiply(
|
||||
ngraph::op::TemporaryReplaceOutputType(input2_1, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(input2_2, element::f32).get()),
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include "ngraph_ops/type_relaxed.hpp"
|
||||
#include "ngraph_functions/subgraph_builders.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "lpt_ngraph_functions/common/builders.hpp"
|
||||
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
|
||||
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
|
||||
#include "lpt_ngraph_functions/common/builders.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
|
||||
using namespace ngraph::opset1;
|
||||
using namespace ngraph::pass::low_precision;
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include "ngraph_ops/type_relaxed.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
|
||||
#include <legacy/ngraph_ops/power.hpp>
|
||||
#include <legacy/ngraph_ops/scaleshift.hpp>
|
||||
@ -28,20 +27,15 @@ namespace subgraph {
|
||||
const auto input = std::make_shared<ngraph::op::v0::Parameter>(precision, inputShape);
|
||||
|
||||
const auto mulConst = ngraph::op::Constant::create(ngraph::element::f32, mulValues.constantShape, mulValues.values);
|
||||
const auto mul = std::make_shared<ngraph::op::TypeRelaxed<ngraph::pass::low_precision::DequantizationMultiply>>(
|
||||
const auto mul = std::make_shared<ngraph::op::TypeRelaxed<opset1::Multiply>>(
|
||||
std::vector<element::Type>{element::f32, element::f32}, std::vector<element::Type>{ element::f32 },
|
||||
ngraph::op::TemporaryReplaceOutputType(input, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(mulConst, element::f32).get());
|
||||
|
||||
const auto addConst = ngraph::op::Constant::create(ngraph::element::f32, addValues.constantShape, addValues.values);
|
||||
const auto add = std::make_shared<ngraph::pass::low_precision::DequantizationAdd>(mul, addConst);
|
||||
const auto add = std::make_shared<opset1::Add>(mul, addConst);
|
||||
add->set_friendly_name("add");
|
||||
|
||||
if (!isDequantization) {
|
||||
ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(mul);
|
||||
ngraph::pass::low_precision::NetworkHelper::cleanRunTimeInfo(add);
|
||||
}
|
||||
|
||||
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(add) };
|
||||
return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MulAddToScaleshiftOrPowerFunction");
|
||||
}
|
||||
@ -62,7 +56,6 @@ namespace subgraph {
|
||||
std::shared_ptr<ngraph::Node> lastNode;
|
||||
if (isDequantization) {
|
||||
std::shared_ptr<Node> scaleshift = std::make_shared<ngraph::op::ScaleShiftIE>(input, weights, biases, precisionAfterOperation);
|
||||
addDequantizationAttribute(scaleshift);
|
||||
scaleshift->set_friendly_name("add");
|
||||
lastNode = scaleshift;
|
||||
} else {
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph_ops/type_relaxed.hpp>
|
||||
#include "ngraph_functions/subgraph_builders.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
|
||||
#include "lpt_ngraph_functions/common/builders.hpp"
|
||||
@ -63,13 +62,9 @@ std::shared_ptr<ngraph::Function> MultiplyFunction::get(
|
||||
const BranchNodes branchNodes1 = getBranch(actualValues.branch1);
|
||||
const BranchNodes branchNodes2 = getBranch(actualValues.branch2);
|
||||
|
||||
auto multiplyOriginal = actualValues.isDequantization ?
|
||||
DequantizationMultiply(
|
||||
ngraph::op::TemporaryReplaceOutputType(branchNodes1.dequantization, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(branchNodes2.dequantization, element::f32).get()) :
|
||||
ngraph::opset1::Multiply(
|
||||
ngraph::op::TemporaryReplaceOutputType(branchNodes1.dequantization, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(branchNodes2.dequantization, element::f32).get());
|
||||
auto multiplyOriginal = opset1::Multiply(
|
||||
ngraph::op::TemporaryReplaceOutputType(branchNodes1.dequantization, element::f32).get(),
|
||||
ngraph::op::TemporaryReplaceOutputType(branchNodes2.dequantization, element::f32).get());
|
||||
|
||||
const std::shared_ptr<ngraph::Node> multiply = std::make_shared<ngraph::op::TypeRelaxed<ngraph::opset1::Multiply>>(
|
||||
multiplyOriginal,
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include <ngraph_ops/type_relaxed.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include "ngraph_functions/subgraph_builders.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
#include "lpt_ngraph_functions/common/builders.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
|
@ -238,7 +238,7 @@ std::shared_ptr<ngraph::Function> PrecisionPropagationFunction::getReferenceWith
|
||||
shape,
|
||||
std::vector<float>(ngraph::shape_size(shape), 128.f));
|
||||
|
||||
auto subtract = std::make_shared<op::TypeRelaxed<ngraph::pass::low_precision::DequantizationSubtract>>(
|
||||
auto subtract = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(
|
||||
std::vector<element::Type>{element::f32, element::f32},
|
||||
std::vector<element::Type>{ element::f32 },
|
||||
ngraph::op::TemporaryReplaceOutputType(result2, element::f32).get(),
|
||||
|
@ -14,7 +14,6 @@
|
||||
#include "ngraph_functions/subgraph_builders.hpp"
|
||||
#include "lpt_ngraph_functions/common/builders.hpp"
|
||||
#include "low_precision/network_helper.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace builder {
|
||||
|
@ -7,7 +7,6 @@
|
||||
#include <ngraph/ngraph.hpp>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include "lpt_ngraph_functions/common/builders.hpp"
|
||||
#include "low_precision/common/dequantization_op.hpp"
|
||||
|
||||
using namespace ngraph::pass::low_precision;
|
||||
|
||||
@ -66,11 +65,11 @@ std::shared_ptr<ngraph::Function> SubtractMultiplyToMultiplyAddFunction::getRefe
|
||||
std::shared_ptr<Node> parent = dequantizationOp;
|
||||
|
||||
if (!multiply.empty()) {
|
||||
parent = makeElementwise<DequantizationMultiply>(parent, multiply);
|
||||
parent = makeElementwise<opset1::Multiply>(parent, multiply);
|
||||
}
|
||||
|
||||
if (!add.empty()) {
|
||||
parent = makeElementwise<DequantizationAdd>(parent, add);
|
||||
parent = makeElementwise<opset1::Add>(parent, add);
|
||||
}
|
||||
parent->set_friendly_name("output");
|
||||
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include "ngraph/ops.hpp"
|
||||
#include "ngraph/variant.hpp"
|
||||
#include "ngraph/opsets/opset6.hpp"
|
||||
#include "transformations/rt_info/dequantization_attribute.hpp"
|
||||
#include "transformations/rt_info/fused_names_attribute.hpp"
|
||||
#include "transformations/rt_info/primitives_priority_attribute.hpp"
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
@ -219,18 +218,6 @@ TEST(NetworkContext_CNNNetwork, HashWithPrimitivesPriority) {
|
||||
NetworkCompilationContext::computeHash(net3, {}));
|
||||
}
|
||||
|
||||
TEST(NetworkContext_CNNNetwork, HashWithDequantization) {
|
||||
auto setDeqEmpty = [&](Node::RTMap& rtInfo) {
|
||||
rtInfo[VariantWrapper<DequantizationAttr>::type_info.name] =
|
||||
std::make_shared<VariantWrapper<DequantizationAttr>>(DequantizationAttr());
|
||||
};
|
||||
auto setDeq = [&](Node::RTMap& rtInfo, const std::string& name) {
|
||||
rtInfo[VariantWrapper<DequantizationAttr>::type_info.name] =
|
||||
std::make_shared<VariantWrapper<DequantizationAttr>>(DequantizationAttr(name));
|
||||
};
|
||||
checkCustomRt(setDeqEmpty, setDeq);
|
||||
}
|
||||
|
||||
TEST(NetworkContext_CNNNetwork, HashWithFusedNames) {
|
||||
auto setFusedEmpty = [&](Node::RTMap& rtInfo) {
|
||||
rtInfo[VariantWrapper<FusedNames>::type_info.name] =
|
||||
|
Loading…
Reference in New Issue
Block a user