[LPT]MoveFakeQuantize Q/DQ (#7430)

* Q/DQ + mulichannel support

backup

fix interval

mfk_functiun.cpp

WIP moveDequantizationBefore

add moveDequantizationBefore function

add cpu and gpu tests

attribute cmp false

attribute cmp false

rm temp line

mkl-dnn update

concat with multichanels for mOve_fake_quantize_function, bad runtime info for q/dq

rm extra qualification

fix run time info for q/dq

add support of multichanel fakequantize, bad test for it

work tests for multi chanel FQ

rm workaround

cpplint fix

cpplint fix

don't worl Variadic split

ieFuncTest work

cpuFuncTest work

Fix benchmark_app build (#7577)

[GPU] Added onednn dependency. (#6564)

cpp lint

cpplint

fix get_shape

fix fq constants

cpp lint

some fix in mfk.cpp

resolve conversations, add spil_nodes function

add new tests for multi-chanels, rename NetworkHelper::split_consts_before_concat()

fix get fq constants

* add new multi-chanels test and use constant_fold to split constant

* remove extra spaces

fix namespase terminated

fix namespase terminated
This commit is contained in:
Nikita Demashov 2022-01-11 22:49:16 +03:00 committed by GitHub
parent e9be93aec5
commit dce2aa2c0e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 824 additions and 279 deletions

View File

@ -333,6 +333,13 @@ protected:
const bool updatePrecision, const bool updatePrecision,
const bool moveSubtract = true) const; const bool moveSubtract = true) const;
std::shared_ptr<ngraph::Node> moveDequantizationBefore(
TransformationContext& context,
const std::shared_ptr<ngraph::Node>& operation,
const FakeQuantizeDequantization& dequantization,
const bool updatePrecision,
const bool moveSubtract = true) const;
void updateOutput( void updateOutput(
TransformationContext &context, TransformationContext &context,
std::shared_ptr<ngraph::Node> lastNode, std::shared_ptr<ngraph::Node> lastNode,

View File

@ -17,6 +17,7 @@ public:
NGRAPH_RTTI_DECLARATION; NGRAPH_RTTI_DECLARATION;
MoveFakeQuantize(const Params& params = Params()); MoveFakeQuantize(const Params& params = Params());
bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override;
bool canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const override;
bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override; bool isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept override;
}; };

View File

@ -173,6 +173,16 @@ public:
const bool updatePrecision, const bool updatePrecision,
const bool moveSubtract); const bool moveSubtract);
static InsertDequantizationResult moveDequantizationBefore(
const std::shared_ptr<ngraph::Node>& operation,
const FakeQuantizeDequantization& dequantization,
const bool updatePrecision,
const bool moveSubtract);
static std::vector<std::vector<std::shared_ptr<ngraph::opset1::Constant>>> split_consts_before_concat(
const std::shared_ptr<ov::Node> concat,
const std::vector<std::shared_ptr<opset1::Constant>> currConstants);
static bool checkConstantValuePrecision(const element::Type expectedPrecision, const std::shared_ptr<Node>& constant); static bool checkConstantValuePrecision(const element::Type expectedPrecision, const std::shared_ptr<Node>& constant);
static size_t getChildInputIndex(const std::shared_ptr<ngraph::Node>& parent, const std::shared_ptr<ngraph::Node>& child); static size_t getChildInputIndex(const std::shared_ptr<ngraph::Node>& parent, const std::shared_ptr<ngraph::Node>& child);

View File

@ -387,6 +387,17 @@ std::shared_ptr<ngraph::Node> LayerTransformation::moveDequantizationAfter(
return result.newOperation; return result.newOperation;
} }
std::shared_ptr<ngraph::Node> LayerTransformation::moveDequantizationBefore(
TransformationContext& context,
const std::shared_ptr<ngraph::Node>& operation,
const FakeQuantizeDequantization& dequantization,
const bool updatePrecision,
const bool moveSubtract) const {
const auto result = ngraph::pass::low_precision::NetworkHelper::moveDequantizationBefore(operation, dequantization, updatePrecision, moveSubtract);
updateOutput(context, result.newOperation, result.lastDequantization);
return result.newOperation;
}
void LayerTransformation::updateOutput( void LayerTransformation::updateOutput(
TransformationContext &context, TransformationContext &context,
std::shared_ptr<ngraph::Node> lastNode, std::shared_ptr<ngraph::Node> lastNode,

View File

@ -8,7 +8,7 @@
#include <ngraph/opsets/opset1.hpp> #include <ngraph/opsets/opset1.hpp>
#include <memory> #include <memory>
#include <ngraph/ngraph.hpp> #include <ngraph/node.hpp>
#include <ngraph/opsets/opset1.hpp> #include <ngraph/opsets/opset1.hpp>
#include <ngraph/pattern/op/or.hpp> #include <ngraph/pattern/op/or.hpp>
@ -39,26 +39,12 @@ MoveFakeQuantize::MoveFakeQuantize(const Params& params) : LayerTransformation(p
output_low, output_low,
output_high }); output_high });
ngraph::graph_rewrite_callback callback = [=](pattern::Matcher& m) { ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
auto op = m.get_match_root(); auto op = m.get_match_root();
if (transformation_callback(op)) { if (transformation_callback(op)) {
return false; return false;
} }
// workaround: only per-tensor quantization is allowed
const auto& pattern_map = m.get_pattern_value_map();
const auto is_scalar = [&](const std::shared_ptr<ngraph::Node>& wrapped_constant) {
return NetworkHelper::isScalarLike(
as_type_ptr<opset1::Constant>(pattern_map.at(wrapped_constant).get_node_shared_ptr()));
};
if (!is_scalar(input_low) ||
!is_scalar(input_high) ||
!is_scalar(output_low) ||
!is_scalar(output_high)) {
return false;
}
return transform(*context, m); return transform(*context, m);
}; };
@ -70,49 +56,111 @@ MoveFakeQuantize::MoveFakeQuantize(const Params& params) : LayerTransformation(p
bool MoveFakeQuantize::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { bool MoveFakeQuantize::transform(TransformationContext& context, ngraph::pattern::Matcher& m) {
auto fq = m.get_match_root(); auto fq = m.get_match_root();
if (!canBeTransformed(context, fq)) {
return false;
}
auto operation = fq->get_input_node_shared_ptr(0); auto operation = fq->get_input_node_shared_ptr(0);
std::shared_ptr<ngraph::Node> concat; std::shared_ptr<ngraph::Node> concat;
bool only_concat = true; bool without_operation = true;
std::string fq_original_name = fq->get_friendly_name(), operation_original_name; std::string fq_original_name = fq->get_friendly_name(),
operation_original_name,
convert_q_original_name;
if (is_type<opset1::Concat>(operation)) { if (is_type<opset1::Concat>(operation)) {
concat = operation; concat = operation;
} else { } else {
operation_original_name = operation->get_friendly_name(); operation_original_name = operation->get_friendly_name();
concat = operation->get_input_node_shared_ptr(0); concat = operation->get_input_node_shared_ptr(0);
only_concat = false; without_operation = false;
} }
if (!ConcatTransformation::isQuantizedStatic(concat)) { if (!ConcatTransformation::isQuantizedStatic(concat)) {
return false; return false;
} }
std::vector<std::shared_ptr<ngraph::Node>> fqs; auto convert_q = (*fq->output(0).get_target_inputs().begin()).get_node()->shared_from_this();
size_t input_size = concat->get_input_size(); bool q_dq = is_type<opset1::Convert>(convert_q);
for (size_t i{ 0 }; i < input_size; ++i) { std::vector<std::shared_ptr<opset1::Constant>> currConstants(4);
bool multi_chanels = false;
const auto number_of_concat_inputs = concat->get_input_size();
const auto concatNode = as_type_ptr<opset1::Concat>(concat);
const auto concat_axis = concatNode->get_concatenation_axis();
for (size_t i = 0; i < 4; i++) {
currConstants[i] = as_type_ptr<opset1::Constant>(fq->get_input_node_shared_ptr(i + 1));
if (!multi_chanels && currConstants[i]->get_shape().size() > 1 && currConstants[i]->get_shape()[concat_axis] != 1) {
multi_chanels = true;
}
}
std::vector<std::vector<std::shared_ptr<ngraph::opset1::Constant>>> newConstants;
if (multi_chanels) {
newConstants = NetworkHelper::split_consts_before_concat(concat, currConstants);
}
std::vector<std::shared_ptr<ngraph::Node>> newNodes;
for (size_t i{ 0 }; i < number_of_concat_inputs; ++i) {
std::shared_ptr<ngraph::Node> fq_input; std::shared_ptr<ngraph::Node> fq_input;
if (only_concat) { if (without_operation) {
fq_input = concat->get_input_node_shared_ptr(i); fq_input = concat->get_input_node_shared_ptr(i);
} else { } else {
auto input = concat->get_input_node_shared_ptr(i); auto input = concat->get_input_node_shared_ptr(i);
fq_input = operation->clone_with_new_inputs({ input }); fq_input = operation->clone_with_new_inputs({ input });
fq_input->set_friendly_name(operation_original_name + "_" + std::to_string(i + 1)); fq_input->set_friendly_name(operation_original_name + "_" + std::to_string(i + 1));
} }
auto newFq = fq->clone_with_new_inputs({ fq_input, std::shared_ptr<ngraph::Node> newFq;
fq->get_input_node_shared_ptr(1)->clone_with_new_inputs({}), if (multi_chanels) {
fq->get_input_node_shared_ptr(2)->clone_with_new_inputs({}), newFq = fq->clone_with_new_inputs({ fq_input,
fq->get_input_node_shared_ptr(3)->clone_with_new_inputs({}), newConstants[0][newConstants[0].size() == 1 ? 0 : i],
fq->get_input_node_shared_ptr(4)->clone_with_new_inputs({}) }); newConstants[1][newConstants[1].size() == 1 ? 0 : i],
newConstants[2][newConstants[2].size() == 1 ? 0 : i],
newConstants[3][newConstants[3].size() == 1 ? 0 : i] });
} else {
newFq = fq->clone_with_new_inputs({ fq_input,
fq->get_input_node_ptr(1)->clone_with_new_inputs({}),
fq->get_input_node_ptr(2)->clone_with_new_inputs({}),
fq->get_input_node_ptr(3)->clone_with_new_inputs({}),
fq->get_input_node_ptr(4)->clone_with_new_inputs({}) });
}
ngraph::copy_runtime_info(fq, newFq);
newFq->set_friendly_name(fq_original_name + "_" + std::to_string(i + 1)); newFq->set_friendly_name(fq_original_name + "_" + std::to_string(i + 1));
fqs.push_back(newFq); if (q_dq) {
auto newConvert_q = convert_q->clone_with_new_inputs({ newFq });
ngraph::copy_runtime_info(convert_q, newConvert_q);
newConvert_q->set_friendly_name(convert_q->get_friendly_name() + "_" + std::to_string(i + 1));
newNodes.push_back(newConvert_q);
} else {
newNodes.push_back(newFq);
}
} }
ngraph::copy_runtime_info(fq, fqs); auto newConcat = concat->clone_with_new_inputs(ngraph::OutputVector(newNodes.begin(), newNodes.end()));
auto newConcat = concat->clone_with_new_inputs(ngraph::OutputVector(fqs.begin(), fqs.end()));
newConcat->set_friendly_name(concat->get_friendly_name()); newConcat->set_friendly_name(concat->get_friendly_name());
replace_node(fq, newConcat);
NetworkHelper::copyInfo(concat, newConcat); NetworkHelper::copyInfo(concat, newConcat);
if (q_dq) {
auto dq = NetworkHelper::getDequantizationBelow(convert_q);
moveDequantizationBefore(context, newConcat, dq, false);
return true;
}
replace_node(fq, newConcat);
updateOutput(context, newConcat, fq); updateOutput(context, newConcat, fq);
return true; return true;
} }
bool MoveFakeQuantize::isPrecisionPreserved(std::shared_ptr<Node> layer) const noexcept { bool MoveFakeQuantize::canBeTransformed(const TransformationContext& context, std::shared_ptr<Node> layer) const {
auto operation = layer->get_input_node_shared_ptr(0);
std::shared_ptr<ngraph::Node> concat;
if (is_type<opset1::Concat>(operation)) {
concat = operation;
} else {
concat = operation->get_input_node_shared_ptr(0);
}
if (!ConcatTransformation::isQuantizedStatic(concat)) {
return false;
}
auto convert_q = (*layer->output(0).get_target_inputs().begin()).get_node()->shared_from_this();
bool q_dq = is_type<opset1::Convert>(convert_q);
if (q_dq && (convert_q->get_output_size() != 1 || layer->get_output_size() != 1)) {
return false;
}
return true;
}
bool MoveFakeQuantize::isPrecisionPreserved(std::shared_ptr<Node>) const noexcept {
return true; return true;
} }

View File

@ -1662,6 +1662,144 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter
return InsertDequantizationResult(newOperation, parent); return InsertDequantizationResult(newOperation, parent);
} }
NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationBefore(
const std::shared_ptr<ngraph::Node>& operation,
const FakeQuantizeDequantization& dequantization,
const bool updatePrecision,
const bool moveSubtract) {
assert(
(NetworkHelper::getDequantizationBelow(operation).subtractConstant == nullptr) ||
(NetworkHelper::getDequantizationBelow(operation).subtractConstant.get() == dequantization.subtractConstant.get()));
assert(
(NetworkHelper::getDequantizationBelow(operation).multiplyConstant == nullptr) ||
(NetworkHelper::getDequantizationBelow(operation).multiplyConstant.get() == dequantization.multiplyConstant.get()));
std::vector<std::vector<std::shared_ptr<ngraph::opset1::Constant>>> multiplyConstants, subtractConstants;
if (is_type<opset1::Concat>(operation)) {
const auto concatNode = as_type_ptr<opset1::Concat>(operation);
auto axis = concatNode->get_concatenation_axis();
if (dequantization.multiply && dequantization.multiplyConstant->get_shape().size() > 1 && dequantization.multiplyConstant->get_shape()[axis] != 1) {
multiplyConstants = NetworkHelper::split_consts_before_concat(operation, { dequantization.multiplyConstant });
}
if (dequantization.subtract && dequantization.subtractConstant->get_shape().size() > 1 && dequantization.subtractConstant->get_shape()[axis] != 1) {
subtractConstants = NetworkHelper::split_consts_before_concat(operation, { dequantization.subtractConstant });
}
}
std::vector<std::shared_ptr<ngraph::Node>> newNodes;
for (size_t i = 0; i < operation->get_input_size(); ++i) {
auto parent = operation->get_input_node_shared_ptr(i);
const element::Type deqPrecision = dequantization.multiplyConstant->get_element_type();
const bool shouldConvert = (operation->get_output_element_type(0) != deqPrecision);
if (shouldConvert) {
const auto convertOutputPrecision = dequantization.convert != nullptr ?
dequantization.convert->get_output_element_type(0) :
deqPrecision;
parent = std::make_shared<opset1::Convert>(parent, convertOutputPrecision);
parent->set_friendly_name(dequantization.convert->get_friendly_name() + "_" + std::to_string(i + 1));
ngraph::copy_runtime_info(dequantization.convert, parent);
}
if (moveSubtract && (dequantization.subtract != nullptr)) {
if (dequantization.subtractConvert == nullptr) {
const element::Type parentPrecision = parent->get_output_element_type(0);
if (parentPrecision.bitwidth() < dequantization.subtractConstant->get_element_type().bitwidth()) {
THROW_IE_LPT_EXCEPTION(*parent) <<
"unexpected precisions: on data " << parent->get_friendly_name() << ":" << parentPrecision <<
", subtract dequantization constant " << dequantization.subtractConstant->get_friendly_name() << ":" <<
dequantization.subtractConstant->get_element_type();
}
auto subtractConstant = subtractConstants.size() ? subtractConstants[0][i] : dequantization.subtractConstant;
parent = std::make_shared<op::TypeRelaxed<opset1::Subtract>>(
std::vector<element::Type>{element::f32, element::f32}, std::vector<element::Type>{ element::f32 },
ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(),
ngraph::op::TemporaryReplaceOutputType(
subtractConstant->output(0).get_element_type() == parentPrecision ?
subtractConstant :
foldConvert(subtractConstant, parentPrecision), element::f32).get());
parent->set_friendly_name(dequantization.subtract->get_friendly_name() + "_" + std::to_string(i + 1));
} else {
parent = std::make_shared<opset1::Subtract>(parent, dequantization.subtractConvert);
}
ngraph::copy_runtime_info(dequantization.subtract, parent);
}
if (dequantization.multiply != nullptr) {
auto multiplyConstant = multiplyConstants.size() ? multiplyConstants[0][i] : dequantization.multiplyConstant;
const element::Type parentPrecision = parent->get_output_element_type(0);
if (parentPrecision.bitwidth() < multiplyConstant->get_element_type().bitwidth()) {
THROW_IE_LPT_EXCEPTION(*parent) <<
"unexpected precisions: on data " << parent->get_friendly_name() << ":" << parentPrecision <<
", multiply dequantization constant " << multiplyConstant->get_friendly_name() << ":" << multiplyConstant->get_element_type();
}
parent = std::make_shared<op::TypeRelaxed<opset1::Multiply>>(
opset1::Multiply(parent,
multiplyConstant->output(0).get_element_type() == parentPrecision ?
multiplyConstant :
foldConvert(multiplyConstant->output(0), parentPrecision)),
dequantization.multiply->get_output_element_type(0));
ngraph::copy_runtime_info(dequantization.multiply, parent);
parent->set_friendly_name(dequantization.multiply->get_friendly_name() + "_" + std::to_string(i + 1));
}
if ((!moveSubtract) && (dequantization.convert != nullptr) && (dequantization.subtract != nullptr)) {
// issue #43088
// NetworkHelper::optimizeElementwise(dequantization.subtract);
}
newNodes.push_back(parent);
}
auto newOperation = operation->clone_with_new_inputs(ngraph::OutputVector(newNodes.begin(), newNodes.end()));
NetworkHelper::copyInfo(operation, newOperation);
replace_node(dequantization.multiply, newOperation);
auto op = std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(newOperation);
if (op != nullptr) {
if (updatePrecision) {
op->set_overridden_output_type(newOperation->get_input_element_type(0));
} else if (dequantization.multiply) {
op->set_overridden_output_type(dequantization.multiplyConstant->get_element_type());
} else if (dequantization.subtract) {
op->set_overridden_output_type(dequantization.subtractConstant->get_element_type());
}
std::dynamic_pointer_cast<ngraph::Node>(newOperation)->validate_and_infer_types();
}
return InsertDequantizationResult(newOperation, dequantization.multiply);
}
std::vector<std::vector<std::shared_ptr<ngraph::opset1::Constant>>> NetworkHelper::split_consts_before_concat(const std::shared_ptr<ov::Node> concat,
const std::vector<std::shared_ptr<opset1::Constant>> currConstants) {
std::vector<std::vector<std::shared_ptr<ngraph::opset1::Constant>>> newConstants(currConstants.size());
auto number_of_concat_inputs = concat->get_input_size();
const auto concatNode = as_type_ptr<opset1::Concat>(concat);
const auto concat_axis = concatNode->get_concatenation_axis();
std::vector<unsigned int> shape_axis(number_of_concat_inputs);
for (size_t i{ 0 }; i < number_of_concat_inputs; ++i) {
auto shape = concat->get_input_shape(i);
shape_axis[i] = shape[concat_axis];
}
for (size_t i = 0; i < currConstants.size(); ++i) {
std::vector<std::shared_ptr<ngraph::opset1::Constant>> newConstant;
if (currConstants[i]->output(0).get_shape()[concat_axis] == 1) {
newConstant.push_back(currConstants[i]);
newConstants[i] = newConstant;
continue;
}
auto split = std::make_shared<opset1::VariadicSplit>(currConstants[i],
opset1::Constant::create(element::i64, Shape{}, { concat_axis }),
opset1::Constant::create(element::i64, Shape{ number_of_concat_inputs }, shape_axis));
OutputVector outputResults(split->get_output_size());
auto foldResult = split->constant_fold(outputResults, split->input_values());
if (!foldResult) {
// handle potential constant fold issue here
}
for (auto outputResult : outputResults) {
auto constant = as_type_ptr<opset1::Constant>(outputResult.get_node_shared_ptr());
newConstant.push_back(constant);
}
newConstants[i] = newConstant;
}
return newConstants;
}
bool NetworkHelper::checkConstantValuePrecision(const element::Type expectedPrecision, const std::shared_ptr<Node>& constant) { bool NetworkHelper::checkConstantValuePrecision(const element::Type expectedPrecision, const std::shared_ptr<Node>& constant) {
if (expectedPrecision.is_signed()) { if (expectedPrecision.is_signed()) {
return true; return true;

View File

@ -37,12 +37,10 @@ namespace {
class MoveFakeQuantizeTransformationActualValues { class MoveFakeQuantizeTransformationActualValues {
public: public:
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore1; size_t number_of_operations;
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore1; std::vector<ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant> fakeQuantizeBefore;
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1; ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore;
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore2; ngraph::builder::subgraph::DequantizationOperations dequantizationBefore;
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore2;
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2;
std::string operation; std::string operation;
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter; ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter;
ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter; ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter;
@ -51,12 +49,9 @@ public:
inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationActualValues& values) { inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationActualValues& values) {
return out << "_" << return out << "_" <<
values.fakeQuantizeBefore1 << "_" << values.number_of_operations << "_" <<
values.convertBefore1.outPrecision << "_" << values.convertBefore.outPrecision << "_" <<
values.dequantizationBefore1 << "_" << values.dequantizationBefore << "_" <<
values.fakeQuantizeBefore2 << "_" <<
values.convertBefore2.outPrecision << "_" <<
values.dequantizationBefore2 << "_" <<
values.operation << "_" << values.operation << "_" <<
values.fakeQuantizeAfter << "_" << values.fakeQuantizeAfter << "_" <<
values.convertAfter.outPrecision << "_" << values.convertAfter.outPrecision << "_" <<
@ -65,33 +60,25 @@ inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransfo
class MoveFakeQuantizeTransformationResultValues { class MoveFakeQuantizeTransformationResultValues {
public: public:
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore1; size_t number_of_operations;
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore1; std::vector<ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant> fakeQuantizeBefore;
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1; ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore;
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore2; ngraph::builder::subgraph::DequantizationOperations dequantizationBefore;
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore2;
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2;
std::string operation; std::string operation;
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter; ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter;
ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter; ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter;
ngraph::builder::subgraph::DequantizationOperations dequantizationAfter; ngraph::builder::subgraph::DequantizationOperations dequantizationAfter;
ngraph::element::Type precisionAfterOperation; ngraph::element::Type precisionAfterOperation;
ngraph::builder::subgraph::DequantizationOperations dequantizationAfterNotFQ;
}; };
inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationResultValues& values) { inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransformationResultValues& values) {
return out << "_" << return out << "_" <<
values.fakeQuantizeBefore1 << "_" << values.convertBefore.outPrecision << "_" <<
values.convertBefore1.outPrecision << "_" << values.dequantizationBefore << "_" <<
values.dequantizationBefore1 << "_" <<
values.fakeQuantizeBefore2 << "_" <<
values.convertBefore2.outPrecision << "_" <<
values.dequantizationBefore2 << "_" <<
values.operation << "_" << values.operation << "_" <<
values.fakeQuantizeAfter << "_" << values.fakeQuantizeAfter << "_" <<
values.convertAfter << "_" << values.convertAfter << "_" <<
values.dequantizationAfter << "_" << values.dequantizationAfter;
values.dequantizationAfterNotFQ;
} }
class MoveFakeQuantizeTransformationTestValues { class MoveFakeQuantizeTransformationTestValues {
@ -126,7 +113,7 @@ inline std::ostream& operator<<(std::ostream& out, const MoveFakeQuantizeTransfo
typedef std::tuple < typedef std::tuple <
ngraph::element::Type, ngraph::element::Type,
ngraph::PartialShape, std::vector<ngraph::PartialShape>,
MoveFakeQuantizeTransformationTestValues MoveFakeQuantizeTransformationTestValues
> MoveFakeQuantizeTransformationParams; > MoveFakeQuantizeTransformationParams;
@ -134,16 +121,13 @@ class MoveFakeQuantizeTransformation : public LayerTransformation, public testin
public: public:
void SetUp() override { void SetUp() override {
const ngraph::element::Type precision = std::get<0>(GetParam()); const ngraph::element::Type precision = std::get<0>(GetParam());
const ngraph::PartialShape shape = std::get<1>(GetParam()); const std::vector<ngraph::PartialShape> shape = std::get<1>(GetParam());
//const auto shape = std::get<1>(GetParam());
MoveFakeQuantizeTransformationTestValues testValues = std::get<2>(GetParam()); MoveFakeQuantizeTransformationTestValues testValues = std::get<2>(GetParam());
// dequantization output precision depends on input precision // dequantization output precision depends on input precision
// to avoid huge amount of tests cases let's define dequantization output precision as input precision // to avoid huge amount of tests cases let's define dequantization output precision as input precision
if (!testValues.actual.dequantizationBefore1.multiply.empty()) { if (!testValues.actual.dequantizationBefore.multiply.empty()) {
testValues.actual.dequantizationBefore1.multiply.outPrecision = precision; testValues.actual.dequantizationBefore.multiply.outPrecision = precision;
}
if (!testValues.actual.dequantizationBefore2.multiply.empty()) {
testValues.actual.dequantizationBefore2.multiply.outPrecision = precision;
} }
IntervalsAlignmentSharedValue::Interval interval{ -1.28f, 2.55f }; IntervalsAlignmentSharedValue::Interval interval{ -1.28f, 2.55f };
@ -151,12 +135,10 @@ public:
actualFunction = ngraph::builder::subgraph::MoveFakeQuantize::get( actualFunction = ngraph::builder::subgraph::MoveFakeQuantize::get(
precision, precision,
shape, shape,
testValues.actual.fakeQuantizeBefore1, testValues.actual.number_of_operations,
testValues.actual.convertBefore1, testValues.actual.fakeQuantizeBefore,
testValues.actual.dequantizationBefore1, testValues.actual.convertBefore,
testValues.actual.fakeQuantizeBefore2, testValues.actual.dequantizationBefore,
testValues.actual.convertBefore2,
testValues.actual.dequantizationBefore2,
testValues.actual.operation, testValues.actual.operation,
testValues.actual.fakeQuantizeAfter, testValues.actual.fakeQuantizeAfter,
testValues.actual.convertAfter, testValues.actual.convertAfter,
@ -167,7 +149,6 @@ public:
QuantizationAlignmentAttribute(false) QuantizationAlignmentAttribute(false)
}, },
ngraph::element::undefined, ngraph::element::undefined,
{},
testValues.axis); testValues.axis);
auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({ auto supportedPrecisionsOnActivation = std::vector<ngraph::pass::low_precision::OperationPrecisionRestriction>({
ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}}) ngraph::pass::low_precision::OperationPrecisionRestriction::create<ngraph::opset1::AvgPool>({{0, testValues.params.precisionsOnActivations}})
@ -183,6 +164,7 @@ public:
ov::pass::Manager manager; ov::pass::Manager manager;
manager.register_pass<ngraph::pass::low_precision::MoveFakeQuantize>(params); manager.register_pass<ngraph::pass::low_precision::MoveFakeQuantize>(params);
manager.run_passes(actualFunction); manager.run_passes(actualFunction);
// dequantization output precision depends on input precision // dequantization output precision depends on input precision
// to avoid huge amount of tests cases let's define dequantization output precision as input precision // to avoid huge amount of tests cases let's define dequantization output precision as input precision
if (!testValues.result.dequantizationAfter.multiply.empty()) { if (!testValues.result.dequantizationAfter.multiply.empty()) {
@ -198,12 +180,10 @@ public:
referenceFunction = ngraph::builder::subgraph::MoveFakeQuantize::get( referenceFunction = ngraph::builder::subgraph::MoveFakeQuantize::get(
precision, precision,
shape, shape,
testValues.result.fakeQuantizeBefore1, testValues.result.number_of_operations,
testValues.result.convertBefore1, testValues.result.fakeQuantizeBefore,
testValues.result.dequantizationBefore1, testValues.result.convertBefore,
testValues.result.fakeQuantizeBefore2, testValues.result.dequantizationBefore,
testValues.result.convertBefore2,
testValues.result.dequantizationBefore2,
testValues.result.operation, testValues.result.operation,
testValues.result.fakeQuantizeAfter, testValues.result.fakeQuantizeAfter,
testValues.result.convertAfter, testValues.result.convertAfter,
@ -214,18 +194,16 @@ public:
QuantizationAlignmentAttribute(false) QuantizationAlignmentAttribute(false)
}, },
testValues.result.precisionAfterOperation, testValues.result.precisionAfterOperation,
{},
testValues.axis); testValues.axis);
} }
static std::string getTestCaseName(testing::TestParamInfo<MoveFakeQuantizeTransformationParams> obj) { static std::string getTestCaseName(testing::TestParamInfo<MoveFakeQuantizeTransformationParams> obj) {
const ngraph::element::Type precision = std::get<0>(obj.param); const ngraph::element::Type precision = std::get<0>(obj.param);
const ngraph::PartialShape shape = std::get<1>(obj.param); const std::vector<ngraph::PartialShape> shape = std::get<1>(obj.param);
const MoveFakeQuantizeTransformationTestValues testValues = std::get<2>(obj.param); const MoveFakeQuantizeTransformationTestValues testValues = std::get<2>(obj.param);
std::ostringstream result; std::ostringstream result;
result << result <<
LayerTransformation::getTestCaseNameByParams(precision, shape, testValues.params) << "_" << LayerTransformation::getTestCaseNameByParams(precision, shape[0], testValues.params) << "_" <<
(testValues.multiChannels ? "multiChannels_" : "notMultiChannels_") << (testValues.multiChannels ? "multiChannels_" : "notMultiChannels_") <<
"axis_" << testValues.axis << "_" << "axis_" << testValues.axis << "_" <<
testValues.actual << "_" << testValues.actual << "_" <<
@ -236,7 +214,7 @@ public:
TEST_P(MoveFakeQuantizeTransformation, CompareFunctions) { TEST_P(MoveFakeQuantizeTransformation, CompareFunctions) {
actualFunction->validate_nodes_and_infer_types(); actualFunction->validate_nodes_and_infer_types();
auto res = compare_functions(referenceFunction, actualFunction, true, true, true, true, true); auto res = compare_functions(referenceFunction, actualFunction, true, true, true, true, false);
ASSERT_TRUE(res.first) << res.second; ASSERT_TRUE(res.first) << res.second;
ASSERT_TRUE(LayerTransformation::allNamesAreUnique(actualFunction)) << "Not all names are unique"; ASSERT_TRUE(LayerTransformation::allNamesAreUnique(actualFunction)) << "Not all names are unique";
@ -252,21 +230,19 @@ const std::vector<ngraph::element::Type> precisions = {
}; };
namespace testValues1 { namespace testValues1 {
const std::vector<ngraph::PartialShape> shapes = { const std::vector<std::vector<ngraph::PartialShape>> shapes = {
{ 1, 3, 9, 9 }, {{ 1, 3, 9, 9 }},
{ 4, 3, 9, 9 }, {{ 4, 3, 9, 9 }},
{ Dimension::dynamic(), 3, Dimension::dynamic(), Dimension::dynamic() } {{ Dimension::dynamic(), 3, Dimension::dynamic(), Dimension::dynamic() }}
}; };
const std::vector<MoveFakeQuantizeTransformationTestValues> testValues = { const std::vector<MoveFakeQuantizeTransformationTestValues> testValues = {
// U8: concat // without operation
{ {
LayerTransformation::createParamsU8I8(), LayerTransformation::createParamsU8I8(),
false, false,
1, 1,
{ {
{}, 2,
{},
{},
{}, {},
{}, {},
{}, {},
@ -276,28 +252,23 @@ const std::vector<MoveFakeQuantizeTransformationTestValues> testValues = {
{} {}
}, },
{ {
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, 2,
{}, {{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}},
{},
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{}, {},
{}, {},
"", "",
{}, {},
{}, {},
{}, {},
}, }
false,
false
}, },
// with ReLU
{ {
LayerTransformation::createParamsU8I8(), LayerTransformation::createParamsU8I8(),
false, false,
1, 1,
{ {
{}, 2,
{},
{},
{}, {},
{}, {},
{}, {},
@ -307,28 +278,23 @@ const std::vector<MoveFakeQuantizeTransformationTestValues> testValues = {
{} {}
}, },
{ {
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, 2,
{}, {{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}},
{},
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{}, {},
{}, {},
"relu", "relu",
{}, {},
{}, {},
{}, {},
}, }
false,
false
}, },
// negative test
{ {
LayerTransformation::createParamsU8I8(), LayerTransformation::createParamsU8I8(),
false, false,
0, 0,
{ {
{}, 2,
{},
{},
{}, {},
{}, {},
{}, {},
@ -338,9 +304,7 @@ const std::vector<MoveFakeQuantizeTransformationTestValues> testValues = {
{} {}
}, },
{ {
{}, 2,
{},
{},
{}, {},
{}, {},
{}, {},
@ -348,9 +312,109 @@ const std::vector<MoveFakeQuantizeTransformationTestValues> testValues = {
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{}, {},
{} {}
}, }
},
// Q/DQ
{
LayerTransformation::createParamsU8I8(),
false, false,
false 1,
{
2,
{},
{},
{},
"",
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{ ngraph::element::u8 },
{
{ element::f32 },
{},
{ 0.01f }
},
},
{
2,
{{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}},
{ ngraph::element::u8 },
{
{ element::f32 },
{},
{ 0.01f }
},
"",
{},
{},
{},
}
},
// Q/DQ with ReLU
{
LayerTransformation::createParamsU8I8(),
false,
1,
{
2,
{},
{},
{},
"relu",
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{ ngraph::element::u8 },
{
{ element::f32 },
{},
{ 0.01f }
},
},
{
2,
{{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}},
{ ngraph::element::u8 },
{
{ element::f32 },
{},
{ 0.01f }
},
"relu",
{},
{},
{},
}
},
// Q/DQ with subtract
{
LayerTransformation::createParamsU8I8(),
false,
1,
{
2,
{},
{},
{},
"",
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{ ngraph::element::u8 },
{
{ element::f32 },
{ 0.01f },
{ 0.01f }
},
},
{
2,
{{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}},
{ ngraph::element::u8 },
{
{ element::f32 },
{ 0.01f },
{ 0.01f }
},
"",
{},
{},
{},
}
}, },
}; };
@ -363,4 +427,66 @@ INSTANTIATE_TEST_SUITE_P(
::testing::ValuesIn(testValues)), ::testing::ValuesIn(testValues)),
MoveFakeQuantizeTransformation::getTestCaseName); MoveFakeQuantizeTransformation::getTestCaseName);
} // namespace testValues1 } // namespace testValues1
namespace testValues2 {
const std::vector<ngraph::element::Type> precisions = {
ngraph::element::f32,
ngraph::element::f16
};
const std::vector<std::vector<ngraph::PartialShape>> shapes = {
{{ 1, 1, 224, 224 }, { 1, 2, 224, 224 }},
{{ 4, 1, 9, 9 }, { 4, 2, 9, 9 }}
};
const std::vector<MoveFakeQuantizeTransformationTestValues> testValues = {
// multi-chanels
{
LayerTransformation::createParamsU8I8(),
true,
1,
{
2,
{},
{},
{},
"",
{
256ul,
{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1}},
{-2.66068696975708f}, {2.6399004459381104f},
{-31.695816040039062f, -35.69844055175781f, -49.126914978027344f},
{277.8320007324219f, 267.07110595703125f, 254.99429321289062f}
},
{},
{}
},
{
2,
{
{256ul,
{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}},
{-2.66068696975708f}, {2.6399004459381104f}, {-31.695816040039062f}, {277.8320007324219f}},
{256ul,
{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 2, 1, 1}, {1, 2, 1, 1}},
{-2.66068696975708f}, {2.6399004459381104f},
{-35.69844055175781f, -49.126914978027344f},
{267.07110595703125f, 254.99429321289062f}}
},
{},
{},
"",
{},
{},
{},
}
},
};
INSTANTIATE_TEST_SUITE_P(
smoke_LPT,
MoveFakeQuantizeTransformation,
::testing::Combine(
::testing::ValuesIn(precisions),
::testing::ValuesIn(shapes),
::testing::ValuesIn(testValues)),
MoveFakeQuantizeTransformation::getTestCaseName);
} // namespace testValues2
} // namespace } // namespace

View File

@ -9,7 +9,6 @@
using namespace LayerTestsDefinitions; using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = { const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32, ngraph::element::f32,
//ngraph::element::f16 //ngraph::element::f16
@ -19,12 +18,12 @@ const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> tras
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true) LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams().setUpdatePrecisions(true)
}; };
namespace testValues1 {
const std::vector<LayerTestsDefinitions::MoveFakeQuantizeTransformationParam> params = { const std::vector<LayerTestsDefinitions::MoveFakeQuantizeTransformationParam> params = {
// without operation // without operation
{ {
{}, 3,
{},
{},
{}, {},
{}, {},
{}, {},
@ -38,9 +37,7 @@ const std::vector<LayerTestsDefinitions::MoveFakeQuantizeTransformationParam> pa
}, },
// with ReLU operation // with ReLU operation
{ {
{}, 3,
{},
{},
{}, {},
{}, {},
{}, {},
@ -52,27 +49,117 @@ const std::vector<LayerTestsDefinitions::MoveFakeQuantizeTransformationParam> pa
"U8", "U8",
1 1
}, },
// negative axis // Q/DQ
{ {
{}, 3,
{},
{},
{}, {},
{}, {},
{}, {},
"", "",
{256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f}}, { 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} },
{ ngraph::element::u8 },
{
{ ngraph::element::f32 },
{},
{ 0.01f }
},
"Concatenation",
"U8",
1
},
// Q/DQ with ReLU
{
3,
{},
{},
{},
"relu",
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} },
{ ngraph::element::u8 },
{
{ ngraph::element::f32 },
{},
{ 0.01f }
},
"Concatenation",
"U8",
1
},
// multi-chanels
{
3,
{},
{},
{},
"relu",
{
256ul,
{{1, 6, 1, 1}, {1, 6, 1, 1}, {1, 6, 1, 1}, {1, 6, 1, 1}},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{2.55f, 2.55f / 2.f, 2.55f / 3.f, 2.55f / 4.f, 2.55f / 5.f, 2.55f / 6.f},
{-128.f, -128.f, -128.f, -128.f, -128.f, -128.f},
{127.f, 127.f, 127.f, 127.f, 127.f, 127.f}
},
{}, {},
{}, {},
"Concatenation", "Concatenation",
"FP32", "I8",
0 1
} },
// Q/DQ with multi-channels multiply
{
3,
{},
{},
{},
"",
{
256ul,
{{1, 6, 1, 1}, {1, 6, 1, 1}, {1, 6, 1, 1}, {1, 6, 1, 1}},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{2.55f, 2.55f / 2.f, 2.55f / 3.f, 2.55f / 4.f, 2.55f / 5.f, 2.55f / 6.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{255.f, 255.f / 2.f, 255.f / 3.f, 255.f / 4.f, 255.f / 5.f, 255.f / 6.f},
},
{ ngraph::element::u8 },
{
{ ngraph::element::f32 },
{},
{ {0.01f, 0.02f, 0.03f, 0.04f, 0.05f, 0.06f}, ngraph::element::f32, {1, 6, 1, 1} },
},
"Concatenation",
"U8",
1
},
// Q/DQ with multi-channels subtract
{
3,
{},
{},
{},
"",
{
256ul,
{{1, 6, 1, 1}, {1, 6, 1, 1}, {1, 6, 1, 1}, {1, 6, 1, 1}},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{2.55f, 2.55f / 2.f, 2.55f / 3.f, 2.55f / 4.f, 2.55f / 5.f, 2.55f / 6.f},
{0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
{255.f, 255.f / 2.f, 255.f / 3.f, 255.f / 4.f, 255.f / 5.f, 255.f / 6.f},
},
{ ngraph::element::u8 },
{
{ ngraph::element::f32 },
{ {-127.f, -127.f / 2.f, -127.f / 3.f, -127.f / 4.f, -127.f / 5.f, -127.f / 6.f}, ngraph::element::f32, {1, 6, 1, 1} },
{ 0.01f },
},
"Concatenation",
"U8",
1
},
}; };
const std::vector<ngraph::Shape> shapes = { const std::vector<std::vector<ngraph::PartialShape>> shapes = {
{ 1, 3, 16, 16 }, {{ 1, 1, 16, 16 }, { 1, 2, 16, 16 }, { 1, 3, 16, 16 }}
{ 4, 3, 16, 16 }
}; };
INSTANTIATE_TEST_SUITE_P(smoke_LPT, MoveFakeQuantizeTransformation, INSTANTIATE_TEST_SUITE_P(smoke_LPT, MoveFakeQuantizeTransformation,
@ -83,4 +170,36 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, MoveFakeQuantizeTransformation,
::testing::ValuesIn(trasformationParamValues), ::testing::ValuesIn(trasformationParamValues),
::testing::ValuesIn(params)), ::testing::ValuesIn(params)),
MoveFakeQuantizeTransformation::getTestCaseName); MoveFakeQuantizeTransformation::getTestCaseName);
} // namespace } // namespace testValues1
namespace testValues2 {
const std::vector<LayerTestsDefinitions::MoveFakeQuantizeTransformationParam> params = {
// negative axis
{
3,
{},
{},
{},
"",
{256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f}},
{},
{},
"Concatenation",
"FP32",
-1
},
};
const std::vector<std::vector<ngraph::PartialShape>> shapes = {
{{ 1, 1, 16, 16 }}
};
INSTANTIATE_TEST_SUITE_P(smoke_LPT, MoveFakeQuantizeTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(shapes),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::ValuesIn(trasformationParamValues),
::testing::ValuesIn(params)),
MoveFakeQuantizeTransformation::getTestCaseName);
} // namespace testValues2

View File

@ -15,72 +15,171 @@ const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f16 ngraph::element::f16
}; };
const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = { const std::vector<ngraph::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams(), LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams(),
}; };
const std::vector<LayerTestsDefinitions::MoveFakeQuantizeTransformationParam> params = { const std::vector<LayerTestsDefinitions::MoveFakeQuantizeTransformationParam> params = {
// without operation // without operation
{ {
{}, 2,
{}, {},
{}, {},
{},
"",
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}},
{},
{},
"Concat",
"U8",
1,
},
// with ReLU operation
{
2,
{},
{},
{},
"relu",
{ 256ul, {}, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f }},
{},
{},
"Concat",
"U8",
1
},
// negative axis
{
2,
{},
{},
{},
"",
{256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f}},
{},
{},
"Concat",
"FP32",
0
},
// Q/DQ
{
2,
{},
{},
{},
"",
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} },
{ ngraph::element::u8 },
{
{ ngraph::element::f32 },
{},
{ 0.01f }
},
"Concat",
"U8",
1
},
// Q/DQ with ReLU
{
2,
{},
{},
{},
"relu",
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f} },
{ ngraph::element::u8 },
{
{ ngraph::element::f32 },
{},
{ 0.01f }
},
"Concat",
"U8",
1
},
// multi chanel
{
3,
{},
{},
{},
"relu",
{ 256ul,
{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1}},
{-2.66068696975708f}, {2.6399004459381104f},
{-31.695816040039062f, -35.69844055175781f, -49.126914978027344f},
{277.8320007324219f, 267.07110595703125f, 254.99429321289062f}
},
{},
{},
"Concat",
"U8",
1
},
// Q/DQ with multi-channels
{
3,
{},
{},
{},
"",
{
256ul,
{{1, 3, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1}},
{0.f, 0.f, 0.f},
{2.55f, 2.55f, 2.55f},
{0.f, 0.f, 0.f},
{255.f, 255.f, 255.f}
},
{ ngraph::element::u8 },
{
{ ngraph::element::f32 },
{},
{ {0.01f, 0.01f, 0.01f}, ngraph::element::f32, {1, 3, 1, 1} }
},
"Concat",
"U8",
1
},
// Q/DQ with multi-channels subtruct
{
3,
{}, {},
{}, {},
{}, {},
"", "",
{ 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f}}, {
{}, 256ul,
{}, {{1, 3, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1}},
"Concat", {0.f, 0.f, 0.f},
"U8", {2.55f, 2.55f, 2.55f},
1, {0.f, 0.f, 0.f},
}, {255.f, 255.f, 255.f}
// with ReLU operation },
{ { ngraph::element::u8 },
{}, {
{}, { ngraph::element::f32 },
{}, { {0.01f, 0.01f, 0.01f}, ngraph::element::f32, {1, 3, 1, 1} },
{}, { 0.01f }
{}, },
{},
"relu",
{ 256ul, {}, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f }},
{},
{},
"Concat", "Concat",
"U8", "U8",
1 1
}, },
// negative axis };
{
{},
{},
{},
{},
{},
{},
"",
{256ul, {}, {-1.28f}, {1.27f}, {-1.28f}, {1.27f}},
{},
{},
"Concat",
"FP32",
0
}
};
const std::vector<ngraph::Shape> shapes = { const std::vector<std::vector<ngraph::PartialShape>> shapes = {
{ 1, 3, 16, 16 }, {{ 1, 1, 16, 16 }},
{ 4, 3, 16, 16 } {{ 4, 1, 16, 16 }}
}; };
INSTANTIATE_TEST_SUITE_P(smoke_LPT, MoveFakeQuantizeTransformation, INSTANTIATE_TEST_SUITE_P(smoke_LPT, MoveFakeQuantizeTransformation,
::testing::Combine( ::testing::Combine(
::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(shapes), ::testing::ValuesIn(shapes),
::testing::Values(CommonTestUtils::DEVICE_GPU), ::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::ValuesIn(trasformationParamValues), ::testing::ValuesIn(trasformationParamValues),
::testing::ValuesIn(params)), ::testing::ValuesIn(params)),
MoveFakeQuantizeTransformation::getTestCaseName); MoveFakeQuantizeTransformation::getTestCaseName);
} // namespace } // namespace

View File

@ -19,12 +19,10 @@ namespace LayerTestsDefinitions {
class MoveFakeQuantizeTransformationParam { class MoveFakeQuantizeTransformationParam {
public: public:
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore1; size_t number_of_operations;
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore1; std::vector<ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant> fakeQuantizeBefore;
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore1; ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore;
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeBefore2; ngraph::builder::subgraph::DequantizationOperations dequantizationBefore;
ngraph::builder::subgraph::DequantizationOperations::Convert convertBefore2;
ngraph::builder::subgraph::DequantizationOperations dequantizationBefore2;
std::string operation; std::string operation;
ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter; ngraph::builder::subgraph::FakeQuantizeOnDataWithConstant fakeQuantizeAfter;
ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter; ngraph::builder::subgraph::DequantizationOperations::Convert convertAfter;
@ -36,7 +34,7 @@ public:
typedef std::tuple < typedef std::tuple <
ngraph::element::Type, ngraph::element::Type,
ngraph::Shape, std::vector<ngraph::PartialShape>,
std::string, std::string,
ngraph::pass::low_precision::LayerTransformation::Params, ngraph::pass::low_precision::LayerTransformation::Params,
MoveFakeQuantizeTransformationParam MoveFakeQuantizeTransformationParam

View File

@ -20,21 +20,21 @@ namespace LayerTestsDefinitions {
std::string MoveFakeQuantizeTransformation::getTestCaseName(testing::TestParamInfo<MoveFakeQuantizeTransformationParams> obj) { std::string MoveFakeQuantizeTransformation::getTestCaseName(testing::TestParamInfo<MoveFakeQuantizeTransformationParams> obj) {
ngraph::element::Type netPrecision; ngraph::element::Type netPrecision;
ngraph::PartialShape inputShape; std::vector<ngraph::PartialShape> inputShape;
std::string targetDevice; std::string targetDevice;
ngraph::pass::low_precision::LayerTransformation::Params params; ngraph::pass::low_precision::LayerTransformation::Params params;
MoveFakeQuantizeTransformationParam param; MoveFakeQuantizeTransformationParam param;
std::tie(netPrecision, inputShape, targetDevice, params, param) = obj.param; std::tie(netPrecision, inputShape, targetDevice, params, param) = obj.param;
std::ostringstream result; std::ostringstream result;
result << getTestCaseNameByParams(netPrecision, inputShape, targetDevice, params) << result << getTestCaseNameByParams(netPrecision, inputShape[0], targetDevice, params) <<
param.operation << param.fakeQuantizeAfter; param.operation << param.fakeQuantizeAfter << param.dequantizationAfter;
return result.str(); return result.str();
} }
void MoveFakeQuantizeTransformation::SetUp() { void MoveFakeQuantizeTransformation::SetUp() {
ngraph::element::Type netPrecision; ngraph::element::Type netPrecision;
ngraph::PartialShape inputShape; std::vector<ngraph::PartialShape> inputShape;
ngraph::pass::low_precision::LayerTransformation::Params params; ngraph::pass::low_precision::LayerTransformation::Params params;
MoveFakeQuantizeTransformationParam param; MoveFakeQuantizeTransformationParam param;
std::tie(netPrecision, inputShape, targetDevice, params, param) = this->GetParam(); std::tie(netPrecision, inputShape, targetDevice, params, param) = this->GetParam();
@ -42,19 +42,16 @@ void MoveFakeQuantizeTransformation::SetUp() {
function = ngraph::builder::subgraph::MoveFakeQuantize::get( function = ngraph::builder::subgraph::MoveFakeQuantize::get(
netPrecision, netPrecision,
inputShape, inputShape,
param.fakeQuantizeBefore1, param.number_of_operations,
param.convertBefore1, param.fakeQuantizeBefore,
param.dequantizationBefore1, param.convertBefore,
param.fakeQuantizeBefore2, param.dequantizationBefore,
param.convertBefore2,
param.dequantizationBefore2,
param.operation, param.operation,
param.fakeQuantizeAfter, param.fakeQuantizeAfter,
param.convertAfter, param.convertAfter,
param.dequantizationAfter, param.dequantizationAfter,
{}, {},
{}, {},
{},
param.axis); param.axis);
} }

View File

@ -19,20 +19,17 @@ class MoveFakeQuantize {
public: public:
static std::shared_ptr<ngraph::Function> get( static std::shared_ptr<ngraph::Function> get(
const ngraph::element::Type inputPrecision, const ngraph::element::Type inputPrecision,
const ngraph::PartialShape& inputShape, const std::vector<ngraph::PartialShape>& inputShape,
const FakeQuantizeOnDataWithConstant& fqOnData1, const size_t number_of_operations,
const DequantizationOperations::Convert& convert1, const std::vector<FakeQuantizeOnDataWithConstant>& fqBefore,
const DequantizationOperations& dequantization1, const DequantizationOperations::Convert& convertBefore,
const FakeQuantizeOnDataWithConstant& fqOnData2, const DequantizationOperations& dequantizationBefore,
const DequantizationOperations::Convert& convert2,
const DequantizationOperations& dequantization2,
const std::string& operation, const std::string& operation,
const FakeQuantizeOnDataWithConstant& fqOnData3, const FakeQuantizeOnDataWithConstant& fqOnDataAfter,
const DequantizationOperations::Convert& convert3, const DequantizationOperations::Convert& convertAfter,
const DequantizationOperations& dequantization3, const DequantizationOperations& dequantizationAfter,
const std::vector<ov::Any>& concatAttributes, const std::vector<ov::Any>& concatAttributes,
const ngraph::element::Type precisionAfterOperation, const ngraph::element::Type precisionAfterOperation,
const DequantizationOperations& dequantizationAfter,
const std::int64_t& axis); const std::int64_t& axis);
}; };

View File

@ -21,83 +21,77 @@ using namespace ngraph::pass;
std::shared_ptr<ngraph::Function> MoveFakeQuantize::get( std::shared_ptr<ngraph::Function> MoveFakeQuantize::get(
const ngraph::element::Type inputPrecision, const ngraph::element::Type inputPrecision,
const ngraph::PartialShape& inputShape, const std::vector<ngraph::PartialShape>& inputShape,
const FakeQuantizeOnDataWithConstant& fqOnData1, const size_t number_of_operations,
const DequantizationOperations::Convert& convert1, const std::vector<FakeQuantizeOnDataWithConstant>& fqOnDataBefore,
const DequantizationOperations& dequantization1, const DequantizationOperations::Convert& convertBefore,
const FakeQuantizeOnDataWithConstant& fqOnData2, const DequantizationOperations& dequantizationBefore,
const DequantizationOperations::Convert& convert2,
const DequantizationOperations& dequantization2,
const std::string& operation, const std::string& operation,
const FakeQuantizeOnDataWithConstant& fqOnData3, const FakeQuantizeOnDataWithConstant& fqOnDataAfter,
const DequantizationOperations::Convert& convert3, const DequantizationOperations::Convert& convertAfter,
const DequantizationOperations& dequantization3, const DequantizationOperations& dequantizationAfter,
const std::vector<ov::Any>& concatAttributes, const std::vector<ov::Any>& concatAttributes,
const ngraph::element::Type precisionAfterOperation, const ngraph::element::Type precisionAfterOperation,
const DequantizationOperations& dequantizationAfter,
const std::int64_t& axis) { const std::int64_t& axis) {
const auto input1 = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape); std::vector <std::shared_ptr<ngraph::opset1::Parameter>> inputs(number_of_operations);
input1->set_friendly_name("input1"); std::vector <std::shared_ptr<ngraph::Node>> parents(number_of_operations);
for (size_t i = 0; i < number_of_operations; i++) {
const auto input2 = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape); auto ind = 0;
input2->set_friendly_name("input2"); if (inputShape.size() != 1) {
std::shared_ptr<Node> parent1 = input1, parent2 = input2; ind = i;
if (!fqOnData1.empty()) {
if (operation == "relu") {
auto relu1 = std::make_shared<ngraph::opset1::Relu>(input1->output(0));
parent1 = makeFakeQuantize(relu1, inputPrecision, fqOnData1);
} else {
parent1 = makeFakeQuantize(input1, inputPrecision, fqOnData1);
} }
parent1->set_friendly_name("concat_fq1"); inputs[i] = std::make_shared<ngraph::opset1::Parameter>(inputPrecision, inputShape[ind]);
if (!convert1.empty()) { inputs[i]->set_friendly_name(std::string("input") + "_" + std::to_string(i + 1));
parent1 = std::make_shared<opset1::Convert>(parent1, convert1.outPrecision); parents[i] = inputs[i];
} }
if (!dequantization1.empty()) { if (!fqOnDataBefore.empty()) {
parent1 = makeDequantization(parent1, dequantization1); for (size_t i = 0; i < number_of_operations; i++) {
size_t ind = i;
if (fqOnDataBefore.size() == 1) {
ind = 0;
}
if (operation == "relu") {
auto relu = std::make_shared<ngraph::opset1::Relu>(parents[i]->output(0));
parents[i] = makeFakeQuantize(relu, inputPrecision, fqOnDataBefore[ind]);
} else {
parents[i] = makeFakeQuantize(parents[i], inputPrecision, fqOnDataBefore[ind]);
}
parents[i]->set_friendly_name(std::string("concat_fq") + "_" + std::to_string(i + 1));
if (!convertBefore.empty()) {
parents[i] = std::make_shared<opset1::Convert>(parents[i], convertBefore.outPrecision);
}
if (!dequantizationBefore.empty()) {
parents[i] = makeDequantization(parents[i], dequantizationBefore);
}
} }
} }
if (!fqOnData2.empty()) { const std::shared_ptr<ngraph::opset1::Concat> concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector(parents.begin(), parents.end()), axis);
if (operation == "relu") {
auto relu2 = std::make_shared<ngraph::opset1::Relu>(input2->output(0));
parent2 = makeFakeQuantize(relu2, inputPrecision, fqOnData2);
} else {
parent2 = makeFakeQuantize(input1, inputPrecision, fqOnData2);
}
parent2->set_friendly_name("concat_fq2");
if (!convert2.empty()) {
parent1 = std::make_shared<opset1::Convert>(parent2, convert2.outPrecision);
}
if (!dequantization1.empty()) {
parent2 = makeDequantization(parent2, dequantization2);
}
}
const std::shared_ptr<ngraph::opset1::Concat> concat = std::make_shared<ngraph::opset1::Concat>(ngraph::OutputVector{ parent1, parent2 }, axis);
concat->set_friendly_name("concat"); concat->set_friendly_name("concat");
std::shared_ptr<ngraph::Node> parent = concat; std::shared_ptr<ngraph::Node> parent = concat;
if (!dequantizationAfter.empty()) {
const auto lastDequantization = makeDequantization(concat, dequantizationAfter);
lastDequantization->set_friendly_name("multiply");
parent = lastDequantization;
}
addAttributes({ parent }, concatAttributes); addAttributes({ parent }, concatAttributes);
if (!fqOnData3.empty()) { if (!fqOnDataAfter.empty()) {
std::shared_ptr<Node> fq; std::shared_ptr<ngraph::Node> fq;
if (operation == "relu") { if (operation == "relu") {
auto relu = std::make_shared<ngraph::opset1::Relu>(concat->output(0)); auto relu = std::make_shared<ngraph::opset1::Relu>(concat->output(0));
fq = makeFakeQuantize(relu, inputPrecision, fqOnData3); fq = makeFakeQuantize(relu, inputPrecision, fqOnDataAfter);
} else { } else {
fq = makeFakeQuantize(concat, inputPrecision, fqOnData3); fq = makeFakeQuantize(concat, inputPrecision, fqOnDataAfter);
} }
fq->set_friendly_name("fakeQuantizeAfter"); fq->set_friendly_name("fakeQuantizeAfter");
parent = fq; parent = fq;
if (!convertAfter.empty()) {
parent = std::make_shared<opset1::Convert>(parent, convertAfter.outPrecision);
}
if (!dequantizationAfter.empty()) {
parent = makeDequantization(parent, dequantizationAfter);
}
} }
parent->set_friendly_name("output"); parent->set_friendly_name("output");
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(parent) }; ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(parent) };
std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>( std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>(
results, results,
ngraph::ParameterVector{ input1, input2 }, ngraph::ParameterVector(inputs.begin(), inputs.end()),
"MoveFakeQuantize"); "MoveFakeQuantize");
return function; return function;
} }