[Snippets] Added support of INT8 models (#12395)
This commit is contained in:
parent
f7e05ad402
commit
f6d6f5629f
@ -24,6 +24,7 @@ namespace op {
|
||||
class ConvertSaturation : public ov::op::v0::Convert {
|
||||
public:
|
||||
OPENVINO_OP("ConvertSaturation", "SnippetsOpset", ov::op::v0::Convert);
|
||||
BWDCMP_RTTI_DECLARATION;
|
||||
|
||||
ConvertSaturation(const Output<Node>& x, const ov::element::Type& destination_type);
|
||||
ConvertSaturation() = default;
|
||||
|
@ -23,6 +23,7 @@ namespace op {
|
||||
class ConvertTruncation : public ov::op::v0::Convert {
|
||||
public:
|
||||
OPENVINO_OP("ConvertTruncation", "SnippetsOpset", ov::op::v0::Convert);
|
||||
BWDCMP_RTTI_DECLARATION;
|
||||
|
||||
ConvertTruncation(const Output<Node>& x, const ov::element::Type& destination_type);
|
||||
ConvertTruncation() = default;
|
||||
|
@ -88,6 +88,17 @@ public:
|
||||
return m_generator;
|
||||
}
|
||||
|
||||
size_t get_non_scalar_constants_count() const {
|
||||
return m_non_scalar_constants_count;
|
||||
}
|
||||
|
||||
bool is_quantized() const {
|
||||
return config.m_is_quantized;
|
||||
}
|
||||
|
||||
bool has_type_relaxed_ops() const {
|
||||
return config.m_has_type_relaxed_ops;
|
||||
}
|
||||
|
||||
snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, ngraph::pass::Manager& opt,
|
||||
const void* compile_params = nullptr);
|
||||
@ -99,6 +110,7 @@ public:
|
||||
// plugin sets generator for a snippet to some specific generator.
|
||||
// it's going to be replaced with Jitters table later
|
||||
void set_generator(std::shared_ptr<ngraph::snippets::Generator> generator);
|
||||
void set_non_scalar_constants_count(const size_t count);
|
||||
|
||||
void print() const;
|
||||
void print_statistics(bool verbose);
|
||||
@ -111,9 +123,29 @@ public:
|
||||
private:
|
||||
void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes);
|
||||
void convert_to_snippet_dialect();
|
||||
Shape exec_domain;
|
||||
std::shared_ptr<ov::Model> m_body;
|
||||
std::shared_ptr<ngraph::snippets::Generator> m_generator;
|
||||
// Count of potentional non-scalar Consants that will be created after some tranformations
|
||||
// At the moment it's relevant only for FakeQuantize decomposition
|
||||
// NOTE: To avoid overheads in each calcution of this count (for example, in validate_and_type_infer()),
|
||||
// we should MANUALLY calculate it where it needed.
|
||||
size_t m_non_scalar_constants_count = 0;
|
||||
Shape exec_domain = {};
|
||||
std::shared_ptr<ov::Model> m_body = nullptr;
|
||||
std::shared_ptr<ngraph::snippets::Generator> m_generator = nullptr;
|
||||
|
||||
// TODO: Change logic of insert Converts. This exec element type can be different for plugins
|
||||
const ov::element::Type execution_element_type = ov::element::f32;
|
||||
|
||||
// Config to know which transformations should be called.
|
||||
// It helps to avoid overheads of extra transformation calls
|
||||
struct {
|
||||
// True if Subgraph contains FakeQuantize -> FQ decomposition should be called
|
||||
bool m_is_quantized = false;
|
||||
// True if we should align element types indise body
|
||||
bool m_is_needed_to_align_precision = false;
|
||||
// True if Subgraph contains TypeRelaxed nodes -> for several streams in tp mode we should copy body using mutexes
|
||||
// because TypeRelaxed::copy_with_new_inputs() isn't save-thread method
|
||||
bool m_has_type_relaxed_ops = false;
|
||||
} config;
|
||||
};
|
||||
|
||||
static inline std::ostream& operator<<(std::ostream& os, const op::Subgraph::BlockedShape& blocked_shape) {
|
||||
@ -121,10 +153,6 @@ static inline std::ostream& operator<<(std::ostream& os, const op::Subgraph::Blo
|
||||
return os;
|
||||
}
|
||||
|
||||
static inline auto is_scalar_constant(const std::shared_ptr<ngraph::Node>& source_output_node) -> bool {
|
||||
return ngraph::is_type<ngraph::opset1::Constant>(source_output_node) && ngraph::shape_size(source_output_node->get_shape()) == 1;
|
||||
};
|
||||
|
||||
static inline auto create_body(std::string name, const ngraph::ResultVector& results, const ngraph::ParameterVector& parameters) ->
|
||||
std::shared_ptr<ov::Model> {
|
||||
auto body = std::make_shared<ov::Model>(results, parameters, name);
|
||||
|
@ -0,0 +1,46 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <ngraph/pattern/matcher.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace snippets {
|
||||
namespace pass {
|
||||
|
||||
/**
|
||||
* @interface AlignElementType
|
||||
* @brief Wrap sequence of operations which doesn't support execution on original element type by ConvertSaturation
|
||||
* and reset element type for type relaxed nodes inside body to align element type between nodes.
|
||||
* Example 1:
|
||||
* - After FQ decomposition there may be Convert[U8/I8]. If after the Convert there are other operations
|
||||
* that don't support U8/I8, new ConvertSaturation[exec_type] will be inserted after the FQ decomposition
|
||||
* to execute these operations on supported element type
|
||||
* Example 2:
|
||||
* - Input[I8] -> Unsupported I8 op -> Movement op -> Output[I8]. There will be inserted two ConvertSaturation:
|
||||
* * ConvertSatiration[exec_type] before op which is unsupported I8
|
||||
* * ConvertSaturation[I8] before Movement op to return original low precision.
|
||||
* Note: We cannot just remove original Convert[I8/U8] in Example 1 because we should cover two things:
|
||||
* * allow execution of operations on supported element type for them
|
||||
* * keep computations mathematically equivalent to the original function
|
||||
* Thus, for these cases we should have the following pipeline: FP32 -> Convert[I8/U8] -> Convert[FP32] -> FP32
|
||||
* Note: We shouldn't call validate_and_infer_type() after Convert insertions to avoid element type conflicts on inputs of ops
|
||||
* @ingroup snippets
|
||||
*/
|
||||
class AlignElementType: public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
OPENVINO_RTTI("AlignElementType", "0");
|
||||
AlignElementType(const ov::element::Type exec_type = ov::element::f32);
|
||||
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
|
||||
|
||||
static bool opNeedsAlignElementType(const std::shared_ptr<ov::Node>& n, const ov::element::Type exec_type = ov::element::f32);
|
||||
private:
|
||||
ov::element::Type exec_type;
|
||||
};
|
||||
|
||||
} // namespace pass
|
||||
} // namespace snippets
|
||||
} // namespace ngraph
|
@ -0,0 +1,22 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <ngraph/pattern/matcher.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace snippets {
|
||||
namespace pass {
|
||||
|
||||
class CommonOptimizations : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
CommonOptimizations();
|
||||
};
|
||||
|
||||
} // namespace pass
|
||||
} // namespace snippets
|
||||
} // namespace ngraph
|
@ -14,7 +14,7 @@ namespace pass {
|
||||
/**
|
||||
* @interface ConvertConstantsToScalars
|
||||
* @brief Replace only constants which are should be represented as scalars during code generation.
|
||||
* Only single-value (0D) constants are currently supported.
|
||||
* Only single-value (0D) constants are currently supported.
|
||||
* @ingroup snippets
|
||||
*/
|
||||
class ConvertConstantsToScalars: public ngraph::pass::MatcherPass {
|
||||
@ -24,4 +24,4 @@ public:
|
||||
|
||||
} // namespace pass
|
||||
} // namespace snippets
|
||||
} // namespace ngraph
|
||||
} // namespace ngraph
|
@ -0,0 +1,91 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ngraph/op/fake_quantize.hpp"
|
||||
#include "ngraph/pass/graph_rewrite.hpp"
|
||||
#include "ngraph/pass/constant_folding.hpp"
|
||||
#include "snippets/pass/transform_convert.hpp"
|
||||
#include "transformations_visibility.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace snippets {
|
||||
namespace pass {
|
||||
|
||||
/**
|
||||
* @interface FakeQuantizeDecomposition
|
||||
* @ingroup snippets
|
||||
* @brief FakeQuantizeDecomposition transformation decomposes FakeQuantize layer.
|
||||
*
|
||||
* Expression from specification:
|
||||
* if x <= min(il, ih):
|
||||
* output = ol
|
||||
* elif x > max(il, ih):
|
||||
* output = oh
|
||||
* else:
|
||||
* output = round((x - il) / (ih - il) * (levels-1)) / (levels-1) * (oh - ol) + ol
|
||||
*
|
||||
* Expand brackets:
|
||||
* round(x * (levels-1) / (ih - il) - il * (levels-1) / (ih - il)) * (oh - ol) / (levels-1) + ol
|
||||
*
|
||||
* Marking:
|
||||
* - isc := (levels-1) / (ih - il)
|
||||
* - ish := -il * isc
|
||||
* - osc := (oh - ol) / (levels-1)
|
||||
* - osh := ol
|
||||
* Final expression:
|
||||
* round(x * isc + ish) * osc + osh
|
||||
*
|
||||
* Some optimizations (example for scalars):
|
||||
* 1. If output element type of FQ is U8 and il = 0, ish = 0, osc = 1, osh = 0, there is enough expression: x * isc
|
||||
* 2. If output element type of FQ is I8 and ish ~= 128, osc = 1, osh ~= -128, il * isc ~= -128, ih * isc ~= 127 there is enough expression: x * isc
|
||||
* 3. If osc = 1, osh = 0, there isn't dequantization
|
||||
* 4. If there isn't dequantization and output element type of FQ isn't FP32, there isn't rounding
|
||||
*
|
||||
* This transformation doesn't support following cases:
|
||||
* 1. At least one 'range' input is not Constant
|
||||
* 2. At least one 'il' input value greater or equal than 'ih' input value
|
||||
*
|
||||
*/
|
||||
|
||||
class FakeQuantizeDecomposition : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
FakeQuantizeDecomposition();
|
||||
|
||||
static bool isAllScalarConstant(const std::shared_ptr<const ngraph::Node>& node);
|
||||
static bool getScalesAndShifts(const std::shared_ptr<const ngraph::op::v0::FakeQuantize>& fq_node,
|
||||
std::vector<float>& cl,
|
||||
std::vector<float>& ch,
|
||||
std::vector<float>& isc,
|
||||
std::vector<float>& ish,
|
||||
std::vector<float>& osc,
|
||||
std::vector<float>& osh);
|
||||
static std::vector<float> calculateScales(const ngraph::element::Type& out_type,
|
||||
const std::vector<float>& cl,
|
||||
const std::vector<float>& ch,
|
||||
const std::vector<float>& isc,
|
||||
const std::vector<float>& ish,
|
||||
const std::vector<float>& osc,
|
||||
const std::vector<float>& osh);
|
||||
};
|
||||
|
||||
/**
|
||||
* @interface CommonFakeQuantizeDecomposition
|
||||
* @ingroup snippets
|
||||
* @brief CommonFakeQuantizeDecomposition pass applies all needed transformations for
|
||||
* correct FQ Decomposition:
|
||||
* 0. Disable Validate() pass after each transformations
|
||||
* 1. FakeQuantization decomposition
|
||||
* 2. ConstantFolding
|
||||
* 3. Validate
|
||||
*/
|
||||
class CommonFakeQuantizeDecomposition: public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
|
||||
};
|
||||
|
||||
} // namespace pass
|
||||
} // namespace snippets
|
||||
} // namespace ngraph
|
@ -1,31 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <ngraph/pattern/matcher.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace snippets {
|
||||
namespace pass {
|
||||
|
||||
/**
|
||||
* @interface InsertConvertOnInputs
|
||||
* @brief Inserts ConvertSaturation op after Parameters and Scalars to convert data type of inputs
|
||||
* to supported execution data type.
|
||||
* Note: ConvertSaturation op isn't covered by specification of "Convert" op
|
||||
* This op is used for conversion into and from FP32 after the correspoding Load
|
||||
* and before Store to calculate in FP32 inside subgraph body in CPU Plugin
|
||||
* @ingroup snippets
|
||||
*/
|
||||
class InsertConvertOnInputs: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
InsertConvertOnInputs(const ov::element::Type exec_type = ov::element::f32);
|
||||
};
|
||||
|
||||
|
||||
} // namespace pass
|
||||
} // namespace snippets
|
||||
} // namespace ngraph
|
@ -13,7 +13,7 @@ namespace pass {
|
||||
|
||||
/**
|
||||
* @interface InsertMoveBroadcast
|
||||
* @brief Inserts explicit MoveBroadcast instruction if broadcasting by most warying dimension is needed.
|
||||
* @brief Inserts explicit MoveBroadcast instruction if broadcasting by most varying dimension is needed.
|
||||
* The pass is used to convert model to a canonical form for code generation
|
||||
* @ingroup snippets
|
||||
*/
|
||||
|
@ -1,31 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include <ngraph/pattern/matcher.hpp>
|
||||
|
||||
namespace ngraph {
|
||||
namespace snippets {
|
||||
namespace pass {
|
||||
|
||||
/**
|
||||
* @interface ResetTypeRelaxedNodePrecision
|
||||
* @brief Reset precision for type relaxed nodes inside body to align precision between nodes.
|
||||
* Should be called after all Convert insertions
|
||||
* @ingroup snippets
|
||||
*/
|
||||
class ResetTypeRelaxedNodePrecision: public ngraph::pass::FunctionPass {
|
||||
public:
|
||||
OPENVINO_RTTI("ResetTypeRelaxedNodePrecision", "0");
|
||||
ResetTypeRelaxedNodePrecision(const ov::element::Type exec_type = ov::element::f32);
|
||||
bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
|
||||
private:
|
||||
ov::element::Type exec_type;
|
||||
};
|
||||
|
||||
} // namespace pass
|
||||
} // namespace snippets
|
||||
} // namespace ngraph
|
28
src/common/snippets/include/snippets/utils.hpp
Normal file
28
src/common/snippets/include/snippets/utils.hpp
Normal file
@ -0,0 +1,28 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
/**
|
||||
* @brief A file contains public utilities.
|
||||
* @file utils.hpp
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "snippets_isa.hpp"
|
||||
#include "emitter.hpp"
|
||||
|
||||
namespace ngraph {
|
||||
namespace snippets {
|
||||
namespace utils {
|
||||
|
||||
// Get non-scalar Constant count that will be created after FakeQuantize decomposition.
|
||||
// This count is needed to know exact count of non-scalar Constants during tokenization.
|
||||
auto get_non_scalar_constant_count_for_fq(const std::shared_ptr<ngraph::opset1::FakeQuantize>& fq) -> size_t;
|
||||
|
||||
inline auto is_scalar_constant(const std::shared_ptr<ngraph::Node>& source_output_node) -> bool {
|
||||
return ngraph::is_type<ngraph::opset1::Constant>(source_output_node) && ngraph::shape_size(source_output_node->get_shape()) == 1;
|
||||
}
|
||||
|
||||
} // namespace utils
|
||||
} // namespace snippets
|
||||
} // namespace ngraph
|
@ -8,6 +8,9 @@
|
||||
|
||||
#include "ngraph/runtime/host_tensor.hpp"
|
||||
|
||||
|
||||
BWDCMP_RTTI_DEFINITION(ngraph::snippets::op::ConvertSaturation);
|
||||
|
||||
ngraph::snippets::op::ConvertSaturation::ConvertSaturation(const Output<Node>& x, const ov::element::Type& destination_type)
|
||||
: ov::op::v0::Convert({x}, destination_type) {
|
||||
}
|
||||
|
@ -8,6 +8,9 @@
|
||||
|
||||
#include "ngraph/runtime/host_tensor.hpp"
|
||||
|
||||
|
||||
BWDCMP_RTTI_DEFINITION(ngraph::snippets::op::ConvertTruncation);
|
||||
|
||||
ngraph::snippets::op::ConvertTruncation::ConvertTruncation(const Output<Node>& x, const ov::element::Type& destination_type)
|
||||
: ov::op::v0::Convert({x}, destination_type) {
|
||||
}
|
||||
|
@ -11,18 +11,19 @@
|
||||
#include "snippets/pass/insert_movebroadcast.hpp"
|
||||
#include "snippets/pass/load_movebroadcast_to_broadcastload.hpp"
|
||||
#include "snippets/pass/assign_registers.hpp"
|
||||
#include "snippets/pass/convert_constants_to_scalars.hpp"
|
||||
#include "snippets/pass/convert_constants.hpp"
|
||||
#include "snippets/pass/convert_power_to_powerstatic.hpp"
|
||||
#include "snippets/pass/vector_to_scalar.hpp"
|
||||
#include "snippets/pass/transform_convert_to_truncation.hpp"
|
||||
#include "snippets/pass/insert_convert_on_inputs.hpp"
|
||||
#include "snippets/pass/reset_type_relaxed_node_precision.hpp"
|
||||
#include "snippets/pass/transform_convert.hpp"
|
||||
#include "snippets/pass/align_element_type.hpp"
|
||||
#include "snippets/utils.hpp"
|
||||
|
||||
#include "transformations/common_optimizations/nop_elimination.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
#include "ngraph/pass/constant_folding.hpp"
|
||||
#include "ngraph_ops/type_relaxed.hpp"
|
||||
#include <openvino/pass/serialize.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
@ -36,8 +37,20 @@ void snippets::op::Subgraph::set_generator(std::shared_ptr<ngraph::snippets::Gen
|
||||
m_generator = generator;
|
||||
}
|
||||
|
||||
void snippets::op::Subgraph::set_non_scalar_constants_count(const size_t count) {
|
||||
m_non_scalar_constants_count = count;
|
||||
}
|
||||
|
||||
snippets::op::Subgraph::Subgraph(const OutputVector& args, std::shared_ptr<ov::Model> body)
|
||||
: Op(args), m_body(body), m_generator(nullptr) {
|
||||
const auto ops = m_body->get_ops();
|
||||
for (const auto& op : ops) {
|
||||
config.m_is_quantized = config.m_is_quantized || ov::is_type<ov::op::v0::FakeQuantize>(op);
|
||||
config.m_has_type_relaxed_ops = config.m_has_type_relaxed_ops || std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(op);
|
||||
config.m_is_needed_to_align_precision = config.m_is_needed_to_align_precision || is_quantized() || has_type_relaxed_ops() ||
|
||||
snippets::pass::AlignElementType::opNeedsAlignElementType(op, execution_element_type);
|
||||
}
|
||||
|
||||
constructor_validate_and_infer_types();
|
||||
}
|
||||
|
||||
@ -86,7 +99,8 @@ auto snippets::op::Subgraph::wrap_node_as_subgraph(const std::shared_ptr<ov::Nod
|
||||
ngraph::OutputVector subgraph_inputs;
|
||||
|
||||
for (const auto& input : node->input_values()) {
|
||||
if (is_scalar_constant(input.get_node_shared_ptr())) {
|
||||
if ((utils::is_scalar_constant(input.get_node_shared_ptr())) ||
|
||||
(ov::is_type<ov::op::v0::FakeQuantize>(node) && ov::is_type<ov::op::v0::Constant>(input.get_node_shared_ptr()))) {
|
||||
body_inputs.push_back(input);
|
||||
} else {
|
||||
auto parameter = std::make_shared<ngraph::opset1::Parameter>(input.get_element_type(), input.get_partial_shape());
|
||||
@ -119,6 +133,10 @@ auto snippets::op::Subgraph::wrap_node_as_subgraph(const std::shared_ptr<ov::Nod
|
||||
auto body = create_body(node->get_friendly_name(), body_results, body_parameters);
|
||||
auto subgraph = build_subgraph(node, subgraph_inputs, body);
|
||||
|
||||
if (auto fq_node = ov::as_type_ptr<ov::op::v0::FakeQuantize>(node)) {
|
||||
subgraph->set_non_scalar_constants_count(utils::get_non_scalar_constant_count_for_fq(fq_node));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < body->get_parameters().size(); i++) {
|
||||
body->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name());
|
||||
}
|
||||
@ -251,25 +269,18 @@ Shape snippets::op::Subgraph::canonicalize(const BlockedShapeVector& outputShape
|
||||
|
||||
void snippets::op::Subgraph::align_element_types(const BlockedShapeVector& outputShapes,
|
||||
const BlockedShapeVector& inputShapes) {
|
||||
// TODO: At the moment snippets support execution in only one element type
|
||||
const auto execution_element_type = ov::element::f32;
|
||||
|
||||
ngraph::pass::Manager p_manager;
|
||||
p_manager.register_pass<snippets::pass::TransformConvertToConvertTruncation>();
|
||||
p_manager.run_passes(m_body);
|
||||
|
||||
const auto& body_results = m_body->get_results();
|
||||
for (size_t i = 0; i < outputShapes.size(); i++) {
|
||||
const auto needed_out_type = std::get<2>(outputShapes[i]);
|
||||
|
||||
// If there is real Convert from graph (ConvertTruncation) before Result
|
||||
// If there is real Convert from graph (ConvertTruncation) or after FQ decomp (ConvertSaturation) before Result
|
||||
// we should check destination type and insert ConvertSaturation before that if needed.
|
||||
// For example, to return original element type after Convert insertion on inputs
|
||||
std::shared_ptr<ov::Node> first_convert = body_results[i];
|
||||
while (ov::is_type<ngraph::snippets::op::ConvertTruncation>(first_convert->get_input_node_ptr(0))) {
|
||||
while (ov::is_type<ngraph::op::v0::Convert>(first_convert->get_input_node_ptr(0))) {
|
||||
first_convert = first_convert->get_input_node_shared_ptr(0);
|
||||
}
|
||||
if (auto existing_convert_t = ngraph::as_type_ptr<ngraph::snippets::op::ConvertTruncation>(first_convert)) {
|
||||
if (auto existing_convert_t = ngraph::as_type_ptr<ngraph::op::v0::Convert>(first_convert)) {
|
||||
const auto original_input_element_type = existing_convert_t->get_input_element_type(0);
|
||||
if (original_input_element_type != execution_element_type) {
|
||||
const auto convert = std::make_shared<ngraph::snippets::op::ConvertSaturation>(
|
||||
@ -283,16 +294,16 @@ void snippets::op::Subgraph::align_element_types(const BlockedShapeVector& outpu
|
||||
body_results[i]->get_input_node_shared_ptr(0), needed_out_type);
|
||||
body_results[i]->set_argument(0, convert);
|
||||
}
|
||||
|
||||
// After Convert insertion we should make the following steps:
|
||||
// - insert ConvertSaturation after inputs and scalar to start aligning of exec data type inside body
|
||||
// - manually set output element types of type relaxed nodes to align element type inside subgraph body
|
||||
// - after Convert insertion on inputs and after scalars we should use ConstantFolding pass to convert
|
||||
// element type of Scalars before inference
|
||||
// - eliminate redundant Convert that could have been inserted
|
||||
// We should align element type inside body using the corresponding pass:
|
||||
// - Insert Convert before operations that doesn't support original element type for execution
|
||||
// - Insert reverse Convert before operations that support original element type
|
||||
// but have inputs that doesn't support it (because before them will be inserted Convert with exec_type - first point)
|
||||
// Then we should use ConstantFolding pass to convert element type of Scalars before inference.
|
||||
// At the end eliminate redundant Convert that could be inserted
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<snippets::pass::InsertConvertOnInputs>(execution_element_type);
|
||||
manager.register_pass<snippets::pass::ResetTypeRelaxedNodePrecision>(execution_element_type);
|
||||
if (config.m_is_needed_to_align_precision) {
|
||||
manager.register_pass<snippets::pass::AlignElementType>(execution_element_type);
|
||||
}
|
||||
manager.register_pass<ngraph::pass::ConstantFolding>();
|
||||
manager.register_pass<ngraph::pass::EliminateConvert>();
|
||||
manager.run_passes(m_body);
|
||||
|
97
src/common/snippets/src/pass/align_element_type.cpp
Normal file
97
src/common/snippets/src/pass/align_element_type.cpp
Normal file
@ -0,0 +1,97 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <snippets/itt.hpp>
|
||||
|
||||
#include "snippets/snippets_isa.hpp"
|
||||
#include "snippets/op/convert_saturation.hpp"
|
||||
#include "snippets/pass/align_element_type.hpp"
|
||||
#include "snippets/utils.hpp"
|
||||
#include "ngraph_ops/type_relaxed.hpp"
|
||||
#include "ngraph/op/util/op_types.hpp"
|
||||
|
||||
#include <ngraph/rt_info.hpp>
|
||||
|
||||
namespace {
|
||||
|
||||
auto is_in_out_op(const std::shared_ptr<ov::Node>& n) -> bool {
|
||||
return ov::is_type<ov::op::v0::Parameter>(n)
|
||||
|| ov::is_type<ov::op::v0::Constant>(n)
|
||||
|| ov::is_type<ov::op::v0::Result>(n);
|
||||
}
|
||||
|
||||
// At the moment Subgraph supports only Eltwise, Convert and FQ (which is decomposed into Eltwises and Convert)
|
||||
// And only Eltwises supports execution only in "exec_type". So we can check op type from the opposite
|
||||
auto op_supports_only_exec_type(const std::shared_ptr<ov::Node>& n) -> bool {
|
||||
return !ov::is_type<ov::op::v0::Convert>(n);
|
||||
}
|
||||
|
||||
// Check if executable operation supports only execution element type f32
|
||||
// NOTE: Executable op is node that isn't Parameter/Constant/Result
|
||||
auto is_executable_op_only_on_exec_type(const std::shared_ptr<ov::Node>& n) -> bool {
|
||||
return op_supports_only_exec_type(n) && !is_in_out_op(n);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ngraph::snippets::pass::AlignElementType::AlignElementType(const ov::element::Type exec_type) : exec_type(exec_type) { }
|
||||
|
||||
bool ngraph::snippets::pass::AlignElementType::run_on_model(const std::shared_ptr<ov::Model> &m) {
|
||||
RUN_ON_FUNCTION_SCOPE(AlignElementType);
|
||||
|
||||
auto insertConvert = [](const std::shared_ptr<ov::Node>& op, const size_t idx, const ov::element::Type& element_type) -> void {
|
||||
auto convert = std::make_shared<ngraph::snippets::op::ConvertSaturation>(op->input(idx).get_source_output(), element_type);
|
||||
ngraph::copy_runtime_info(op->get_input_node_shared_ptr(idx), convert);
|
||||
op->set_argument(idx, convert);
|
||||
};
|
||||
|
||||
// NOTE: We don't call validate_and_infer_types() to avoid precision conflicts on inputs
|
||||
bool rewritten = false;
|
||||
auto ops = m->get_ordered_ops();
|
||||
for (auto& op : ops) {
|
||||
if (is_in_out_op(op) || ov::is_type<ov::op::v0::Convert>(op)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (op_supports_only_exec_type(op)) {
|
||||
for (auto i = 0; i < op->inputs().size(); i++) {
|
||||
auto shared_input = op->get_input_node_shared_ptr(i);
|
||||
auto existing_convert = ov::as_type_ptr<ov::op::v0::Convert>(shared_input);
|
||||
// We should insert Convert before Ops, which supports only exec element type, only when:
|
||||
// - Input is Convert with unsupported destination type
|
||||
// - Input is Op which support any element type
|
||||
// We couldn't unite these conditions and just check that element type isn't supported exec type
|
||||
// because we don't call validate_and_infer_types() so we don't know new precisions
|
||||
if ((existing_convert && existing_convert->get_destination_type() != exec_type) || (!is_executable_op_only_on_exec_type(shared_input))) {
|
||||
insertConvert(op, i, exec_type);
|
||||
rewritten |= true;
|
||||
}
|
||||
}
|
||||
if (auto tr_node = std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(op)) {
|
||||
tr_node->set_overridden_output_type(exec_type, 0);
|
||||
rewritten |= true;
|
||||
}
|
||||
} else { // branch for the Movement ops and MatMul ops in the future
|
||||
for (auto i = 0; i < op->inputs().size(); i++) {
|
||||
auto shared_input = op->get_input_node_shared_ptr(i);
|
||||
// it's original element type because we don't use validate_and_infer_type() anywhere
|
||||
const auto original_eltype = op->input(i).get_element_type();
|
||||
// If before op there is another op that doesn't support execution on original element type, we know that
|
||||
// before this op will be inserted reverse Convert to support execution on supported element type (first branch of condition).
|
||||
// So we should return original element type for operations that can support low precision
|
||||
if (is_executable_op_only_on_exec_type(shared_input) && original_eltype != exec_type) {
|
||||
insertConvert(op, i, original_eltype);
|
||||
rewritten |= true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rewritten;
|
||||
}
|
||||
|
||||
bool ngraph::snippets::pass::AlignElementType::opNeedsAlignElementType(const std::shared_ptr<ov::Node>& op, const ov::element::Type exec_type) {
|
||||
// At the moment Snippets support only Eltwise/Convert/FQ which one output so we can just call get_element_type()
|
||||
return is_executable_op_only_on_exec_type(op) && op->get_element_type() != exec_type;
|
||||
}
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include "snippets/pass/collapse_subgraph.hpp"
|
||||
#include "snippets/op/subgraph.hpp"
|
||||
#include "snippets/utils.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/opsets/opset5.hpp>
|
||||
@ -56,9 +57,19 @@ auto outputs_are_not_broadcastable(const std::shared_ptr<const Node>& node) -> b
|
||||
return std::find_if_not(std::begin(outputs), std::end(outputs), check_shapes_broadcastable) != std::end(outputs);
|
||||
}
|
||||
|
||||
auto is_layout_oblivious(const std::shared_ptr<const Node> &n) -> bool {
|
||||
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::is_layout_oblivious")
|
||||
auto is_layout_oblivious_binary = [](const std::shared_ptr<const Node> &n) -> bool {
|
||||
auto is_supported_op(const std::shared_ptr<const Node> &n) -> bool {
|
||||
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::is_supported_op")
|
||||
auto is_supported_fq_op = [](const std::shared_ptr<const Node>& n) -> bool {
|
||||
// TODO [92179]: Add support of FakeQuantize with non-constants inputs and with binarization algorithm.
|
||||
const auto fq = ov::as_type_ptr<const opset1::FakeQuantize>(n);
|
||||
return fq && fq->get_levels() != 2 &&
|
||||
is_type<opset1::Constant>(n->get_input_node_shared_ptr(1)) &&
|
||||
is_type<opset1::Constant>(n->get_input_node_shared_ptr(2)) &&
|
||||
is_type<opset1::Constant>(n->get_input_node_shared_ptr(3)) &&
|
||||
is_type<opset1::Constant>(n->get_input_node_shared_ptr(4));
|
||||
};
|
||||
|
||||
auto is_supported_binary_eltwise_op = [](const std::shared_ptr<const Node> &n) -> bool {
|
||||
return ov::is_type<opset1::Add>(n)
|
||||
|| ov::is_type<opset1::Divide>(n)
|
||||
|| ov::is_type<opset1::Equal>(n)
|
||||
@ -79,10 +90,11 @@ auto is_layout_oblivious(const std::shared_ptr<const Node> &n) -> bool {
|
||||
|| ov::is_type<opset1::Power>(n)
|
||||
|| ov::is_type<opset1::SquaredDifference>(n)
|
||||
|| ov::is_type<opset1::Subtract>(n)
|
||||
|| ov::is_type<opset1::Xor>(n);
|
||||
|| ov::is_type<opset1::Xor>(n)
|
||||
|| ov::is_type<ngraph::op::v0::Convert>(n);
|
||||
};
|
||||
|
||||
auto is_layout_oblivious_unary = [](const std::shared_ptr<const Node> &n) -> bool {
|
||||
auto is_supported_unary_eltwise_op = [](const std::shared_ptr<const Node> &n) -> bool {
|
||||
return ov::is_type<opset1::Abs>(n)
|
||||
|| ov::is_type<opset1::Clamp>(n)
|
||||
|| ov::is_type<opset1::Floor>(n)
|
||||
@ -99,10 +111,10 @@ auto is_layout_oblivious(const std::shared_ptr<const Node> &n) -> bool {
|
||||
|| ov::is_type<opset1::Tanh>(n)
|
||||
|| ov::is_type<ngraph::op::v0::Gelu>(n)
|
||||
|| ov::is_type<ngraph::op::v7::Gelu>(n)
|
||||
|| ov::is_type<ngraph::op::v4::HSwish>(n)
|
||||
|| ov::is_type<ngraph::op::v0::Convert>(n);
|
||||
|| ov::is_type<ngraph::op::v4::Swish>(n)
|
||||
|| ov::is_type<ngraph::op::v4::HSwish>(n);
|
||||
};
|
||||
return is_layout_oblivious_unary(n) || is_layout_oblivious_binary(n);
|
||||
return is_supported_fq_op(n) || is_supported_unary_eltwise_op(n) || is_supported_binary_eltwise_op(n);
|
||||
}
|
||||
|
||||
auto has_supported_in_out(const std::shared_ptr<const Node> &n) -> bool {
|
||||
@ -162,7 +174,7 @@ auto update_out_tensor_name(std::shared_ptr<ngraph::snippets::op::Subgraph> &sub
|
||||
} // namespace
|
||||
|
||||
bool AppropriateForSubgraph(const std::shared_ptr<const Node> &node) {
|
||||
return is_layout_oblivious(node) && has_supported_in_out(node);
|
||||
return is_supported_op(node) && has_supported_in_out(node);
|
||||
}
|
||||
|
||||
void SetSnippetsNodeType(const std::shared_ptr<Node> &node, SnippetsNodeType nodeType) {
|
||||
@ -435,7 +447,10 @@ TokenizeSnippets::TokenizeSnippets() {
|
||||
// Result op has a single input
|
||||
internal_inputs.push_back(source_result->input_value(0));
|
||||
} else {
|
||||
if (op::is_scalar_constant(input_node)) {
|
||||
// We have to save explicitly FQ Constants to call ConstantFolding after Tokenization.
|
||||
// After ConstantFolding we will move remaining non-scalar Constants from body using ConvertConstantsToParameters pass
|
||||
if ((utils::is_scalar_constant(input_node)) ||
|
||||
(ov::is_type<ov::op::v0::Constant>(input_node) && ov::is_type<ov::op::v0::FakeQuantize>(node))) {
|
||||
internal_inputs.push_back(input_node->output(0));
|
||||
} else {
|
||||
external_inputs.push_back(input_value);
|
||||
@ -461,10 +476,23 @@ TokenizeSnippets::TokenizeSnippets() {
|
||||
throw ngraph_error("original node outputs size and extracted node outputs size doesn't much");
|
||||
}
|
||||
|
||||
// After some transformations, a different number of Constants for some operations may be created
|
||||
// than the actual number of Constants during tokenization.
|
||||
// To avoid unsupported number of non-scalar Constants in the future (plugin specific limitation)
|
||||
// we should calculate potentional number of non-scalar Constants that will be moved up from body.
|
||||
size_t hidden_non_scalar_constant_count = 0;
|
||||
if (const auto fq_node = ov::as_type_ptr<ov::op::v0::FakeQuantize>(node)) {
|
||||
hidden_non_scalar_constant_count += ngraph::snippets::utils::get_non_scalar_constant_count_for_fq(fq_node);
|
||||
}
|
||||
|
||||
ResultVector body_results;
|
||||
std::vector<std::set<Input<Node>>> subgraph_result_inputs;
|
||||
|
||||
for (auto subgraph : input_subgraphs) {
|
||||
// we should summurize non-scalar Constants count from all input subgraphs
|
||||
// because we will collapse them with our node and we should get total count of non-scalar Constants
|
||||
hidden_non_scalar_constant_count += ov::as_type_ptr<ngraph::snippets::op::Subgraph>(subgraph)->get_non_scalar_constants_count();
|
||||
|
||||
for (auto output : subgraph->outputs()) {
|
||||
bool first_side_consumer = true;
|
||||
|
||||
@ -502,12 +530,15 @@ TokenizeSnippets::TokenizeSnippets() {
|
||||
if (body_results.size() != subgraph_result_inputs.size()) {
|
||||
throw ngraph_error("body results and node results size mismatch during subgraph collaps");
|
||||
}
|
||||
|
||||
// todo: move this plugin-specific constraint to the plugin callback
|
||||
if (body_parameters.size() + body_results.size() > 12) {
|
||||
if (body_parameters.size() + body_results.size() + hidden_non_scalar_constant_count > 12) {
|
||||
const std::string message_reset = "new subgraph is created. Impossible to schedule subgraph with " +
|
||||
std::to_string(body_parameters.size()) + " inputs and " + std::to_string(body_results.size()) + " outputs.";
|
||||
std::to_string(body_parameters.size()) + " inputs, " + std::to_string(body_results.size()) + " outputs and " +
|
||||
std::to_string(hidden_non_scalar_constant_count) + " non-scalar constants.";
|
||||
const std::string message_abort = "failed to continue subgraph. Impossible to schedule subgraph with " +
|
||||
std::to_string(body_parameters.size()) + " inputs and " + std::to_string(body_results.size()) + " outputs.";
|
||||
std::to_string(body_parameters.size()) + " inputs, " + std::to_string(body_results.size()) + " outputs and " +
|
||||
std::to_string(hidden_non_scalar_constant_count) + " non-scalar constants.";
|
||||
return abort_with_strategy(message_reset, message_abort);
|
||||
}
|
||||
|
||||
@ -542,6 +573,7 @@ TokenizeSnippets::TokenizeSnippets() {
|
||||
act_body1->get_parameters()[i]->set_friendly_name(body_parameters[i]->get_friendly_name());
|
||||
}
|
||||
subgraph->get_rt_info()["originalLayersNames"] = fusedNames;
|
||||
subgraph->set_non_scalar_constants_count(hidden_non_scalar_constant_count);
|
||||
|
||||
remark(1) << "Replacement (merge) done for: "
|
||||
<< subgraph->get_friendly_name()
|
||||
|
87
src/common/snippets/src/pass/common_optimizations.cpp
Normal file
87
src/common/snippets/src/pass/common_optimizations.cpp
Normal file
@ -0,0 +1,87 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "snippets/pass/common_optimizations.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/pass/constant_folding.hpp>
|
||||
|
||||
#include "transformations/utils/utils.hpp"
|
||||
#include "snippets/pass/fq_decomposition.hpp"
|
||||
#include "snippets/op/subgraph.hpp"
|
||||
#include "snippets/itt.hpp"
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::CommonOptimizations, "Snippets::CommonOptimizations", 0);
|
||||
|
||||
namespace ngraph {
|
||||
namespace snippets {
|
||||
namespace pass {
|
||||
|
||||
|
||||
// Move up Constants which aren't scalars from body to Subgraph and replace them with Parameters inside body
|
||||
void ConvertConstantsToParameters(const std::shared_ptr<ngraph::snippets::op::Subgraph>& subgraph) {
|
||||
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::ConvertConstantsToParameters");
|
||||
auto body = subgraph->get_body();
|
||||
|
||||
ParameterVector new_parameters;
|
||||
OutputVector new_external_inputs = subgraph->input_values();
|
||||
|
||||
for (auto& op : body->get_ops()) {
|
||||
auto constant = ov::as_type_ptr<ov::op::v0::Constant>(op);
|
||||
if (!(constant && ngraph::shape_size(constant->get_shape()) != 1ul))
|
||||
continue;
|
||||
|
||||
auto parameter = std::make_shared<opset1::Parameter>(constant->get_element_type(), constant->output(0).get_partial_shape());
|
||||
parameter->set_friendly_name(constant->get_friendly_name());
|
||||
ngraph::copy_runtime_info(constant, parameter);
|
||||
constant->output(0).replace(parameter->output(0));
|
||||
|
||||
new_external_inputs.push_back(constant);
|
||||
new_parameters.push_back(parameter);
|
||||
}
|
||||
|
||||
if (new_parameters.size() != 0) {
|
||||
body->add_parameters(new_parameters);
|
||||
body->validate_nodes_and_infer_types();
|
||||
subgraph->set_arguments(new_external_inputs);
|
||||
}
|
||||
}
|
||||
|
||||
CommonOptimizations::CommonOptimizations() {
|
||||
ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) {
|
||||
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::CommonOptimizations");
|
||||
|
||||
auto subgraph = ngraph::as_type_ptr<ngraph::snippets::op::Subgraph>(m.get_match_root());
|
||||
if (transformation_callback(subgraph)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto body = subgraph->get_body();
|
||||
const auto is_quantized = subgraph->is_quantized();
|
||||
|
||||
// Firsly we should transform all original Converts inside body to ConvertTruncation to save original behavior.
|
||||
// Then if Subgraph contains FakeQuantize we enable specific transformation for quantized subgraphs.
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::snippets::pass::TransformConvertToConvertTruncation>();
|
||||
if (is_quantized) {
|
||||
manager.register_pass<ngraph::snippets::pass::CommonFakeQuantizeDecomposition>();
|
||||
}
|
||||
manager.run_passes(body);
|
||||
|
||||
// At the moment only non-scalar Constants of FakeQuantize can be inside Subgraph
|
||||
// so we can enable ConvertConstantsToParameters pass for quantized models
|
||||
if (is_quantized) {
|
||||
ConvertConstantsToParameters(subgraph);
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(ngraph::pattern::wrap_type<ngraph::snippets::op::Subgraph>(), "snippets::pass::CommonOptimizations");
|
||||
this->register_matcher(m, callback);
|
||||
}
|
||||
|
||||
} // namespace pass
|
||||
} // namespace snippets
|
||||
} // namespace ngraph
|
@ -3,9 +3,12 @@
|
||||
//
|
||||
|
||||
#include <snippets/itt.hpp>
|
||||
#include "snippets/snippets_isa.hpp"
|
||||
#include "snippets/pass/convert_constants_to_scalars.hpp"
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
|
||||
#include "snippets/snippets_isa.hpp"
|
||||
#include "snippets/pass/convert_constants.hpp"
|
||||
#include "snippets/op/subgraph.hpp"
|
||||
|
||||
|
||||
ngraph::snippets::pass::ConvertConstantsToScalars::ConvertConstantsToScalars() {
|
||||
@ -24,5 +27,5 @@ ngraph::snippets::pass::ConvertConstantsToScalars::ConvertConstantsToScalars() {
|
||||
|
||||
return true;
|
||||
};
|
||||
register_matcher(std::make_shared<ov::pass::pattern::Matcher>(constants, matcher_name), callback);
|
||||
register_matcher(std::make_shared<ov::pass::pattern::Matcher>(constants), callback);
|
||||
}
|
308
src/common/snippets/src/pass/fq_decomposition.cpp
Normal file
308
src/common/snippets/src/pass/fq_decomposition.cpp
Normal file
@ -0,0 +1,308 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "snippets/pass/fq_decomposition.hpp"
|
||||
#include "snippets/op/convert_saturation.hpp"
|
||||
#include "snippets/itt.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/opsets/opset5.hpp>
|
||||
#include <ngraph/partial_shape.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
#include <numeric>
|
||||
|
||||
namespace {
|
||||
|
||||
bool isValidRangesInputs(const std::shared_ptr<ngraph::opset1::FakeQuantize>& fq) {
|
||||
auto il = fq->input_value(1);
|
||||
auto ih = fq->input_value(2);
|
||||
auto greater_equal = std::make_shared<ngraph::opset1::GreaterEqual>(il, ih);
|
||||
|
||||
ngraph::OutputVector result(1);
|
||||
if (!greater_equal->constant_fold(result, greater_equal->input_values()))
|
||||
return false;
|
||||
|
||||
auto res_node = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(result[0].get_node_shared_ptr());
|
||||
|
||||
const std::vector<bool> comp_result = res_node->cast_vector<bool>();
|
||||
|
||||
return !std::any_of(comp_result.begin(), comp_result.end(), [](const bool value) {
|
||||
return value;
|
||||
});
|
||||
}
|
||||
|
||||
bool is_scalar_constant(const std::shared_ptr<ngraph::Node>& source_output_node) {
|
||||
return ngraph::is_type<ngraph::opset1::Constant>(source_output_node) &&
|
||||
ngraph::shape_size(source_output_node->get_shape()) == 1;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ngraph::snippets::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() {
|
||||
MATCHER_SCOPE(FakeQuantizeDecomposition);
|
||||
|
||||
auto fake_quantize = ngraph::pattern::wrap_type<ngraph::opset1::FakeQuantize>(
|
||||
OutputVector{ngraph::pattern::any_input(),
|
||||
ngraph::pattern::wrap_type<opset1::Constant>(),
|
||||
ngraph::pattern::wrap_type<opset1::Constant>(),
|
||||
ngraph::pattern::wrap_type<opset1::Constant>(),
|
||||
ngraph::pattern::wrap_type<opset1::Constant>()});
|
||||
|
||||
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
|
||||
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::FakeQuantizeDecomposition")
|
||||
auto& pattern_to_output = m.get_pattern_value_map();
|
||||
const auto fake_quantize_node = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(
|
||||
pattern_to_output.at(fake_quantize).get_node_shared_ptr());
|
||||
|
||||
if (!fake_quantize_node || transformation_callback(fake_quantize_node) ||
|
||||
!isValidRangesInputs(fake_quantize_node)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Output<Node> data{fake_quantize_node->input_value(0)};
|
||||
const Output<Node> input_low{fake_quantize_node->input_value(1)};
|
||||
const Output<Node> input_high{fake_quantize_node->input_value(2)};
|
||||
const Output<Node> output_low{fake_quantize_node->input_value(3)};
|
||||
const Output<Node> output_high{fake_quantize_node->input_value(4)};
|
||||
auto input_type = data.get_element_type();
|
||||
|
||||
std::vector<float> out_scales;
|
||||
std::vector<float> cl, ch, isc, ish, osc, osh;
|
||||
const bool status = getScalesAndShifts(fake_quantize_node, cl, ch, isc, ish, osc, osh);
|
||||
if (status) {
|
||||
out_scales = calculateScales(fake_quantize_node->get_output_element_type(0), cl, ch, isc, ish, osc, osh);
|
||||
}
|
||||
const bool do_dequantize = !(status && ((std::all_of(osc.cbegin(),
|
||||
osc.cend(),
|
||||
[](float val) {
|
||||
return val == 1.f;
|
||||
}) &&
|
||||
std::all_of(osh.cbegin(),
|
||||
osh.cend(),
|
||||
[](float val) {
|
||||
return val == 0.f;
|
||||
})) ||
|
||||
out_scales.size() != 0));
|
||||
const bool do_rounding = do_dequantize || fake_quantize_node->get_output_element_type(0) == ngraph::element::f32;
|
||||
|
||||
ngraph::NodeVector decomp_ops;
|
||||
if (input_type != input_low.get_element_type()) {
|
||||
input_type = input_low.get_element_type();
|
||||
data = std::make_shared<ngraph::snippets::op::ConvertSaturation>(data, input_type);
|
||||
decomp_ops.push_back(data.get_node_shared_ptr());
|
||||
}
|
||||
|
||||
// if we set input_low or input_high in formula we got output = output_low and output = output_high
|
||||
// respectively so we just clamp x
|
||||
const auto max = std::make_shared<ngraph::opset1::Maximum>(data, input_low);
|
||||
const auto min = std::make_shared<ngraph::opset1::Minimum>(max, input_high);
|
||||
decomp_ops.push_back(max);
|
||||
decomp_ops.push_back(min);
|
||||
|
||||
std::shared_ptr<ngraph::Node> result = nullptr;
|
||||
if (out_scales.size() != 0) {
|
||||
PartialShape scale_shape = input_low.get_partial_shape();
|
||||
ngraph::PartialShape::broadcast_merge_into(scale_shape,
|
||||
input_high.get_partial_shape(),
|
||||
ov::op::AutoBroadcastType::NUMPY);
|
||||
const auto scales =
|
||||
std::make_shared<ngraph::opset1::Constant>(ngraph::element::f32, scale_shape.get_shape(), out_scales);
|
||||
decomp_ops.push_back(scales);
|
||||
|
||||
result = std::make_shared<ngraph::opset1::Multiply>(min, scales);
|
||||
decomp_ops.push_back(result);
|
||||
} else {
|
||||
// (levels-1)
|
||||
const auto levels_minus_one =
|
||||
std::make_shared<ngraph::opset1::Constant>(input_type, Shape{}, fake_quantize_node->get_levels() - 1);
|
||||
decomp_ops.push_back(levels_minus_one);
|
||||
// (input_high - input_low)
|
||||
const auto subInHighLow = std::make_shared<ngraph::opset1::Subtract>(input_high, input_low);
|
||||
// (levels-1) / (input_high - input_low)
|
||||
const auto isc = std::make_shared<ngraph::opset1::Divide>(levels_minus_one, subInHighLow);
|
||||
// input_low * (levels-1) / (input_high - input_low)
|
||||
const auto ish = std::make_shared<ngraph::opset1::Multiply>(input_low, isc);
|
||||
decomp_ops.push_back(subInHighLow);
|
||||
decomp_ops.push_back(isc);
|
||||
decomp_ops.push_back(ish);
|
||||
|
||||
// x * (levels-1) / (input_high - input_low)
|
||||
const auto after_isc_apply = std::make_shared<ngraph::opset1::Multiply>(min, isc);
|
||||
// x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)
|
||||
result = std::make_shared<ngraph::opset1::Subtract>(after_isc_apply, ish);
|
||||
decomp_ops.push_back(after_isc_apply);
|
||||
decomp_ops.push_back(result);
|
||||
}
|
||||
|
||||
if (do_rounding) {
|
||||
// round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low))
|
||||
result = std::make_shared<ngraph::opset5::Round>(result, ngraph::opset5::Round::RoundMode::HALF_TO_EVEN);
|
||||
decomp_ops.push_back(result);
|
||||
}
|
||||
|
||||
if (do_dequantize) {
|
||||
// (levels-1)
|
||||
const auto levels_minus_one =
|
||||
std::make_shared<ngraph::opset1::Constant>(input_type, Shape{}, fake_quantize_node->get_levels() - 1);
|
||||
// (output_high - output_low)
|
||||
const auto sub_out_high_low = std::make_shared<ngraph::opset1::Subtract>(output_high, output_low);
|
||||
// (output_high - output_low) / (levels-1)
|
||||
const auto osc = std::make_shared<ngraph::opset1::Divide>(sub_out_high_low, levels_minus_one);
|
||||
decomp_ops.push_back(sub_out_high_low);
|
||||
decomp_ops.push_back(osc);
|
||||
|
||||
// round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) *
|
||||
// (output_high - output_low) / (levels-1)
|
||||
const auto after_osc_apply = std::make_shared<ngraph::opset1::Multiply>(result, osc);
|
||||
// round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) *
|
||||
// (output_high - output_low) / (levels-1) + output_low
|
||||
result = std::make_shared<ngraph::opset1::Add>(after_osc_apply, output_low);
|
||||
decomp_ops.push_back(after_osc_apply);
|
||||
decomp_ops.push_back(result);
|
||||
}
|
||||
|
||||
if (result->get_output_element_type(0) != fake_quantize_node->get_output_element_type(0)) {
|
||||
result = std::make_shared<snippets::op::ConvertSaturation>(result, fake_quantize_node->get_output_element_type(0));
|
||||
decomp_ops.push_back(result);
|
||||
}
|
||||
|
||||
result->set_friendly_name(m.get_match_root()->get_friendly_name());
|
||||
ngraph::copy_runtime_info(fake_quantize_node, decomp_ops);
|
||||
ngraph::replace_node(m.get_match_root(), result);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(fake_quantize, matcher_name);
|
||||
register_matcher(m, callback);
|
||||
}
|
||||
|
||||
bool ngraph::snippets::pass::FakeQuantizeDecomposition::isAllScalarConstant(const std::shared_ptr<const ngraph::Node>& node) {
|
||||
return is_scalar_constant(node->get_input_node_shared_ptr(1)) &&
|
||||
is_scalar_constant(node->get_input_node_shared_ptr(2)) &&
|
||||
is_scalar_constant(node->get_input_node_shared_ptr(3)) &&
|
||||
is_scalar_constant(node->get_input_node_shared_ptr(4));
|
||||
}
|
||||
|
||||
bool ngraph::snippets::pass::FakeQuantizeDecomposition::getScalesAndShifts(
|
||||
const std::shared_ptr<const ngraph::opset1::FakeQuantize>& fq_node,
|
||||
std::vector<float>& cl,
|
||||
std::vector<float>& ch,
|
||||
std::vector<float>& isc,
|
||||
std::vector<float>& ish,
|
||||
std::vector<float>& osc,
|
||||
std::vector<float>& osh) {
|
||||
auto input_low_constant =
|
||||
std::dynamic_pointer_cast<ngraph::opset1::Constant>(fq_node->get_input_node_shared_ptr(1));
|
||||
auto input_high_constant =
|
||||
std::dynamic_pointer_cast<ngraph::opset1::Constant>(fq_node->get_input_node_shared_ptr(2));
|
||||
auto output_low_constant =
|
||||
std::dynamic_pointer_cast<ngraph::opset1::Constant>(fq_node->get_input_node_shared_ptr(3));
|
||||
auto output_high_constant =
|
||||
std::dynamic_pointer_cast<ngraph::opset1::Constant>(fq_node->get_input_node_shared_ptr(4));
|
||||
if (!input_low_constant || !input_high_constant || !output_low_constant || !output_high_constant)
|
||||
return false;
|
||||
|
||||
auto input_low = input_low_constant->cast_vector<float>();
|
||||
auto input_high = input_high_constant->cast_vector<float>();
|
||||
auto output_low = output_low_constant->cast_vector<float>();
|
||||
auto output_high = output_high_constant->cast_vector<float>();
|
||||
auto levels = fq_node->get_levels();
|
||||
|
||||
const auto input_size = std::max(input_low.size(), input_high.size());
|
||||
const auto output_size = std::max(output_low.size(), output_high.size());
|
||||
|
||||
cl = input_low;
|
||||
ch = input_high;
|
||||
isc.resize(input_size, 0);
|
||||
ish.resize(input_size, 0);
|
||||
osc.resize(output_size, 0);
|
||||
osh.resize(output_size, 0);
|
||||
|
||||
for (int i = 0; i < input_size; i++) {
|
||||
float il = input_low[input_low.size() == 1 ? 0 : i];
|
||||
float ih = input_high[input_high.size() == 1 ? 0 : i];
|
||||
|
||||
isc[i] = (levels - 1) / (ih - il);
|
||||
ish[i] = -il * isc[i];
|
||||
}
|
||||
|
||||
for (int i = 0; i < output_size; i++) {
|
||||
float ol = output_low[output_low.size() == 1 ? 0 : i];
|
||||
float oh = output_high[output_high.size() == 1 ? 0 : i];
|
||||
|
||||
osc[i] = (oh - ol) / (levels - 1);
|
||||
osh[i] = ol;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<float> ngraph::snippets::pass::FakeQuantizeDecomposition::calculateScales(const ngraph::element::Type& out_type,
|
||||
const std::vector<float>& cl,
|
||||
const std::vector<float>& ch,
|
||||
const std::vector<float>& isc,
|
||||
const std::vector<float>& ish,
|
||||
const std::vector<float>& osc,
|
||||
const std::vector<float>& osh) {
|
||||
std::vector<float> out_scales;
|
||||
if (out_type == ngraph::element::u8 &&
|
||||
std::all_of(cl.cbegin(),
|
||||
cl.cend(),
|
||||
[](float val) {
|
||||
return val == 0.0f;
|
||||
}) &&
|
||||
std::all_of(ish.cbegin(),
|
||||
ish.cend(),
|
||||
[](float val) {
|
||||
return val == 0.0f;
|
||||
}) &&
|
||||
std::all_of(osc.cbegin(),
|
||||
osc.cend(),
|
||||
[](float val) {
|
||||
return val == 1.0f;
|
||||
}) &&
|
||||
std::all_of(osh.cbegin(), osh.cend(), [](float val) {
|
||||
return val == 0.0f;
|
||||
})) {
|
||||
out_scales = isc;
|
||||
}
|
||||
|
||||
static const float thr = 0.0001f;
|
||||
if (out_type == ngraph::element::i8 &&
|
||||
std::all_of(ish.cbegin(), ish.cend(), [](float val) { return std::abs(val - 128.f) < thr; }) &&
|
||||
std::all_of(osc.cbegin(), osc.cend(), [](float val) { return val == 1.f; }) &&
|
||||
std::all_of(osh.cbegin(), osh.cend(), [](float val) { return std::abs(val + 128.f) < thr; })) {
|
||||
bool is_crop_aligned = true;
|
||||
for (int i = 0; i < std::max(cl.size(), isc.size()); i++) {
|
||||
if (std::abs(cl[cl.size() == 1 ? 0 : i] * isc[isc.size() == 1 ? 0 : i] + 128.f) > thr) {
|
||||
is_crop_aligned = false;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < std::max(ch.size(), isc.size()); i++) {
|
||||
if (std::abs(ch[ch.size() == 1 ? 0 : i] * isc[isc.size() == 1 ? 0 : i] - 127.f) > thr) {
|
||||
is_crop_aligned = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_crop_aligned) {
|
||||
out_scales = isc;
|
||||
}
|
||||
}
|
||||
|
||||
return out_scales;
|
||||
}
|
||||
|
||||
bool ngraph::snippets::pass::CommonFakeQuantizeDecomposition::run_on_model(const std::shared_ptr<ngraph::Function>& f) {
|
||||
RUN_ON_FUNCTION_SCOPE(CommonFakeQuantizeDecomposition);
|
||||
ngraph::pass::Manager manager;
|
||||
manager.set_per_pass_validation(false);
|
||||
manager.register_pass<ngraph::snippets::pass::FakeQuantizeDecomposition>();
|
||||
manager.register_pass<ngraph::pass::ConstantFolding>();
|
||||
manager.register_pass<ngraph::pass::Validate>();
|
||||
manager.run_passes(f);
|
||||
return false;
|
||||
}
|
@ -1,72 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <snippets/itt.hpp>
|
||||
#include "snippets/remarks.hpp"
|
||||
|
||||
#include "snippets/pass/insert_convert_on_inputs.hpp"
|
||||
#include "snippets/snippets_isa.hpp"
|
||||
|
||||
#include "ngraph/type.hpp"
|
||||
#include "ngraph/node.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ngraph/pattern/op/wrap_type.hpp>
|
||||
#include <ngraph/pattern/op/or.hpp>
|
||||
|
||||
// We should recursivelly (after full sequences of ConvertTruncation) go through inputs and
|
||||
// insert ConvertSaturation with supported element type before eltwises
|
||||
// NOTE: JUST EXAMPLE:
|
||||
// Parameter I8
|
||||
// ConvertTruncation U8
|
||||
// / | \
|
||||
// ConvertTruncation F32 ConvertTruncation I32 ConvertTruncation BF16
|
||||
// Eltwise ConvertSaturation FP32 ConvertTruncation I32
|
||||
// <> Eltwise ConvertSaturation FP32
|
||||
// <> Eltwise
|
||||
bool insertConvertSaturationAfterNode(const std::shared_ptr<ov::Node>& node, const ov::element::Type element_type) {
|
||||
bool rewritten = false;
|
||||
for (const auto& output : node->outputs()) {
|
||||
for (auto consumer : output.get_target_inputs()) {
|
||||
const auto output_shared_node = consumer.get_node()->shared_from_this();
|
||||
// Go down through ConvertTruncation sequence
|
||||
if (auto existing_convert_t = ov::as_type_ptr<ngraph::snippets::op::ConvertTruncation>(output_shared_node)) {
|
||||
rewritten = insertConvertSaturationAfterNode(existing_convert_t, element_type);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if ConvertSaturation already exists with supported element type or not and insert ConvertSaturation with supported element type
|
||||
auto existing_convert_s = ov::as_type_ptr<ngraph::snippets::op::ConvertSaturation>(output_shared_node);
|
||||
if ((!existing_convert_s && !ov::is_type<ov::op::v0::Result>(output_shared_node) && consumer.get_element_type() != element_type) ||
|
||||
(existing_convert_s && existing_convert_s->get_destination_type() != element_type)) {
|
||||
const auto convert = std::make_shared<ngraph::snippets::op::ConvertSaturation>(node, element_type);
|
||||
consumer.replace_source_output(convert);
|
||||
rewritten |= true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rewritten;
|
||||
}
|
||||
|
||||
ngraph::snippets::pass::InsertConvertOnInputs::InsertConvertOnInputs(const ov::element::Type exec_type) {
|
||||
MATCHER_SCOPE(InsertConvertOnInputs);
|
||||
|
||||
auto param_pattern = ngraph::pattern::wrap_type<ngraph::opset1::Parameter>();
|
||||
auto scalar_pattern = pattern::wrap_type<opset1::Constant>(
|
||||
[=](Output<Node> output) -> bool { return ngraph::shape_size(output.get_shape()) == 1; });
|
||||
auto input = std::make_shared<pattern::op::Or>(OutputVector{ param_pattern, scalar_pattern });
|
||||
|
||||
ngraph::matcher_pass_callback callback = [this, exec_type](ngraph::pattern::Matcher& m) {
|
||||
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::InsertConvertOnInputs")
|
||||
auto root = m.get_match_root();
|
||||
|
||||
auto rewritten = insertConvertSaturationAfterNode(root, exec_type);
|
||||
|
||||
return rewritten;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<ngraph::pattern::Matcher>(input, matcher_name);
|
||||
register_matcher(m, callback);
|
||||
}
|
@ -17,124 +17,43 @@ using namespace ngraph;
|
||||
|
||||
namespace {
|
||||
|
||||
std::shared_ptr<ngraph::Node> numpy_broadcast_node(const ngraph::Output<ngraph::Node>& value,
|
||||
const ngraph::Shape& output_shape, const ngraph::Shape& source_shape) {
|
||||
std::shared_ptr<ngraph::Node> broadcast_node_last_dim(const ngraph::Output<ngraph::Node>& value,
|
||||
const ov::Shape& target_shape, const ov::Shape& normalized_shape) {
|
||||
std::shared_ptr<ngraph::Node> broadcasted_node = value.get_node_shared_ptr();
|
||||
|
||||
if (output_shape == value.get_shape()) {
|
||||
if (target_shape == value.get_shape()) {
|
||||
return broadcasted_node;
|
||||
}
|
||||
|
||||
NGRAPH_CHECK(source_shape.size() == output_shape.size(),
|
||||
"Ranks of source_shape and output_shape dont match: ",
|
||||
source_shape.size(),
|
||||
" vs ",
|
||||
output_shape.size());
|
||||
|
||||
bool do_broadcast = output_shape.size() > value.get_shape().size();
|
||||
if (!do_broadcast) {
|
||||
for (size_t index = 0; index < output_shape.size(); ++index) {
|
||||
if (source_shape.at(index) == 1 && output_shape.at(index) != 1) {
|
||||
do_broadcast = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
remark(2) << "Insert explicit broadcast " << value.get_node()->get_type_name()
|
||||
<< " " << broadcasted_node->get_shape() << " -> " << output_shape << std::endl;
|
||||
|
||||
// it shouldn't be a probrem for now since we don't consider StridedSlice and Broadcast here
|
||||
if (auto constant = ngraph::as_type_ptr<ngraph::opset1::Constant>(broadcasted_node)) {
|
||||
if (constant->get_shape() == ngraph::Shape() || ngraph::shape_size(constant->get_shape()) == 1) {
|
||||
remark(2) << "Insert explicit broadcast " << value.get_node()->get_type_name()
|
||||
<< " to scalar constant " << constant->get_shape() << " -- aborting!" << std::endl;
|
||||
|
||||
return broadcasted_node;
|
||||
}
|
||||
}
|
||||
|
||||
if (auto constant = ngraph::as_type_ptr<ngraph::snippets::op::Scalar>(broadcasted_node)) {
|
||||
if (constant->get_shape() == ngraph::Shape() || ngraph::shape_size(constant->get_shape()) == 1) {
|
||||
remark(2) << "Insert explicit broadcast " << value.get_node()->get_type_name()
|
||||
<< " to scalar constant " << constant->get_shape() << " -- aborting!" << std::endl;
|
||||
|
||||
return broadcasted_node;
|
||||
}
|
||||
}
|
||||
|
||||
if (do_broadcast) {
|
||||
// ShapeOf
|
||||
broadcasted_node = std::make_shared<ngraph::snippets::op::BroadcastMove>(broadcasted_node, output_shape);
|
||||
// Insert BroadcastMove only if the last dimension needs to be broadcasted. Higher-level dims broadcasting
|
||||
// will be handled by pointer arithmetics in TileScheduler
|
||||
if (*target_shape.rbegin() != *normalized_shape.rbegin()) {
|
||||
ov::Shape broadcasted_shape = normalized_shape;
|
||||
*broadcasted_shape.rbegin() = *target_shape.rbegin();
|
||||
broadcasted_node = std::make_shared<ngraph::snippets::op::BroadcastMove>(broadcasted_node, broadcasted_shape);
|
||||
}
|
||||
|
||||
return broadcasted_node;
|
||||
}
|
||||
|
||||
ngraph::Shape calculate_broadcast_shape(ngraph::Shape lhs_shape, ngraph::Shape rhs_shape) {
|
||||
ngraph::Shape result;
|
||||
auto lhs_rank = lhs_shape.size();
|
||||
auto rhs_rank = rhs_shape.size();
|
||||
auto max_rank = std::max(lhs_rank, rhs_rank);
|
||||
|
||||
// left-pad the lhs_shape with ones
|
||||
lhs_shape.insert(begin(lhs_shape), max_rank - lhs_rank, 1);
|
||||
// left-pad the rhs_shape with ones
|
||||
rhs_shape.insert(begin(rhs_shape), max_rank - rhs_rank, 1);
|
||||
|
||||
for (size_t index = 0; index < max_rank; ++index) {
|
||||
size_t lhs_dim = lhs_shape.at(index);
|
||||
size_t rhs_dim = rhs_shape.at(index);
|
||||
|
||||
if (lhs_dim != rhs_dim && lhs_dim != 1 && rhs_dim != 1) {
|
||||
throw ngraph::ngraph_error("incompatible shapes");
|
||||
}
|
||||
|
||||
result.push_back(std::max(lhs_dim, rhs_dim));
|
||||
std::pair<ov::Shape, std::vector<ov::Shape>> get_numpy_broadcast_shapes(const std::vector<ov::Shape>& input_shapes) {
|
||||
ov::PartialShape target_shape = input_shapes.front();
|
||||
for (auto i = 1; i < input_shapes.size(); i++) {
|
||||
if (!ov::PartialShape::broadcast_merge_into(target_shape, input_shapes[i], op::AutoBroadcastType::NUMPY))
|
||||
throw ngraph::ngraph_error("InsertMoveBroadcast: Failed broadcast-merge input shapes");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::pair<ngraph::Shape, std::vector<ngraph::Shape>> get_numpy_broadcast_shapes(const std::vector<ngraph::Shape>& input_shapes) {
|
||||
ngraph::Shape target_shape = std::accumulate(begin(input_shapes), end(input_shapes), ngraph::Shape{}, calculate_broadcast_shape);
|
||||
|
||||
std::vector<ngraph::Shape> full_shapes;
|
||||
for (const ngraph::Shape& input : input_shapes) {
|
||||
ngraph::Shape padded_shape{input};
|
||||
padded_shape.insert(begin(padded_shape), target_shape.size() - padded_shape.size(), 1);
|
||||
full_shapes.push_back(move(padded_shape));
|
||||
std::vector<ov::Shape> normalized_shapes;
|
||||
for (const auto& input : input_shapes) {
|
||||
ov::Shape padded_shape{input};
|
||||
padded_shape.insert(padded_shape.begin(), target_shape.size() - padded_shape.size(), 1);
|
||||
normalized_shapes.push_back(std::move(padded_shape));
|
||||
}
|
||||
|
||||
return {target_shape, full_shapes};
|
||||
}
|
||||
|
||||
auto reset_broacast_config(const std::shared_ptr<ngraph::Node>& op) -> void {
|
||||
using namespace ngraph;
|
||||
|
||||
bool is_scalar = false;
|
||||
for (auto input : op->inputs()) {
|
||||
if (input.get_shape() == Shape() || ngraph::shape_size(input.get_shape()) == 1) {
|
||||
is_scalar = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_scalar) {
|
||||
if (auto binary = std::dynamic_pointer_cast<ngraph::op::util::BinaryElementwiseArithmetic>(op)) {
|
||||
binary->set_autob(ngraph::op::AutoBroadcastType::NONE);
|
||||
} else if (auto binary = std::dynamic_pointer_cast<ngraph::op::util::BinaryElementwiseComparison>(op)) {
|
||||
binary->set_autob(ngraph::op::AutoBroadcastType::NONE);
|
||||
} else if (auto binary = std::dynamic_pointer_cast<ngraph::op::util::BinaryElementwiseLogical>(op)) {
|
||||
binary->set_autob(ngraph::op::AutoBroadcastType::NONE);
|
||||
}
|
||||
}
|
||||
return {target_shape.get_shape(), normalized_shapes};
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// adds explicit broadcasts if needed
|
||||
// ToDO: this indeed make model not reshapable, need to come up with more clever way to insert fake broadcast,
|
||||
// well on the other hand, if we replace scalar constant with Scalar op / or ShapeOf, we could have broadcasts that are reshapable
|
||||
// TODO: generate FakeBroadcast if and only if broadcast is done by w dimension
|
||||
ngraph::snippets::pass::InsertMoveBroadcast::InsertMoveBroadcast() {
|
||||
MATCHER_SCOPE(InsertMoveBroadcast);
|
||||
ngraph::graph_rewrite_callback callback = [this](ngraph::pattern::Matcher &m) {
|
||||
@ -145,28 +64,39 @@ ngraph::snippets::pass::InsertMoveBroadcast::InsertMoveBroadcast() {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<ngraph::Shape> input_shapes;
|
||||
for (const auto& input : values) {
|
||||
input_shapes.push_back(input.get_shape());
|
||||
auto is_scalar_constant = [](const ov::Output<ov::Node>& v){
|
||||
if (auto constant = ov::as_type_ptr<ov::op::v0::Constant>(v.get_node_shared_ptr())) {
|
||||
if (constant->get_shape().empty() || ngraph::shape_size(constant->get_shape()) == 1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
std::vector<ov::Shape> input_shapes;
|
||||
std::vector<bool> ignore_as_scalar;
|
||||
for (const auto& val : values) {
|
||||
input_shapes.emplace_back(val.get_shape());
|
||||
ignore_as_scalar.push_back(is_scalar_constant(val));
|
||||
}
|
||||
|
||||
// find the output tensor's shape, then broadcast all inputs so that they are compatible
|
||||
// find the output tensor's shape, then broadcast all inputs so that they are compatible with respect to the last dim
|
||||
auto bcast_shapes = get_numpy_broadcast_shapes(input_shapes);
|
||||
|
||||
ngraph::OutputVector broadcasted_inputs;
|
||||
for (size_t i = 0; i < values.size(); ++i) {
|
||||
auto node = numpy_broadcast_node(values[i], bcast_shapes.first, bcast_shapes.second[i]);
|
||||
ngraph::copy_runtime_info(root, node);
|
||||
broadcasted_inputs.push_back(node);
|
||||
if (ignore_as_scalar[i]) {
|
||||
broadcasted_inputs.push_back(values[i]);
|
||||
} else {
|
||||
auto node = broadcast_node_last_dim(values[i], bcast_shapes.first, bcast_shapes.second[i]);
|
||||
ngraph::copy_runtime_info(root, node);
|
||||
broadcasted_inputs.push_back(node);
|
||||
}
|
||||
}
|
||||
|
||||
auto new_args = ngraph::as_node_vector(broadcasted_inputs);
|
||||
for (size_t i = 0; i < new_args.size(); i++) {
|
||||
root->input(i).replace_source_output(new_args[i]->output(0));
|
||||
}
|
||||
|
||||
reset_broacast_config(root);
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
|
@ -27,32 +27,20 @@ ngraph::snippets::pass::LoadMoveBroadcastToBroadcastLoad::LoadMoveBroadcastToBro
|
||||
const auto input = pm.at(load_pattern).get_node_shared_ptr();
|
||||
const auto param = pm.at(param_pattern).get_node_shared_ptr();
|
||||
|
||||
// check if load has more than 1 user to avoid load+broadcast load on the same parameter
|
||||
if (input->output(0).get_target_inputs().size() != 1) {
|
||||
// Cannot rewrite Broadcast + Load if load has more than 1 user
|
||||
// or more than one input, or if Broadcast has several inputs
|
||||
if (input->output(0).get_target_inputs().size() != 1 ||
|
||||
root->inputs().size() != 1 || input->inputs().size() != 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (root->inputs().size() != 1 || input->inputs().size() != 1) {
|
||||
throw ngraph_error("cannot rewrite Broadcast load with more than one input");
|
||||
}
|
||||
|
||||
auto inshape = root->input(0).get_shape();
|
||||
auto outshape = root->output(0).get_shape();
|
||||
|
||||
auto broadcastload = std::make_shared<snippets::op::BroadcastLoad>(param, outshape);
|
||||
Shape bct(inshape.size(), 0);
|
||||
for (size_t k = 0; k < inshape.size(); k++) {
|
||||
if (inshape[k] != outshape[k] && inshape[k] == 1) {
|
||||
bct[k] = 1;
|
||||
}
|
||||
}
|
||||
// Todo: consider refactoring BroadcastLoad, it seems we don't need broadcast_info at this point.
|
||||
broadcastload->set_broadcast_info(bct);
|
||||
if (inshape.back() == 1 && outshape.back() != 1) {
|
||||
ngraph::copy_runtime_info(root, broadcastload);
|
||||
ngraph::replace_node(root, broadcastload);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
ngraph::copy_runtime_info(root, broadcastload);
|
||||
ngraph::replace_node(root, broadcastload);
|
||||
|
||||
return true;
|
||||
});
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <snippets/itt.hpp>
|
||||
|
||||
#include "snippets/op/convert_saturation.hpp"
|
||||
#include "snippets/pass/reset_type_relaxed_node_precision.hpp"
|
||||
#include "ngraph_ops/type_relaxed.hpp"
|
||||
|
||||
#include <ngraph/rt_info.hpp>
|
||||
|
||||
|
||||
ngraph::snippets::pass::ResetTypeRelaxedNodePrecision::ResetTypeRelaxedNodePrecision(const ov::element::Type exec_type) : exec_type(exec_type) { }
|
||||
|
||||
bool ngraph::snippets::pass::ResetTypeRelaxedNodePrecision::run_on_model(const std::shared_ptr<ov::Model> &m) {
|
||||
RUN_ON_FUNCTION_SCOPE(ResetTypeRelaxedNodePrecision);
|
||||
bool rewritten = false;
|
||||
for (auto& op : m->get_ordered_ops()) {
|
||||
if (auto node = std::dynamic_pointer_cast<ngraph::op::TypeRelaxedBase>(op)) {
|
||||
for (int i = 0; i < op->outputs().size(); i++) {
|
||||
node->set_overridden_output_type(exec_type, i);
|
||||
rewritten |= true;
|
||||
}
|
||||
} else {
|
||||
op->validate_and_infer_types();
|
||||
}
|
||||
}
|
||||
|
||||
return rewritten;
|
||||
}
|
@ -5,7 +5,7 @@
|
||||
#include "snippets/remarks.hpp"
|
||||
#include <snippets/itt.hpp>
|
||||
|
||||
#include "snippets/pass/transform_convert_to_truncation.hpp"
|
||||
#include "snippets/pass/transform_convert.hpp"
|
||||
#include "snippets/snippets_isa.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
@ -14,15 +14,19 @@
|
||||
|
||||
ngraph::snippets::pass::TransformConvertToConvertTruncation::TransformConvertToConvertTruncation() {
|
||||
MATCHER_SCOPE(TransformConvertToConvertTruncation);
|
||||
auto convert = std::make_shared<pattern::op::Label>(pattern::any_input(),
|
||||
[](const std::shared_ptr<const Node> &n) {
|
||||
return ov::is_type<ngraph::opset1::Convert>(n) &&
|
||||
!ov::is_type<op::ConvertTruncation>(n) &&
|
||||
!ov::is_type<op::ConvertSaturation>(n);
|
||||
});
|
||||
|
||||
register_matcher(std::make_shared<ngraph::pattern::Matcher>(
|
||||
ngraph::pattern::wrap_type<ngraph::opset1::Convert>()),
|
||||
ngraph::pattern::wrap_type<ngraph::opset1::Convert>(), matcher_name),
|
||||
[this](ngraph::pattern::Matcher &m) {
|
||||
OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::TransformConvertToConvertTruncation")
|
||||
const auto root = m.get_match_root();
|
||||
const auto convert = ngraph::as_type_ptr<ngraph::opset1::Convert>(root);
|
||||
if (!convert)
|
||||
return false;
|
||||
|
||||
auto convert_truncation = std::make_shared<op::ConvertTruncation>(convert->get_input_source_output(0),
|
||||
convert->get_destination_type());
|
||||
convert_truncation->set_friendly_name(convert->get_friendly_name());
|
||||
@ -31,4 +35,4 @@ ngraph::snippets::pass::TransformConvertToConvertTruncation::TransformConvertToC
|
||||
|
||||
return true;
|
||||
});
|
||||
}
|
||||
}
|
57
src/common/snippets/src/utils.cpp
Normal file
57
src/common/snippets/src/utils.cpp
Normal file
@ -0,0 +1,57 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "snippets/utils.hpp"
|
||||
|
||||
#include "snippets/pass/fq_decomposition.hpp"
|
||||
|
||||
|
||||
auto ngraph::snippets::utils::get_non_scalar_constant_count_for_fq(const std::shared_ptr<ngraph::opset1::FakeQuantize>& fq) -> size_t {
|
||||
std::vector<float> out_scales;
|
||||
std::vector<float> cl, ch, isc, ish, osc, osh;
|
||||
const bool status = ngraph::snippets::pass::FakeQuantizeDecomposition::getScalesAndShifts(fq, cl, ch, isc, ish, osc, osh);
|
||||
if (status) {
|
||||
out_scales = ngraph::snippets::pass::FakeQuantizeDecomposition::calculateScales(fq->get_output_element_type(0), cl, ch, isc, ish, osc, osh);
|
||||
if (out_scales.size() != 0) {
|
||||
return out_scales.size() != 1;
|
||||
}
|
||||
}
|
||||
|
||||
const bool only_quantized = status &&
|
||||
std::all_of(osc.cbegin(), osc.cend(),
|
||||
[](float val) { return val == 1.f; }) &&
|
||||
std::all_of(osh.cbegin(), osh.cend(),
|
||||
[](float val) { return val == 0.f; });
|
||||
const bool il = ngraph::shape_size(fq->input(1).get_shape()) != 1lu;
|
||||
const bool ih = ngraph::shape_size(fq->input(2).get_shape()) != 1lu;
|
||||
const bool ol = !only_quantized && ngraph::shape_size(fq->input(3).get_shape()) != 1lu;
|
||||
const bool oh = !only_quantized && ngraph::shape_size(fq->input(4).get_shape()) != 1lu;
|
||||
|
||||
// FakeQuantize decompoisition has the folowwing formula:
|
||||
// round(x * (levels-1) / (ih - il) - il * (levels-1) / (ih - il)) * (oh - ol) / (levels-1) + ol
|
||||
// After the decomposition there is call of ConstantsFolding pass that generates new Constants:
|
||||
// - isc := (levels-1) / (ih - il)
|
||||
// - ish := -il * isc
|
||||
// - osc := (oh - ol) / (levels-1)
|
||||
// - osh := ol
|
||||
// New formula:
|
||||
// round(x * isc + ish) * osc + osh
|
||||
// Thus, after FakeQuantize decompoisition we have 6 Constants instead of original 4:
|
||||
// ih, il (for Max/Min), isc, ish, osc, osh
|
||||
// Some of them can be scalar or non-scalar. It depends on which original 4 Constants are non-scalar
|
||||
// To sum it up, below conditions check all possible cases to calculate count of new generated non-scalars
|
||||
if (ol && il && ih)
|
||||
return 6;
|
||||
else if ((ol && (il || ih)) || (il && ih && oh))
|
||||
return 5;
|
||||
else if ((il && oh) || (ih && oh) || (il && ih))
|
||||
return 4;
|
||||
else if (il || ih)
|
||||
return 3;
|
||||
else if (ol)
|
||||
return 2;
|
||||
else if (oh)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
@ -52,40 +52,6 @@ TEST(TransformationTests, FuseLoadWithBroadcastMoveByX) {
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, NotFuseLoadWithBroadcastMoveByY) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
{
|
||||
auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2});
|
||||
auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
|
||||
auto load0 = std::make_shared<snippets::isa::Load>(data0);
|
||||
auto load1 = std::make_shared<snippets::isa::Load>(data1);
|
||||
auto bct = std::make_shared<snippets::isa::BroadcastMove>(load0, load1->get_shape());
|
||||
auto add = std::make_shared<opset1::Add>(bct, load1);
|
||||
auto store = std::make_shared<snippets::isa::Store>(add);
|
||||
f = std::make_shared<Function>(NodeVector{store}, ParameterVector{data0, data1});
|
||||
|
||||
pass::Manager m;
|
||||
m.register_pass<pass::InitNodeInfo>();
|
||||
m.register_pass<snippets::pass::LoadMoveBroadcastToBroadcastLoad>();
|
||||
m.run_passes(f);
|
||||
ASSERT_NO_THROW(check_rt_info(f));
|
||||
}
|
||||
|
||||
{
|
||||
auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2});
|
||||
auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 2});
|
||||
auto load0 = std::make_shared<snippets::isa::Load>(data0);
|
||||
auto load1 = std::make_shared<snippets::isa::Load>(data1);
|
||||
auto bct = std::make_shared<snippets::isa::BroadcastMove>(load0, load1->get_shape());
|
||||
auto add = std::make_shared<opset1::Add>(bct, load1);
|
||||
auto store = std::make_shared<snippets::isa::Store>(add);
|
||||
f_ref = std::make_shared<Function>(NodeVector{store}, ParameterVector{data0, data1});
|
||||
}
|
||||
|
||||
auto res = compare_functions(f, f_ref);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
|
||||
TEST(TransformationTests, NoFuseLoadWithBroadcastMoveMultipleUsers) {
|
||||
std::shared_ptr<Function> f(nullptr), f_ref(nullptr);
|
||||
{
|
||||
|
@ -22,7 +22,7 @@ using namespace ngraph;
|
||||
TEST_F(TransformationTestsF, InsertBroadcastMove) {
|
||||
{
|
||||
auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
|
||||
auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 3});
|
||||
auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 1});
|
||||
auto add = std::make_shared<opset1::Add>(data0, data1);
|
||||
function = std::make_shared<Function>(NodeVector{add}, ParameterVector{data0, data1});
|
||||
|
||||
@ -30,10 +30,9 @@ TEST_F(TransformationTestsF, InsertBroadcastMove) {
|
||||
}
|
||||
{
|
||||
auto data0 = std::make_shared<opset1::Parameter>(element::f32, Shape{2, 3});
|
||||
auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 1, 3});
|
||||
auto move0 = std::make_shared<snippets::isa::BroadcastMove>(data0, Shape{1, 2, 3});
|
||||
auto data1 = std::make_shared<opset1::Parameter>(element::f32, Shape{1, 2, 1});
|
||||
auto move1 = std::make_shared<snippets::isa::BroadcastMove>(data1, Shape{1, 2, 3});
|
||||
auto add = std::make_shared<opset1::Add>(move0, move1);
|
||||
auto add = std::make_shared<opset1::Add>(data0, move1);
|
||||
function_ref = std::make_shared<Function>(NodeVector{add}, ParameterVector{data0, data1});
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,49 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
#include "snippets/pass/common_optimizations.hpp"
|
||||
#include "snippets/op/subgraph.hpp"
|
||||
#include "fake_quantize_function.hpp"
|
||||
#include "function_helper.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace test {
|
||||
namespace snippets {
|
||||
|
||||
class FakeQuantizeDecompositionTest : public TransformationTestsF {
|
||||
public:
|
||||
void register_passes() {
|
||||
manager.register_pass<ngraph::snippets::pass::CommonOptimizations>();
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
TransformationTestsF::TearDown();
|
||||
|
||||
auto subgraph = FunctionHelper::getSubgraph(function);
|
||||
auto body = subgraph == nullptr ? nullptr : std::dynamic_pointer_cast<ngraph::snippets::op::Subgraph>(subgraph)->get_body();
|
||||
|
||||
auto subgraph_ref = FunctionHelper::getSubgraph(function_ref);
|
||||
auto body_ref = subgraph_ref == nullptr ? nullptr : std::dynamic_pointer_cast<ngraph::snippets::op::Subgraph>(subgraph_ref)->get_body();
|
||||
|
||||
auto res = comparator.compare(body, body_ref);
|
||||
ASSERT_TRUE(res.valid) << res.message;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(FakeQuantizeDecompositionTest, smoke_Snippets_PerTensorFakeQuantizeDecomposition) {
|
||||
function = FakeQuantizeFunction::getSubgraphWithFakeQuantize(
|
||||
{1, 3, 16, 16}, element::f32, {{}, {}, {}, {}}, 1.f);
|
||||
|
||||
function_ref = FakeQuantizeFunction::getSubgraphWithDecomposedFakeQuantize(
|
||||
{1, 3, 16, 16}, element::f32, {{}, {}, {}, {}}, 1.f);
|
||||
|
||||
register_passes();
|
||||
}
|
||||
|
||||
} // namespace snippets
|
||||
} // namespace test
|
||||
} // namespace ov
|
@ -41,31 +41,21 @@ TEST_P(InsertLoadStoreTests, ThreeInputsEltwise) {
|
||||
|
||||
namespace InsertLoadStoreTestsInstantiation {
|
||||
using ov::Shape;
|
||||
std::vector<Shape> inputShapes1{{1, 1, 2, 5, 1}, {1, 4, 1, 5, 1}};
|
||||
std::vector<Shape> inputShapes2{{1, 1, 2, 5, 1}, {1, 4, 1, 5, 1}, {1, 4, 1, 5, 16}};
|
||||
std::vector<Shape> inputShapes{{1, 4, 1, 5, 1}, {1, 4, 2, 5, 1}};
|
||||
std::vector<Shape> broadcastShapes{{1, 4, 1, 5, 16}, {1, 4, 2, 5, 16}};
|
||||
Shape exec_domain{1, 4, 2, 5, 16};
|
||||
Shape emptyShape{};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Snippets_BroadcastLoad, InsertLoadStoreTests,
|
||||
::testing::Combine(
|
||||
::testing::Values(exec_domain),
|
||||
::testing::ValuesIn(inputShapes1),
|
||||
::testing::ValuesIn(inputShapes1),
|
||||
::testing::Values(inputShapes[0]),
|
||||
::testing::Values(inputShapes[1]),
|
||||
::testing::Values(emptyShape),
|
||||
::testing::Values(exec_domain),
|
||||
::testing::Values(exec_domain)),
|
||||
::testing::Values(broadcastShapes[0]),
|
||||
::testing::Values(broadcastShapes[1])),
|
||||
InsertLoadStoreTests::getTestCaseName);
|
||||
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Snippets_BroadcastMove, InsertLoadStoreTests,
|
||||
::testing::Combine(
|
||||
::testing::Values(exec_domain),
|
||||
::testing::Values(Shape {1, 4, 1, 5, 16}),
|
||||
::testing::ValuesIn(inputShapes2),
|
||||
::testing::Values(emptyShape),
|
||||
::testing::Values(exec_domain),
|
||||
::testing::Values(exec_domain)),
|
||||
InsertLoadStoreTests::getTestCaseName);
|
||||
} // namespace InsertLoadStoreTestsInstantiation
|
||||
} // namespace snippets
|
||||
} // namespace test
|
||||
|
@ -39,7 +39,7 @@ TEST_P(InsertMoveBroadcastTests, AddBroadcast) {
|
||||
|
||||
namespace InsertMoveBroadcastTestsInstantiation {
|
||||
using ov::Shape;
|
||||
std::vector<Shape> inputShapes0 {{1, 1, 1, 3}, {1, 1, 2, 3}, {1, 8, 1, 3}};
|
||||
std::vector<Shape> inputShapes0 {{1, 8, 2, 1}};
|
||||
std::vector<Shape> inputShapes1 {{1, 8, 2, 3}};
|
||||
Shape broadcastShape {1, 8, 2, 3};
|
||||
Shape emptyShape {};
|
||||
@ -59,12 +59,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_BroadcastOn1, InsertMoveBroadcastTests,
|
||||
::testing::Values(broadcastShape)),
|
||||
InsertMoveBroadcastTests::getTestCaseName);
|
||||
|
||||
std::vector<Shape> inputShapesBoth0 {{4, 1, 2, 3}, {1, 8, 1, 3}, {1, 1, 2, 3}};
|
||||
std::vector<Shape> inputShapesBoth1 {{1, 8, 1, 3}, {4, 1, 2, 3}, {4, 8, 1, 3}};
|
||||
Shape broadcastShapeBoth{4, 8, 2, 3};
|
||||
std::vector<insertMoveBroadcastParams> params = {std::make_tuple(inputShapesBoth0[0], inputShapesBoth1[0], broadcastShapeBoth, broadcastShapeBoth),
|
||||
std::make_tuple(inputShapesBoth0[1], inputShapesBoth1[1], broadcastShapeBoth, broadcastShapeBoth),
|
||||
std::make_tuple(inputShapesBoth0[2], inputShapesBoth1[2], broadcastShapeBoth, broadcastShapeBoth)};
|
||||
std::vector<Shape> inputShapesBoth0 {{4, 1, 2, 1}, {1, 8, 1, 1}, {1, 1, 2, 3}};
|
||||
std::vector<Shape> inputShapesBoth1 {{4, 8, 2, 3}, {4, 1, 2, 3}, {4, 8, 1, 1}};
|
||||
std::vector<Shape> broadcastShapeBoth{{4, 1, 2, 3}, {1, 8, 1, 3}, {4, 8, 1, 3}};
|
||||
std::vector<insertMoveBroadcastParams> params = {std::make_tuple(inputShapesBoth0[0], inputShapesBoth1[0], broadcastShapeBoth[0], emptyShape),
|
||||
std::make_tuple(inputShapesBoth0[1], inputShapesBoth1[1], broadcastShapeBoth[1], emptyShape),
|
||||
std::make_tuple(inputShapesBoth0[2], inputShapesBoth1[2], emptyShape, broadcastShapeBoth[2])};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_Snippets_BroadcastOnBoth, InsertMoveBroadcastTests,
|
||||
::testing::ValuesIn(params),
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#include "snippets_transformations/op/load_convert.hpp"
|
||||
#include "snippets_transformations/op/store_convert.hpp"
|
||||
#include "ngraph_transformations/op/swish_cpu.hpp"
|
||||
|
||||
#include <ngraph/opsets/opset5.hpp>
|
||||
|
||||
@ -114,6 +115,7 @@ ov::intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_
|
||||
// jitters[ngraph::opset1::Tan::get_type_info_static()] = CREATE_EMITTER(); // not supported
|
||||
jitters[ngraph::opset1::Tanh::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_tanh_emitter);
|
||||
|
||||
jitters[ov::intel_cpu::SwishNode::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_swish_emitter);
|
||||
jitters[ngraph::op::v4::HSwish::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_hswish_emitter);
|
||||
// jitters[ngraph::opset1::HardSigmoid::get_type_info_static()] = CREATE_EMITTER(); // not supported
|
||||
// jitters[ngraph::opset1::Selu::get_type_info_static()] = CREATE_EMITTER(); // not supported
|
||||
|
@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "ngraph/opsets/opset5.hpp"
|
||||
#include "ngraph_transformations/op/swish_cpu.hpp"
|
||||
#include "jit_dnnl_emitters.hpp"
|
||||
|
||||
namespace ov {
|
||||
@ -102,6 +103,20 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class jit_swish_emitter : public jit_dnnl_emitter {
|
||||
public:
|
||||
jit_swish_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32)
|
||||
: jit_dnnl_emitter(host, host_isa, n, exec_prc) {
|
||||
kind = dnnl_eltwise_swish;
|
||||
auto op = ngraph::as_type_ptr<ov::intel_cpu::SwishNode>(n);
|
||||
alpha = op->get_alpha();
|
||||
beta = 0.f;
|
||||
|
||||
set_injector();
|
||||
}
|
||||
};
|
||||
|
||||
class jit_hswish_emitter : public jit_dnnl_emitter {
|
||||
public:
|
||||
jit_hswish_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
@ -114,6 +129,7 @@ public:
|
||||
set_injector();
|
||||
}
|
||||
};
|
||||
|
||||
class jit_gelu_v0_emitter : public jit_dnnl_emitter {
|
||||
public:
|
||||
jit_gelu_v0_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr<ngraph::Node>& n,
|
||||
|
@ -18,10 +18,12 @@ using namespace Xbyak::util;
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
namespace {
|
||||
// heuristic threshold number by byte between mask load and emulation with several simple partial load
|
||||
const int threshold_for_mask_emu_load = 14;
|
||||
constexpr int threshold_for_mask_emu_load = 14;
|
||||
// heuristic threshold number by byte between mask store and emulation with several simple partial store
|
||||
const int threshold_for_mask_emu_store = 6;
|
||||
constexpr int threshold_for_mask_emu_store = 6;
|
||||
} // namespace
|
||||
|
||||
size_t load_emitter_params::hash() const {
|
||||
size_t seed = 0;
|
||||
|
@ -387,13 +387,6 @@ void TileEmitter::emit_impl(const std::vector<size_t>& in,
|
||||
|
||||
BroadcastMoveEmitter::BroadcastMoveEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa,
|
||||
const std::shared_ptr<ov::Node>& n) : jit_emitter(h, isa, n) {
|
||||
if (n->get_input_shape(0).empty())
|
||||
use_broadcast = true;
|
||||
else if (*n->get_input_shape(0).rbegin() != *n->get_output_shape(0).rbegin())
|
||||
use_broadcast = true;
|
||||
else
|
||||
use_broadcast = false;
|
||||
|
||||
if (n->get_input_element_type(0) != n->get_output_element_type(0))
|
||||
IE_THROW() << "BroadcastMoveEmitter supports only equal input and output types but gets: "
|
||||
<< n->get_input_element_type(0) << " and " << n->get_output_element_type(0);
|
||||
@ -420,20 +413,14 @@ template <dnnl::impl::cpu::x64::cpu_isa_t isa>
|
||||
void BroadcastMoveEmitter::emit_isa(const std::vector<size_t> &in, const std::vector<size_t> &out) const {
|
||||
using Vmm = typename dnnl::impl::utils::conditional3<isa == dnnl::impl::cpu::x64::sse41,
|
||||
Xmm, isa == dnnl::impl::cpu::x64::avx2, Ymm, Zmm>::type;
|
||||
Vmm vmm_src0 = Vmm(in[0]);
|
||||
Xmm xmm_src0 = Xmm(in[0]);
|
||||
Vmm vmm_dst = Vmm(out[0]);
|
||||
|
||||
if (use_broadcast) {
|
||||
switch (byte_size) {
|
||||
case 4: h->uni_vbroadcastss(vmm_dst, xmm_src0); break;
|
||||
case 2: h->vpbroadcastw(vmm_dst, xmm_src0); break;
|
||||
case 1: h->vpbroadcastb(vmm_dst, xmm_src0); break;
|
||||
default: assert(!"unsupported data type");
|
||||
}
|
||||
} else {
|
||||
if (vmm_src0 != vmm_dst)
|
||||
h->uni_vmovups(vmm_dst, vmm_src0);
|
||||
switch (byte_size) {
|
||||
case 4: h->uni_vbroadcastss(vmm_dst, xmm_src0); break;
|
||||
case 2: h->vpbroadcastw(vmm_dst, xmm_src0); break;
|
||||
case 1: h->vpbroadcastb(vmm_dst, xmm_src0); break;
|
||||
default: assert(!"unsupported data type");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -78,6 +78,7 @@ ExecNetwork::ExecNetwork(const InferenceEngine::CNNNetwork &network,
|
||||
bool isFloatModel = !ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(function);
|
||||
|
||||
_cfg.isNewApi = !isLegacyAPI();
|
||||
_mutex = std::make_shared<std::mutex>();
|
||||
|
||||
// WA for inference dynamic batch cases in new API
|
||||
if (_cfg.isNewApi) {
|
||||
@ -176,10 +177,10 @@ ExecNetwork::GraphGuard::Lock ExecNetwork::GetGraph() const {
|
||||
auto makeGraph = [&] {
|
||||
try {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{_cfgMutex};
|
||||
std::lock_guard<std::mutex> lock{*_mutex.get()};
|
||||
graphLock._graph.setConfig(_cfg);
|
||||
}
|
||||
graphLock._graph.CreateGraph(_network, extensionManager, _numaNodesWeights[numaNodeId]);
|
||||
graphLock._graph.CreateGraph(_network, extensionManager, _numaNodesWeights[numaNodeId], _mutex);
|
||||
} catch(...) {
|
||||
exception = std::current_exception();
|
||||
}
|
||||
@ -198,7 +199,7 @@ ExecNetwork::GraphGuard::Lock ExecNetwork::GetGraph() const {
|
||||
|
||||
void ExecNetwork::setProperty(const std::map<std::string, std::string> &properties) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock{_cfgMutex};
|
||||
std::lock_guard<std::mutex> lock{*_mutex.get()};
|
||||
_cfg.readProperties(properties);
|
||||
}
|
||||
for (auto& g : _graphs) {
|
||||
|
@ -53,7 +53,9 @@ protected:
|
||||
ExtensionManager::Ptr extensionManager;
|
||||
std::vector<InferenceEngine::IVariableStateInternal::Ptr> memoryStates;
|
||||
const InferenceEngine::CNNNetwork _network;
|
||||
mutable std::mutex _cfgMutex;
|
||||
// Generic synchronization primitive on ExecNetwork level.
|
||||
// Usage example: helps to avoid data races during CPU Graph initialization in multi-streams scenario
|
||||
mutable std::shared_ptr<std::mutex> _mutex;
|
||||
Config _cfg;
|
||||
std::atomic_int _numRequests = {0};
|
||||
std::string _name;
|
||||
@ -67,7 +69,7 @@ protected:
|
||||
|
||||
// WARNING: Do not use _graphs directly.
|
||||
mutable std::deque<GraphGuard> _graphs;
|
||||
mutable NumaNodesWeights _numaNodesWeights;
|
||||
mutable NumaNodesWeights _numaNodesWeights;
|
||||
|
||||
/* WARNING: Use GetGraph() function to get access to graph in current stream.
|
||||
* NOTE: Main thread is interpreted as master thread of external stream so use this function to get access to graphs
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "nodes/input.h"
|
||||
#include <nodes/reorder.h>
|
||||
#include "nodes/convert.h"
|
||||
#include "nodes/subgraph.h"
|
||||
|
||||
#include <ie_algorithm.hpp>
|
||||
#include <blob_factory.hpp>
|
||||
@ -68,7 +69,7 @@ Graph::~Graph() {
|
||||
|
||||
template<typename NET>
|
||||
void Graph::CreateGraph(NET &net, const ExtensionManager::Ptr& extMgr,
|
||||
WeightsSharing::Ptr &w_cache) {
|
||||
WeightsSharing::Ptr &w_cache, const std::shared_ptr<std::mutex>& mutex) {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "CreateGraph");
|
||||
|
||||
if (IsReady())
|
||||
@ -77,6 +78,7 @@ void Graph::CreateGraph(NET &net, const ExtensionManager::Ptr& extMgr,
|
||||
weightsCache = config.streamExecutorConfig._streams != 1 ? w_cache : nullptr;
|
||||
|
||||
rtParamsCache = std::make_shared<MultiCache>(config.rtCacheCapacity);
|
||||
sharedMutex = mutex;
|
||||
|
||||
Replicate(net, extMgr);
|
||||
InitGraph();
|
||||
@ -119,9 +121,9 @@ void Graph::CreateGraph(const std::vector<NodePtr> &graphNodes,
|
||||
}
|
||||
|
||||
template void Graph::CreateGraph(const std::shared_ptr<const ngraph::Function>&,
|
||||
const ExtensionManager::Ptr&, WeightsSharing::Ptr&);
|
||||
const ExtensionManager::Ptr&, WeightsSharing::Ptr&, const std::shared_ptr<std::mutex>& mutex);
|
||||
template void Graph::CreateGraph(const CNNNetwork&,
|
||||
const ExtensionManager::Ptr&, WeightsSharing::Ptr&);
|
||||
const ExtensionManager::Ptr&, WeightsSharing::Ptr&, const std::shared_ptr<std::mutex>& mutex);
|
||||
|
||||
void Graph::Replicate(const std::shared_ptr<const ov::Model> &subgraph, const ExtensionManager::Ptr& extMgr) {
|
||||
this->_name = "subgraph";
|
||||
@ -153,7 +155,9 @@ void Graph::Replicate(const std::shared_ptr<const ov::Model> &subgraph, const Ex
|
||||
if (isQuantized()) {
|
||||
node->setQuantizedGraphFlag(true);
|
||||
}
|
||||
|
||||
node->setRuntimeCache(rtParamsCache);
|
||||
node->setSharedMutex(sharedMutex);
|
||||
|
||||
graphNodes.push_back(node);
|
||||
|
||||
@ -265,7 +269,10 @@ void Graph::Replicate(const CNNNetwork &network, const ExtensionManager::Ptr& ex
|
||||
if (isQuantized()) {
|
||||
node->setQuantizedGraphFlag(true);
|
||||
}
|
||||
|
||||
node->setRuntimeCache(rtParamsCache);
|
||||
node->setSharedMutex(sharedMutex);
|
||||
|
||||
graphNodes.push_back(node);
|
||||
|
||||
if (op->get_type_info() == ngraph::op::v0::Parameter::get_type_info_static()) {
|
||||
|
@ -53,7 +53,8 @@ public:
|
||||
template<typename NET>
|
||||
void CreateGraph(NET &network,
|
||||
const ExtensionManager::Ptr& extMgr,
|
||||
WeightsSharing::Ptr &w_cache);
|
||||
WeightsSharing::Ptr &w_cache,
|
||||
const std::shared_ptr<std::mutex>& mutex);
|
||||
|
||||
void CreateGraph(const std::vector<NodePtr> &graphNodes,
|
||||
const std::vector<EdgePtr> &graphEdges,
|
||||
@ -262,6 +263,7 @@ private:
|
||||
std::vector<NodePtr> executableGraphNodes;
|
||||
|
||||
MultiCachePtr rtParamsCache;
|
||||
std::shared_ptr<std::mutex> sharedMutex = nullptr;
|
||||
|
||||
void EnforceBF16();
|
||||
};
|
||||
|
@ -2,7 +2,9 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
#include "snippets_mark_skipped.hpp"
|
||||
#include <snippets/pass/collapse_subgraph.hpp>
|
||||
#include "snippets/pass/collapse_subgraph.hpp"
|
||||
#include "snippets/op/subgraph.hpp"
|
||||
#include "snippets/utils.hpp"
|
||||
#include <ngraph/opsets/opset1.hpp>
|
||||
#include <utils/general_utils.h>
|
||||
#include <utils/cpu_utils.hpp>
|
||||
@ -15,6 +17,7 @@ namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
namespace {
|
||||
static const int DEFAULT_AXIS = 1;
|
||||
NodeFusingType GetNodeFusingType(const std::shared_ptr<const Node> &node) {
|
||||
auto &rt = node->get_rt_info();
|
||||
const auto rinfo = rt.find("MayBeFusedInPlugin");
|
||||
@ -110,13 +113,18 @@ bool canBePerformedAsScaleShift(const std::shared_ptr<const Node> &node, const i
|
||||
isBroadcastableToDataInput();
|
||||
}
|
||||
|
||||
bool SupportsFusingWithConvolution_Simple(const std::shared_ptr<const Node> &node, const int channelAxis = 1) {
|
||||
inline bool canBeMatMulExecutedInInt8(const ov::element::Type& firstType, const ov::element::Type& secondType) {
|
||||
return one_of(firstType, ov::element::i8, ov::element::u8) && secondType == ov::element::i8;
|
||||
}
|
||||
|
||||
bool SupportsFusingWithConvolution_Simple(const std::shared_ptr<const Node> &node, const int channelAxis = DEFAULT_AXIS) {
|
||||
return SupportsFusingWithConvolution_SumActivation(node) ||
|
||||
ov::is_type<ngraph::op::Tanh>(node) ||
|
||||
ov::is_type<ngraph::op::v0::Gelu>(node) ||
|
||||
ov::is_type<ngraph::op::v7::Gelu>(node) ||
|
||||
ov::is_type<ngraph::op::Abs>(node) ||
|
||||
ov::is_type<ngraph::op::Sqrt>(node) ||
|
||||
ov::is_type<ngraph::op::FakeQuantize>(node) ||
|
||||
canBePerformedAsScaleShift(node, channelAxis);
|
||||
}
|
||||
// Convolution is a special case, since it supports peculiar fusings
|
||||
@ -136,7 +144,7 @@ bool isSuitableBinaryConvolutionParent(const std::shared_ptr<const Node> &node)
|
||||
return is_suitable_node && has_only_child;
|
||||
}
|
||||
int getChannelAxis(const ov::AxisSet &axes, bool keep_dims) {
|
||||
int channelAxis = 1;
|
||||
int channelAxis = DEFAULT_AXIS;
|
||||
if (!keep_dims) {
|
||||
for (auto &axis : axes) {
|
||||
if (axis == 1) {
|
||||
@ -150,7 +158,7 @@ int getChannelAxis(const ov::AxisSet &axes, bool keep_dims) {
|
||||
}
|
||||
return channelAxis;
|
||||
}
|
||||
bool isSuitableMiscParent(const std::shared_ptr<const Node> &node, int &channelAxis) {
|
||||
bool isSuitableMiscParent(const std::shared_ptr<const Node> &node) {
|
||||
const bool is_suitable_node = ov::is_type<ngraph::op::v0::MVN>(node) ||
|
||||
ov::is_type<ngraph::op::v6::MVN>(node) ||
|
||||
ov::is_type<ngraph::op::v0::NormalizeL2>(node) ||
|
||||
@ -160,13 +168,8 @@ bool isSuitableMiscParent(const std::shared_ptr<const Node> &node, int &channelA
|
||||
ov::is_type<ngraph::op::v4::LSTMCell>(node) ||
|
||||
ov::is_type<ngraph::opset1::ConvolutionBackpropData>(node) ||
|
||||
ov::is_type<ngraph::op::util::ArithmeticReductionKeepDims>(node) ||
|
||||
ov::is_type<ngraph::op::util::LogicalReductionKeepDims>(node) ||
|
||||
ov::is_type<ngraph::opset1::GroupConvolutionBackpropData>(node);
|
||||
if (const auto reduce = std::dynamic_pointer_cast<const ngraph::op::util::ArithmeticReductionKeepDims>(node)) {
|
||||
channelAxis = getChannelAxis(reduce->get_reduction_axes(), reduce->get_keep_dims());
|
||||
} else if (const auto reduce = std::dynamic_pointer_cast<const ngraph::op::util::LogicalReductionKeepDims>(node)) {
|
||||
channelAxis = getChannelAxis(reduce->get_reduction_axes(), reduce->get_keep_dims());
|
||||
}
|
||||
ov::is_type<ngraph::opset1::GroupConvolutionBackpropData>(node) ||
|
||||
ov::is_type<ngraph::opset1::AvgPool>(node);
|
||||
// has a single output, connected to a single child
|
||||
const auto out = node->outputs();
|
||||
const bool has_only_child = (out.size() == 1) && (out[0].get_target_inputs().size() == 1);
|
||||
@ -180,6 +183,13 @@ bool isSuitableMatMulParent(const std::shared_ptr<const Node> &node) {
|
||||
const bool has_only_child = (out.size() == 1) && (out[0].get_target_inputs().size() == 1);
|
||||
return is_suitable_node && has_only_child;
|
||||
}
|
||||
// From Reduce::canFuse() corner case. CanFuseSimpleOperation is covered by Misc
|
||||
inline bool isSuitableReduceParent(const std::shared_ptr<const Node> &node) {
|
||||
bool is_suitable_reduce = ov::is_type<ov::op::util::ArithmeticReductionKeepDims>(node) && isSuitableMiscParent(node);
|
||||
bool is_not_min_max = !ov::is_type<ov::op::v1::ReduceMax>(node) && !ov::is_type<ov::op::v1::ReduceMin>(node);
|
||||
bool out_is_f32 = node->get_output_element_type(0) == ov::element::f32;
|
||||
return is_suitable_reduce && is_not_min_max && out_is_f32;
|
||||
}
|
||||
// Subtract as ZeroPoints for Convolution
|
||||
bool isSuitableSubtractAsZeroPointsParent(const std::shared_ptr<const Node> &node) {
|
||||
const bool is_suitable_node = ov::is_type<ngraph::op::v1::Subtract>(node);
|
||||
@ -197,21 +207,24 @@ bool isSuitableSubtractAsZeroPointsParent(const std::shared_ptr<const Node> &nod
|
||||
const auto weight_shape = child->get_input_shape(1);
|
||||
const bool is_depthwise = is_group_conv && weight_shape[1] == 1 && weight_shape[2] == 1;
|
||||
const bool deptwise_is_suitable = implication(is_depthwise, child->get_input_shape(0).size() < 5);
|
||||
if (!(is_conv && deptwise_is_suitable))
|
||||
if (!deptwise_is_suitable)
|
||||
return false;
|
||||
|
||||
const bool first_input_is_suitable = node->get_input_node_shared_ptr(0)->get_output_element_type(0) == ov::element::u8;
|
||||
const auto zp_weights = node->get_input_node_shared_ptr(1);
|
||||
const auto zp_weight_shape = zp_weights->get_output_shape(0);
|
||||
bool second_input_is_suitable =
|
||||
ov::is_type<ngraph::op::v0::Constant>(zp_weights) &&
|
||||
zp_weights->get_output_element_type(0) == ov::element::u8 &&
|
||||
zp_weight_shape.size() >= 2;
|
||||
if (!(first_input_is_suitable && second_input_is_suitable))
|
||||
return false;
|
||||
auto correct_shape = ov::Shape(zp_weight_shape.size(), 1);
|
||||
correct_shape[1] = zp_weight_shape[1];
|
||||
return correct_shape == zp_weight_shape;
|
||||
if (zp_weight_shape.size() > 1)
|
||||
correct_shape[1] = zp_weight_shape[1];
|
||||
const bool zp_weights_is_suitable = ov::is_type<ov::op::v0::Constant>(zp_weights) &&
|
||||
zp_weights->get_element_type() == ov::element::u8 &&
|
||||
zp_weight_shape.size() >= 2 && correct_shape == zp_weight_shape;
|
||||
const bool first_conv_input_is_suitable = node->get_input_element_type(0) == ov::element::u8 &&
|
||||
zp_weights_is_suitable;
|
||||
|
||||
const auto conv_weights = child->get_input_node_shared_ptr(1);
|
||||
bool second_conv_input_is_suitable = ov::is_type<ngraph::op::v0::Constant>(conv_weights) &&
|
||||
conv_weights->get_output_element_type(0) == ov::element::i8;
|
||||
return first_conv_input_is_suitable && second_conv_input_is_suitable;
|
||||
}
|
||||
bool isSuitablePoolChild(const std::shared_ptr<const Node> &node) {
|
||||
const bool is_suitable_node = ov::is_type<ngraph::op::v1::MaxPool>(node);
|
||||
@ -220,11 +233,12 @@ bool isSuitablePoolChild(const std::shared_ptr<const Node> &node) {
|
||||
const bool has_only_child = (out.size() == 1) && (out[0].get_target_inputs().size() == 1);
|
||||
return is_suitable_node && has_only_child;
|
||||
}
|
||||
bool isSuitableChildForFusingSimple(const std::shared_ptr<const Node> &node, int channelAxis = 1) {
|
||||
bool isSuitableChildForFusingSimple(const std::shared_ptr<const Node> &node, const int channelAxis = DEFAULT_AXIS) {
|
||||
// Note: Fusing child is allowed to have several users, but that must be the end of the chain
|
||||
return SupportsFusingWithConvolution_Simple(node, channelAxis) && getNumNonConstInputs(node) == 1;
|
||||
}
|
||||
bool isSuitableChildForFusingMatMul(const std::shared_ptr<const Node> &node, NodeFusingType &updatedChainType) {
|
||||
bool isSuitableChildForFusingMatMul(const std::shared_ptr<const Node> &node, const bool canMatMulBeExecutedInI8,
|
||||
NodeFusingType &updatedChainType, int& fusingAxis) {
|
||||
int num_non_const_inputs = 0;
|
||||
bool can_be_converted_to_FC = false;
|
||||
ov::Shape bias_shape;
|
||||
@ -255,52 +269,66 @@ bool isSuitableChildForFusingMatMul(const std::shared_ptr<const Node> &node, Nod
|
||||
if (num_non_const_inputs != 1)
|
||||
return false;
|
||||
|
||||
// Matmul / FC bias fusion
|
||||
if (ov::is_type<ngraph::opset1::Add>(node) &&
|
||||
bias_shape.back() == matmul_shape.back() &&
|
||||
bias_shape.back() == shape_size(bias_shape)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// FuseMatMulAndSimpleOperation or FuseFullyConnectedAndSimpleOperation
|
||||
// Invoke SupportsFusingWithConvolution_Simple directly instead of isSuitableChildForFusingSimple to
|
||||
// eliminate getNumNonConstInputs() check
|
||||
int fusingAxis = can_be_converted_to_FC ? (matmul_shape.size() == 3 ? 2 : 1) : matmul_shape.size() - 1;
|
||||
|
||||
fusingAxis = can_be_converted_to_FC ? (matmul_shape.size() == 3 ? 2 : 1) : matmul_shape.size() - 1;
|
||||
if (SupportsFusingWithConvolution_Simple(node, fusingAxis)) {
|
||||
updatedChainType = NodeFusingType::FusedWithMisc;
|
||||
return true;
|
||||
}
|
||||
|
||||
// canFuse() from MatMul for case with rank > 2
|
||||
// Algorithm::EltwisePowerStatic is ignored
|
||||
if (!can_be_converted_to_FC &&
|
||||
node->get_output_shape(0).size() > 2) {
|
||||
if (ov::is_type<ov::op::v1::Add>(node) ||
|
||||
ov::is_type<ov::op::v1::Multiply>(node) ||
|
||||
ov::is_type<ov::op::v1::Subtract>(node) ||
|
||||
ov::is_type<ov::op::v1::Divide>(node) ||
|
||||
ov::is_type<ov::op::v0::PRelu>(node)) {
|
||||
const auto const1 = ov::is_type<ov::op::v0::Constant>(node->get_input_node_shared_ptr(0));
|
||||
const auto const2 = ov::is_type<ov::op::v0::Constant>(node->get_input_node_shared_ptr(1));
|
||||
int constPort = -1;
|
||||
if (const2) {
|
||||
constPort = 1;
|
||||
} else if (const1) {
|
||||
constPort = 0;
|
||||
}
|
||||
// MatMul specific checks from ::canFuse()
|
||||
if (!can_be_converted_to_FC) {
|
||||
// can with rank() > 2
|
||||
// Algorithm::EltwisePowerStatic is ignored
|
||||
if (node->get_output_shape(0).size() > 2) {
|
||||
if (ov::is_type<ov::op::v1::Add>(node) ||
|
||||
ov::is_type<ov::op::v1::Multiply>(node) ||
|
||||
ov::is_type<ov::op::v1::Subtract>(node) ||
|
||||
ov::is_type<ov::op::v1::Divide>(node) ||
|
||||
ov::is_type<ov::op::v0::PRelu>(node)) {
|
||||
const auto const1 = ov::is_type<ov::op::v0::Constant>(node->get_input_node_shared_ptr(0));
|
||||
const auto const2 = ov::is_type<ov::op::v0::Constant>(node->get_input_node_shared_ptr(1));
|
||||
int constPort = -1;
|
||||
if (const2) {
|
||||
constPort = 1;
|
||||
} else if (const1) {
|
||||
constPort = 0;
|
||||
}
|
||||
|
||||
if (constPort != -1) {
|
||||
auto const_shape = node->get_input_shape(constPort);
|
||||
if (ov::shape_size(const_shape) != 1) {
|
||||
if (constPort != -1) {
|
||||
auto const_shape = node->get_input_shape(constPort);
|
||||
if (ov::shape_size(const_shape) != 1) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else if (ov::is_type<ov::op::v0::FakeQuantize>(node)) {
|
||||
const bool is_per_tensor_broadcasting = ngraph::snippets::utils::is_scalar_constant(node->get_input_node_shared_ptr(1)) &&
|
||||
ngraph::snippets::utils::is_scalar_constant(node->get_input_node_shared_ptr(2)) &&
|
||||
ngraph::snippets::utils::is_scalar_constant(node->get_input_node_shared_ptr(3)) &&
|
||||
ngraph::snippets::utils::is_scalar_constant(node->get_input_node_shared_ptr(4));
|
||||
if (!is_per_tensor_broadcasting) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// specific case for FQ
|
||||
if (ov::is_type<ov::op::v0::FakeQuantize>(node)) {
|
||||
if (one_of(node->get_output_element_type(0), ov::element::i8, ov::element::u8) && canMatMulBeExecutedInI8) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FullyConnectedBiasFusion
|
||||
if (!(can_be_converted_to_FC && ov::is_type<ngraph::opset1::Add>(node) &&
|
||||
bias_shape.back() == matmul_shape.back() &&
|
||||
bias_shape.back() == shape_size(bias_shape))) {
|
||||
return false;
|
||||
}
|
||||
// Fusing chain must be interrupted after the node, since reshape will be inserted
|
||||
if (bias_shape.size() >= 2)
|
||||
updatedChainType = NodeFusingType::FusedTerminator;
|
||||
return true;
|
||||
}
|
||||
bool isSuitableParentForFusingSumActivation(const std::shared_ptr<const Node> &node) {
|
||||
@ -334,11 +362,21 @@ bool isSuitableParentForFusingSumActivation(const std::shared_ptr<const Node> &n
|
||||
}
|
||||
return true;
|
||||
};
|
||||
auto isFusedFQNode = [&isFusedBiasNode](std::shared_ptr<Node> n) {
|
||||
if (!(ov::is_type<ngraph::op::v0::FakeQuantize>(n) &&
|
||||
GetNodeFusingType(n) == NodeFusingType::FusedWithConvolution))
|
||||
return false;
|
||||
const auto& parent = n->get_input_node_shared_ptr(0);
|
||||
const bool is_suitable_parent = isSuitableConvolutionParent(parent)
|
||||
|| isFusedBiasNode(parent)
|
||||
|| (GetNodeFusingType(parent) == NodeFusingType::FusedWithConvolution);
|
||||
return is_suitable_parent;
|
||||
};
|
||||
int num_conv_parents = 0;
|
||||
for (size_t i = 0; i < node->get_input_size(); i++) {
|
||||
const auto n = node->get_input_node_shared_ptr(i);
|
||||
//BinaryConvolution allows other ops to be fused before the Add, while Convolution doesn't
|
||||
num_conv_parents += (isSuitableConvolutionParent(n) || isFusedBiasNode(n) ||
|
||||
num_conv_parents += (isSuitableConvolutionParent(n) || isFusedBiasNode(n) || isFusedFQNode(n) ||
|
||||
GetNodeFusingType(n) == NodeFusingType::FusedWithBinaryConvolution);
|
||||
}
|
||||
return getNumNonConstInputs(node) == 2 && num_conv_parents >=1;
|
||||
@ -346,6 +384,9 @@ bool isSuitableParentForFusingSumActivation(const std::shared_ptr<const Node> &n
|
||||
bool isSuitableChildForFusingSumActivation(const std::shared_ptr<const Node> &node) {
|
||||
return SupportsFusingWithConvolution_SumActivation(node);
|
||||
}
|
||||
bool isSuitableReduceChild(const std::shared_ptr<const Node> &node, const int channelAxis = DEFAULT_AXIS) {
|
||||
return node->get_output_element_type(0) == ov::element::f32 && isSuitableChildForFusingSimple(node, channelAxis);
|
||||
}
|
||||
// Continue fusing chain of the passed type if the node has one child
|
||||
// Otherwise mark node as FusedTerminator (Fused, but fusing chain is interrupted)
|
||||
void PropagateIfHasOnlyChild(const std::shared_ptr<Node> &node, NodeFusingType nodeType) {
|
||||
@ -378,59 +419,77 @@ void MarkSubgraphOpAsSkipped(const std::shared_ptr<Node> &node) {
|
||||
|
||||
bool SnippetsMarkSkipped::run_on_model(const std::shared_ptr<ov::Model> &m) {
|
||||
RUN_ON_MODEL_SCOPE(SnippetsMarkSkipped);
|
||||
int channelAxis = 1;
|
||||
int channelAxis = DEFAULT_AXIS;
|
||||
for (auto &node : m->get_ordered_ops()) {
|
||||
if (ngraph::op::is_constant(node))
|
||||
continue;
|
||||
|
||||
if (ngraph::op::is_parameter(node)) {
|
||||
SetNodeFusingType(node, NodeFusingType::IgnoredAfterInputs);
|
||||
continue;
|
||||
} else if (isSuitableConvolutionParent(node)) {
|
||||
// Initiate fusing chain
|
||||
SetNodeFusingType(node, NodeFusingType::FusedWithConvolution);
|
||||
continue;
|
||||
channelAxis = DEFAULT_AXIS;
|
||||
} else if (isSuitableBinaryConvolutionParent(node)) {
|
||||
SetNodeFusingType(node, NodeFusingType::FusedWithBinaryConvolution);
|
||||
continue;
|
||||
} else if (isSuitableMiscParent(node, channelAxis)) {
|
||||
channelAxis = DEFAULT_AXIS;
|
||||
} else if (isSuitableReduceParent(node)) {
|
||||
const auto reduce = std::dynamic_pointer_cast<const ngraph::op::util::ArithmeticReductionKeepDims>(node);
|
||||
channelAxis = getChannelAxis(reduce->get_reduction_axes(), reduce->get_keep_dims());
|
||||
SetNodeFusingType(node, NodeFusingType::FusedWithReduce);
|
||||
} else if (isSuitableMiscParent(node)) {
|
||||
if (const auto reduce = std::dynamic_pointer_cast<const ngraph::op::util::ArithmeticReductionKeepDims>(node)) {
|
||||
channelAxis = getChannelAxis(reduce->get_reduction_axes(), reduce->get_keep_dims());
|
||||
} else {
|
||||
channelAxis = DEFAULT_AXIS;
|
||||
}
|
||||
SetNodeFusingType(node, NodeFusingType::FusedWithMisc);
|
||||
continue;
|
||||
} else if (isSuitableMatMulParent(node)) {
|
||||
SetNodeFusingType(node, NodeFusingType::FusedWithMatMul);
|
||||
continue;
|
||||
if (canBeMatMulExecutedInInt8(node->get_input_element_type(0), node->get_input_element_type(1)))
|
||||
SetNodeFusingType(node, NodeFusingType::FusedWithMatMulI8);
|
||||
else
|
||||
SetNodeFusingType(node, NodeFusingType::FusedWithMatMul);
|
||||
channelAxis = DEFAULT_AXIS;
|
||||
} else if (isSuitableSubtractAsZeroPointsParent(node)) {
|
||||
SetSnippetsNodeType(node, snippets::pass::SnippetsNodeType::SkippedByPlugin);
|
||||
continue;
|
||||
}
|
||||
for (const auto fusingChainType : getContinuableChains(node)) {
|
||||
if (isSuitableChildForFusingSimple(node, channelAxis)) {
|
||||
PropagateIfHasOnlyChild(node, fusingChainType);
|
||||
} else if (fusingChainType == NodeFusingType::FusedWithConvolution ||
|
||||
fusingChainType == NodeFusingType::FusedWithBinaryConvolution) {
|
||||
if (isSuitableParentForFusingSumActivation(node)) {
|
||||
PropagateIfHasOnlyChild(node, NodeFusingType::FusedWithConvolutionSumActivation);
|
||||
// Mimic FuseConvolutionAndSimpleOperationThroughMaxPool
|
||||
} else if (isSuitablePoolChild(node)) {
|
||||
channelAxis = DEFAULT_AXIS;
|
||||
} else {
|
||||
for (const auto fusingChainType : getContinuableChains(node)) {
|
||||
if (fusingChainType == NodeFusingType::FusedWithReduce) {
|
||||
if (isSuitableReduceChild(node, channelAxis))
|
||||
PropagateIfHasOnlyChild(node, fusingChainType);
|
||||
} else if (isSuitableChildForFusingSimple(node, channelAxis)) {
|
||||
PropagateIfHasOnlyChild(node, fusingChainType);
|
||||
} else if (fusingChainType == NodeFusingType::FusedWithConvolution ||
|
||||
fusingChainType == NodeFusingType::FusedWithBinaryConvolution) {
|
||||
if (isSuitableParentForFusingSumActivation(node)) {
|
||||
PropagateIfHasOnlyChild(node, NodeFusingType::FusedWithConvolutionSumActivation);
|
||||
// Mimic FuseConvolutionAndSimpleOperationThroughMaxPool
|
||||
} else if (isSuitablePoolChild(node)) {
|
||||
PropagateIfHasOnlyChild(node, fusingChainType);
|
||||
}
|
||||
} else if (fusingChainType == NodeFusingType::FusedWithConvolutionSumActivation &&
|
||||
isSuitableChildForFusingSumActivation(node)) {
|
||||
// Todo: Chain could be converted from FusedWithBinaryConvolution to FusedWithConvolution at this point
|
||||
// Set FusedWithConvolution, so the fusing chain could be propagated
|
||||
PropagateIfHasOnlyChild(node, NodeFusingType::FusedWithConvolution);
|
||||
} else if (fusingChainType == NodeFusingType::FusedWithMatMul ||
|
||||
fusingChainType == NodeFusingType::FusedWithMatMulI8) {
|
||||
const bool isExecutedInINT8 = fusingChainType == NodeFusingType::FusedWithMatMulI8;
|
||||
// Handle fusings for both MatMul and FullyConnected
|
||||
NodeFusingType updatedChainType = fusingChainType;
|
||||
if (isSuitableChildForFusingMatMul(node, isExecutedInINT8, updatedChainType, channelAxis))
|
||||
PropagateIfHasOnlyChild(node, updatedChainType);
|
||||
} else if (fusingChainType == NodeFusingType::IgnoredAfterInputs && (snippets::pass::AppropriateForSubgraph(node) ||
|
||||
ov::is_type<ngraph::op::v0::Convert>(node) || ov::is_type<ngraph::op::v1::Transpose>(node))) {
|
||||
// In OV_API 2.0 after Input node with I8/U8 precisions incerts Convert node, moreother on TF models inserts
|
||||
// Transpose layer. These brakes an idea to leave Eltwise node with I8/U8 inputs and FP32 outputs instead of Subgrath node
|
||||
// TODO Remove an additional check on Convert/Transpose here after enabling Subgraths with I8/U8 inputs and FP32 outputs
|
||||
SetNodeFusingType(node, NodeFusingType::IgnoredAfterInputs);
|
||||
}
|
||||
} else if (fusingChainType == NodeFusingType::FusedWithConvolutionSumActivation &&
|
||||
isSuitableChildForFusingSumActivation(node)) {
|
||||
// Todo: Chain could be converted from FusedWithBinaryConvolution to FusedWithConvolution at this point
|
||||
// Set FusedWithConvolution, so the fusing chain could be propagated
|
||||
PropagateIfHasOnlyChild(node, NodeFusingType::FusedWithConvolution);
|
||||
} else if (fusingChainType == NodeFusingType::FusedWithMatMul) {
|
||||
// Handle fusings for both MatMul and FullyConnected
|
||||
NodeFusingType updatedChainType = fusingChainType;
|
||||
if (isSuitableChildForFusingMatMul(node, updatedChainType))
|
||||
PropagateIfHasOnlyChild(node, updatedChainType);
|
||||
} else if (fusingChainType == NodeFusingType::IgnoredAfterInputs && (snippets::pass::AppropriateForSubgraph(node) ||
|
||||
ov::is_type<ngraph::op::v0::Convert>(node) || ov::is_type<ngraph::op::v1::Transpose>(node))) {
|
||||
// In OV_API 2.0 after Input node with I8/U8 precisions incerts Convert node, moreother on TF models inserts
|
||||
// Transpose layer. These brakes an idea to leave Eltwise node with I8/U8 inputs and FP32 outputs instead of Subgrath node
|
||||
// TODO Remove an additional check on Convert/Transpose here after enabling Subgraths with I8/U8 inputs and FP32 outputs
|
||||
SetNodeFusingType(node, NodeFusingType::IgnoredAfterInputs);
|
||||
}
|
||||
}
|
||||
|
||||
if (GetNodeFusingType(node) != NodeFusingType::NotSet) {
|
||||
SetSnippetsNodeType(node, snippets::pass::SnippetsNodeType::SkippedByPlugin);
|
||||
} else {
|
||||
|
@ -37,7 +37,7 @@ enum class NodeFusingType : int64_t {
|
||||
NotSet,
|
||||
FusedTerminator,
|
||||
FusedWithConvolution, FusedWithBinaryConvolution, FusedWithConvolutionSumActivation,
|
||||
FusedWithMatMul, FusedWithMisc, IgnoredAfterInputs};
|
||||
FusedWithMatMul, FusedWithMatMulI8, FusedWithReduce, FusedWithMisc, IgnoredAfterInputs};
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
||||
|
@ -573,6 +573,10 @@ public:
|
||||
rtParamsCache = cache;
|
||||
}
|
||||
|
||||
void setSharedMutex(const std::shared_ptr<std::mutex>& mutex) {
|
||||
sharedMutex = mutex;
|
||||
}
|
||||
|
||||
protected:
|
||||
bool canFuseSimpleOperation(const NodePtr& node) const;
|
||||
|
||||
@ -747,6 +751,8 @@ protected:
|
||||
|
||||
std::shared_ptr<IShapeInfer> shapeInference;
|
||||
|
||||
std::shared_ptr<std::mutex> sharedMutex = nullptr;
|
||||
|
||||
private:
|
||||
std::vector<EdgeWeakPtr> parentEdges;
|
||||
std::vector<EdgeWeakPtr> childEdges;
|
||||
|
@ -70,8 +70,8 @@ void If::getSupportedDescriptors() {
|
||||
|
||||
const std::shared_ptr<const ov::Model>& thenBody = ifOp->get_then_body();
|
||||
const std::shared_ptr<const ov::Model>& elseBody = ifOp->get_else_body();
|
||||
subGraphThen.CreateGraph(thenBody, ext_mng, weightCache);
|
||||
subGraphElse.CreateGraph(elseBody, ext_mng, weightCache);
|
||||
subGraphThen.CreateGraph(thenBody, ext_mng, weightCache, sharedMutex);
|
||||
subGraphElse.CreateGraph(elseBody, ext_mng, weightCache, sharedMutex);
|
||||
|
||||
const auto &inMapThen = subGraphThen.GetInputNodesMap();
|
||||
for (const auto ¶m : ifOp->get_then_body()->get_parameters()) {
|
||||
|
@ -20,9 +20,12 @@
|
||||
#include <ngraph/rt_info.hpp>
|
||||
#include <ie_ngraph_utils.hpp>
|
||||
|
||||
#include <shared_mutex>
|
||||
|
||||
#include <snippets/op/subgraph.hpp>
|
||||
#include "emitters/cpu_generator.hpp"
|
||||
#include "snippets_transformations/fuse_load_store_and_convert.hpp"
|
||||
#include "ngraph_transformations/convert_to_swish_cpu.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace dnnl::impl::utils;
|
||||
@ -34,30 +37,42 @@ namespace ov {
|
||||
namespace intel_cpu {
|
||||
namespace node {
|
||||
|
||||
|
||||
Snippet::Snippet(const std::shared_ptr<ngraph::Node>& op, const dnnl::engine& eng, WeightsSharing::Ptr &cache)
|
||||
: Node(op, eng, cache) {
|
||||
host_isa = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core) ?
|
||||
dnnl::impl::cpu::x64::avx512_core : dnnl::impl::cpu::x64::avx2;
|
||||
|
||||
// Create a deep local copy of the input snippet to perform canonicalization & code generation
|
||||
// Todo: Probably better to implement a proper copy constructor
|
||||
if (const auto tmp_snippet = ov::as_type_ptr<ngraph::snippets::op::Subgraph>(op)) {
|
||||
ngraph::OutputVector subgraph_node_inputs;
|
||||
for (const auto &input : tmp_snippet->input_values()) {
|
||||
auto new_input = std::make_shared<ngraph::opset1::Parameter>(input.get_element_type(), input.get_partial_shape());
|
||||
subgraph_node_inputs.push_back(new_input);
|
||||
}
|
||||
auto new_body = ov::clone_model(*tmp_snippet->get_body().get());
|
||||
snippet = std::make_shared<ngraph::snippets::op::Subgraph>(subgraph_node_inputs, new_body);
|
||||
ngraph::copy_runtime_info(tmp_snippet, snippet);
|
||||
snippet->set_friendly_name(tmp_snippet->get_friendly_name());
|
||||
snippet->set_generator(std::make_shared<CPUGenerator>(host_isa));
|
||||
} else {
|
||||
original_snippet = ov::as_type_ptr<ngraph::snippets::op::Subgraph>(op);
|
||||
if (!original_snippet) {
|
||||
IE_THROW(NotImplemented) << "Node is not an instance of snippets::op::Subgraph";
|
||||
}
|
||||
}
|
||||
|
||||
void Snippet::copy_snippet() {
|
||||
ngraph::OutputVector subgraph_node_inputs;
|
||||
for (const auto &input : original_snippet->input_values()) {
|
||||
auto new_input = std::make_shared<ngraph::opset1::Parameter>(input.get_element_type(), input.get_partial_shape());
|
||||
subgraph_node_inputs.push_back(new_input);
|
||||
}
|
||||
std::shared_ptr<ov::Model> new_body = nullptr;
|
||||
// Ticket[79554]: TypeRelaxed ops aren't thread safe so we use mutex to avoid collision in throughput mode
|
||||
if (original_snippet->has_type_relaxed_ops()) {
|
||||
if (!sharedMutex) {
|
||||
IE_THROW() << "Subgraph doesn't have shared mutex";
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(*sharedMutex.get());
|
||||
new_body = ov::clone_model(*original_snippet->get_body().get());
|
||||
} else {
|
||||
new_body = ov::clone_model(*original_snippet->get_body().get());
|
||||
}
|
||||
snippet = std::make_shared<ngraph::snippets::op::Subgraph>(subgraph_node_inputs, new_body);
|
||||
ngraph::copy_runtime_info(original_snippet, snippet);
|
||||
snippet->set_friendly_name(original_snippet->get_friendly_name());
|
||||
snippet->set_generator(std::make_shared<CPUGenerator>(host_isa));
|
||||
}
|
||||
|
||||
void Snippet::initSupportedPrimitiveDescriptors() {
|
||||
copy_snippet();
|
||||
if (!supportedPrimitiveDescriptors.empty())
|
||||
return;
|
||||
|
||||
@ -488,6 +503,7 @@ void Snippet::generate() {
|
||||
ov::pass::Manager optManager;
|
||||
optManager.register_pass<ov::intel_cpu::pass::FuseLoadConvert>();
|
||||
optManager.register_pass<ov::intel_cpu::pass::FuseStoreConvert>();
|
||||
optManager.register_pass<ConvertToSwishCPU>();
|
||||
|
||||
// LoadConvert uses Load emitter that support conversion from any type to only f32
|
||||
optManager.get_pass_config()->set_callback<ov::intel_cpu::pass::FuseLoadConvert>(
|
||||
|
@ -32,6 +32,10 @@ public:
|
||||
void selectOptimalPrimitiveDescriptor() override;
|
||||
InferenceEngine::Precision getRuntimePrecision() const override;
|
||||
|
||||
// to avoid collisions in throughput mode with copy of TypeRelaxed nodes
|
||||
// we should have common shared mutex between streams
|
||||
void setSharedMutex(const std::shared_ptr<std::mutex>& mutex);
|
||||
|
||||
// Here we convert to canonical for & jit everything
|
||||
void createPrimitive() override;
|
||||
|
||||
@ -46,6 +50,11 @@ private:
|
||||
|
||||
typedef void (*kernel)(const void *, const void *);
|
||||
|
||||
// Create a deep local copy of the input snippet to perform canonicalization & code generation
|
||||
// TODO: Probably better to implement a proper copy constructor
|
||||
// NOTE: Before call mutex should be initialized
|
||||
void copy_snippet();
|
||||
|
||||
void define_schedule();
|
||||
|
||||
void generate();
|
||||
@ -54,6 +63,8 @@ private:
|
||||
void schedule_6d(const jit_snippets_call_args& const_args) const;
|
||||
void schedule_nt(const jit_snippets_call_args& const_args) const;
|
||||
|
||||
// Original subgraph node
|
||||
std::shared_ptr<ngraph::snippets::op::Subgraph> original_snippet;
|
||||
// Local copy of subgraph node for canonization & code generation
|
||||
std::shared_ptr<ngraph::snippets::op::Subgraph> snippet;
|
||||
|
||||
|
@ -363,7 +363,7 @@ void TensorIterator::getSupportedDescriptors() {
|
||||
THROW_ERROR << "cannot be cast to ov::op::util::SubGraphOp";
|
||||
}
|
||||
const std::shared_ptr<const ov::Model> body = tiOp->get_function();
|
||||
sub_graph.CreateGraph(body, ext_mng, weightCache);
|
||||
sub_graph.CreateGraph(body, ext_mng, weightCache, sharedMutex);
|
||||
|
||||
const auto &inMap = sub_graph.GetInputNodesMap();
|
||||
for (const auto ¶m : tiOp->get_function()->get_parameters()) {
|
||||
|
@ -82,6 +82,8 @@
|
||||
#include <transformations/op_conversions/fq_decomposition.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <snippets/pass/collapse_subgraph.hpp>
|
||||
#include <snippets/pass/common_optimizations.hpp>
|
||||
#include <snippets/pass/convert_constants.hpp>
|
||||
#include "ngraph_transformations/snippets_mark_skipped.hpp"
|
||||
#include <transformations/op_conversions/convert_roi_align_v9_to_v3.hpp>
|
||||
#include <transformations/op_conversions/convert_roi_align_v3_to_v9.hpp>
|
||||
@ -579,20 +581,12 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
}
|
||||
|
||||
ngraph::pass::Manager postLPTPassManager;
|
||||
postLPTPassManager.register_pass<ngraph::pass::FakeQuantizeDecomposition>();
|
||||
postLPTPassManager.register_pass<ngraph::pass::UnrollTensorIterator>();
|
||||
postLPTPassManager.register_pass<ReshapePRelu>();
|
||||
|
||||
postLPTPassManager.get_pass_config()->set_callback<ngraph::pass::FakeQuantizeDecomposition>([](const_node_ptr &node) -> bool {
|
||||
std::string errMsg;
|
||||
return node::FakeQuantize::isSupportedOperation(node, errMsg);
|
||||
});
|
||||
postLPTPassManager.get_pass_config()->set_callback<ngraph::pass::UnrollTensorIterator>([](const_node_ptr &node) -> bool {
|
||||
// UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
|
||||
return node->get_rt_info().count("UNROLL_TI") == 0;
|
||||
});
|
||||
|
||||
|
||||
postLPTPassManager.register_pass<MoveEltwiseUpThroughDataMov>();
|
||||
postLPTPassManager.get_pass_config()->set_callback<MoveEltwiseUpThroughDataMov>([](const std::shared_ptr<const ngraph::Node>& node) -> bool {
|
||||
if (node->get_input_size() >= 2) {
|
||||
@ -625,13 +619,19 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
});
|
||||
postLPTPassManager.run_passes(nGraphFunc);
|
||||
|
||||
if (!useLpt && _enableSnippets && dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) {
|
||||
ngraph::pass::Manager tokenization_manager;
|
||||
tokenization_manager.register_pass<SnippetsMarkSkipped>();
|
||||
tokenization_manager.register_pass<ngraph::snippets::pass::EnumerateNodes>();
|
||||
tokenization_manager.register_pass<ngraph::snippets::pass::TokenizeSnippets>();
|
||||
tokenization_manager.get_pass_config()->set_callback<ngraph::snippets::pass::TokenizeSnippets>(
|
||||
if (_enableSnippets && dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) {
|
||||
ngraph::pass::Manager snippetsManager;
|
||||
snippetsManager.register_pass<SnippetsMarkSkipped>();
|
||||
snippetsManager.register_pass<ngraph::snippets::pass::EnumerateNodes>();
|
||||
snippetsManager.register_pass<ngraph::snippets::pass::TokenizeSnippets>();
|
||||
snippetsManager.get_pass_config()->set_callback<ngraph::snippets::pass::TokenizeSnippets>(
|
||||
[](const std::shared_ptr<const ov::Node>& n) -> bool {
|
||||
// CPU Plugin support Swish in Subgraph via conversion to SwichCPU which assumes second input to be constant
|
||||
if (ov::is_type<const ov::op::v4::Swish>(n)) {
|
||||
if (n->inputs().size() > 1 && !ov::is_type<const ov::op::v0::Constant>(n->get_input_node_shared_ptr(1)))
|
||||
return true;
|
||||
}
|
||||
|
||||
const auto& inputs = n->inputs();
|
||||
// todo: clarify whether we can evaluate snippets on const paths
|
||||
const bool has_only_const_inputs = std::all_of(inputs.begin(), inputs.end(),
|
||||
@ -650,8 +650,18 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
|
||||
[&](const ov::Output<const ov::Node>& out) {return rank_is_too_large(out.get_tensor());});
|
||||
return has_only_const_inputs || bad_input_rank || bad_output_rank;
|
||||
});
|
||||
tokenization_manager.run_passes(nGraphFunc);
|
||||
snippetsManager.register_pass<ngraph::snippets::pass::CommonOptimizations>();
|
||||
snippetsManager.run_passes(nGraphFunc);
|
||||
}
|
||||
|
||||
ngraph::pass::Manager postSnippetsManager;
|
||||
postSnippetsManager.register_pass<ngraph::pass::FakeQuantizeDecomposition>();
|
||||
postSnippetsManager.get_pass_config()->set_callback<ngraph::pass::FakeQuantizeDecomposition>([](const_node_ptr& node) -> bool {
|
||||
std::string errMsg;
|
||||
return node::FakeQuantize::isSupportedOperation(node, errMsg);
|
||||
});
|
||||
postSnippetsManager.register_pass<ngraph::pass::ConstantFolding>();
|
||||
postSnippetsManager.run_passes(nGraphFunc);
|
||||
}
|
||||
|
||||
static void Transformation(CNNNetwork& clonedNetwork, const bool _enableLPT, const bool _enableBF16, const bool _enableSnippets, const bool isLegacyApi) {
|
||||
|
@ -0,0 +1,131 @@
|
||||
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "snippets/fake_quantize_decomposition_test.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
using namespace ngraph;
|
||||
|
||||
namespace {
|
||||
|
||||
namespace decompositionInSubgraph {
|
||||
const std::vector<TestValues> testValuesDecompositionScalars = {
|
||||
{
|
||||
ov::element::f32,
|
||||
ngraph::Shape{1, 3, 16, 16},
|
||||
ov::element::f32,
|
||||
1.f,
|
||||
{{}, {}, {}, {}},
|
||||
},
|
||||
};
|
||||
const std::vector<TestValues> testValuesDecompositionPerChannel = {
|
||||
{
|
||||
ov::element::f32,
|
||||
ngraph::Shape{1, 3, 16, 16},
|
||||
ov::element::f32,
|
||||
1.f,
|
||||
{{1, 3, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1}},
|
||||
},
|
||||
{
|
||||
ov::element::f32,
|
||||
ngraph::Shape{1, 3, 16, 16},
|
||||
ov::element::f32,
|
||||
1.f,
|
||||
{{1, 3, 1, 1}, {1, 3, 1, 1}, {}, {}},
|
||||
},
|
||||
};
|
||||
|
||||
std::vector<std::pair<std::shared_ptr<Node>, std::pair<std::string, std::string> >> operations = {
|
||||
{std::make_shared<opset1::Abs>(), {"Subgraph", "Abs,fakeQuantize"}},
|
||||
{std::make_shared<ngraph::op::v4::Swish>(), {"Subgraph", "Swish,fakeQuantize"}},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_Snippets_FQDecomposition_Scalars,
|
||||
FakeQuantizeDecompositionTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(testValuesDecompositionScalars),
|
||||
::testing::ValuesIn(operations),
|
||||
// reorder (nChw[16|8]c) + MaxPool + Subgraph + reorder(nchw)
|
||||
::testing::Values(std::pair<size_t, size_t>{4, 1}),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
FakeQuantizeDecompositionTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_Snippets_FQDecomposition_PerChannel,
|
||||
FakeQuantizeDecompositionTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(testValuesDecompositionPerChannel[0]),
|
||||
::testing::ValuesIn(operations),
|
||||
// reorder (nChw[16|8]c) + MaxPool + reorder(nChw[16|8]c) x6 + Subgraph + reorder(nchw)
|
||||
::testing::Values(std::pair<size_t, size_t>{10, 1}),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
FakeQuantizeDecompositionTest::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_Snippets_FQDecomposition_PerChannel_Input,
|
||||
FakeQuantizeDecompositionTest,
|
||||
::testing::Combine(
|
||||
::testing::Values(testValuesDecompositionPerChannel[1]),
|
||||
::testing::ValuesIn(operations),
|
||||
// reorder (nChw[16|8]c) + MaxPool + reorder(nChw[16|8]c) x4 + Subgraph + reorder(nchw)
|
||||
::testing::Values(std::pair<size_t, size_t>{8, 1}),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
FakeQuantizeDecompositionTest::getTestCaseName);
|
||||
} // namespace decompositionInSubgraph
|
||||
|
||||
|
||||
namespace legacyFuse {
|
||||
const std::vector<TestValues> testValuesLegacyFuse = {
|
||||
{
|
||||
ov::element::f32,
|
||||
ngraph::Shape{1, 3, 16, 16},
|
||||
ov::element::f32,
|
||||
1.f,
|
||||
{{1, 3, 1, 1}, {1, 3, 1, 1}, {}, {}}
|
||||
},
|
||||
{
|
||||
ov::element::f32,
|
||||
ngraph::Shape{1, 3, 16, 16},
|
||||
ov::element::f32,
|
||||
1.f,
|
||||
{{}, {}, {1, 3, 1, 1}, {1, 3, 1, 1}}
|
||||
},
|
||||
{
|
||||
ov::element::f32,
|
||||
ngraph::Shape{1, 3, 16, 16},
|
||||
ov::element::f32,
|
||||
1.f,
|
||||
{{}, {}, {}, {}}
|
||||
},
|
||||
{
|
||||
ov::element::f32,
|
||||
ngraph::Shape{1, 3, 16, 16},
|
||||
ov::element::f32,
|
||||
1.f,
|
||||
{{1, 3, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1}}
|
||||
},
|
||||
};
|
||||
|
||||
std::vector<std::pair<std::shared_ptr<Node>, std::pair<std::string, std::string>>> operations = {
|
||||
{std::make_shared<opset1::Convolution>(), {"Convolution", "Convolution,fakeQuantize"}},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
smoke_Snippets,
|
||||
FakeQuantizeDecompositionTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(testValuesLegacyFuse),
|
||||
::testing::ValuesIn(operations),
|
||||
// reorder (nChw[16|8]c) + MaxPool + reorder(nhwc) + reorder(ABcd16b16a) + Convolution + reorder(nchw)
|
||||
::testing::Values(std::pair<size_t, size_t>{6, 0}),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
FakeQuantizeDecompositionTest::getTestCaseName);
|
||||
|
||||
} // namespace legacyFuse
|
||||
|
||||
} // namespace
|
@ -0,0 +1,107 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
#include "snippets/pass/fq_decomposition.hpp"
|
||||
#include "snippets/pass/collapse_subgraph.hpp"
|
||||
#include "fake_quantize_function.hpp"
|
||||
#include "snippets/op/subgraph.hpp"
|
||||
#include "ngraph_transformations/snippets_mark_skipped.hpp"
|
||||
#include "function_helper.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace test {
|
||||
namespace snippets {
|
||||
|
||||
class FakeQuantizeTokenizationTest : public TransformationTestsF {
|
||||
public:
|
||||
void register_passes() {
|
||||
manager.register_pass<ov::intel_cpu::SnippetsMarkSkipped>();
|
||||
manager.register_pass<ngraph::snippets::pass::EnumerateNodes>();
|
||||
manager.register_pass<ngraph::snippets::pass::TokenizeSnippets>();
|
||||
manager.get_pass_config()->set_callback<ngraph::snippets::pass::TokenizeSnippets>([](const std::shared_ptr<const ov::Node>& n) -> bool {
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
TransformationTestsF::TearDown();
|
||||
|
||||
auto subgraph = FunctionHelper::getSubgraph(function);
|
||||
auto body = subgraph == nullptr ? nullptr : std::dynamic_pointer_cast<ngraph::snippets::op::Subgraph>(subgraph)->get_body();
|
||||
|
||||
auto subgraph_ref = FunctionHelper::getSubgraph(function_ref);
|
||||
auto body_ref = subgraph_ref == nullptr ? nullptr : std::dynamic_pointer_cast<ngraph::snippets::op::Subgraph>(subgraph_ref)->get_body();
|
||||
|
||||
if ((body != nullptr) && (body_ref != nullptr)) {
|
||||
auto res = comparator.compare(body, body_ref);
|
||||
ASSERT_TRUE(res.valid) << res.message;
|
||||
} else {
|
||||
ASSERT_EQ(nullptr, body);
|
||||
ASSERT_EQ(nullptr, body_ref);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(FakeQuantizeTokenizationTest, smoke_Snippets_FakeQuantize_PerTensor) {
|
||||
function = FakeQuantizeFunction::getOperationAndFakeQuantize(
|
||||
{ {1, 3, 16, 16} },
|
||||
element::f32,
|
||||
{ {}, {}, {}, {} },
|
||||
true,
|
||||
FunctionHelper::makePrerequisitesOriginal());
|
||||
|
||||
function_ref = FakeQuantizeFunction::getSubgraphWithFakeQuantize(
|
||||
{ {1, 3, 16, 16} },
|
||||
element::f32,
|
||||
{ {}, {}, {}, {} },
|
||||
true,
|
||||
FunctionHelper::makePrerequisitesOriginal());
|
||||
|
||||
register_passes();
|
||||
}
|
||||
|
||||
TEST_F(FakeQuantizeTokenizationTest, smoke_Snippets_FakeQuantize_PerChannels) {
|
||||
function = FakeQuantizeFunction::getOperationAndFakeQuantize(
|
||||
{ {1, 3, 16, 16} },
|
||||
element::f32,
|
||||
{ {1, 3, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1} },
|
||||
true,
|
||||
FunctionHelper::makePrerequisitesOriginal());
|
||||
|
||||
function_ref = FakeQuantizeFunction::getSubgraphWithFakeQuantize(
|
||||
{ {1, 3, 16, 16} },
|
||||
element::f32,
|
||||
{ {1, 3, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1}, {1, 3, 1, 1} },
|
||||
true,
|
||||
FunctionHelper::makePrerequisitesOriginal());
|
||||
|
||||
register_passes();
|
||||
}
|
||||
|
||||
TEST_F(FakeQuantizeTokenizationTest, smoke_Snippets_ConvolutionWithFakeQuantize) {
|
||||
function = FakeQuantizeFunction::getOperationAndFakeQuantize(
|
||||
{{1, 3, 16, 16}},
|
||||
element::f32,
|
||||
{{}, {}, {}, {}},
|
||||
true,
|
||||
FunctionHelper::makePrerequisitesOriginal(),
|
||||
std::make_shared<ngraph::opset1::Convolution>());
|
||||
|
||||
function_ref = FakeQuantizeFunction::getOperationAndFakeQuantize(
|
||||
{{1, 3, 16, 16}},
|
||||
element::f32,
|
||||
{{}, {}, {}, {}},
|
||||
true,
|
||||
FunctionHelper::makePrerequisitesOriginal(),
|
||||
std::make_shared<ngraph::opset1::Convolution>());
|
||||
|
||||
register_passes();
|
||||
}
|
||||
|
||||
} // namespace snippets
|
||||
} // namespace test
|
||||
} // namespace ov
|
@ -0,0 +1,50 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "shared_test_classes/base/snippets_test_utils.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ActualValues {
|
||||
public:
|
||||
ov::element::Type modelType;
|
||||
ngraph::Shape inputShape;
|
||||
ov::element::Type inputType;
|
||||
float zeroPoint;
|
||||
std::vector<ngraph::Shape> fakeQuantizeShapes;
|
||||
};
|
||||
|
||||
class TestValues {
|
||||
public:
|
||||
ov::element::Type modelType;
|
||||
ngraph::Shape inputShape;
|
||||
ov::element::Type inputType;
|
||||
float zeroPoint;
|
||||
std::vector<ngraph::Shape> fakeQuantizeShapes;
|
||||
};
|
||||
|
||||
typedef std::tuple<
|
||||
TestValues, // test values
|
||||
std::pair<std::shared_ptr<ngraph::Node>, std::pair<std::string, std::string>>, // operation
|
||||
std::pair<size_t, size_t>, // number of nodes
|
||||
std::string // target device
|
||||
> testsParams;
|
||||
|
||||
class FakeQuantizeDecompositionTest : public testing::WithParamInterface<testsParams>, virtual public ov::test::SnippetsTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<testsParams> obj);
|
||||
|
||||
protected:
|
||||
void SetUp() override;
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -0,0 +1,78 @@
|
||||
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "snippets/fake_quantize_decomposition_test.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
#include "ngraph_ops/type_relaxed.hpp"
|
||||
#include "fake_quantize_function.hpp"
|
||||
#include "function_helper.hpp"
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
std::string FakeQuantizeDecompositionTest::getTestCaseName(testing::TestParamInfo<testsParams> obj) {
|
||||
std::ostringstream result;
|
||||
const auto values = std::get<0>(obj.param);
|
||||
const auto operation = std::get<1>(obj.param);
|
||||
const auto operations_number = std::get<2>(obj.param);
|
||||
const auto targetDevice = std::get<3>(obj.param);
|
||||
|
||||
const auto type_info = operation.first->get_type_info();
|
||||
const auto operationString = ngraph::is_type<ngraph::opset1::Parameter>(operation.first) ?
|
||||
"nullptr" :
|
||||
(std::string(type_info.name) + "_" + std::string(type_info.version_id));
|
||||
|
||||
result << "IS=" << CommonTestUtils::vec2str(values.inputShape) << "_";
|
||||
result << "netPRC=" << values.modelType << "_";
|
||||
result << "D=" << targetDevice << "_";
|
||||
result << "IN=" << values.inputType << "_";
|
||||
result << "OP=" << operationString << "_";
|
||||
result << "ON1=" << std::string(operation.second.first) << "_";
|
||||
result << "ON1=" << std::string(operation.second.second) << "_";
|
||||
result << "LP=" << values.zeroPoint;
|
||||
result << "SH1=" << values.fakeQuantizeShapes[0] << "SH2=" << values.fakeQuantizeShapes[1]
|
||||
<< "SH3=" << values.fakeQuantizeShapes[2] << "SH4=" << values.fakeQuantizeShapes[3];
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void FakeQuantizeDecompositionTest::SetUp() {
|
||||
auto& testsParams = this->GetParam();
|
||||
|
||||
const auto values = std::get<0>(testsParams);
|
||||
const auto operation = std::get<1>(testsParams);
|
||||
const auto operations_number = std::get<2>(testsParams);
|
||||
targetDevice = std::get<3>(testsParams);
|
||||
|
||||
ref_num_nodes = operations_number.first;
|
||||
ref_num_subgraphs = operations_number.second;
|
||||
|
||||
init_input_shapes({{values.inputShape, {values.inputShape}}});
|
||||
|
||||
std::shared_ptr<ngraph::Node> op = ngraph::is_type<ngraph::opset1::Parameter>(operation.first) ? nullptr : operation.first;
|
||||
function = ov::test::snippets::FakeQuantizeFunction::getOperationAndFakeQuantize(
|
||||
{values.inputShape},
|
||||
values.inputType,
|
||||
values.fakeQuantizeShapes,
|
||||
values.zeroPoint,
|
||||
ov::test::snippets::FunctionHelper::makePrerequisitesOriginal(),
|
||||
op);
|
||||
}
|
||||
|
||||
TEST_P(FakeQuantizeDecompositionTest, CompareWithRefImpl) {
|
||||
run();
|
||||
|
||||
const auto operation = std::get<1>(this->GetParam());
|
||||
auto elementType = std::string(operation.second.first);
|
||||
validateOriginalLayersNamesByType(elementType, operation.second.second);
|
||||
|
||||
validateNumSubgraphs();
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -12,6 +12,9 @@ namespace test {
|
||||
class SnippetsTestsCommon : virtual public ov::test::SubgraphBaseTest {
|
||||
protected:
|
||||
void validateNumSubgraphs();
|
||||
|
||||
void validateOriginalLayersNamesByType(const std::string& layerType, const std::string& originalLayersNames);
|
||||
|
||||
// Expected num nodes and subgraphs in exec graphs depends on the plugin
|
||||
// pipeline, tokenization callback for example. Therefore, they have to be provided manually.
|
||||
size_t ref_num_nodes = 0;
|
||||
|
@ -36,5 +36,23 @@ void SnippetsTestsCommon::validateNumSubgraphs() {
|
||||
ASSERT_EQ(ref_num_subgraphs, num_subgraphs) << "Compiled model contains invalid number of subgraphs.";
|
||||
}
|
||||
|
||||
void SnippetsTestsCommon::validateOriginalLayersNamesByType(const std::string& layerType, const std::string& originalLayersNames) {
|
||||
const auto& compiled_model = compiledModel.get_runtime_model();
|
||||
for (const auto& op : compiled_model->get_ops()) {
|
||||
const auto& rtInfo = op->get_rt_info();
|
||||
|
||||
const auto& typeIt = rtInfo.find("layerType");
|
||||
const auto type = typeIt->second.as<std::string>();
|
||||
if (type == layerType) {
|
||||
const auto& nameIt = rtInfo.find("originalLayersNames");
|
||||
const auto name = nameIt->second.as<std::string>();
|
||||
ASSERT_EQ(originalLayersNames, name);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_TRUE(false) << "Layer type '" << layerType << "' was not found in compiled model";
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace ov
|
||||
|
@ -0,0 +1,43 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ngraph/ngraph.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace test {
|
||||
namespace snippets {
|
||||
|
||||
class FakeQuantizeFunction {
|
||||
public:
|
||||
// Parameter => Operation => FakeQuantize => Result
|
||||
static std::shared_ptr<ov::Model> getOperationAndFakeQuantize(
|
||||
const ngraph::Shape& inputShape,
|
||||
const element::Type inputType,
|
||||
const std::vector<ngraph::Shape>& fakeQuantizeShapes,
|
||||
const float zeroPoint,
|
||||
const std::vector<std::shared_ptr<ngraph::Node>>& prerequisites,
|
||||
std::shared_ptr<ngraph::Node> operation = nullptr);
|
||||
|
||||
// Parameter => Subgraph (Parameter => FakeQuantize => Result) => Result
|
||||
static std::shared_ptr<ov::Model> getSubgraphWithFakeQuantize(
|
||||
const ngraph::Shape& inputShape,
|
||||
const element::Type inputType,
|
||||
const std::vector<ngraph::Shape>& fakeQuantizeShapes,
|
||||
const float zeroPoint,
|
||||
const std::vector<std::shared_ptr<ngraph::Node>>& prerequisites = {},
|
||||
const std::vector<std::shared_ptr<Node>>& beforeFakeQuantizeOperations = {});
|
||||
|
||||
// Parameter => Subgraph (Parameter => element-wise ops from FakeQuantize decomposition results => Result) => Result
|
||||
static std::shared_ptr<ov::Model> getSubgraphWithDecomposedFakeQuantize(
|
||||
const ngraph::Shape& inputShape,
|
||||
const element::Type inputType,
|
||||
const std::vector<ngraph::Shape>& fakeQuantizeShapes,
|
||||
const float zeroPoint);
|
||||
};
|
||||
|
||||
} // namespace snippets
|
||||
} // namespace test
|
||||
} // namespace ov
|
@ -0,0 +1,28 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ngraph/ngraph.hpp>
|
||||
|
||||
namespace ov {
|
||||
namespace test {
|
||||
namespace snippets {
|
||||
|
||||
// TODO: workaround while element-wise operations after `Parameter` are not added in Subgraph
|
||||
class FunctionHelper {
|
||||
public:
|
||||
static std::vector<std::shared_ptr<Node>> makePrerequisitesOriginal();
|
||||
|
||||
static std::shared_ptr<Node> applyPrerequisites(
|
||||
const std::shared_ptr<Node>& parent,
|
||||
const std::vector<std::shared_ptr<Node>>& prerequisites);
|
||||
|
||||
// index: -1 - latest `Subgraph` operation
|
||||
static std::shared_ptr<Node> getSubgraph(const std::shared_ptr<Model>& f, const int index = -1);
|
||||
};
|
||||
|
||||
} // namespace snippets
|
||||
} // namespace test
|
||||
} // namespace ov
|
@ -0,0 +1,264 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "fake_quantize_function.hpp"
|
||||
#include "common_test_utils/data_utils.hpp"
|
||||
#include <snippets/snippets_isa.hpp>
|
||||
#include <snippets/op/subgraph.hpp>
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
#include "function_helper.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace test {
|
||||
namespace snippets {
|
||||
|
||||
namespace {
|
||||
std::shared_ptr<ngraph::op::FakeQuantize> makeFakeQuantize(
|
||||
const Output<Node>& parent,
|
||||
const ngraph::Shape& inputShape,
|
||||
const element::Type inputType,
|
||||
const std::vector<ngraph::Shape>& fakeQuantizeShapes,
|
||||
const float zeroPoint) {
|
||||
auto generate = [](const ov::element::Type precision,
|
||||
const ngraph::Shape& shape,
|
||||
const float initialValue,
|
||||
const std::string& name) {
|
||||
const auto size = ngraph::shape_size(shape);
|
||||
std::vector<float> values(size);
|
||||
for (auto i = 0; i < size; ++i) {
|
||||
values[i] = static_cast<float>(initialValue + i);
|
||||
}
|
||||
auto constant = std::make_shared<ngraph::opset1::Constant>(precision, shape, values);
|
||||
constant->set_friendly_name(name);
|
||||
return constant;
|
||||
};
|
||||
|
||||
const auto fakeQuantize = std::make_shared<ngraph::opset1::FakeQuantize>(
|
||||
parent,
|
||||
generate(inputType, fakeQuantizeShapes[0], zeroPoint, "inputLow"),
|
||||
generate(inputType, fakeQuantizeShapes[1], 20.f, "inputHigh"),
|
||||
generate(inputType, fakeQuantizeShapes[2], zeroPoint, "outputLow"),
|
||||
generate(inputType, fakeQuantizeShapes[3], 20.f, "outputHigh"),
|
||||
256ul);
|
||||
fakeQuantize->set_friendly_name("fakeQuantize");
|
||||
|
||||
return fakeQuantize;
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::opset1::Convolution> makeConvolution(const Output<Node>& parent) {
|
||||
const auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{ 3, 3, 1, 1 }, { 1.f });
|
||||
const auto convolution = std::make_shared<ngraph::opset1::Convolution>(
|
||||
parent,
|
||||
weights,
|
||||
ngraph::Strides{ 1, 1 },
|
||||
ngraph::CoordinateDiff{ 0, 0 },
|
||||
ngraph::CoordinateDiff{ 0, 0 },
|
||||
ngraph::Strides{ 1, 1 });
|
||||
convolution->set_friendly_name("Convolution");
|
||||
return convolution;
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::opset1::GroupConvolution> makeGroupConvolution(const Output<Node>& parent) {
|
||||
const auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{ 1, 3, 3, 1, 1 }, { 1.f });
|
||||
const auto convolution = std::make_shared<ngraph::opset1::GroupConvolution>(
|
||||
parent,
|
||||
weights,
|
||||
ngraph::Strides{ 1, 1 },
|
||||
ngraph::CoordinateDiff{ 0, 0 },
|
||||
ngraph::CoordinateDiff{ 0, 0 },
|
||||
ngraph::Strides{ 1, 1 });
|
||||
convolution->set_friendly_name("GroupConvolution");
|
||||
return convolution;
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::opset1::MatMul> makeMatMul(const Output<Node>& parent1, const Output<Node>& parent2) {
|
||||
const auto matMul = std::make_shared<ngraph::opset1::MatMul>(parent1, parent2);
|
||||
matMul->set_friendly_name("MatMul");
|
||||
return matMul;
|
||||
}
|
||||
|
||||
Output<Node> initOperation(std::shared_ptr<Node> operation, const std::vector<Output<Node>>& parents) {
|
||||
if (is_type<ngraph::opset1::Convolution>(operation)) {
|
||||
assert(parents.size() == 1ul);
|
||||
return makeConvolution(parents[0]);
|
||||
}
|
||||
|
||||
if (is_type<ngraph::opset1::GroupConvolution>(operation)) {
|
||||
assert(parents.size() == 1ul);
|
||||
return makeGroupConvolution(parents[0]);
|
||||
}
|
||||
|
||||
if (is_type<ngraph::opset1::MatMul>(operation)) {
|
||||
assert(parents.size() == 2ul);
|
||||
return makeMatMul(parents[0], parents[1]);
|
||||
}
|
||||
|
||||
operation->set_argument(0, parents[0]);
|
||||
auto elementType = std::string(operation->get_type_name());
|
||||
operation->set_friendly_name(elementType);
|
||||
|
||||
return operation;
|
||||
}
|
||||
|
||||
// TODO: workaround while element-wise operations after `Parameter` are not added in Subgraph
|
||||
std::shared_ptr<Node> getOperations(const std::vector<std::shared_ptr<Node>>& operations, const Output<Node>& parent) {
|
||||
Output<Node> currentParent = parent;
|
||||
for (auto operation : operations) {
|
||||
operation->set_argument(0, currentParent);
|
||||
currentParent = operation;
|
||||
}
|
||||
return currentParent.get_node_shared_ptr();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::shared_ptr<ov::Model> FakeQuantizeFunction::getOperationAndFakeQuantize(
|
||||
const ngraph::Shape& inputShape,
|
||||
const element::Type inputType,
|
||||
const std::vector<ngraph::Shape>& fakeQuantizeShapes,
|
||||
const float zeroPoint,
|
||||
const std::vector<std::shared_ptr<ngraph::Node>>& prerequisites,
|
||||
std::shared_ptr<ngraph::Node> operation) {
|
||||
assert(fakeQuantizeShapes.size() == 4ul);
|
||||
|
||||
const auto parameter = std::make_shared<ngraph::opset1::Parameter>(inputType, inputShape);
|
||||
parameter->set_friendly_name("parameter");
|
||||
|
||||
auto parent = FunctionHelper::applyPrerequisites(parameter, prerequisites);
|
||||
|
||||
const auto fakeQuantize = makeFakeQuantize(
|
||||
operation == nullptr ? parent : initOperation(operation, { parent }),
|
||||
inputShape,
|
||||
inputType,
|
||||
fakeQuantizeShapes,
|
||||
zeroPoint);
|
||||
|
||||
fakeQuantize->set_friendly_name("fakeQuantize");
|
||||
|
||||
const auto result = std::make_shared<ngraph::opset1::Result>(fakeQuantize);
|
||||
result->set_friendly_name("result");
|
||||
|
||||
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{ result }, ParameterVector{ parameter }, "FakeQuantizeFunction");
|
||||
function->validate_nodes_and_infer_types();
|
||||
|
||||
return function;
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::Model> FakeQuantizeFunction::getSubgraphWithFakeQuantize(
|
||||
const ngraph::Shape& inputShape,
|
||||
const element::Type inputType,
|
||||
const std::vector<ngraph::Shape>& fakeQuantizeShapes,
|
||||
const float zeroPoint,
|
||||
const std::vector<std::shared_ptr<ngraph::Node>>& prerequisites,
|
||||
const std::vector<std::shared_ptr<Node>>& beforeFakeQuantizeOperations) {
|
||||
assert(fakeQuantizeShapes.size() == 4ul);
|
||||
|
||||
auto getSubgraphBody = [](
|
||||
const ngraph::Shape& inputShape,
|
||||
const element::Type inputType,
|
||||
const std::vector<ngraph::Shape>& fakeQuantizeShapes,
|
||||
const float zeroPoint,
|
||||
const std::vector<std::shared_ptr<Node>>& beforeFakeQuantizeOperations) {
|
||||
const auto parameter = std::make_shared<ngraph::opset1::Parameter>(inputType, inputShape);
|
||||
parameter->set_friendly_name("parameter");
|
||||
|
||||
const auto fakeQuantize = makeFakeQuantize(
|
||||
getOperations(beforeFakeQuantizeOperations, {parameter}), inputShape, inputType, fakeQuantizeShapes, zeroPoint);
|
||||
|
||||
const auto result = std::make_shared<ngraph::opset1::Result>(fakeQuantize);
|
||||
result->set_friendly_name("result");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{parameter}, "SubgraphWithFakeQuantizeBody");
|
||||
};
|
||||
|
||||
const auto parameter = std::make_shared<ngraph::opset1::Parameter>(inputType, inputShape);
|
||||
parameter->set_friendly_name("parameter");
|
||||
|
||||
auto parent = FunctionHelper::applyPrerequisites(parameter, prerequisites);
|
||||
|
||||
const auto subgraph = std::make_shared<ngraph::snippets::op::Subgraph>(
|
||||
ngraph::OutputVector{ parent },
|
||||
getSubgraphBody(inputShape, inputType, fakeQuantizeShapes, zeroPoint, beforeFakeQuantizeOperations));
|
||||
subgraph->set_friendly_name("subgraph");
|
||||
|
||||
const auto result = std::make_shared<ngraph::opset1::Result>(subgraph);
|
||||
result->set_friendly_name("result");
|
||||
|
||||
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{ result }, ParameterVector{ parameter }, "SubgraphWithFakeQuantize");
|
||||
function->validate_nodes_and_infer_types();
|
||||
return function;
|
||||
}
|
||||
|
||||
std::shared_ptr<ov::Model> FakeQuantizeFunction::getSubgraphWithDecomposedFakeQuantize(
|
||||
const ngraph::Shape& inputShape,
|
||||
const element::Type inputType,
|
||||
const std::vector<ngraph::Shape>& fakeQuantizeShapes,
|
||||
const float zeroPoint) {
|
||||
assert(fakeQuantizeShapes.size() == 4ul);
|
||||
|
||||
auto getSubgraphBody = [](
|
||||
const ngraph::Shape& inputShape,
|
||||
const element::Type inputType,
|
||||
const std::vector<ngraph::Shape>& fakeQuantizeShapes,
|
||||
const float zeroPoint) {
|
||||
const auto parameter = std::make_shared<ngraph::opset1::Parameter>(inputType, inputShape);
|
||||
parameter->set_friendly_name("parameter");
|
||||
|
||||
const auto maximum = std::make_shared<ngraph::opset1::Maximum>(
|
||||
parameter,
|
||||
std::make_shared<ngraph::opset1::Constant>(element::f32, Shape{}, std::vector<float>{1.f}));
|
||||
maximum->set_friendly_name("inputLow");
|
||||
|
||||
const auto minimum = std::make_shared<ngraph::opset1::Minimum>(
|
||||
maximum,
|
||||
std::make_shared<ngraph::opset1::Constant>(element::f32, Shape{}, std::vector<float>{20.f}));
|
||||
minimum->set_friendly_name("inputHigh");
|
||||
|
||||
const auto multiply = std::make_shared<ngraph::opset1::Multiply>(
|
||||
minimum,
|
||||
std::make_shared<ngraph::opset1::Constant>(element::f32, Shape{}, std::vector<float>{13.4211f}));
|
||||
multiply->set_friendly_name("multiply");
|
||||
|
||||
const auto subtract = std::make_shared<ngraph::opset1::Subtract>(
|
||||
multiply,
|
||||
std::make_shared<ngraph::opset1::Constant>(element::f32, Shape{}, std::vector<float>{13.4211f}));
|
||||
subtract->set_friendly_name("subtract");
|
||||
|
||||
const auto round = std::make_shared<ngraph::opset5::Round>(subtract, ngraph::opset5::Round::RoundMode::HALF_TO_EVEN);
|
||||
round->set_friendly_name("round");
|
||||
|
||||
const auto devide = std::make_shared<ngraph::opset1::Multiply>(
|
||||
round,
|
||||
std::make_shared<ngraph::opset1::Constant>(element::f32, Shape{}, std::vector<float>{0.0745098f}));
|
||||
devide->set_friendly_name("devide");
|
||||
|
||||
const auto add = std::make_shared<ngraph::opset1::Add>(
|
||||
devide,
|
||||
std::make_shared<ngraph::opset1::Constant>(element::f32, Shape{}, std::vector<float>{1.f}));
|
||||
add->set_friendly_name("add");
|
||||
|
||||
const auto result = std::make_shared<ngraph::opset1::Result>(add);
|
||||
result->set_friendly_name("result");
|
||||
|
||||
return std::make_shared<ngraph::Function>(
|
||||
ngraph::ResultVector{result}, ngraph::ParameterVector{parameter}, "SubgraphWithDecomposedFakeQuantizeBody");
|
||||
};
|
||||
|
||||
const auto parameter = std::make_shared<ngraph::opset1::Parameter>(inputType, inputShape);
|
||||
parameter->set_friendly_name("parameter");
|
||||
|
||||
const auto subgraph = std::make_shared<ngraph::snippets::op::Subgraph>(
|
||||
ngraph::OutputVector {parameter},
|
||||
getSubgraphBody(inputShape, inputType, fakeQuantizeShapes, zeroPoint));
|
||||
subgraph->set_friendly_name("subgraph");
|
||||
|
||||
const auto result = std::make_shared<ngraph::opset1::Result>(subgraph);
|
||||
result->set_friendly_name("result");
|
||||
|
||||
return std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{parameter}, "SubgraphWithDecomposedFakeQuantize");
|
||||
}
|
||||
|
||||
} // namespace snippets
|
||||
} // namespace test
|
||||
} // namespace ov
|
@ -0,0 +1,73 @@
|
||||
// Copyright (C) 2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "function_helper.hpp"
|
||||
#include "common_test_utils/data_utils.hpp"
|
||||
#include <snippets/snippets_isa.hpp>
|
||||
#include <snippets/op/subgraph.hpp>
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace test {
|
||||
namespace snippets {
|
||||
|
||||
// TODO: workaround while element-wise operations after `Parameter` are not added in Subgraph
|
||||
std::vector<std::shared_ptr<Node>> FunctionHelper::makePrerequisitesOriginal() {
|
||||
std::vector<std::shared_ptr<Node>> nodes;
|
||||
|
||||
const auto parameter = std::make_shared<ngraph::opset1::Parameter>();
|
||||
parameter->set_friendly_name("parameter");
|
||||
nodes.push_back(parameter);
|
||||
|
||||
const auto maxPool = std::make_shared<ngraph::opset1::MaxPool>(
|
||||
parameter,
|
||||
Strides{ 1, 1 }, // strides
|
||||
Shape{ 0, 0 }, // pads_begin
|
||||
Shape{ 0, 0 }, // pads_end
|
||||
Shape{ 1, 1 }); // kernel
|
||||
maxPool->set_friendly_name("maxPool");
|
||||
nodes.push_back(maxPool);
|
||||
|
||||
return nodes;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> FunctionHelper::applyPrerequisites(const std::shared_ptr<Node>& parent, const std::vector<std::shared_ptr<Node>>& prerequisites) {
|
||||
std::shared_ptr<ngraph::Node> currentParent;
|
||||
if (prerequisites.empty()) {
|
||||
currentParent = parent;
|
||||
} else {
|
||||
auto begin = prerequisites[0];
|
||||
if (is_type<ngraph::opset1::Parameter>(begin)) {
|
||||
begin = prerequisites[1];
|
||||
}
|
||||
begin->set_argument(0, parent);
|
||||
|
||||
currentParent = *prerequisites.rbegin();
|
||||
}
|
||||
return currentParent;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> FunctionHelper::getSubgraph(const std::shared_ptr<Model>& f, const int index) {
|
||||
int currentIndex = 0;
|
||||
std::shared_ptr<ngraph::snippets::op::Subgraph> subgraph;
|
||||
for (const auto& op : f->get_ordered_ops()) {
|
||||
auto tmp_subgraph = as_type_ptr<ngraph::snippets::op::Subgraph>(op);
|
||||
if (tmp_subgraph != nullptr) {
|
||||
if (index == currentIndex) {
|
||||
return tmp_subgraph;
|
||||
}
|
||||
subgraph = tmp_subgraph;
|
||||
currentIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
if (index != -1) {
|
||||
return nullptr;
|
||||
}
|
||||
return subgraph;
|
||||
}
|
||||
|
||||
} // namespace snippets
|
||||
} // namespace test
|
||||
} // namespace ov
|
@ -13,19 +13,19 @@ namespace snippets {
|
||||
|
||||
std::shared_ptr<ov::Model> AddFunctionLoweredBroadcast::initLowered() const {
|
||||
auto data0 = std::make_shared<op::v0::Parameter>(precision, input_shapes[0]);
|
||||
auto load0 = std::make_shared<ngraph::snippets::op::Load>(data0);
|
||||
std::shared_ptr<Node> add_input0 = load0;
|
||||
if (!broadcast_shapes[0].empty()) {
|
||||
auto broadcast0 = std::make_shared<ngraph::snippets::op::BroadcastMove>(load0, broadcast_shapes[0]);
|
||||
add_input0 = broadcast0;
|
||||
std::shared_ptr<Node> add_input0 = nullptr;
|
||||
if (!broadcast_shapes[0].empty() && broadcast_shapes[0].back() != input_shapes[0].back()) {
|
||||
add_input0 = std::make_shared<ngraph::snippets::op::BroadcastLoad>(data0, broadcast_shapes[0]);
|
||||
} else {
|
||||
add_input0 = std::make_shared<ngraph::snippets::op::Load>(data0);
|
||||
}
|
||||
|
||||
auto data1 = std::make_shared<op::v0::Parameter>(precision, input_shapes[1]);
|
||||
auto load1 = std::make_shared<ngraph::snippets::op::Load>(data1);
|
||||
std::shared_ptr<Node> add_input1 = load1;
|
||||
if (!broadcast_shapes[1].empty()) {
|
||||
auto broadcast1 = std::make_shared<ngraph::snippets::op::BroadcastMove>(load1, broadcast_shapes[1]);
|
||||
add_input1 = broadcast1;
|
||||
std::shared_ptr<Node> add_input1 = nullptr;
|
||||
if (!broadcast_shapes[1].empty() && broadcast_shapes[1].back() != input_shapes[1].back()) {
|
||||
add_input1 = std::make_shared<ngraph::snippets::op::BroadcastLoad>(data1, broadcast_shapes[1]);
|
||||
} else {
|
||||
add_input1 = std::make_shared<ngraph::snippets::op::Load>(data1);
|
||||
}
|
||||
auto add = std::make_shared<op::v1::Add>(add_input0, add_input1);
|
||||
auto store = std::make_shared<ngraph::snippets::op::Store>(add);
|
||||
|
Loading…
Reference in New Issue
Block a user