diff --git a/src/common/snippets/include/snippets/pass/fq_decomposition.hpp b/src/common/snippets/include/snippets/pass/fq_decomposition.hpp index cc62bd11354..30e23b66c55 100644 --- a/src/common/snippets/include/snippets/pass/fq_decomposition.hpp +++ b/src/common/snippets/include/snippets/pass/fq_decomposition.hpp @@ -6,8 +6,6 @@ #include "openvino/op/fake_quantize.hpp" #include "openvino/pass/graph_rewrite.hpp" -#include "snippets/pass/transform_convert.hpp" -#include "transformations_visibility.hpp" namespace ov { namespace snippets { @@ -82,6 +80,8 @@ public: class CommonFakeQuantizeDecomposition: public ov::pass::ModelPass { public: bool run_on_model(const std::shared_ptr& m) override; + + static bool is_supported_fq(const std::shared_ptr& fq); }; } // namespace pass diff --git a/src/common/snippets/include/snippets/pass/validate.hpp b/src/common/snippets/include/snippets/pass/validate.hpp new file mode 100644 index 00000000000..f0803d88bb1 --- /dev/null +++ b/src/common/snippets/include/snippets/pass/validate.hpp @@ -0,0 +1,41 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/pattern/matcher.hpp" + +namespace ov { +namespace snippets { +namespace pass { + +/** + * @interface Validate + * @brief The pass validates OV model on correctness after all common optimizations + * @ingroup snippets + */ +class Validate: public ov::pass::ModelPass { +public: + OPENVINO_RTTI("Validate", "0"); + Validate(const std::shared_ptr& pass_config) : m_pass_config(pass_config) {} + + bool run_on_model(const std::shared_ptr& m) override; + +private: + bool is_supported_constant(const std::shared_ptr& op); + bool is_supported_convert(const std::shared_ptr& op); + bool is_supported_matmul(const std::shared_ptr& op); + bool is_supported_softmax(const std::shared_ptr& op); + bool is_supported_fq(const std::shared_ptr& node); + bool is_supported_transpose(const std::shared_ptr& node); + bool is_supported_op(const std::shared_ptr& node); + + // Pass config of CommonOptimizations that contains information: which of common passes are disabled + std::shared_ptr m_pass_config; +}; + +} // namespace pass +} // namespace snippets +} // namespace ov diff --git a/src/common/snippets/include/snippets/utils.hpp b/src/common/snippets/include/snippets/utils.hpp index 00badfb0469..525de3e03b2 100644 --- a/src/common/snippets/include/snippets/utils.hpp +++ b/src/common/snippets/include/snippets/utils.hpp @@ -51,6 +51,10 @@ constexpr bool everyone_is(T val, P item, Args... item_others) { return val == item && everyone_is(val, item_others...); } +constexpr inline bool implication(bool cause, bool cond) { + return !cause || !!cond; +} + VectorDims get_planar_vdims(const VectorDims& shape, const std::vector& layout); VectorDims get_planar_vdims(const snippets::lowered::PortDescriptorPtr& port_desc); VectorDims get_planar_vdims(const snippets::lowered::ExpressionPort& expr_port); diff --git a/src/common/snippets/src/pass/collapse_subgraph.cpp b/src/common/snippets/src/pass/collapse_subgraph.cpp index b437c4c37d1..9acea3720e9 100644 --- a/src/common/snippets/src/pass/collapse_subgraph.cpp +++ b/src/common/snippets/src/pass/collapse_subgraph.cpp @@ -9,6 +9,7 @@ #include "snippets/pass/tokenization.hpp" #include "snippets/pass/transpose_decomposition.hpp" #include "snippets/pass/fuse_transpose_brgemm.hpp" +#include "snippets/pass/fq_decomposition.hpp" #include "snippets/op/subgraph.hpp" #include "snippets/utils.hpp" @@ -86,15 +87,7 @@ auto is_supported_op(const std::shared_ptr &n) -> bool { }; auto is_supported_fq_op = [](const std::shared_ptr& n) -> bool { - // TODO [92179]: Add support of FakeQuantize with non-constants inputs and with binarization algorithm. - const auto fq = ov::as_type_ptr(n); - return fq && fq->get_levels() != 2 && - is_type(n->get_input_node_shared_ptr(1)) && - is_type(n->get_input_node_shared_ptr(2)) && - is_type(n->get_input_node_shared_ptr(3)) && - is_type(n->get_input_node_shared_ptr(4)) && - (fq->get_auto_broadcast() == ov::op::AutoBroadcastType::NUMPY || - fq->get_auto_broadcast() == ov::op::AutoBroadcastType::NONE); + return CommonFakeQuantizeDecomposition::is_supported_fq(ov::as_type_ptr(n)); }; auto is_supported_ternary_eltwise_op = [](const std::shared_ptr &n) -> bool { diff --git a/src/common/snippets/src/pass/common_optimizations.cpp b/src/common/snippets/src/pass/common_optimizations.cpp index ffbb43efb7a..609496cd026 100644 --- a/src/common/snippets/src/pass/common_optimizations.cpp +++ b/src/common/snippets/src/pass/common_optimizations.cpp @@ -9,6 +9,8 @@ #include "snippets/pass/explicit_transpose_matmul_inputs.hpp" #include "snippets/pass/transpose_decomposition.hpp" #include "snippets/pass/fuse_transpose_brgemm.hpp" +#include "snippets/pass/transform_convert.hpp" +#include "snippets/pass/validate.hpp" #include "snippets/op/subgraph.hpp" #include "snippets/itt.hpp" @@ -372,7 +374,7 @@ CommonOptimizations::CommonOptimizations(const SnippetsTokenization::Config& con // Firstly, we should transform all original Converts inside body to ConvertTruncation to save original behavior. // Then if Subgraph contains FakeQuantize we enable specific transformation for quantized subgraphs. - ov::pass::Manager manager; + ov::pass::Manager manager(get_pass_config()); manager.register_pass(); manager.register_pass(); if (is_quantized) { @@ -392,6 +394,10 @@ CommonOptimizations::CommonOptimizations(const SnippetsTokenization::Config& con if (config.split_m_dimension) SplitDimensionM(subgraph, config.concurrency); } + + // Validate the body after all common optimizations + ov::snippets::pass::Validate(get_pass_config()).run_on_model(body); + return true; }; diff --git a/src/common/snippets/src/pass/fq_decomposition.cpp b/src/common/snippets/src/pass/fq_decomposition.cpp index 0f849ba1f06..a173396544d 100644 --- a/src/common/snippets/src/pass/fq_decomposition.cpp +++ b/src/common/snippets/src/pass/fq_decomposition.cpp @@ -12,35 +12,19 @@ #include "openvino/pass/manager.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "openvino/pass/validate.hpp" + #include "openvino/reference/autobroadcast_binop.hpp" #include "openvino/reference/broadcast.hpp" + #include "snippets/itt.hpp" +#include "snippets/utils.hpp" #include "snippets/op/convert_saturation.hpp" -namespace { -bool isValidRangesInputs(const std::shared_ptr& fq) { - auto il = fq->input_value(1); - auto ih = fq->input_value(2); - auto greater_equal = std::make_shared(il, ih); - - ov::OutputVector result(1); - if (!greater_equal->constant_fold(result, greater_equal->input_values())) - return false; - - auto res_node = std::dynamic_pointer_cast(result[0].get_node_shared_ptr()); - - const std::vector comp_result = res_node->cast_vector(); - - return !std::any_of(comp_result.begin(), comp_result.end(), [](const bool value) { - return value; - }); -} -} // namespace ov::snippets::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { MATCHER_SCOPE(FakeQuantizeDecomposition); - auto fake_quantize = ov::pass::pattern::wrap_type( + auto fake_quantize = ov::pass::pattern::wrap_type( OutputVector{ov::pass::pattern::any_input(), ov::pass::pattern::wrap_type(), ov::pass::pattern::wrap_type(), @@ -50,14 +34,16 @@ ov::snippets::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::FakeQuantizeDecomposition") auto& pattern_to_output = m.get_pattern_value_map(); - const auto fake_quantize_node = std::dynamic_pointer_cast( + const auto fake_quantize_node = std::dynamic_pointer_cast( pattern_to_output.at(fake_quantize).get_node_shared_ptr()); - if (!fake_quantize_node || transformation_callback(fake_quantize_node) || - !isValidRangesInputs(fake_quantize_node)) { + if (!fake_quantize_node || transformation_callback(fake_quantize_node)) { return false; } + OPENVINO_ASSERT(CommonFakeQuantizeDecomposition::is_supported_fq(fake_quantize_node), + "FQ Decomposition got invalid FakeQuantize node with the name " + fake_quantize_node->get_friendly_name()); + Output data{fake_quantize_node->input_value(0)}; const Output input_low{fake_quantize_node->input_value(1)}; const Output input_high{fake_quantize_node->input_value(2)}; @@ -94,8 +80,8 @@ ov::snippets::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { // if we set input_low or input_high in formula we got output = output_low and output = output_high // respectively so we just clamp x - const auto max = std::make_shared(data, input_low); - const auto min = std::make_shared(max, input_high); + const auto max = std::make_shared(data, input_low); + const auto min = std::make_shared(max, input_high); decomp_ops.push_back(max); decomp_ops.push_back(min); @@ -106,30 +92,30 @@ ov::snippets::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { input_high.get_partial_shape(), broadcast_type); const auto scales = - std::make_shared(ov::element::f32, scale_shape.get_shape(), out_scales); + std::make_shared(ov::element::f32, scale_shape.get_shape(), out_scales); decomp_ops.push_back(scales); - result = std::make_shared(min, scales); + result = std::make_shared(min, scales); decomp_ops.push_back(result); } else { // (levels-1) const auto levels_minus_one = - std::make_shared(input_type, Shape{}, fake_quantize_node->get_levels() - 1); + std::make_shared(input_type, Shape{}, fake_quantize_node->get_levels() - 1); decomp_ops.push_back(levels_minus_one); // (input_high - input_low) - const auto subInHighLow = std::make_shared(input_high, input_low); + const auto subInHighLow = std::make_shared(input_high, input_low); // (levels-1) / (input_high - input_low) - const auto isc = std::make_shared(levels_minus_one, subInHighLow); + const auto isc = std::make_shared(levels_minus_one, subInHighLow); // input_low * (levels-1) / (input_high - input_low) - const auto ish = std::make_shared(input_low, isc); + const auto ish = std::make_shared(input_low, isc); decomp_ops.push_back(subInHighLow); decomp_ops.push_back(isc); decomp_ops.push_back(ish); // x * (levels-1) / (input_high - input_low) - const auto after_isc_apply = std::make_shared(min, isc); + const auto after_isc_apply = std::make_shared(min, isc); // x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low) - result = std::make_shared(after_isc_apply, ish); + result = std::make_shared(after_isc_apply, ish); decomp_ops.push_back(after_isc_apply); decomp_ops.push_back(result); } @@ -143,20 +129,20 @@ ov::snippets::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { if (do_dequantize) { // (levels-1) const auto levels_minus_one = - std::make_shared(input_type, Shape{}, fake_quantize_node->get_levels() - 1); + std::make_shared(input_type, Shape{}, fake_quantize_node->get_levels() - 1); // (output_high - output_low) - const auto sub_out_high_low = std::make_shared(output_high, output_low); + const auto sub_out_high_low = std::make_shared(output_high, output_low); // (output_high - output_low) / (levels-1) - const auto osc = std::make_shared(sub_out_high_low, levels_minus_one); + const auto osc = std::make_shared(sub_out_high_low, levels_minus_one); decomp_ops.push_back(sub_out_high_low); decomp_ops.push_back(osc); // round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * // (output_high - output_low) / (levels-1) - const auto after_osc_apply = std::make_shared(result, osc); + const auto after_osc_apply = std::make_shared(result, osc); // round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * // (output_high - output_low) / (levels-1) + output_low - result = std::make_shared(after_osc_apply, output_low); + result = std::make_shared(after_osc_apply, output_low); decomp_ops.push_back(after_osc_apply); decomp_ops.push_back(result); } @@ -177,21 +163,17 @@ ov::snippets::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { } bool ov::snippets::pass::FakeQuantizeDecomposition::getScalesAndShifts( - const std::shared_ptr& fq_node, + const std::shared_ptr& fq_node, std::vector& cl, std::vector& ch, std::vector& isc, std::vector& ish, std::vector& osc, std::vector& osh) { - auto input_low_constant = - std::dynamic_pointer_cast(fq_node->get_input_node_shared_ptr(1)); - auto input_high_constant = - std::dynamic_pointer_cast(fq_node->get_input_node_shared_ptr(2)); - auto output_low_constant = - std::dynamic_pointer_cast(fq_node->get_input_node_shared_ptr(3)); - auto output_high_constant = - std::dynamic_pointer_cast(fq_node->get_input_node_shared_ptr(4)); + auto input_low_constant = ov::as_type_ptr(fq_node->get_input_node_shared_ptr(1)); + auto input_high_constant = ov::as_type_ptr(fq_node->get_input_node_shared_ptr(2)); + auto output_low_constant = ov::as_type_ptr(fq_node->get_input_node_shared_ptr(3)); + auto output_high_constant = ov::as_type_ptr(fq_node->get_input_node_shared_ptr(4)); if (!input_low_constant || !input_high_constant || !output_low_constant || !output_high_constant) return false; @@ -305,12 +287,12 @@ bool ov::snippets::pass::FakeQuantizeDecomposition::getScalesAndShifts( } std::vector ov::snippets::pass::FakeQuantizeDecomposition::calculateScales(const ov::element::Type& out_type, - const std::vector& cl, - const std::vector& ch, - const std::vector& isc, - const std::vector& ish, - const std::vector& osc, - const std::vector& osh) { + const std::vector& cl, + const std::vector& ch, + const std::vector& isc, + const std::vector& ish, + const std::vector& osc, + const std::vector& osh) { std::vector out_scales; if (out_type == ov::element::u8 && std::all_of(cl.cbegin(), @@ -360,6 +342,32 @@ std::vector ov::snippets::pass::FakeQuantizeDecomposition::calculateScale return out_scales; } +bool ov::snippets::pass::CommonFakeQuantizeDecomposition::is_supported_fq(const std::shared_ptr& fq) { + // TODO [92179]: Add support of FakeQuantize with non-constants inputs and with binarization algorithm. + auto is_valid_range_values = [](const std::shared_ptr& fq) { + const auto il = fq->input_value(1); + const auto ih = fq->input_value(2); + const auto greater_equal = std::make_shared(il, ih); + + ov::OutputVector result(1); + if (!greater_equal->constant_fold(result, greater_equal->input_values())) + return false; + + const auto res_node = std::dynamic_pointer_cast(result[0].get_node_shared_ptr()); + const auto comp_result = res_node->cast_vector(); + return !std::any_of(comp_result.begin(), comp_result.end(), [](const bool value) { + return value; + }); + }; + return fq && fq->get_levels() != 2 && + ov::is_type(fq->get_input_node_shared_ptr(1)) && + ov::is_type(fq->get_input_node_shared_ptr(2)) && + ov::is_type(fq->get_input_node_shared_ptr(3)) && + ov::is_type(fq->get_input_node_shared_ptr(4)) && + utils::one_of(fq->get_auto_broadcast(), ov::op::AutoBroadcastType::NUMPY, ov::op::AutoBroadcastType::NONE) && + is_valid_range_values(fq); +} + bool ov::snippets::pass::CommonFakeQuantizeDecomposition::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(CommonFakeQuantizeDecomposition); ov::pass::Manager manager; diff --git a/src/common/snippets/src/pass/validate.cpp b/src/common/snippets/src/pass/validate.cpp new file mode 100644 index 00000000000..d80549403d0 --- /dev/null +++ b/src/common/snippets/src/pass/validate.cpp @@ -0,0 +1,111 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/pass/validate.hpp" + +#include "snippets/op/convert_saturation.hpp" +#include "snippets/op/convert_truncation.hpp" +#include "snippets/pass/explicit_transpose_matmul_inputs.hpp" +#include "snippets/pass/fq_decomposition.hpp" +#include "snippets/utils.hpp" +#include "snippets/itt.hpp" + +#include "openvino/op/fake_quantize.hpp" +#include "openvino/op/broadcast.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/softmax.hpp" +#include "openvino/core/validation_util.hpp" + + +namespace ov { +namespace snippets { +namespace pass { + +namespace { +#define VALIDATE(op, op_type, validator) \ + if (ov::is_type(op)) \ + OPENVINO_ASSERT(validator(op), "Snippets validation of OV body has been failed: " + \ + std::string(op->get_type_name()) + " op " + op->get_friendly_name() + " is not supported"); \ + else + +} // namespace + +bool Validate::is_supported_constant(const std::shared_ptr& op) { + const auto constant = ov::as_type_ptr(op); + const auto consumers = op->get_output_target_inputs(0); + return constant && + (ov::shape_size(constant->get_output_shape(0)) == 1 || + std::all_of(consumers.cbegin(), consumers.cend(), + [](const ov::Input& in) { + return ov::is_type(in.get_node()) || + ov::is_type(in.get_node()) || + ov::is_type(in.get_node()); + })); +} + +bool Validate::is_supported_convert(const std::shared_ptr& op) { + return ov::is_type(op) || ov::is_type(op); +} + +bool Validate::is_supported_matmul(const std::shared_ptr& op) { + // If ExplicitTransposeMatMulInputs pass is enabled, MatMul should have not transposed inputs + const auto matmul = ov::as_type_ptr(op); + return matmul && utils::implication(m_pass_config->is_enabled(), + !matmul->get_transpose_a() && !matmul->get_transpose_b()); +} + +bool Validate::is_supported_softmax(const std::shared_ptr& op) { + // Softmax is supported only with axis by last dim + const auto softmax_rank = op->get_input_partial_shape(0).rank(); + int64_t axis = 0; + if (const auto softmax_v8 = ov::as_type_ptr(op)) { + OPENVINO_SUPPRESS_DEPRECATED_START + axis = ov::normalize_axis(softmax_v8->get_friendly_name(), softmax_v8->get_axis(), softmax_rank); + OPENVINO_SUPPRESS_DEPRECATED_END + } else if (const auto softmax_v1 = ov::as_type_ptr(op)) { + axis = softmax_v1->get_axis(); + } else { + return false; + } + return axis == softmax_rank.get_length() - 1; +} + +bool Validate::is_supported_fq(const std::shared_ptr& node) { + // FQ is decomposed into ops in CommonFakeQuantizeDecomposition pass + return m_pass_config->is_disabled(); +} + +bool Validate::is_supported_transpose(const std::shared_ptr& node) { + // Transpose is supported only on Inputs or Outputs of body + const auto consumers = node->get_output_target_inputs(0); + return (ov::is_type(node->get_input_node_shared_ptr(0))) || + (consumers.size() == 1 && ov::is_type(consumers.cbegin()->get_node())); +} + +bool Validate::is_supported_op(const std::shared_ptr& node) { + return false; +} + +bool Validate::run_on_model(const std::shared_ptr& m) { + RUN_ON_MODEL_SCOPE(Validate); + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::op::Validate") + + for (const auto& op : m->get_ordered_ops()) { + VALIDATE(op, ov::op::v0::Constant, is_supported_constant) + VALIDATE(op, ov::op::v0::Convert, is_supported_convert) + VALIDATE(op, ov::op::v0::MatMul, is_supported_matmul) + VALIDATE(op, ov::op::v1::Softmax, is_supported_softmax) + VALIDATE(op, ov::op::v8::Softmax, is_supported_softmax) + VALIDATE(op, ov::op::v0::FakeQuantize, is_supported_fq) + VALIDATE(op, ov::op::v1::Transpose, is_supported_transpose) + VALIDATE(op, ov::op::v1::Reshape, is_supported_op); + } + return true; +} + +} // namespace pass +} // namespace snippets +} // namespace ov