diff --git a/src/plugins/intel_gna/src/backend/gna_limitations.cpp b/src/plugins/intel_gna/src/backend/gna_limitations.cpp index 0ecf83e1436..004fcdba370 100644 --- a/src/plugins/intel_gna/src/backend/gna_limitations.cpp +++ b/src/plugins/intel_gna/src/backend/gna_limitations.cpp @@ -15,12 +15,215 @@ #include "gna/gna_config.hpp" #include "gna_graph_tools.hpp" #include "gna_lib_ver_selector.hpp" +#include "ie_ngraph_utils.hpp" #include "log/log.hpp" +#include "ops/util/util.hpp" namespace ov { namespace intel_gna { using namespace common; namespace limitations { +namespace { +std::ostream& operator<<(std::ostream& os, const std::set& t) { + for (auto it = t.begin(); it != t.end(); ++it) { + if (it != t.begin()) { + os << ", " << *it; + } else { + os << *it; + } + } + return os; +} +} // namespace + +const std::set SupportedElementTypes::supported_parameter_types = {ov::element::u8, + ov::element::i16, + ov::element::f32}; + +bool SupportedElementTypes::is_parameter_type_supported(ov::element::Type elem_type, bool is_exception_allowed) { + if (supported_parameter_types.count(elem_type) == 0) { + if (is_exception_allowed) { + THROW_GNA_EXCEPTION << "The plugin does not support input precision with " << elem_type.get_type_name() + << " format. Supported precisions " << supported_parameter_types << "\n"; + } + return false; + } + return true; +} + +const std::set SupportedElementTypes::supported_constant_types = {ov::element::i8, + ov::element::u8, + ov::element::i16, + ov::element::u16, + ov::element::i32, + ov::element::f32, + ov::element::f64}; + +bool SupportedElementTypes::is_constant_type_supported(ov::element::Type elem_type, bool is_exception_allowed) { + if (supported_constant_types.count(elem_type) == 0) { + if (is_exception_allowed) { + THROW_GNA_EXCEPTION << "The plugin does not support constant precision with " << elem_type.get_type_name() + << " format. Supported precisions " << supported_constant_types << "\n"; + } + return false; + } + return true; +} + +bool is_conv_supported(const std::shared_ptr& conv_ie, + const ov::intel_gna::common::DeviceVersion& effective_compile_target, + const InferenceEngine::Precision gna_precision, + bool is_exception_allowed) { + OPENVINO_ASSERT(conv_ie, "ConvolutionIE node is empty!"); + size_t batch_size = conv_ie->input_value(0).get_shape()[0]; + if (batch_size != 1) { + if (is_exception_allowed) { + THROW_GNA_EXCEPTION << "topology with layer: " + conv_ie->get_friendly_name() + + ", type: " + conv_ie->get_type_name() + ", and batch size(" + + std::to_string(batch_size) + ") != 1 not supported"; + } + return false; + } + auto check_dilation = [&](size_t filter_dilation_height, size_t filter_stride_width) -> bool { + cnn2d::RangeLimit2D dilation_limit{{convDilationHeight, convDilationHeight, "dilation height"}, + {convDilationWidth, convDilationWidth, "dilation width"}}; + std::string error = dilation_limit.GetErrorOrEmpty(filter_dilation_height, filter_stride_width); + return cnn2d::AbstractValidator::ValidationSuccesful(is_exception_allowed, + error, + conv_ie->get_friendly_name(), + conv_ie->get_type_name()); + }; + auto input_shape = conv_ie->input_value(0).get_shape(); + auto filter_shape = conv_ie->input_value(1).get_shape(); + if ((4 == filter_shape.size() && filter_shape[2] > 1 && filter_shape[3] > 1) || + (4 == input_shape.size() && input_shape[2] > 1 && input_shape[3] > 1)) { + pass::helper::ConvData conv_data; + pass::helper::GetConvData(conv_ie, conv_data); + if (gna_convolution_layer::isMappableFrom2DTo1D(conv_data.input_height, + conv_data.input_width, + conv_data.input_channel_count, + conv_data.filter_height, + conv_data.filter_width, + conv_data.filter_stride_height, + conv_data.filter_stride_width)) { + return check_dilation(conv_data.filter_dilation_height, conv_data.filter_dilation_width); + } + const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target); + if (cnn2dValidatorPtr) { + return cnn2dValidatorPtr->ValidateCnn2D(conv_ie->get_friendly_name(), + conv_data.input_height, + conv_data.input_width, + conv_data.input_channel_count, + conv_data.filter_height, + conv_data.filter_width, + conv_data.filter_channel_count, + conv_data.filter_stride_height, + conv_data.filter_stride_width, + conv_data.filter_dilation_height, + conv_data.filter_dilation_width, + OvGnaTypeIntFromBytes(gna_precision.size()), + is_exception_allowed); + } + } + return check_dilation(conv_ie->get_dilations()[0], conv_ie->get_dilations()[1]); +} + +bool is_pooling_supported(const std::shared_ptr max_pool, + const ov::intel_gna::common::DeviceVersion& effective_compile_target, + bool is_exception_allowed) { + OPENVINO_ASSERT(max_pool, "MaxPool node is empty!"); + auto kernels = max_pool->get_kernel(); + if (2 == kernels.size() && kernels[0] > 1 && kernels[1] > 1) { + const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target); + if (cnn2dValidatorPtr) { + auto strides = max_pool->get_strides(); + return cnn2dValidatorPtr->ValidatePooling2D(max_pool->get_friendly_name(), + kernels[0], + kernels[1], + strides[0], + strides[1], + is_exception_allowed); + } + } + return true; +} + +bool is_fc_supported(const std::shared_ptr& fully_connected, bool is_exception_allowed) { + OPENVINO_ASSERT(fully_connected, "FullyConnected node is empty!"); + size_t output_batch_size = fully_connected->get_output_shape(0)[0]; + if (output_batch_size > 8) { + if (is_exception_allowed) { + THROW_GNA_EXCEPTION << "topology with layer: " + fully_connected->get_friendly_name() + + ", type: " + fully_connected->get_type_name() + ", and batch size(" + + std::to_string(output_batch_size) + ") not supported"; + } + return false; + } + return true; +} + +bool is_split_supported(const std::shared_ptr& node, bool is_exception_allowed) { + OPENVINO_ASSERT(node, "Split node is empty!"); + bool is_aligned = true; + for (size_t i = 0; i < node->get_output_size(); i++) { + is_aligned &= ov::intel_gna::ngraph_util::is_aligned_split(node, i); + } + return is_aligned; +} + +bool is_op_supported(const std::shared_ptr& node, + const ov::intel_gna::common::DeviceVersion& effective_compile_target, + const InferenceEngine::Precision gna_precision, + bool is_exception_allowed) { + if (ov::op::util::is_parameter(node)) { + return SupportedElementTypes::is_parameter_type_supported(node->get_element_type(), is_exception_allowed); + } else if (ov::op::util::is_constant(node)) { + return SupportedElementTypes::is_constant_type_supported(node->get_element_type(), is_exception_allowed); + } else if (auto conv_ie = std::dynamic_pointer_cast(node)) { + return is_conv_supported(conv_ie, effective_compile_target, gna_precision, is_exception_allowed); + } else if (auto fully_connected = std::dynamic_pointer_cast(node)) { + return is_fc_supported(fully_connected, is_exception_allowed); + } else if (ov::intel_gna::ngraph_util::is_pooling(node)) { + return is_pooling_supported(std::dynamic_pointer_cast(node), + effective_compile_target, + is_exception_allowed); + } else if (ov::op::util::is_output(node) || ov::op::util::is_sink(node) || + ov::intel_gna::ngraph_util::is_eltwise_add(node) || ov::intel_gna::ngraph_util::is_eltwise_mul(node) || + ov::intel_gna::ngraph_util::is_crop_affined(node) || + ov::intel_gna::ngraph_util::is_activation(node.get()) || + ov::intel_gna::ngraph_util::is_gna_precision_agnostic( + node) || // check concat/split are aligned when transformations will be moved to ngraph + (std::dynamic_pointer_cast(node) != nullptr) || + (std::dynamic_pointer_cast(node) != nullptr) || + (std::dynamic_pointer_cast(node) != nullptr) || + (std::dynamic_pointer_cast(node) != nullptr)) { + return true; + } else if (ov::intel_gna::ngraph_util::is_gna_precision_agnostic(node)) { + if ((std::dynamic_pointer_cast(node) != nullptr) || + (std::dynamic_pointer_cast(node) != nullptr)) { + return is_split_supported(node, is_exception_allowed); + } + // TODO check concat are aligned when transformation will be moved to ngraph + return true; + } + return false; +} + +void check_all_ops_supported(const std::shared_ptr& model, + const ov::intel_gna::common::DeviceVersion& effective_compile_target, + const InferenceEngine::Precision gna_precision) { + std::stringstream error; + // Walk through the transformed model + for (auto& op : model->get_ops()) { + if (!is_op_supported(op, effective_compile_target, gna_precision, true)) { + error << "The plugin does not support layer " << op->get_friendly_name() << " (type " << op->get_type_name() + << ")!" << std::endl; + } + } + if (!error.str().empty()) { + THROW_GNA_EXCEPTION << error.str(); + } +} namespace cnn2d { bool IsEqualToLimit::isValid(const uint32_t val) const { @@ -655,16 +858,9 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe // If there are no inputs start search from an output startLayer = getCreatorLayer(outputs.begin()->second).lock(); } else { - auto network_input_precision = inputs.begin()->second->getPrecision(); - - if (network_input_precision != InferenceEngine::Precision::FP32 && - network_input_precision != InferenceEngine::Precision::I16 && - network_input_precision != InferenceEngine::Precision::U8) { - errMessage = "The plugin does not support input precision with " + - std::string(network_input_precision.name()) + - " format. Supported input precisions FP32, I16, U8\n"; - return false; - } + SupportedElementTypes::is_parameter_type_supported( + InferenceEngine::details::convertPrecision(inputs.begin()->second->getPrecision()), + true); auto& secondLayers = getInputTo(inputs.begin()->second->getInputData()); if (secondLayers.empty()) { diff --git a/src/plugins/intel_gna/src/backend/gna_limitations.hpp b/src/plugins/intel_gna/src/backend/gna_limitations.hpp index e210bda8802..5db7a456f00 100644 --- a/src/plugins/intel_gna/src/backend/gna_limitations.hpp +++ b/src/plugins/intel_gna/src/backend/gna_limitations.hpp @@ -13,6 +13,10 @@ #include "common/gna_target.hpp" #include "dnn_types.hpp" #include "gna_lib_ver_selector.hpp" +#include "legacy/ngraph_ops/convolution_ie.hpp" +#include "legacy/ngraph_ops/fully_connected.hpp" +#include "ngraph/opsets/opset7.hpp" +#include "ngraph/opsets/opset9.hpp" namespace ov { namespace intel_gna { @@ -72,6 +76,80 @@ inline bool IsTransposeSupported(const std::vector& shape) { return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize; } +class SupportedElementTypes { +public: + static bool is_parameter_type_supported(ov::element::Type type, bool is_exception_allowed = false); + static bool is_constant_type_supported(ov::element::Type type, bool is_exception_allowed = false); + +private: + static const std::set supported_parameter_types; + static const std::set supported_constant_types; +}; + +/** + * @brief Validates if legacy convolution is supported by GNA + * @param conv_ie convolution + * @param effective_compile_target GNA compile targets + * @param gna_precision GNA inference precision + * @param is_exception_allowed flag specifies whether exception is allowed + * @return true if supported + */ +bool is_conv_supported(const std::shared_ptr& conv_ie, + const ov::intel_gna::common::DeviceVersion& effective_compile_target, + const InferenceEngine::Precision gna_precision, + bool is_exception_allowed = false); +/** + * @brief Validates if max pooling is supported by GNA + * @param max_pool max pooling + * @param effective_compile_target GNA compile targets + * @param supported_types list of supported types + * @param is_exception_allowed flag specifies whether exception is allowed + * @return true if precision is found in supported + */ +bool is_pooling_supported(const std::shared_ptr max_pool, + const ov::intel_gna::common::DeviceVersion& effective_compile_target, + bool is_exception_allowed = false); + +/** + * @brief Validates if fully connected is supported by GNA + * @param fully_connected fully connected + * @param is_exception_allowed flag specifies whether exception is allowed + * @return true if supported + */ +bool is_fc_supported(const std::shared_ptr& fully_connected, + bool is_exception_allowed = false); + +/** + * @brief Validates if split is supported by GNA + * @param node split + * @param is_exception_allowed flag specifies whether exception is allowed + * @return true if supported + */ +bool is_split_supported(const std::shared_ptr& node, bool is_exception_allowed = false); + +/** + * @brief Validates if operation is supported by GNA + * @param node operation + * @param gna_compile_target GNA compile target + * @param gna_precision GNA inference precision + * @param is_exception_allowed flag specifies whether exception is allowed + * @return true if supported + */ +bool is_op_supported(const std::shared_ptr& node, + const ov::intel_gna::common::DeviceVersion& effective_compile_target, + const InferenceEngine::Precision gna_precision, + bool is_exception_allowed = false); + +/** + * @brief Check if all operations are supported by GNA + * @param model ngraph model + * @param gna_compile_target GNA compile target + * @param gna_precision GNA inference precision + */ +void check_all_ops_supported(const std::shared_ptr& model, + const ov::intel_gna::common::DeviceVersion& effective_compile_target, + const InferenceEngine::Precision gna_precision); + namespace cnn2d { struct IsEqualToLimit { @@ -147,12 +225,13 @@ struct RectLimitByChannelsAndPrecision { class AbstractValidator { protected: static void ThrowIfNotEmpty(const std::string& prefix, const std::string& error); + +public: static bool ValidationSuccesful(const bool throwOnError, const std::string& error, const std::string& operation, const std::string& type); -public: virtual ~AbstractValidator() = default; virtual bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, diff --git a/src/plugins/intel_gna/src/frontend/model_quantizer.hpp b/src/plugins/intel_gna/src/frontend/model_quantizer.hpp index 951af012d27..253f88780f9 100644 --- a/src/plugins/intel_gna/src/frontend/model_quantizer.hpp +++ b/src/plugins/intel_gna/src/frontend/model_quantizer.hpp @@ -15,6 +15,7 @@ #include "gna_graph_tools.hpp" #include "gna_itt.hpp" #include "gna_plugin_config.hpp" +#include "gna_transformations_pipeline.hpp" #include "layer_quantizer.hpp" #include "scale_factor_calc.hpp" #include "weights_converter.hpp" @@ -27,17 +28,11 @@ namespace frontend { * Quantize entire network */ class ModelQuantizer { - const Config& gna_config; - const bool fake_quantized; + ov::intel_gna::TransformationsPipeline& gna_transformer; public: - ModelQuantizer(const Config& gna_config, const bool fake_quantized) - : gna_config(gna_config), - fake_quantized(fake_quantized) {} - template - InferenceEngine::CNNNetwork quantize(const InferenceEngine::CNNNetwork& model, - const PreQuantisationCb& cb, - const GnaInputs& inputs) const { + ModelQuantizer(ov::intel_gna::TransformationsPipeline& transformer) : gna_transformer(transformer) {} + InferenceEngine::CNNNetwork quantize(const InferenceEngine::CNNNetwork& model, const GnaInputs& inputs) const { OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ModelQuantizer::quantize"); auto visitor = [&](InferenceEngine::CNNLayerPtr layer_ptr) { auto new_layer = InferenceEngine::injectData(layer_ptr); @@ -46,12 +41,12 @@ public: }; InferenceEngine::CNNNetwork copied_net = InferenceEngine::CNNNetCopy(model); - cb(copied_net, true, gna_config.gnaFlags.input_low_precision); + gna_transformer.apply_legacy(copied_net, true); copied_net = InferenceEngine::CNNNetCopy(copied_net, visitor); // Allow client code to access copied topology, to avoid copies if user would like to chain quantisation with // another preprocessing - cb(copied_net, false, gna_config.gnaFlags.input_low_precision); + gna_transformer.apply_legacy(copied_net, false); auto sorted_new_net = InferenceEngine::details::CNNNetSortTopologically(copied_net); log::debug() << "Sorted layers: " << std::endl; @@ -67,7 +62,7 @@ public: // Propagate scale factor and quantize layers propagateScaleFactor(sorted_new_net); - frontend::LayerQuantizer lq(gna_config); + frontend::LayerQuantizer lq(gna_transformer.config); for (auto&& layer : sorted_new_net) { lq.quantize(*layer); @@ -78,7 +73,7 @@ public: private: void propagateScaleFactor(std::vector& net) const { - ScaleFactorCalculator sf(net, gna_config, fake_quantized); + ScaleFactorCalculator sf(net, gna_transformer.config, gna_transformer.is_fake_quantized()); uint32_t inf_loop_count = 0; std::vector inf_loop_pattern; std::vector inf_loop_history; diff --git a/src/plugins/intel_gna/src/gna_graph_compiler.cpp b/src/plugins/intel_gna/src/gna_graph_compiler.cpp index 86fe8c88a6c..32b2e6be8be 100644 --- a/src/plugins/intel_gna/src/gna_graph_compiler.cpp +++ b/src/plugins/intel_gna/src/gna_graph_compiler.cpp @@ -385,11 +385,6 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer) THROW_GNA_LAYER_EXCEPTION(layer) << "with batch size not equals 1 is not supported"; } - if (convolution._dilation_x != 1 || convolution._dilation_y != 1) { - // TODO: Issue 24839 - THROW_GNA_LAYER_EXCEPTION(layer) << "with dilation is not supported on GNA"; - } - if (convolution._kernel_x > in_width * in_height) { THROW_GNA_LAYER_EXCEPTION(layer) << "Kernel dimensions X (" << convolution._kernel_x << ")" << " is bigger than total input dimensions WxH (" << in_width << "x" diff --git a/src/plugins/intel_gna/src/gna_plugin.cpp b/src/plugins/intel_gna/src/gna_plugin.cpp index 76990fb704d..4141f677109 100644 --- a/src/plugins/intel_gna/src/gna_plugin.cpp +++ b/src/plugins/intel_gna/src/gna_plugin.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include @@ -19,26 +18,11 @@ #include #include #include -#include #include #include #include #include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include @@ -58,10 +42,10 @@ #include "gna_model_serial.hpp" #include "gna_plugin_config.hpp" #include "gna_tensor_tools.hpp" +#include "gna_transformations_pipeline.hpp" #include "layers/gna_layer_type.hpp" #include "log/log.hpp" #include "memory/gna_memory_state.hpp" -#include "optimizer/gna_pass_manager.hpp" #include "orientation_helper.hpp" #include "preprocessing.hpp" #include "request/model_wrapper_factory.hpp" @@ -69,37 +53,8 @@ #include "request/worker_pool_impl.hpp" #include "runtime/gna_float_runtime.hpp" #include "scale_factor_helper.hpp" -#include "transformations/broadcast_const.hpp" -#include "transformations/common_optimizations/concat_reduce_fusion.hpp" -#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" -#include "transformations/convert_dwsc_to_scaleshifts.hpp" -#include "transformations/convert_matmul_to_pointwise_convolution.hpp" -#include "transformations/convert_padded_to_valid_convolution.hpp" -#include "transformations/convert_precision.hpp" -#include "transformations/decompose_2d_convolution.hpp" -#include "transformations/decompose_mvn.hpp" -#include "transformations/disable_decompression_convert_constant_folding.hpp" -#include "transformations/handle_transposes_around_matmul.hpp" -#include "transformations/insert_copy_layer.hpp" -#include "transformations/insert_identity_layer.hpp" -#include "transformations/insert_reshape_around_matmul.hpp" -#include "transformations/insert_transpose_after_convolution_or_pooling.hpp" -#include "transformations/markup_fusable_transpose.hpp" -#include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp" -#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp" -#include "transformations/op_conversions/gru_cell_decomposition.hpp" -#include "transformations/op_conversions/lstm_cell_decomposition.hpp" -#include "transformations/op_conversions/softsign_decomposition.hpp" -#include "transformations/pwl_approximation.hpp" -#include "transformations/remove_converts.hpp" -#include "transformations/remove_extra_reshapes.hpp" -#include "transformations/remove_single_input_concat.hpp" -#include "transformations/reorder_activation_and_pooling.hpp" -#include "transformations/split_convolution_with_large_buffer_size.hpp" -#include "transformations/split_eltwise.hpp" -#include "transformations/substitute_softsign.hpp" -#include "transformations/swap_input_matmul_gna.hpp" -#include "transformations/unfuse_reshape_and_transpose.hpp" + +using namespace ov::intel_gna::ngraph_util; inline uint32_t ToByteSize(const Gna2DataType type) { switch (type) { @@ -706,130 +661,20 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) { const auto effectiveCompileTarget = config.target->get_effective_compile_target(); graphCompiler.SetValidatorTarget(effectiveCompileTarget); - bool isNgraphPassesUsed = false; - bool fake_quantized = false; + auto transformer = TransformationsPipeline(config, effectiveCompileTarget); if (_network.getFunction()) { CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network); - const auto& graph = clonedNetwork.getFunction(); - ngraph::pass::Manager manager; - manager.register_pass(); - - fake_quantized = ov::op::util::has_op_with_type(graph); - // In OV API 2.0(IRv10) default convertion to fp32 (inputs, outputs and weights) is disabled - // and we need to run the ConvertPrecision transformation to support old networks. - manager.register_pass( - precisions_array{{ngraph::element::f16, ngraph::element::f32}}); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(effectiveCompileTarget, - config.gnaPrecision); - manager.register_pass(effectiveCompileTarget, - config.gnaPrecision); - manager.register_pass(effectiveCompileTarget, config.gnaPrecision); - // TODO enable this transformation for networks with convolutions - if (!ov::op::util::has_op_with_type(graph)) { - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - } - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - /* - Put BroadcastAddMultiplyConst here after ConvertOpSet..() transformations since there are conficts with them. - ngraph::pass::ConvertOpSet1ToLegacy -> ngraph::pass::BiasFusions -> - ngraph::pass::ConvAddFusion, ngraph::pass::ConvMultiplyFusion - That transormations fuse bias into convolution and recognizes const node as [1, C, 1, 1]. - TODO: move that transformation just beyond RemoveSingleInputConcat pass after removing ConvertOpSet1ToLegacy - transormations - */ - manager.register_pass(); - /* - SplitEltwise has dependency on BroadcastAddMultiplyConst for case when spliting of Constant - input is doing - */ - manager.register_pass(); - /* The following transformations perform insertion of Identity layer in 3 steps: - 1. Mark inputs with rt_info attribute where precision change from i32 to i16/i8 is happened - 2. Insert Identity after operation which have consumers marked with precision change - 3. Cleanup appropriate attribute from rt_info - */ - manager.register_pass(config.gnaFlags.input_low_precision); - manager.register_pass(); - manager.register_pass(); - // Breaks fusing of layers before result - manager.register_pass(); - if (!config.gnaFlags.sw_fp32 && !config.gnaFlags.uniformPwlDesign) { - manager.register_pass(config.gnaFlags.pwlMaxErrorPercent); - manager.register_pass(config.gnaFlags.pwlMaxErrorPercent); - } - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - const auto& pass_config = manager.get_pass_config(); - - // Allowing FP16 Converts to be folded and FP16 constants to upgrade to FP32 data type - pass_config->disable(); - pass_config->disable(); - - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - // Consider to enable after per-channel quantization on FakeQuantize layer is supported in GNAPlugin, see issue - // 52034 - pass_config->disable(); - // TransposeReduction can be enabled when Transpose-Conv-Transpose patterns will be handled in ngraph - // transformations - pass_config->disable(); - // Operations Max and Min aren't supported - pass_config->disable(); - // pass_config->disable(); - manager.run_passes(graph); - convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(graph, clonedNetwork); - isNgraphPassesUsed = true; + auto model = clonedNetwork.getFunction(); + transformer.apply(model); + limitations::check_all_ops_supported(model, effectiveCompileTarget, config.gnaPrecision); + convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(model, clonedNetwork); } IE_SUPPRESS_DEPRECATED_START InferenceEngine::CNNNetwork network = convertedNetwork ? InferenceEngine::CNNNetwork{convertedNetwork} : _network; IE_SUPPRESS_DEPRECATED_END - NetPass::ConvertPrecision(network, Precision::I64, Precision::I32); - NetPass::ConvertPrecision(network, Precision::U64, Precision::I32); - NetPass::ConvertPrecision(network, Precision::U32, Precision::I32); + transformer.convert_precision_legacy(network); // Check the network std::string error; @@ -849,7 +694,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) { // Set Scale Factors for inputs according to configuration. ov::intel_gna::helpers::ApplyInputScaleFactors(*inputs_ptr_, config); - if (fake_quantized) { + if (transformer.is_fake_quantized()) { UpdateInputScaleFromNetwork(network); } @@ -857,56 +702,6 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) { FillInputsAndOutputsTranspositionInfo(network); } - // network optimisation phases - int passIdx = 0; - auto run_passes = [&](const CNNNetwork& network, bool runBeforeCopy, bool lowPrecision) { - auto passes = make_shared(PassManagerSettings{runBeforeCopy, lowPrecision}, network); - passes->registerPass(); - if (!isNgraphPassesUsed) { - passes->registerPass(); - passes->registerPass(); - passes->registerPass(); - passes->registerPass(); - passes->registerPass(); - passes->registerPass(); - } - - if (fake_quantized) - passes->registerPass(); - - // fake quantisation aware passes - passes->registerPass(); - passes->registerPass(); - - passes->registerPass(); - - passes->registerPass(); - - if (!isNgraphPassesUsed) { - passes->registerPass(); - passes->registerPass(); - } - - passes->registerPass(); - - if (!isNgraphPassesUsed) { - passes->registerPass(); - } - passes->registerPass(); - passes->registerPass(); - passes->registerPass(); - passes->registerPass(); - // Keep legacy inserting of Identity layer here - // because concat and split aliging passes are not moved to ngraph yet - passes->registerPass(); - passes->registerPass(); - passes->registerPass(); - passes->registerPass(); - passes->registerPass(); - passes->registerPass(); - passIdx = passes->run(passIdx); - }; - InferenceEngine::CNNNetwork newNet; if (gnaFlags->sw_fp32) { @@ -916,11 +711,11 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) { }; newNet = InferenceEngine::CNNNetCopy(network, visitor); // to run all passes need to have two calls to pass manager - run_passes(newNet, true, gnaFlags->input_low_precision); - run_passes(newNet, false, gnaFlags->input_low_precision); + transformer.apply_legacy(newNet, true); + transformer.apply_legacy(newNet, false); } else { - ov::intel_gna::frontend::ModelQuantizer modelQuantizer(config, fake_quantized); - newNet = modelQuantizer.quantize(network, run_passes, *inputs_ptr_); + ov::intel_gna::frontend::ModelQuantizer modelQuantizer(transformer); + newNet = modelQuantizer.quantize(network, *inputs_ptr_); } auto inputLayers = CNNNetGetAllInputLayers(newNet); @@ -1822,12 +1617,29 @@ std::vector> GNAPlugin::GetOutputs() { return results; } -InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork(const InferenceEngine::CNNNetwork& network, - const std::map& config) const { +InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork( + const InferenceEngine::CNNNetwork& network, + const std::map& config_map) const { InferenceEngine::QueryNetworkResult res; - if (network.getFunction()) { - IE_THROW(NotImplemented) << " ngraph::Function is not supported natively"; + Config qn_config(config); + qn_config.UpdateFromMap(config_map); + + const auto effectiveCompileTarget = qn_config.target->get_effective_compile_target(); + auto model = network.getFunction(); + if (model) { + auto supported = GetSupportedNodes( + model, + [&](std::shared_ptr& model) { + TransformationsPipeline(qn_config, effectiveCompileTarget).apply(model); + }, + [&](const std::shared_ptr& op) { + return limitations::is_op_supported(op, effectiveCompileTarget, qn_config.gnaPrecision); + }); + for (auto&& op_name : supported) { + res.supportedLayersMap.emplace(op_name, GetName()); + } + return res; } std::unordered_set allLayers; diff --git a/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp b/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp new file mode 100644 index 00000000000..f057193242a --- /dev/null +++ b/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp @@ -0,0 +1,244 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "gna_transformations_pipeline.hpp" + +#include "gna_itt.hpp" +#include "legacy/net_pass.h" +#include "legacy/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp" +#include "ngraph/opsets/opset7.hpp" +#include "openvino/pass/manager.hpp" +#include "optimizer/gna_pass_manager.hpp" +#include "transformations/broadcast_const.hpp" +#include "transformations/common_optimizations/add_fake_quantize_fusion.hpp" +#include "transformations/common_optimizations/common_optimizations.hpp" +#include "transformations/common_optimizations/concat_reduce_fusion.hpp" +#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" +#include "transformations/common_optimizations/fq_mul_fusion.hpp" +#include "transformations/common_optimizations/fq_reshape_fusion.hpp" +#include "transformations/common_optimizations/pull_transpose_through_fq.hpp" +#include "transformations/common_optimizations/relu_fake_quantize_fusion.hpp" +#include "transformations/common_optimizations/transpose_sinking.hpp" +#include "transformations/control_flow/unroll_tensor_iterator.hpp" +#include "transformations/convert_dwsc_to_scaleshifts.hpp" +#include "transformations/convert_matmul_to_pointwise_convolution.hpp" +#include "transformations/convert_padded_to_valid_convolution.hpp" +#include "transformations/convert_precision.hpp" +#include "transformations/decompose_2d_convolution.hpp" +#include "transformations/decompose_mvn.hpp" +#include "transformations/disable_decompression_convert_constant_folding.hpp" +#include "transformations/handle_transposes_around_matmul.hpp" +#include "transformations/init_node_info.hpp" +#include "transformations/insert_copy_layer.hpp" +#include "transformations/insert_identity_layer.hpp" +#include "transformations/insert_reshape_around_matmul.hpp" +#include "transformations/insert_transpose_after_convolution_or_pooling.hpp" +#include "transformations/markup_fusable_transpose.hpp" +#include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp" +#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp" +#include "transformations/op_conversions/gru_cell_decomposition.hpp" +#include "transformations/op_conversions/lstm_cell_decomposition.hpp" +#include "transformations/op_conversions/softsign_decomposition.hpp" +#include "transformations/opset_conversions/convert_opset2_to_opset1.hpp" +#include "transformations/opset_conversions/convert_opset3_to_opset2.hpp" +#include "transformations/pwl_approximation.hpp" +#include "transformations/remove_converts.hpp" +#include "transformations/remove_extra_reshapes.hpp" +#include "transformations/remove_single_input_concat.hpp" +#include "transformations/reorder_activation_and_pooling.hpp" +#include "transformations/split_convolution_with_large_buffer_size.hpp" +#include "transformations/split_eltwise.hpp" +#include "transformations/substitute_softsign.hpp" +#include "transformations/swap_input_matmul_gna.hpp" +#include "transformations/unfuse_reshape_and_transpose.hpp" +#include "transformations/utils/utils.hpp" + +namespace ov { +namespace intel_gna { + +void TransformationsPipeline::apply(const std::shared_ptr& model) { + OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "TransformationsPipeline::apply"); + + fake_quantized = ov::op::util::has_op_with_type(model); + + ov::pass::Manager manager; + manager.register_pass(); + + // In OV API 2.0(IRv10) default convertion to fp32 (inputs, outputs and weights) is disabled + // and we need to run the ConvertPrecision transformation to support old networks. + manager.register_pass(precisions_array{{ngraph::element::f16, ngraph::element::f32}}); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(effective_compile_target, + config.gnaPrecision); + manager.register_pass(effective_compile_target, + config.gnaPrecision); + manager.register_pass(effective_compile_target, config.gnaPrecision); + // TODO enable this transformation for networks with convolutions + if (!ov::op::util::has_op_with_type(model)) { + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + } + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + /* + Put BroadcastAddMultiplyConst here after ConvertOpSet..() transformations since there are conficts with them. + ngraph::pass::ConvertOpSet1ToLegacy -> ngraph::pass::BiasFusions -> + ngraph::pass::ConvAddFusion, ngraph::pass::ConvMultiplyFusion + That transormations fuse bias into convolution and recognizes const node as [1, C, 1, 1]. + TODO: move that transformation just beyond RemoveSingleInputConcat pass after removing ConvertOpSet1ToLegacy + transormations + */ + manager.register_pass(); + /* + SplitEltwise has dependency on BroadcastAddMultiplyConst for case when spliting of Constant + input is doing + */ + manager.register_pass(); + /* The following transformations perform insertion of Identity layer in 3 steps: + 1. Mark inputs with rt_info attribute where precision change from i32 to i16/i8 is happened + 2. Insert Identity after operation which have consumers marked with precision change + 3. Cleanup appropriate attribute from rt_info + */ + manager.register_pass(config.gnaFlags.input_low_precision); + manager.register_pass(); + manager.register_pass(); + // Breaks fusing of layers before result + manager.register_pass(); + if (!config.gnaFlags.sw_fp32 && !config.gnaFlags.uniformPwlDesign) { + manager.register_pass(config.gnaFlags.pwlMaxErrorPercent); + manager.register_pass(config.gnaFlags.pwlMaxErrorPercent); + } + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(precisions_array{{ov::element::i64, ov::element::i32}, + {ov::element::u64, ov::element::i32}, + {ov::element::u32, ov::element::i32}}); + const auto& pass_config = manager.get_pass_config(); + + // Allowing FP16 Converts to be folded and FP16 constants to upgrade to FP32 data type + pass_config->disable(); + pass_config->disable(); + + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + // Consider to enable after per-channel quantization on FakeQuantize layer is supported in GNAPlugin, see issue + // 52034 + pass_config->disable(); + // TransposeReduction can be enabled when Transpose-Conv-Transpose patterns will be handled in ngraph + // transformations + pass_config->disable(); + // Operations Max and Min aren't supported + pass_config->disable(); + + manager.run_passes(model); + + is_ngraph_passes_used = true; +} + +IE_SUPPRESS_DEPRECATED_START +void TransformationsPipeline::apply_legacy(const InferenceEngine::CNNNetwork& network, bool runBeforeCopy) { + OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "TransformationsPipeline::apply_legacy"); + auto passes = + std::make_shared(PassManagerSettings{runBeforeCopy, config.gnaFlags.input_low_precision}, network); + passes->registerPass(); + if (!is_ngraph_passes_used) { + passes->registerPass(); + passes->registerPass(); + passes->registerPass(); + passes->registerPass(); + passes->registerPass(); + passes->registerPass(); + } + + if (fake_quantized) + passes->registerPass(); + + // fake quantisation aware passes + passes->registerPass(); + passes->registerPass(); + + passes->registerPass(); + + passes->registerPass(); + + if (!is_ngraph_passes_used) { + passes->registerPass(); + passes->registerPass(); + } + + passes->registerPass(); + + if (!is_ngraph_passes_used) { + passes->registerPass(); + } + passes->registerPass(); + passes->registerPass(); + passes->registerPass(); + passes->registerPass(); + // Keep legacy inserting of Identity layer here + // because concat and split aliging passes are not moved to ngraph yet + passes->registerPass(); + passes->registerPass(); + passes->registerPass(); + passes->registerPass(); + passes->registerPass(); + passes->registerPass(); + legacy_pass_index = passes->run(legacy_pass_index); +} + +void TransformationsPipeline::convert_precision_legacy(InferenceEngine::CNNNetwork& network) { + if (!is_ngraph_passes_used) { + InferenceEngine::NetPass::ConvertPrecision(network, + InferenceEngine::Precision::I64, + InferenceEngine::Precision::I32); + InferenceEngine::NetPass::ConvertPrecision(network, + InferenceEngine::Precision::U64, + InferenceEngine::Precision::I32); + InferenceEngine::NetPass::ConvertPrecision(network, + InferenceEngine::Precision::U32, + InferenceEngine::Precision::I32); + } +} +IE_SUPPRESS_DEPRECATED_END +} // namespace intel_gna +} // namespace ov diff --git a/src/plugins/intel_gna/src/gna_transformations_pipeline.hpp b/src/plugins/intel_gna/src/gna_transformations_pipeline.hpp new file mode 100644 index 00000000000..14c937b27b1 --- /dev/null +++ b/src/plugins/intel_gna/src/gna_transformations_pipeline.hpp @@ -0,0 +1,41 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "cpp/ie_cnn_network.h" +#include "gna_plugin_config.hpp" +#include "openvino/core/model.hpp" + +namespace ov { +namespace intel_gna { + +class TransformationsPipeline { +public: + explicit TransformationsPipeline(const Config& config, + const ov::intel_gna::common::DeviceVersion& effective_compile_target = + ov::intel_gna::common::DeviceVersion::NotSet) + : config(config), + effective_compile_target(effective_compile_target) {} + void apply(const std::shared_ptr& model); + IE_SUPPRESS_DEPRECATED_START + void apply_legacy(const InferenceEngine::CNNNetwork& network, bool runBeforeCopy); + void convert_precision_legacy(InferenceEngine::CNNNetwork& network); + IE_SUPPRESS_DEPRECATED_END + bool is_fake_quantized() { + return fake_quantized; + }; + const ov::intel_gna::Config& config; + +private: + bool is_ngraph_passes_used = false; + bool fake_quantized = false; + int legacy_pass_index = 0; + ov::intel_gna::common::DeviceVersion effective_compile_target; +}; + +} // namespace intel_gna +} // namespace ov diff --git a/src/plugins/intel_gna/src/ops/util/util.hpp b/src/plugins/intel_gna/src/ops/util/util.hpp index 1bb02709b09..f108b735070 100644 --- a/src/plugins/intel_gna/src/ops/util/util.hpp +++ b/src/plugins/intel_gna/src/ops/util/util.hpp @@ -3,26 +3,29 @@ // #pragma once -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include #include #include "backend/gna_limitations.hpp" +#include "gna_plugin_config.hpp" +#include "layers/gna_convolution_layer.hpp" #include "layers/gna_permute.hpp" +#include "legacy/ngraph_ops/convolution_ie.hpp" +#include "legacy/ngraph_ops/crop_ie.hpp" +#include "legacy/ngraph_ops/eltwise.hpp" +#include "legacy/ngraph_ops/fully_connected.hpp" +#include "legacy/ngraph_ops/power.hpp" +#include "legacy/ngraph_ops/relu_ie.hpp" +#include "legacy/ngraph_ops/scaleshift.hpp" +#include "ngraph/opsets/opset7.hpp" +#include "ngraph/opsets/opset8.hpp" +#include "ngraph/opsets/opset9.hpp" #include "ops/copy.hpp" #include "ops/identity.hpp" #include "ops/pwl.hpp" +#include "transformations/rt_info/gna_transpose_fusable.hpp" +#include "transformations/utils/transformation_helper.hpp" +#include "transformations/utils/utils.hpp" namespace ov { namespace intel_gna { @@ -200,7 +203,7 @@ inline bool is_Tbit_fq(const std::shared_ptr& node) { if (!fq_node) return false; auto levels = fq_node->get_levels(); - return std::numeric_limits::max() == levels; + return (std::numeric_limits::max() == levels) || (std::numeric_limits::max() == levels - 1); } inline bool is_32bit_fq(const std::shared_ptr& node) { diff --git a/src/plugins/intel_gna/src/transformations/utils/transformation_helper.cpp b/src/plugins/intel_gna/src/transformations/utils/transformation_helper.cpp index c85b095e143..3fc71f4f59f 100644 --- a/src/plugins/intel_gna/src/transformations/utils/transformation_helper.cpp +++ b/src/plugins/intel_gna/src/transformations/utils/transformation_helper.cpp @@ -13,7 +13,32 @@ namespace intel_gna { namespace pass { namespace helper { +void GetConvData(std::shared_ptr conv, ConvData& conv_data) { + OPENVINO_ASSERT(conv); + conv_data.output_height = conv->get_output_shape(0)[2]; + conv_data.output_width = conv->get_output_shape(0)[3]; + conv_data.input_channel_count = conv->input_value(0).get_shape()[1]; + conv_data.input_height = conv->input_value(0).get_shape()[2]; + conv_data.input_width = conv->input_value(0).get_shape()[3]; + conv_data.filter_count = conv->input_value(1).get_shape()[0]; + conv_data.filter_channel_count = conv->input_value(1).get_shape()[1]; + conv_data.filter_height = conv->input_value(1).get_shape()[2]; + conv_data.filter_width = conv->input_value(1).get_shape()[3]; + conv_data.filter_dilation_height = conv->get_dilations()[0]; + conv_data.filter_dilation_width = conv->get_dilations()[1]; + conv_data.filter_stride_height = conv->get_strides()[0]; + conv_data.filter_stride_width = conv->get_strides()[1]; + conv_data.output_channel_count = conv_data.filter_count; + conv_data.pads_begin_height = conv->get_pads_begin()[0]; + conv_data.pads_begin_width = conv->get_pads_begin()[1]; + conv_data.pads_end_height = conv->get_pads_end()[0]; + conv_data.pads_end_width = conv->get_pads_end()[1]; + conv_data.padding_type = conv->get_auto_pad(); + conv_data.element_type = conv->get_element_type(); +} + void GetConvData(std::shared_ptr conv, ConvData& conv_data) { + OPENVINO_ASSERT(conv); conv_data.output_height = conv->get_output_shape(0)[2]; conv_data.output_width = conv->get_output_shape(0)[3]; conv_data.input_channel_count = conv->input_value(0).get_shape()[1]; diff --git a/src/plugins/intel_gna/src/transformations/utils/transformation_helper.hpp b/src/plugins/intel_gna/src/transformations/utils/transformation_helper.hpp index d549cbd9f65..95881199384 100644 --- a/src/plugins/intel_gna/src/transformations/utils/transformation_helper.hpp +++ b/src/plugins/intel_gna/src/transformations/utils/transformation_helper.hpp @@ -4,6 +4,7 @@ #pragma once +#include #include namespace ov { @@ -42,6 +43,14 @@ struct ConvData { */ void GetConvData(std::shared_ptr conv, ConvData& conv_data); +/** + * @brief gets all legacy convolution related data into a struct for further processing + * @param conv legacy convolution node to get data of + * @param conv_data convolution data structure to put data into + * @return void + */ +void GetConvData(std::shared_ptr conv, ConvData& conv_data); + /** * @brief ngraph matcher predicate fusing existing predicates for consumers count and rank of a layer * @param expected_count expected consumers count for of node diff --git a/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/I8_quantisation_test.cpp b/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/I8_quantisation_test.cpp index 22bb2f2d649..840e34ffd41 100644 --- a/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/I8_quantisation_test.cpp +++ b/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/I8_quantisation_test.cpp @@ -39,9 +39,10 @@ class I8QuantisationTest : public GNATest<> { gna_config.gnaPrecision = InferenceEngine::Precision::I16; gna_config.gnaFlags.input_low_precision = false; - return ModelQuantizer(gna_config, false).quantize( + auto transformer = ov::intel_gna::TransformationsPipeline(gna_config); + + return ModelQuantizer(transformer).quantize( model, - [](const InferenceEngine::CNNNetwork&, bool run_before_copy, bool low_precision) {}, inputs); } @@ -100,7 +101,7 @@ TEST_F(I8QuantisationTest, FCDimensionIs1){ auto weights = make_shared_blob({ Precision::U8, {440}, C }); weights->allocate(); fillWeights(weights); - + Core ie; auto network = ie.ReadNetwork(FCOnlyModel(), weights); @@ -111,7 +112,7 @@ TEST_F(I8QuantisationTest, outputAffinePrecisionIs32Bits){ auto weights = make_shared_blob({ Precision::U8, {440}, C }); weights->allocate(); fillWeights(weights); - + Core ie; auto network = ie.ReadNetwork(Fc2DOutputModel(), weights); @@ -126,7 +127,7 @@ TEST_F(I8QuantisationTest, fp16tofp32_on_fullyConnected_model) { auto weights = make_shared_blob({ Precision::U8, {220}, Layout::C }); weights->allocate(); fillWeights(weights); - + Core ie; auto network = ie.ReadNetwork(FCOnlyModelFP16(), weights); @@ -137,7 +138,7 @@ TEST_F(I8QuantisationTest, LSTMCell_quantize) { auto weights = make_shared_blob({ Precision::U8, {33664}, C }); weights->allocate(); fillWeights(weights); - + Core ie; auto network = ie.ReadNetwork(LSTMCellOnlyModel(), weights); @@ -148,7 +149,7 @@ TEST_F(I8QuantisationTest, LSTMCell_unaligned_quantize) { auto weights = make_shared_blob({ Precision::U8, {3480}, C }); weights->allocate(); fillWeights(weights); - + Core ie; auto network = ie.ReadNetwork(LSTMCellOnlyModelUnaligned(), weights); @@ -159,7 +160,7 @@ TEST_F(I8QuantisationTest, TI_quantize) { auto weights = make_shared_blob({ Precision::U8, {249748}, C }); weights->allocate(); fillWeights(weights); - + Core ie; auto network = ie.ReadNetwork(TIModelWithLSTMCell2(), weights); diff --git a/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/graph_tools/graph_copy_tests.cpp b/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/graph_tools/graph_copy_tests.cpp index 3fe997b4f8b..0da43c865db 100644 --- a/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/graph_tools/graph_copy_tests.cpp +++ b/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/graph_tools/graph_copy_tests.cpp @@ -41,10 +41,11 @@ protected: gna_config.gnaPrecision = InferenceEngine::Precision::I16; gna_config.gnaFlags.input_low_precision = false; - return ModelQuantizer(gna_config, false) + auto transformer = ov::intel_gna::TransformationsPipeline(gna_config); + + return ModelQuantizer(transformer) .quantize( model, - [](InferenceEngine::CNNNetwork&, bool run_before_copy, bool inputs_int8_precision) {}, inputs); } diff --git a/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/i16_quantisation_test.cpp b/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/i16_quantisation_test.cpp index 050c16e7597..6a633c843a2 100644 --- a/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/i16_quantisation_test.cpp +++ b/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/i16_quantisation_test.cpp @@ -20,7 +20,7 @@ class I16QuantisationTest : public GNATest<> { protected: InferenceEngine::CNNLayerPtr quantize (InferenceEngine::CNNLayerPtr lp) { auto newLayer = InferenceEngine::injectData(lp); - Config gna_config; + Config gna_config; gna_config.gnaPrecision = InferenceEngine::Precision::I16; gna_config.gnaFlags.input_low_precision = false; LayerQuantizer lq(gna_config); @@ -41,9 +41,10 @@ class I16QuantisationTest : public GNATest<> { gna_config.gnaPrecision = InferenceEngine::Precision::I16; gna_config.gnaFlags.input_low_precision = false; - return ModelQuantizer(gna_config, false).quantize( + auto transformer = ov::intel_gna::TransformationsPipeline(gna_config); + + return ModelQuantizer(transformer).quantize( model, - [](const InferenceEngine::CNNNetwork&, bool run_before_copy, bool low_precision) {}, inputs); } @@ -367,7 +368,7 @@ TEST_F(I16QuantisationTest, fp16tofp32_on_fullyConnected_model) { auto weights = make_shared_blob({ Precision::U8, {220}, Layout::C }); weights->allocate(); fillWeights(weights); - + Core ie; auto network = ie.ReadNetwork(FCOnlyModelFP16(), weights); @@ -431,7 +432,7 @@ TEST_F(I16QuantisationTest, LSTMCell_unaligned_quantize) { auto weights = make_shared_blob({ Precision::U8, {3480}, C }); weights->allocate(); fillWeights(weights); - + Core ie; auto network = ie.ReadNetwork(LSTMCellOnlyModelUnaligned(), weights); @@ -468,7 +469,7 @@ TEST_F(I16QuantisationTest, TI_quantize) { auto weights = make_shared_blob({ Precision::U8, {249748}, C }); weights->allocate(); fillWeights(weights); - + Core ie; auto network = ie.ReadNetwork(TIModelWithLSTMCell2(), weights); diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/convolution_negative.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/convolution_negative.cpp index 4b59a91ce93..71535935b07 100644 --- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/convolution_negative.cpp +++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/single_layer_tests/convolution_negative.cpp @@ -209,9 +209,8 @@ GNA_NEG_INSTANTIATE(InputW, Fine, InvalidInputW, "Unsupported input width", GNA_ GNA_NEG_INSTANTIATE(InputC, Fine, InvalidInputC, "Unsupported number of input channels", GNA_3_0) GNA_NEG_INSTANTIATE(Padding, InvalidPadding, Fine, "Unsupported convolution input padding", GNA_3_0) GNA_NEG_INSTANTIATE(Stride, InvalidStride, Fine, "Unsupported convolution stride shape", GNA_3_0) -GNA_NEG_INSTANTIATE(Dilation, InvalidDilation, Fine, "dilation is not supported on GNA", GNA_3_0) -GNA_NEG_INSTANTIATE(Dilation35, InvalidDilation, Fine, "dilation is not supported on GNA", GNA_3_5) +GNA_NEG_INSTANTIATE(Dilation, InvalidDilation, Fine, "Unsupported dilation", GNA_3_0) +GNA_NEG_INSTANTIATE(Dilation35, InvalidDilation, Fine, "Unsupported dilation", GNA_3_5) GNA_NEG_INSTANTIATE(PaddingSize, InvalidPaddingSize, Fine, "Unsupported convolution input padding", GNA_3_0) GNA_NEG_INSTANTIATE(PaddingSize35, InvalidPaddingSize, Fine, "Unsupported convolution input padding", GNA_3_5) - } // namespace diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp index 366d6726ecc..b900d14e1f0 100644 --- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -24,8 +24,6 @@ std::vector disabledTestPatterns() { R"(.*(EltwiseLayerTest).*eltwiseOpType=Prod.*secondaryInputType=PARAMETER.*opType=SCALAR.*)", // TODO: Issue: 34348 R"(.*IEClassGetAvailableDevices.*)", - // TODO: Issue 32923 - R"(.*IEClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK.*)", // TODO: Issue 39358 R"(.*unaligned.*MultipleConcatTest.*)", R"(.*ActivationConcatsEltwise.*CS=35.*)", @@ -33,8 +31,6 @@ std::vector disabledTestPatterns() { R"(.*ConcatMultiInput.CompareWithRefConstOnly.*IS=\(1.8\).*)", R"(.*ConcatMultiInput.CompareWithRefConstOnly.*IS=\(1.16\).*)", R"(.*ConcatMultiInput.CompareWithRefConstOnly.*IS=\(1.32\).*)", - // TODO: Issue: 29577 - R"(.*CoreThreadingTests.smoke_QueryNetwork.*)", // TODO: Issue: 46416 R"(.*InferRequestVariableStateTest.inferreq_smoke_VariableState_2infers*.*)", // TODO: Issue 24839 @@ -70,12 +66,13 @@ std::vector disabledTestPatterns() { R"(.*OVExecutableNetworkBaseTest.*CanGetInputsInfoAndCheck.*)", R"(.*OVExecutableNetworkBaseTest.*getOutputsFromSplitFunctionWithSeveralOutputs.*)", R"(.*OVExecutableNetworkBaseTest.*canLoadNetworkFromMemory.*)", - R"(.*OVClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK.*GetMetricNoThrow.*)", + R"(.*(OVClass|IEClass)HeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK.*GetMetricNoThrow.*)", + R"(.*LoadNetwork*.*LoadNetwork(HETEROWithDeviceIDNoThrow|WithBigDeviceID|WithInvalidDeviceID)*.*)", + R"(.*QueryNetwork*.*QueryNetwork(HETEROWithDeviceIDNoThrow|WithBigDeviceID|WithInvalidDeviceID)*.*)", + R"(.*LoadNetworkTest.*QueryNetwork(MULTIWithHETERO|HETEROWithMULTI)NoThrow_V10.*)", R"(.*Behavior.*OVExecutableNetworkBaseTest.*get(Inputs|Outputs)FromFunctionWithSeveral(Inputs|Outputs).*)", // TODO: temporary disabled. Need to be enabled when PR 9282 is merged R"(.*OVExecGraphImportExportTest.*readFromV10IR.*)", - // TODO: Issue: 29577 - R"(.*QueryNetwork.*)", // Issue connected with OV2.0 R"(.*EltwiseLayerTest.*NetType=f16.*)", // TODO: Issue: 69639 @@ -91,12 +88,10 @@ std::vector disabledTestPatterns() { R"(.*CompileModelCacheTestBase.*(SplitConvConcat|KSOFunction).*)", R"(.*CompileModelCacheTestBase.*(SingleConv|NestedSplitConvConcat).*)", R"(.*CompileModelCacheTestBase.*(Bias|ReadConcatSplitAssign).*)", - R"(.*OVClassLoadNetworkTest.*LoadNetwork.*)", // does not work due to GNA 3.0 convolution and other primitives limitations, partially can be resolved by // switching GNA library to GNA3.5 R"(.*CachingSupportCase.*LoadNet.*(Bias|Split|Concat|KSO|SingleConv).*)", R"(.*CachingSupportCase.*LoadNet.*(ConvPoolRelu|TIwithLSTMcell1)_f32_batch2.*)", - R"(.*IEClassLoadNetworkTest.*LoadNetwork(HETERO|MULTI|WithDeviceIDNoThrow|WithInvalidDeviceIDThrows).*)", R"(.*smoke_Multi_BehaviorTests.*)", }; } diff --git a/src/plugins/intel_gna/tests/unit/gna_get_aligned_split_sizes.cpp b/src/plugins/intel_gna/tests/unit/gna_get_aligned_split_sizes.cpp index bb15946b5e4..03c10bf44fb 100644 --- a/src/plugins/intel_gna/tests/unit/gna_get_aligned_split_sizes.cpp +++ b/src/plugins/intel_gna/tests/unit/gna_get_aligned_split_sizes.cpp @@ -8,6 +8,8 @@ // to suppress deprecated definition errors #define IMPLEMENT_INFERENCE_ENGINE_PLUGIN #include "layers/gna_split_layer.hpp" +#include "ngraph/opsets/opset9.hpp" +#include "ops/util/util.hpp" namespace { @@ -31,4 +33,61 @@ TEST(GetAlignedSplitSizesTest, testAlignedSplitSizes) { } } +using VariadicSplitParameters = std::tuple, // split lengths + bool // supported + >; + +const std::vector variadic_split_data = { + VariadicSplitParameters{ov::Shape{1024}, 0, std::vector{192, 192, 320, 320}, true}, + VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector{640, 192, 192}, true}, + VariadicSplitParameters{ov::Shape{1024}, 0, std::vector{500, 24, 500}, false}, + VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector{700, 300, 24}, false}, +}; + +TEST(CheckSplitSupported, CheckVariadicSplitSupported) { + ov::Shape input_shape; + uint32_t axis; + std::vector split_lengths; + bool result; + for (const auto& item : variadic_split_data) { + std::tie(input_shape, axis, split_lengths, result) = item; + auto split = std::make_shared( + std::make_shared(ngraph::element::f32, input_shape), + ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({1}), {axis}), + ngraph::opset9::Constant::create(ngraph::element::i64, + ngraph::Shape({split_lengths.size()}), + split_lengths)); + ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result); + } +} + +using SplitParameters = std::tuple; + +const std::vector split_data = { + SplitParameters{ov::Shape{1024}, 0, 4, true}, + SplitParameters{ov::Shape{1, 1024}, 1, 16, true}, + SplitParameters{ov::Shape{1024}, 0, 64, false}, + SplitParameters{ov::Shape{1, 1024}, 1, 256, false}, +}; + +TEST(CheckSplitSupported, CheckSplitSupported) { + ov::Shape input_shape; + uint32_t axis; + uint32_t num_splits; + bool result; + for (const auto& item : split_data) { + std::tie(input_shape, axis, num_splits, result) = item; + auto split = std::make_shared( + std::make_shared(ngraph::element::f32, input_shape), + ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}), + num_splits); + ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result); + } +} } // namespace