diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp index a8ca176d9b8..85863034081 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp @@ -14,78 +14,18 @@ #include "ie_metric_helpers.hpp" #include "ie_plugin_config.hpp" -#include -#include -#include -#include -#include -#include #include #include -#include -#include - -#include - -#include -#include -#include -#include "transformations/common_optimizations/convert_quantize_dequantize.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "transformations/op_conversions/softmax_decomposition.hpp" -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - #include "cldnn_engine.h" #include "cldnn_executable_network.h" +#include "cldnn_transformations_pipeline.h" #include "cldnn_custom_layer.h" #include "cldnn_itt.h" #include "gpu/gpu_config.hpp" +#include + #include "cldnn/runtime/device_query.hpp" #include "cldnn/runtime/debug_configuration.hpp" @@ -137,350 +77,15 @@ cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map -static bool disableReduceDecomposition(const std::shared_ptr node) { - if (auto op = std::dynamic_pointer_cast(node)) { - bool fp16_batch_not_1 = op->get_element_type() == ngraph::element::f16 && op->input(0).get_shape()[0] != 1; - return !fp16_batch_not_1; - } - return false; -} - InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const InferenceEngine::CNNNetwork& network, const CLDNNPlugin::Config& config) const { OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::CloneAndTransformNetwork"); CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network); if (clonedNetwork.getFunction()) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork"); auto nGraphFunc = clonedNetwork.getFunction(); - - using const_node_ptr = const std::shared_ptr; - - bool enableInt8; - { - ngraph::pass::Manager manager; - enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(nGraphFunc); - if (enableInt8) { - manager.register_pass( - std::vector{ ngraph::element::i8, ngraph::element::u8, ngraph::element::i4, ngraph::element::u4 }); - } - - manager.register_pass(); - manager.register_pass(); - - if (!config.enable_loop_unrolling) { - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - } - - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - - if (config.enable_loop_unrolling) { - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - } - - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - - static const precisions_array convert_precision_list { - {ngraph::element::i64, ngraph::element::i32}, - {ngraph::element::u64, ngraph::element::i32}, - {ngraph::element::u16, ngraph::element::i32}, - {ngraph::element::u32, ngraph::element::i32}, - {ngraph::element::boolean, ngraph::element::u8}, - {ngraph::element::i4, ngraph::element::i8}, - {ngraph::element::u4, ngraph::element::u8}, - }; - - manager.register_pass(convert_precision_list); - - auto pass_config = manager.get_pass_config(); - - // SpaceToDepth/DepthToSpace node implementation supports only equal input/output tensors with rank <= 5 - pass_config->set_callback( - [](const_node_ptr &node) -> bool { - return node->input_value(0).get_shape().size() <= 5lu && - node->input_value(0).get_shape().size() == node->get_output_shape(0).size(); - }); - - pass_config->set_callback( - [](const_node_ptr &node) -> bool { - const auto & rank = node->input(0).get_partial_shape().rank().get_length(); - return rank <= 5lu; - }); - - pass_config->set_callback( - [](const_node_ptr &node) -> bool { - return disableReduceDecomposition(node); - }); - - pass_config->set_callback( - [](const_node_ptr &node) -> bool { - return disableReduceDecomposition(node); - }); - - pass_config->set_callback( - [](const_node_ptr &node) -> bool { - return disableReduceDecomposition(node); - }); - - auto isCellPrimitiveSupported = [](const_node_ptr &node) -> bool { - if (std::dynamic_pointer_cast(node)) { - return false; - } else if (std::dynamic_pointer_cast(node)) { - return false; - } else if (const auto &lstm_cell = std::dynamic_pointer_cast(node)) { - return lstm_cell->get_clip() == 0.0f && lstm_cell->get_activations() == std::vector{"sigmoid", "tanh", "tanh"}; - } else if (const auto &lstm_cell_v1 = std::dynamic_pointer_cast(node)) { - return lstm_cell_v1->get_clip() == 0.0f && lstm_cell_v1->get_activations() == std::vector{"sigmoid", "tanh", "tanh"}; - } - return false; - }; - - // Sequences supported by the plugin shouldn't be converted to TensorIterator. - // sequence_length input is not supported in all Sequences, so if is_seq_len_provided() == true, we - // should always convert to TensorIterator. - // RNN/GRU Sequences are not supported in GPU plugin - // LSTM Sequence supported with clip == 0, and activations have default values (sigmoid, tanh, tanh) - auto isSequencePrimitiveSupported = [](const_node_ptr &node) -> bool { - const auto& data = node->input(0); - const auto& data_pshape = data.get_partial_shape(); - if (data_pshape.rank().is_static() && data_pshape.rank().get_length() > 1 && !data_pshape[1].is_static()) - return false; - auto max_seq_len = data.get_shape().at(1); - if (std::dynamic_pointer_cast(node)) { - return false; - } else if (std::dynamic_pointer_cast(node)) { - return false; - } else if (const auto &lstm_seq = std::dynamic_pointer_cast(node)) { - return lstm_seq->get_clip() == 0.0f && - lstm_seq->get_activations() == std::vector{"sigmoid", "tanh", "tanh"} && - !ngraph::op::util::is_seq_len_provided(lstm_seq->get_input_node_shared_ptr(3), - max_seq_len); - } - return false; - }; - - pass_config->set_callback( - [isCellPrimitiveSupported](const_node_ptr &node) -> bool { - return isCellPrimitiveSupported(node); - }); - - pass_config->set_callback( - [isSequencePrimitiveSupported](const_node_ptr &node) -> bool { - return isSequencePrimitiveSupported(node); - }); - - pass_config->set_callback( - [isCellPrimitiveSupported](const_node_ptr &node) -> bool { - if (const auto& ti_op = std::dynamic_pointer_cast(node)) { - size_t count_rnn = 0; - for (const auto &op : ti_op->get_body()->get_ops()) - count_rnn += isCellPrimitiveSupported(op); - return count_rnn != 1; - } - return true; - }); - - pass_config->set_callback( - [](const_node_ptr &node) -> bool { - const auto mvn = std::dynamic_pointer_cast(node); - if (mvn != nullptr && node->get_input_size() == 2) { - if (auto axesNode = dynamic_cast(mvn->get_input_node_ptr(1))) { - auto axesVal = axesNode->cast_vector(); - auto& mvnShape = mvn->get_output_shape(0); - for (int32_t& axis : axesVal) - axis = axis < 0 ? axis + mvnShape.size() : axis; - std::sort(axesVal.begin(), axesVal.end()); - if (mvnShape.size() == 1) - return false; - if (mvnShape.size() > 5 || (mvnShape.size() != axesVal.size() + 1 && mvnShape.size() != axesVal.size() + 2)) - return false; - int value = mvnShape.size() - 1; - for (int i = axesVal.size() - 1; i >= 0; i--, value--) { - if (axesVal[i] != value) - return false; - } - return true; - } - } - return false; - }); - - pass_config->enable(); - pass_config->set_callback( - [](const_node_ptr &node) -> bool { - return node->input_value(0).get_partial_shape().rank().get_length() <= 5; - }); - - // List of enabled/disabled transformations - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->enable(); - - if (!config.enable_loop_unrolling) { - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - } - - pass_config->enable(); - - if (enableInt8) { - pass_config->set_callback([](const_node_ptr &node) -> bool { - return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForMultiply(node); - }); - - pass_config->set_callback([](const_node_ptr &node) -> bool { - return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForSubtract(node); - }); - } - - manager.run_passes(nGraphFunc); - } - - if (enableInt8) { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork::LPT"); - using namespace ngraph::pass::low_precision; - - // Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers - // With this key users can work-around such issues - if (!config.enable_fp16_for_quantized_models) { - ngraph::pass::Manager manager; - manager.register_pass(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }}); - manager.run_passes(nGraphFunc); - } - - auto supportedPrecisions = std::vector({ - OperationPrecisionRestriction::create({ - {0, {ngraph::element::u8, ngraph::element::i8}}, - {1, {ngraph::element::i8}}, - }), - OperationPrecisionRestriction::create({ - {0, {ngraph::element::u8, ngraph::element::i8}}, - {1, {ngraph::element::i8}} - }), - OperationPrecisionRestriction::create({ - {0, {ngraph::element::u8, ngraph::element::i8}}, - {1, {ngraph::element::i8}} - }), - OperationPrecisionRestriction::create({}) - }); - - auto perTensorQuantization = std::vector({ - OperationPerTensorQuantizationRestriction::create({0}), - OperationPerTensorQuantizationRestriction::create({0}), - }); - - ngraph::pass::Manager lptManager; - - auto lptPassConfig = lptManager.get_pass_config(); - lptPassConfig->disable(); - lptPassConfig->set_callback([](const_node_ptr& node) -> bool { - if (const auto mulitply = std::dynamic_pointer_cast(node)) { - return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply); - } - return false; - }); - lptPassConfig->set_callback([](const_node_ptr& node) -> bool { - auto fillStaticChannel = [](const ngraph::PartialShape& shape, size_t& channel) -> bool { - const auto rank = shape.rank(); - if (rank.is_dynamic()) { - return false; - } - if (rank.get_length() < 2ul) { - return false; - } - const auto dimension = shape[1]; - if (dimension.is_dynamic()) { - return false; - } - channel = dimension.get_length(); - return true; - }; - - size_t inputChannels; - if (!fillStaticChannel(node->get_input_partial_shape(0), inputChannels)) { - return true; - } - - size_t outputChannels; - if (!fillStaticChannel(node->get_output_partial_shape(0), outputChannels)) { - return true; - } - - - if ((inputChannels % 4 != 0) || (outputChannels % 16 != 0)) { - return true; - } - - return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node); - }); - lptPassConfig->set_callback([](const_node_ptr& node) -> bool { - return MatMulTransformation::is3DTensorOnActivations(node); - }); - - lptManager.register_pass(supportedPrecisions, perTensorQuantization); - lptManager.run_passes(nGraphFunc); - } - - { - OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork::RunPasses"); - ngraph::pass::Manager manager; - // This ConstantFolding pass is added to fold reshapes added for constant inputs on NMS internal operation which prevents upper-bound calculation - // TODO: check why we have these reshapes - manager.register_pass(); - - manager.register_pass(); - auto pass_config = manager.get_pass_config(); - pass_config->set_callback( - [config](const std::shared_ptr &node) -> bool { - auto sub_graph_op = std::dynamic_pointer_cast(node); - int64_t num_iter = sub_graph_op->get_num_iterations(); - if (num_iter == 1) { - return false; - } - return !config.enable_loop_unrolling; - }); - - manager.run_passes(nGraphFunc); - } + TransformationsPipeline transformations(config); + transformations.apply(nGraphFunc); } GPU_DEBUG_GET_INSTANCE(debug_config); diff --git a/inference-engine/src/cldnn_engine/cldnn_transformations_pipeline.cpp b/inference-engine/src/cldnn_engine/cldnn_transformations_pipeline.cpp new file mode 100644 index 00000000000..f1ab5b1620d --- /dev/null +++ b/inference-engine/src/cldnn_engine/cldnn_transformations_pipeline.cpp @@ -0,0 +1,429 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cldnn_transformations_pipeline.h" + +#include "ie_metric_helpers.hpp" +#include "ie_plugin_config.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include "transformations/common_optimizations/convert_quantize_dequantize.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "transformations/op_conversions/softmax_decomposition.hpp" +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cldnn_itt.h" + +namespace { +template +static bool disableReduceDecomposition(const std::shared_ptr node) { + if (auto op = std::dynamic_pointer_cast(node)) { + bool fp16_batch_not_1 = op->get_element_type() == ngraph::element::f16 && op->input(0).get_shape()[0] != 1; + return !fp16_batch_not_1; + } + return false; +} +} // namespace + +namespace CLDNNPlugin { + +void TransformationsPipeline::apply(std::shared_ptr func) { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "TransformationsPipeline::apply"); + using const_node_ptr = const std::shared_ptr; + + bool enableInt8; + { + ngraph::pass::Manager manager; + enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(func); + if (enableInt8) { + manager.register_pass( + std::vector{ ngraph::element::i8, ngraph::element::u8, ngraph::element::i4, ngraph::element::u4 }); + } + + manager.register_pass(); + manager.register_pass(); + + if (!config.enable_loop_unrolling) { + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + } + + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + + if (config.enable_loop_unrolling) { + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + } + + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + + static const precisions_array convert_precision_list { + {ngraph::element::i64, ngraph::element::i32}, + {ngraph::element::u64, ngraph::element::i32}, + {ngraph::element::u16, ngraph::element::i32}, + {ngraph::element::u32, ngraph::element::i32}, + {ngraph::element::boolean, ngraph::element::u8}, + {ngraph::element::i4, ngraph::element::i8}, + {ngraph::element::u4, ngraph::element::u8}, + }; + + manager.register_pass(convert_precision_list); + + auto pass_config = manager.get_pass_config(); + + // SpaceToDepth/DepthToSpace node implementation supports only equal input/output tensors with rank <= 5 + pass_config->set_callback( + [](const_node_ptr &node) -> bool { + return node->input_value(0).get_shape().size() <= 5lu && + node->input_value(0).get_shape().size() == node->get_output_shape(0).size(); + }); + + pass_config->set_callback( + [](const_node_ptr &node) -> bool { + const auto & rank = node->input(0).get_partial_shape().rank().get_length(); + return rank <= 5lu; + }); + + pass_config->set_callback( + [](const_node_ptr &node) -> bool { + return disableReduceDecomposition(node); + }); + + pass_config->set_callback( + [](const_node_ptr &node) -> bool { + return disableReduceDecomposition(node); + }); + + pass_config->set_callback( + [](const_node_ptr &node) -> bool { + return disableReduceDecomposition(node); + }); + + auto isCellPrimitiveSupported = [](const_node_ptr &node) -> bool { + if (std::dynamic_pointer_cast(node)) { + return false; + } else if (std::dynamic_pointer_cast(node)) { + return false; + } else if (const auto &lstm_cell = std::dynamic_pointer_cast(node)) { + return lstm_cell->get_clip() == 0.0f && lstm_cell->get_activations() == std::vector{"sigmoid", "tanh", "tanh"}; + } else if (const auto &lstm_cell_v1 = std::dynamic_pointer_cast(node)) { + return lstm_cell_v1->get_clip() == 0.0f && lstm_cell_v1->get_activations() == std::vector{"sigmoid", "tanh", "tanh"}; + } + return false; + }; + + // Sequences supported by the plugin shouldn't be converted to TensorIterator. + // sequence_length input is not supported in all Sequences, so if is_seq_len_provided() == true, we + // should always convert to TensorIterator. + // RNN/GRU Sequences are not supported in GPU plugin + // LSTM Sequence supported with clip == 0, and activations have default values (sigmoid, tanh, tanh) + auto isSequencePrimitiveSupported = [](const_node_ptr &node) -> bool { + const auto& data = node->input(0); + const auto& data_pshape = data.get_partial_shape(); + if (data_pshape.rank().is_static() && data_pshape.rank().get_length() > 1 && !data_pshape[1].is_static()) + return false; + auto max_seq_len = data.get_shape().at(1); + if (std::dynamic_pointer_cast(node)) { + return false; + } else if (std::dynamic_pointer_cast(node)) { + return false; + } else if (const auto &lstm_seq = std::dynamic_pointer_cast(node)) { + return lstm_seq->get_clip() == 0.0f && + lstm_seq->get_activations() == std::vector{"sigmoid", "tanh", "tanh"} && + !ngraph::op::util::is_seq_len_provided(lstm_seq->get_input_node_shared_ptr(3), + max_seq_len); + } + return false; + }; + + pass_config->set_callback( + [isCellPrimitiveSupported](const_node_ptr &node) -> bool { + return isCellPrimitiveSupported(node); + }); + + pass_config->set_callback( + [isSequencePrimitiveSupported](const_node_ptr &node) -> bool { + return isSequencePrimitiveSupported(node); + }); + + pass_config->set_callback( + [isCellPrimitiveSupported](const_node_ptr &node) -> bool { + if (const auto& ti_op = std::dynamic_pointer_cast(node)) { + size_t count_rnn = 0; + for (const auto &op : ti_op->get_body()->get_ops()) + count_rnn += isCellPrimitiveSupported(op); + return count_rnn != 1; + } + return true; + }); + + pass_config->set_callback( + [](const_node_ptr &node) -> bool { + const auto mvn = std::dynamic_pointer_cast(node); + if (mvn != nullptr && node->get_input_size() == 2) { + if (auto axesNode = dynamic_cast(mvn->get_input_node_ptr(1))) { + auto axesVal = axesNode->cast_vector(); + auto& mvnShape = mvn->get_output_shape(0); + for (int32_t& axis : axesVal) + axis = axis < 0 ? axis + mvnShape.size() : axis; + std::sort(axesVal.begin(), axesVal.end()); + if (mvnShape.size() == 1) + return false; + if (mvnShape.size() > 5 || (mvnShape.size() != axesVal.size() + 1 && mvnShape.size() != axesVal.size() + 2)) + return false; + int value = mvnShape.size() - 1; + for (int i = axesVal.size() - 1; i >= 0; i--, value--) { + if (axesVal[i] != value) + return false; + } + return true; + } + } + return false; + }); + + pass_config->enable(); + pass_config->set_callback( + [](const_node_ptr &node) -> bool { + return node->input_value(0).get_partial_shape().rank().get_length() <= 5; + }); + + // List of enabled/disabled transformations + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->enable(); + + if (!config.enable_loop_unrolling) { + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + } + + pass_config->enable(); + + if (enableInt8) { + pass_config->set_callback([](const_node_ptr &node) -> bool { + return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForMultiply(node); + }); + + pass_config->set_callback([](const_node_ptr &node) -> bool { + return ngraph::pass::low_precision::NetworkHelper::areQuantizeAndDequantizeSupportedForSubtract(node); + }); + } + + manager.run_passes(func); + } + + if (enableInt8) { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "TransformationsPipeline::apply::lpt"); + using namespace ngraph::pass::low_precision; + + // Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers + // With this key users can work-around such issues + if (!config.enable_fp16_for_quantized_models) { + ngraph::pass::Manager manager; + manager.register_pass(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }}); + manager.run_passes(func); + } + + auto supportedPrecisions = std::vector({ + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8, ngraph::element::i8}}, + {1, {ngraph::element::i8}}, + }), + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8, ngraph::element::i8}}, + {1, {ngraph::element::i8}} + }), + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8, ngraph::element::i8}}, + {1, {ngraph::element::i8}} + }), + OperationPrecisionRestriction::create({}) + }); + + auto perTensorQuantization = std::vector({ + OperationPerTensorQuantizationRestriction::create({0}), + OperationPerTensorQuantizationRestriction::create({0}), + }); + + ngraph::pass::Manager lptManager; + + auto lptPassConfig = lptManager.get_pass_config(); + lptPassConfig->disable(); + lptPassConfig->set_callback([](const_node_ptr& node) -> bool { + if (const auto mulitply = std::dynamic_pointer_cast(node)) { + return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply); + } + return false; + }); + lptPassConfig->set_callback([](const_node_ptr& node) -> bool { + auto fillStaticChannel = [](const ngraph::PartialShape& shape, size_t& channel) -> bool { + const auto rank = shape.rank(); + if (rank.is_dynamic()) { + return false; + } + if (rank.get_length() < 2ul) { + return false; + } + const auto dimension = shape[1]; + if (dimension.is_dynamic()) { + return false; + } + channel = dimension.get_length(); + return true; + }; + + size_t inputChannels; + if (!fillStaticChannel(node->get_input_partial_shape(0), inputChannels)) { + return true; + } + + size_t outputChannels; + if (!fillStaticChannel(node->get_output_partial_shape(0), outputChannels)) { + return true; + } + + + if ((inputChannels % 4 != 0) || (outputChannels % 16 != 0)) { + return true; + } + + return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node); + }); + lptPassConfig->set_callback([](const_node_ptr& node) -> bool { + return MatMulTransformation::is3DTensorOnActivations(node); + }); + + lptManager.register_pass(supportedPrecisions, perTensorQuantization); + lptManager.run_passes(func); + } + + { + OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "TransformationsPipeline::apply::run_passes"); + ngraph::pass::Manager manager; + // This ConstantFolding pass is added to fold reshapes added for constant inputs on NMS internal operation which prevents upper-bound calculation + // TODO: check why we have these reshapes + manager.register_pass(); + + manager.register_pass(); + auto pass_config = manager.get_pass_config(); + pass_config->set_callback( + [this](const std::shared_ptr &node) -> bool { + auto sub_graph_op = std::dynamic_pointer_cast(node); + int64_t num_iter = sub_graph_op->get_num_iterations(); + if (num_iter == 1) { + return false; + } + return !config.enable_loop_unrolling; + }); + + manager.run_passes(func); + } +} +} // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_transformations_pipeline.h b/inference-engine/src/cldnn_engine/cldnn_transformations_pipeline.h new file mode 100644 index 00000000000..2f429988170 --- /dev/null +++ b/inference-engine/src/cldnn_engine/cldnn_transformations_pipeline.h @@ -0,0 +1,24 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include + +#include "cldnn_config.h" + +namespace CLDNNPlugin { + +class TransformationsPipeline { +public: + explicit TransformationsPipeline(const Config &conf) : config(conf) {} + void apply(std::shared_ptr func); + +private: + Config config; +}; + +} // namespace CLDNNPlugin