diff --git a/docs/template_plugin/src/template_plugin.cpp b/docs/template_plugin/src/template_plugin.cpp index 061d2c40d4d..6da16cce891 100644 --- a/docs/template_plugin/src/template_plugin.cpp +++ b/docs/template_plugin/src/template_plugin.cpp @@ -67,7 +67,7 @@ std::shared_ptr TransformNetwork(const std::shared_ptr(); // Template plugin handles only FP32 networks - passManager.register_pass(ngraph::element::f16, ngraph::element::f32); + passManager.register_pass(precisions_array {{ngraph::element::f16, ngraph::element::f32 }}); // Example: register plugin specific transformation passManager.register_pass(); passManager.register_pass(); diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp index b963d96a16f..0bea81eface 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp @@ -175,7 +175,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc manager.register_pass(); manager.register_pass(); - std::vector> convert_precision_list { + static const precisions_array convert_precision_list { {ngraph::element::i64, ngraph::element::i32}, {ngraph::element::u64, ngraph::element::i32}, {ngraph::element::u16, ngraph::element::i32}, @@ -185,9 +185,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc {ngraph::element::u4, ngraph::element::u8}, }; - for (auto& precision : convert_precision_list) { - manager.register_pass(precision.first, precision.second); - } + manager.register_pass(convert_precision_list); auto pass_config = manager.get_pass_config(); @@ -366,7 +364,7 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc // Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers // With this key users can work-around such issues if (!config.enable_fp16_for_quantized_models) { - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + manager.register_pass(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }}); } auto lptPrerequisites = manager.register_pass(); const std::vector supportedTypes = { ngraph::element::i8, ngraph::element::u8 }; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index a597a1233e4..162c3a530ba 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -65,6 +66,7 @@ #include #include #include +#include #include @@ -122,6 +124,28 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { std::vector{ ngraph::element::i8, ngraph::element::u8, ngraph::element::i4, ngraph::element::u4 }); } + auto get_convert_precisions = []() { + precisions_array array = { + {ngraph::element::i64, ngraph::element::i32}, + {ngraph::element::u64, ngraph::element::i32}, + {ngraph::element::i16, ngraph::element::i32}, + {ngraph::element::u16, ngraph::element::i32}, + {ngraph::element::u32, ngraph::element::i32}, + {ngraph::element::f64, ngraph::element::f32}, + {ngraph::element::f16, ngraph::element::f32}, + {ngraph::element::boolean, ngraph::element::u8}, + {ngraph::element::i4, ngraph::element::i8}, + {ngraph::element::u4, ngraph::element::u8} + }; + + if (!with_cpu_x86_avx512_core()) + array.push_back({ngraph::element::bf16, ngraph::element::f32}); + + return array; + }; + + static const auto precisions = get_convert_precisions(); + // WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass manager.register_pass(); manager.register_pass(); @@ -140,27 +164,7 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { manager.register_pass(); manager.register_pass(); manager.register_pass(); - - std::vector> convert_precision_list{ - {ngraph::element::i64, ngraph::element::i32}, - {ngraph::element::u64, ngraph::element::i32}, - {ngraph::element::i16, ngraph::element::i32}, - {ngraph::element::u16, ngraph::element::i32}, - {ngraph::element::u32, ngraph::element::i32}, - {ngraph::element::f64, ngraph::element::f32}, - {ngraph::element::f16, ngraph::element::f32}, - {ngraph::element::boolean, ngraph::element::u8}, - {ngraph::element::i4, ngraph::element::i8}, - {ngraph::element::u4, ngraph::element::u8}, - }; - - // In case BF16 is not supported by the target CPU we explicitly convert it to FP32 - if (!with_cpu_x86_avx512_core()) - convert_precision_list.push_back({ngraph::element::bf16, ngraph::element::f32}); - - for (auto &precision : convert_precision_list) { - manager.register_pass(precision.first, precision.second); - } + manager.register_pass(precisions); auto pass_config = manager.get_pass_config(); diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp index 8496558e614..9f73446f95a 100644 --- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp +++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp @@ -42,7 +42,7 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr &nGraphF manager.register_pass(); } manager.register_pass(); - manager.register_pass(ngraph::element::i64, ngraph::element::i32); + manager.register_pass(precisions_array {{ ngraph::element::i64, ngraph::element::i32 }}); manager.run_passes(nGraphFunc); } diff --git a/inference-engine/src/transformations/include/transformations/convert_precision.hpp b/inference-engine/src/transformations/include/transformations/convert_precision.hpp index 41603f508d6..56ea4dba7b9 100644 --- a/inference-engine/src/transformations/include/transformations/convert_precision.hpp +++ b/inference-engine/src/transformations/include/transformations/convert_precision.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -69,18 +70,24 @@ class NGRAPH_API ConvertPrecision; * LessEqual */ -using type_to_fuse_map = std::map&, ngraph::element::Type, size_t idx)>>; +using type_to_fuse_map = std::unordered_map&, ngraph::element::Type, size_t idx)>>; +using precisions_array = std::vector>; + class ngraph::pass::ConvertPrecision : public ngraph::pass::FunctionPass { public: NGRAPH_RTTI_DECLARATION; ConvertPrecision(ngraph::element::Type_t from, ngraph::element::Type_t to, type_to_fuse_map additional_type_to_fuse_map = {}) : FunctionPass(), - m_from(from), - m_to(to), + m_precisions(precisions_array {{ from, to }}), + m_additional_type_to_fuse_map(additional_type_to_fuse_map) {} + + ConvertPrecision(const precisions_array& precisions, const type_to_fuse_map & additional_type_to_fuse_map = {}) + : FunctionPass(), + m_precisions(precisions), m_additional_type_to_fuse_map(additional_type_to_fuse_map) {} bool run_on_function(std::shared_ptr f) override; private: - element::Type m_from, m_to; + precisions_array m_precisions; type_to_fuse_map m_additional_type_to_fuse_map; }; diff --git a/inference-engine/src/vpu/common/include/vpu/ngraph/utilities.hpp b/inference-engine/src/vpu/common/include/vpu/ngraph/utilities.hpp index bf37a6ea981..f5290534cbe 100644 --- a/inference-engine/src/vpu/common/include/vpu/ngraph/utilities.hpp +++ b/inference-engine/src/vpu/common/include/vpu/ngraph/utilities.hpp @@ -15,10 +15,11 @@ #include "vpu/ngraph/operations/out_shape_of_reshape.hpp" #include #include +#include namespace vpu { -using typeToFuseMap = std::map&, ngraph::element::Type, size_t idx)>>; +using typeToFuseMap = std::unordered_map&, ngraph::element::Type, size_t idx)>>; bool fuseTypeToStaticShapeNonMaxSuppression(const std::shared_ptr& node, ngraph::element::Type to, size_t idx); bool fuseTypeToStaticShapeNonZero(const std::shared_ptr& node, ngraph::element::Type to, size_t idx); diff --git a/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp b/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp index 91e9be21adb..92e1448e127 100644 --- a/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp +++ b/inference-engine/src/vpu/graph_transformer/src/frontend/frontend.cpp @@ -192,14 +192,17 @@ ie::CNNNetwork FrontEnd::convertNetwork(ie::CNNNetwork& network) { manager.register_pass(); manager.register_pass(); // ConvertPrecision must be executed before ConvertOpSet1ToLegacy due to this pass works with operations from opsets only - manager.register_pass(ngraph::element::i64, ngraph::element::i32, myriadTypeToFuseMap); - manager.register_pass(ngraph::element::u64, ngraph::element::i32, myriadTypeToFuseMap); - manager.register_pass(ngraph::element::u32, ngraph::element::i32, myriadTypeToFuseMap); - manager.register_pass(ngraph::element::boolean, ngraph::element::i32, myriadTypeToFuseMap); + static const precisions_array precisions = { + { ngraph::element::i64, ngraph::element::i32 }, + { ngraph::element::u64, ngraph::element::i32 }, + { ngraph::element::u32, ngraph::element::i32 }, + { ngraph::element::boolean, ngraph::element::i32 } + }; + manager.register_pass(precisions, myriadTypeToFuseMap); manager.register_pass(); // ConvertOpSet1ToLegacy can produce constants with I64 precision - manager.register_pass(ngraph::element::i64, ngraph::element::i32, myriadTypeToFuseMap); + manager.register_pass(precisions_array {{ ngraph::element::i64, ngraph::element::i32 }}, myriadTypeToFuseMap); manager.register_pass(); auto pass_config = manager.get_pass_config(); diff --git a/inference-engine/tests/functional/inference_engine/cnn_network/convert_ngraph_to_cnn_network_tests.cpp b/inference-engine/tests/functional/inference_engine/cnn_network/convert_ngraph_to_cnn_network_tests.cpp index 5a298ed4f41..18a4f6ace4f 100644 --- a/inference-engine/tests/functional/inference_engine/cnn_network/convert_ngraph_to_cnn_network_tests.cpp +++ b/inference-engine/tests/functional/inference_engine/cnn_network/convert_ngraph_to_cnn_network_tests.cpp @@ -174,7 +174,7 @@ TEST(ConvertFunctionToCNNNetworkTests, ConvertTopKWithOneInput) { manager.register_pass(); manager.register_pass(); - std::vector> convert_precision_list { + static const precisions_array convert_precision_list { {ngraph::element::i64, ngraph::element::i32}, {ngraph::element::u64, ngraph::element::i32}, {ngraph::element::u16, ngraph::element::i32}, @@ -183,12 +183,9 @@ TEST(ConvertFunctionToCNNNetworkTests, ConvertTopKWithOneInput) { {ngraph::element::boolean, ngraph::element::u8}, }; - for (auto & precision : convert_precision_list) { - manager.register_pass(precision.first, precision.second); - } - + manager.register_pass(convert_precision_list); manager.register_pass(); - manager.register_pass(ngraph::element::i64, ngraph::element::i32); + manager.register_pass(precisions_array {{ ngraph::element::i64, ngraph::element::i32 }}); manager.run_passes(f); diff --git a/inference-engine/tests/functional/inference_engine/transformations/convert_precision.cpp b/inference-engine/tests/functional/inference_engine/transformations/convert_precision.cpp index ea9fda3016b..1bbe676543c 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/convert_precision.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/convert_precision.cpp @@ -54,8 +54,13 @@ TEST(TransformationTests, ConvertPrecision_NMS3) { f = std::make_shared(NodeVector{nms}, ParameterVector{boxes, scores}); pass::Manager manager; - manager.register_pass(ngraph::element::i64, ngraph::element::i32); - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + + static const precisions_array precisions = { + { ngraph::element::i64, ngraph::element::i32 }, + { ngraph::element::f16, ngraph::element::f32 } + }; + + manager.register_pass(precisions); manager.run_passes(f); } @@ -77,8 +82,13 @@ TEST(TransformationTests, ConvertPrecision_NMS4) { f = std::make_shared(NodeVector{nms}, ParameterVector{boxes, scores}); pass::Manager manager; - manager.register_pass(ngraph::element::i64, ngraph::element::i32); - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + + static const precisions_array precisions = { + { ngraph::element::i64, ngraph::element::i32 }, + { ngraph::element::f16, ngraph::element::f32 } + }; + + manager.register_pass(precisions); manager.run_passes(f); } @@ -95,8 +105,13 @@ TEST(TransformationTests, ConvertPrecision_ShapeOf) { f = std::make_shared(NodeVector{shape_of}, ParameterVector{input}); pass::Manager manager; - manager.register_pass(ngraph::element::i64, ngraph::element::i32); - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + + static const precisions_array precisions = { + { ngraph::element::i64, ngraph::element::i32 }, + { ngraph::element::f16, ngraph::element::f32 } + }; + + manager.register_pass(precisions); manager.run_passes(f); } @@ -113,8 +128,13 @@ TEST(TransformationTests, ConvertPrecision_Convert) { f = std::make_shared(NodeVector{convert}, ParameterVector{input}); pass::Manager manager; - manager.register_pass(ngraph::element::i64, ngraph::element::i32); - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + + static const precisions_array precisions = { + { ngraph::element::i64, ngraph::element::i32 }, + { ngraph::element::f16, ngraph::element::f32 } + }; + + manager.register_pass(precisions); manager.run_passes(f); } @@ -132,7 +152,7 @@ TEST(TransformationTests, ConvertPrecision_ConvertElimination) { f = std::make_shared(NodeVector{convert}, ParameterVector{input}); pass::Manager manager; - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + manager.register_pass(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }}); manager.run_passes(f); ASSERT_FALSE(has_type(f)); } @@ -158,8 +178,13 @@ TEST(TransformationTests, ConvertPrecision_TopK) { f = std::make_shared(OutputVector{topk->output(0), topk->output(1)}, ParameterVector{input}); pass::Manager manager; - manager.register_pass(ngraph::element::i64, ngraph::element::i32); - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + + static const precisions_array precisions = { + { ngraph::element::i64, ngraph::element::i32 }, + { ngraph::element::f16, ngraph::element::f32 } + }; + + manager.register_pass(precisions); manager.run_passes(f); } @@ -176,8 +201,13 @@ TEST(TransformationTests, ConvertPrecision_NonZero) { f = std::make_shared(OutputVector{non_zero}, ParameterVector{input}); pass::Manager manager; - manager.register_pass(ngraph::element::i64, ngraph::element::i32); - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + + static const precisions_array precisions = { + { ngraph::element::i64, ngraph::element::i32 }, + { ngraph::element::f16, ngraph::element::f32 } + }; + + manager.register_pass(precisions); manager.run_passes(f); } @@ -195,8 +225,13 @@ TEST(TransformationTests, ConvertPrecision_Bucketize) { f = std::make_shared(OutputVector{b}, ParameterVector{input}); pass::Manager manager; - manager.register_pass(ngraph::element::i64, ngraph::element::i32); - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + + static const precisions_array precisions = { + { ngraph::element::i64, ngraph::element::i32 }, + { ngraph::element::f16, ngraph::element::f32 } + }; + + manager.register_pass(precisions); manager.run_passes(f); } @@ -223,8 +258,13 @@ TEST(TransformationTests, ConvertPrecision_Roundings) { f = std::make_shared(OutputVector{ss}, ParameterVector{input}); pass::Manager manager; - manager.register_pass(ngraph::element::i64, ngraph::element::i32); - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + + static const precisions_array precisions = { + { ngraph::element::i64, ngraph::element::i32 }, + { ngraph::element::f16, ngraph::element::f32 } + }; + + manager.register_pass(precisions); manager.run_passes(f); auto casted_end = std::dynamic_pointer_cast(ss->input_value(2).get_node_shared_ptr()); @@ -279,8 +319,13 @@ TEST(TransformationTests, ConvertPrecision_TIBody) { ngraph::ParameterVector{X, Y}); ngraph::pass::Manager manager; - manager.register_pass(ngraph::element::i64, ngraph::element::i32); - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + + static const precisions_array precisions = { + { ngraph::element::i64, ngraph::element::i32 }, + { ngraph::element::f16, ngraph::element::f32 } + }; + + manager.register_pass(precisions); manager.run_passes(f); ASSERT_FALSE(has_type(f)); @@ -300,8 +345,13 @@ TEST(TransformationTests, ConvertPrecision_Equal) { f = std::make_shared(OutputVector{node}, ParameterVector{input1, input2}); pass::Manager manager; - manager.register_pass(ngraph::element::boolean, ngraph::element::u8); - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + + static const precisions_array precisions = { + { ngraph::element::boolean, ngraph::element::u8 }, + { ngraph::element::f16, ngraph::element::f32 } + }; + + manager.register_pass(precisions); manager.run_passes(f); } @@ -320,8 +370,13 @@ TEST(TransformationTests, ConvertPrecision_NotEqual) { f = std::make_shared(OutputVector{node}, ParameterVector{input1, input2}); pass::Manager manager; - manager.register_pass(ngraph::element::boolean, ngraph::element::u8); - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + + static const precisions_array precisions = { + { ngraph::element::boolean, ngraph::element::u8 }, + { ngraph::element::f16, ngraph::element::f32 } + }; + + manager.register_pass(precisions); manager.run_passes(f); } @@ -340,8 +395,13 @@ TEST(TransformationTests, ConvertPrecision_Greater) { f = std::make_shared(OutputVector{node}, ParameterVector{input1, input2}); pass::Manager manager; - manager.register_pass(ngraph::element::boolean, ngraph::element::u8); - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + + static const precisions_array precisions = { + { ngraph::element::boolean, ngraph::element::u8 }, + { ngraph::element::f16, ngraph::element::f32 } + }; + + manager.register_pass(precisions); manager.run_passes(f); } @@ -360,8 +420,13 @@ TEST(TransformationTests, ConvertPrecision_GreaterEqual) { f = std::make_shared(OutputVector{node}, ParameterVector{input1, input2}); pass::Manager manager; - manager.register_pass(ngraph::element::boolean, ngraph::element::u8); - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + + static const precisions_array precisions = { + { ngraph::element::boolean, ngraph::element::u8 }, + { ngraph::element::f16, ngraph::element::f32 } + }; + + manager.register_pass(precisions); manager.run_passes(f); } @@ -380,8 +445,13 @@ TEST(TransformationTests, ConvertPrecision_Less) { f = std::make_shared(OutputVector{node}, ParameterVector{input1, input2}); pass::Manager manager; - manager.register_pass(ngraph::element::boolean, ngraph::element::u8); - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + + static const precisions_array precisions = { + { ngraph::element::boolean, ngraph::element::u8 }, + { ngraph::element::f16, ngraph::element::f32 } + }; + + manager.register_pass(precisions); manager.run_passes(f); } @@ -400,8 +470,13 @@ TEST(TransformationTests, ConvertPrecision_LessEqual) { f = std::make_shared(OutputVector{node}, ParameterVector{input1, input2}); pass::Manager manager; - manager.register_pass(ngraph::element::boolean, ngraph::element::u8); - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + + static const precisions_array precisions = { + { ngraph::element::boolean, ngraph::element::u8 }, + { ngraph::element::f16, ngraph::element::f32 } + }; + + manager.register_pass(precisions); manager.run_passes(f); } @@ -420,7 +495,7 @@ TEST(TransformationTests, ConvertPrecision_LogicalAnd) { f = std::make_shared(OutputVector{node}, ParameterVector{input1, input2}); pass::Manager manager; - manager.register_pass(ngraph::element::boolean, ngraph::element::u8); + manager.register_pass(precisions_array {{ ngraph::element::boolean, ngraph::element::u8 }}); manager.run_passes(f); } @@ -438,7 +513,7 @@ TEST(TransformationTests, ConvertPrecision_LogicalOr) { f = std::make_shared(OutputVector{node}, ParameterVector{input1, input2}); pass::Manager manager; - manager.register_pass(ngraph::element::boolean, ngraph::element::u8); + manager.register_pass(precisions_array {{ ngraph::element::boolean, ngraph::element::u8 }}); manager.run_passes(f); } @@ -456,7 +531,7 @@ TEST(TransformationTests, ConvertPrecision_LogicalXor) { f = std::make_shared(OutputVector{node}, ParameterVector{input1, input2}); pass::Manager manager; - manager.register_pass(ngraph::element::boolean, ngraph::element::u8); + manager.register_pass(precisions_array {{ ngraph::element::boolean, ngraph::element::u8 }}); manager.run_passes(f); } @@ -473,7 +548,7 @@ TEST(TransformationTests, ConvertPrecision_LogicalNot) { f = std::make_shared(OutputVector{node}, ParameterVector{input1}); pass::Manager manager; - manager.register_pass(ngraph::element::boolean, ngraph::element::u8); + manager.register_pass(precisions_array {{ ngraph::element::boolean, ngraph::element::u8 }}); manager.run_passes(f); } @@ -491,7 +566,7 @@ TEST(TransformationTests, ConvertPrecision_Select) { f = std::make_shared(OutputVector{select}, ParameterVector{input1}); pass::Manager manager; - manager.register_pass(ngraph::element::boolean, ngraph::element::u8); + manager.register_pass(precisions_array {{ ngraph::element::boolean, ngraph::element::u8 }}); manager.run_passes(f); } @@ -509,8 +584,8 @@ TEST(TransformationTests, ConvertPrecision_TypeRelaxedWithSelect) { f = std::make_shared(OutputVector{select}, ParameterVector{input1}); pass::Manager manager; - manager.register_pass(ngraph::element::boolean, ngraph::element::i32); - manager.register_pass(ngraph::element::i32, ngraph::element::i64); + manager.register_pass(precisions_array {{ ngraph::element::boolean, ngraph::element::i32 }}); + manager.register_pass(precisions_array {{ ngraph::element::i32, ngraph::element::i64 }}); manager.run_passes(f); } @@ -529,8 +604,8 @@ TEST(TransformationTests, ConvertPrecision_TypeRelaxed) { f = std::make_shared(OutputVector{type_relaxed}, ParameterVector{input1}); pass::Manager manager; - manager.register_pass(ngraph::element::boolean, ngraph::element::i32); - manager.register_pass(ngraph::element::i32, ngraph::element::i64); + manager.register_pass(precisions_array {{ ngraph::element::boolean, ngraph::element::i32 }}); + manager.register_pass(precisions_array {{ ngraph::element::i32, ngraph::element::i64 }}); manager.run_passes(f); ASSERT_FALSE(has_type(f)); @@ -555,7 +630,7 @@ TEST(TransformationTests, ConvertPrecision_Variables) { f = std::make_shared(NodeVector{mul}, ParameterVector{inp}); pass::Manager manager; - manager.register_pass(ngraph::element::f16, ngraph::element::f32); + manager.register_pass(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }}); manager.run_passes(f); } @@ -573,7 +648,7 @@ void constant_convert_test(element::Type type_from, element::Type type_to, const f = std::make_shared(NodeVector{c}, ParameterVector{}); pass::Manager manager; - manager.register_pass(type_from, type_to); + manager.register_pass(precisions_array {{ type_from, type_to }}); manager.run_passes(f); } auto ops = f->get_ordered_ops(); @@ -603,7 +678,7 @@ void constant_convert_test(element::Type_t type_from, element::Type_t type_to, F f = std::make_shared(NodeVector{c}, ParameterVector{}); pass::Manager manager; - manager.register_pass(type_from, type_to); + manager.register_pass(precisions_array {{ type_from, type_to }}); manager.run_passes(f); } auto ops = f->get_ordered_ops(); diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/convert.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/convert.hpp index 8591f88c794..ecaf92ffd61 100644 --- a/ngraph/core/reference/include/ngraph/runtime/reference/convert.hpp +++ b/ngraph/core/reference/include/ngraph/runtime/reference/convert.hpp @@ -28,6 +28,10 @@ namespace ngraph void convert(const uint8_t* arg, float16* out, size_t count); template <> void convert(const float16* arg, float* out, size_t count); + template <> + void convert(const float* arg, int8_t* out, size_t count); + template <> + void convert(const float16* arg, int8_t* out, size_t count); // overload to handle ngraph::boolean (it is stored as char) template @@ -39,7 +43,6 @@ namespace ngraph out[i] = static_cast(static_cast(arg[i])); } } - } // namespace reference } // namespace runtime diff --git a/ngraph/core/reference/src/runtime/reference/convert.cpp b/ngraph/core/reference/src/runtime/reference/convert.cpp index 809c9ca3dc5..78625f38c9e 100644 --- a/ngraph/core/reference/src/runtime/reference/convert.cpp +++ b/ngraph/core/reference/src/runtime/reference/convert.cpp @@ -16,6 +16,11 @@ namespace ngraph template void jit_convert_vec(jit::Generator&, const Xbyak::RegExp&, const Xbyak::RegExp&); + template + void jit_convert_vec_prepare(jit::Generator&) + { + } + template <> void jit_convert_vec(jit::Generator& gen, const Xbyak::RegExp& src, @@ -47,6 +52,61 @@ namespace ngraph gen.vmovups(gen.yword[dst], f32vec); } + template <> + void jit_convert_vec_prepare(jit::Generator& gen) + { + auto order = gen.ymm1; + auto addr = gen.r15; + + static const int8_t offsets[32] = {0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 4, + 8, 12, -1, -1, -1, -1, -1, -1, -1, -1}; + + gen.mov(addr, (size_t)offsets); // get offsets[] address + gen.vmovdqu(order, gen.yword[addr]); // save offsets[] to ymm register + } + + template <> + void jit_convert_vec(jit::Generator& gen, + const Xbyak::RegExp& src, + const Xbyak::RegExp& dst) + { + auto order = gen.ymm1; + auto p32vec = gen.ymm2; + auto p32vec_lo = gen.xmm2; + auto p32vec_hi = gen.xmm3; + + gen.vcvtps2dq(p32vec, gen.yword[src]); // convert 8 floats to 8 ints + gen.vpshufb(p32vec, p32vec, order); // Shuffle the bytes according to the order + gen.vextracti128(p32vec_hi, p32vec, 1); // extract upper part of p32vec + gen.vpor(p32vec_lo, p32vec_lo, p32vec_hi); // p32vec_lo = p32vec_lo | p32vec_hi + gen.movq(gen.qword[dst], p32vec_lo); // save the result + } + + template <> + void jit_convert_vec_prepare(jit::Generator& gen) + { + jit_convert_vec_prepare(gen); + } + + template <> + void jit_convert_vec(jit::Generator& gen, + const Xbyak::RegExp& src, + const Xbyak::RegExp& dst) + { + auto order = gen.ymm1; + auto p32vec = gen.ymm2; + auto p32vec_lo = gen.xmm2; + auto p32vec_hi = gen.xmm3; + + gen.vcvtph2ps(p32vec, gen.xword[src]); // convert 8 fp16's to 8 floats + gen.vcvtps2dq(p32vec, p32vec); // convert 8 floats to 8 ints + gen.vpshufb(p32vec, p32vec, order); // Shuffle the bytes according to the order + gen.vextracti128(p32vec_hi, p32vec, 1); // extract upper part of p32vec + gen.vpor(p32vec_lo, p32vec_lo, p32vec_hi); // p32vec_lo = p32vec_lo | p32vec_hi + gen.movq(gen.qword[dst], p32vec_lo); // save the result + } + class jit_convert_array : public jit::Generator { typedef struct context @@ -61,6 +121,7 @@ namespace ngraph void (*convert_vec)(jit::Generator&, const Xbyak::RegExp&, const Xbyak::RegExp&); + void (*prepare)(jit::Generator&); } context_t; jit_convert_array(const context_t& ctx) @@ -77,6 +138,8 @@ namespace ngraph preamble(); + ctx.prepare(*this); + mov(reg_src, ptr[param + offsetof(args_t, src)]); mov(reg_dst, ptr[param + offsetof(args_t, out)]); mov(reg_sz, ptr[param + offsetof(args_t, count)]); @@ -137,7 +200,8 @@ namespace ngraph static const jit_convert_array::context_t context{ {sizeof(src_t), &jit::Generator::copy}, {sizeof(dst_t), &jit::Generator::copy}, - jit_convert_vec}; + jit_convert_vec, + jit_convert_vec_prepare}; static jit_convert_array generator(context); @@ -146,44 +210,49 @@ namespace ngraph return nullptr; } }; + + template + void convert_impl(const TI* arg, TO* out, size_t count) + { + auto converter = jit_convert_array::get(); + + if (converter) + { + jit_convert_array::args_t args = {arg, out, count}; + converter(&args); + } + else + { + for (size_t i = 0; i < count; ++i) + { + out[i] = static_cast(arg[i]); + } + } + } } // namespace template <> void convert(const uint8_t* arg, float16* out, size_t count) { - auto converter = jit_convert_array::get(); - - if (converter) - { - jit_convert_array::args_t args = {arg, out, count}; - converter(&args); - } - else - { - for (size_t i = 0; i < count; ++i) - { - out[i] = static_cast(arg[i]); - } - } + convert_impl(arg, out, count); } template <> void convert(const float16* arg, float* out, size_t count) { - auto converter = jit_convert_array::get(); + convert_impl(arg, out, count); + } - if (converter) - { - jit_convert_array::args_t args = {arg, out, count}; - converter(&args); - } - else - { - for (size_t i = 0; i < count; ++i) - { - out[i] = static_cast(arg[i]); - } - } + template <> + void convert(const float* arg, int8_t* out, size_t count) + { + convert_impl(arg, out, count); + } + + template <> + void convert(const float16* arg, int8_t* out, size_t count) + { + convert_impl(arg, out, count); } } // namespace reference } // namespace runtime diff --git a/ngraph/core/reference/src/runtime/reference/jit_generator.cpp b/ngraph/core/reference/src/runtime/reference/jit_generator.cpp index 783fa6a715d..0f4d5a26f53 100644 --- a/ngraph/core/reference/src/runtime/reference/jit_generator.cpp +++ b/ngraph/core/reference/src/runtime/reference/jit_generator.cpp @@ -158,6 +158,26 @@ namespace ngraph pop(rsi); } + template <> + void Generator::copy(const Xbyak::Reg64& dst, + const Xbyak::Reg64& src, + const Xbyak::Reg64& size) + { + push(rsi); + push(r15); + + xor_(rsi, rsi); + + foreach (rsi, 1, size, [&, this](const Xbyak::Reg64& idx) { + mov(r15b, byte[src + idx * sizeof(int8_t)]); + mov(byte[dst + idx * sizeof(int8_t)], r15b); + }) + ; + + pop(r15); + pop(rsi); + } + template <> void Generator::copy(const Xbyak::Reg64& dst, const Xbyak::Reg64& src, diff --git a/ngraph/core/src/pass/convert_fp32_to_fp16.cpp b/ngraph/core/src/pass/convert_fp32_to_fp16.cpp index 077b36593da..c841852a2f3 100644 --- a/ngraph/core/src/pass/convert_fp32_to_fp16.cpp +++ b/ngraph/core/src/pass/convert_fp32_to_fp16.cpp @@ -15,7 +15,8 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertFP32ToFP16, "ConvertFP32ToFP16", 0); bool ngraph::pass::ConvertFP32ToFP16::run_on_function(std::shared_ptr f) { ngraph::pass::Manager m(get_pass_config()); - m.register_pass(ngraph::element::f32, ngraph::element::f16); + m.register_pass( + precisions_array{{ngraph::element::f32, ngraph::element::f16}}); m.run_passes(f); return false; } diff --git a/ngraph/core/src/pass/convert_precision.cpp b/ngraph/core/src/pass/convert_precision.cpp index 2d5a0ab5eae..8fdef4c9356 100644 --- a/ngraph/core/src/pass/convert_precision.cpp +++ b/ngraph/core/src/pass/convert_precision.cpp @@ -125,6 +125,194 @@ bool fuse_type_to_reduce_logical(const std::shared_ptr& node, return false; } +namespace +{ + void validate_nodes_and_infer_types(const std::vector>& ops) + { + for (auto& node : ops) + { + node->revalidate_and_infer_types(); + } + } + + bool convert_precision(pass::PassBase& pass, + const std::shared_ptr& f, + const type_to_fuse_map& type_to_fuse, + const type_to_fuse_map& type_to_extend, + element::Type from, + element::Type to) + { + // As Constant operations can be shared between multiple nGraph Functions so before + // changing precision we need to understand which Constant consumers belongs + // to the current nGraph Function + std::unordered_map>> const_to_internal_output; + + auto register_constants = + [&const_to_internal_output](const std::vector>& ops) { + for (auto& node : ops) + { + for (auto& input : node->inputs()) + { + if (auto const_node = std::dynamic_pointer_cast( + input.get_source_output().get_node_shared_ptr())) + { + const_to_internal_output[const_node.get()].emplace_back(input); + } + } + } + }; + + auto convert_node_output_precision = [&](const std::shared_ptr& node) { + for (auto output : node->outputs()) + { + if (output.get_element_type() == from) + { + // Handle case with Constants as they can have consumers from other nGraph + // Function object + auto it = const_to_internal_output.find(node.get()); + if (it != const_to_internal_output.end()) + { + return fuse_type_to_constant(node, to, it->second); + } + + // Check that node type exists in map and we can fuse type into node + auto t2f_it = type_to_fuse.find(node->get_type_info()); + if (t2f_it != type_to_fuse.end() && + t2f_it->second(node, to, output.get_index())) + { + // We need to break if original node was replaced + return true; + } + } + } + return false; + }; + + auto convert_node_input_precision = [&](const std::shared_ptr& node) { + for (auto input : node->inputs()) + { + if (input.get_element_type() == from) + { + // For some operations we need to extend their input types to support new type + auto it = type_to_extend.find(node->get_type_info()); + if (it != type_to_extend.end() && it->second(node, to, input.get_index())) + { + return true; + } + } + } + return false; + }; + + std::function&, bool)> convert_function_precision = + [&](const std::shared_ptr& f, bool is_subgraph) { + bool is_changed = false; + + auto ops = f->get_ordered_ops(); + + // Iterate over all nodes in topological order and then iterate over node outputs. + // If output type mismatch given type we try to fuse type into this operation + // otherwise we insert Convert operation. + for (auto& node : ops) + { + pass.transformation_callback(node); + // Recursively apply transformation for sub-graph based operations + if (auto sub_graph_node = std::dynamic_pointer_cast(node)) + { + if (auto sub_graph = sub_graph_node->get_function()) + { + is_changed |= convert_function_precision(sub_graph, true); + } + } + is_changed |= convert_node_input_precision(node); + } + + if (is_changed) + ops = f->get_ordered_ops(); + + // Register internal constants only after fixing input type that could lead to nodes + // replacement + register_constants(ops); + + bool is_output_precision_changed = false; + + for (auto& node : ops) + { + is_output_precision_changed |= convert_node_output_precision(node); + } + + if (is_output_precision_changed) + { + ops = f->get_ordered_ops(); + is_changed |= is_output_precision_changed; + } + + if (!is_subgraph) + { + if (is_changed) + validate_nodes_and_infer_types(ops); + + // TODO: we need to split NopElimination pass to separate MatcherPasses and call + // Convert elimination here + for (auto& node : ops) + { + if (auto convert = std::dynamic_pointer_cast(node)) + { + // WA for topK, dont remove fake convert + if (convert->input(0).get_element_type() == + convert->get_convert_element_type() && + convert->input_value(0).get_node_shared_ptr()->get_output_size() == + 1) + { + replace_output_update_name(convert->output(0), + convert->input_value(0)); + } + } + } + } + + return is_changed; + }; + + return convert_function_precision(f, false); + } + + struct EnumClassHash + { + template + std::size_t operator()(T t) const + { + return static_cast(t); + } + }; + + using precisions_set_t = std::unordered_set; + + precisions_set_t find_all_used_precisions(const std::shared_ptr& fn) + { + precisions_set_t used_precisions; + + ngraph::traverse_nodes(fn, [&](std::shared_ptr node) { + for (auto output : node->outputs()) + { + used_precisions.emplace(output.get_element_type()); + } + if (auto sub_graph_node = std::dynamic_pointer_cast(node)) + { + if (auto sub_graph = sub_graph_node->get_function()) + { + auto sub_graph_precisions = find_all_used_precisions(sub_graph); + used_precisions.insert(sub_graph_precisions.begin(), + sub_graph_precisions.end()); + } + } + }); + + return used_precisions; + } + +} // namespace + NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertPrecision, "ConvertPrecision", 0); bool ngraph::pass::ConvertPrecision::run_on_function(std::shared_ptr f) @@ -161,117 +349,25 @@ bool ngraph::pass::ConvertPrecision::run_on_function(std::shared_ptr, std::vector>> - const_to_internal_output; + bool is_changed = false; - std::function&)> register_constants = - [&const_to_internal_output](const std::shared_ptr& f) { - for (auto& node : f->get_ordered_ops()) - { - for (auto& input : node->inputs()) - { - if (auto const_node = std::dynamic_pointer_cast( - input.get_source_output().get_node_shared_ptr())) - { - const_to_internal_output[const_node].emplace_back(input); - } - } - } - }; + auto const used_precisions = find_all_used_precisions(f); - auto convert_node_output_precision = [this, &const_to_internal_output, &type_to_fuse]( - const std::shared_ptr& node) { - for (auto output : node->outputs()) - { - if (output.get_element_type() == m_from) - { - // Handle case with Constants as they can have consumers from other nGraph Function - // object - if (ngraph::op::is_constant(node) && const_to_internal_output.count(node)) - { - fuse_type_to_constant(node, m_to, const_to_internal_output.at(node)); - break; - } - - // Check that node type exists in map and we can fuse type into node - if (type_to_fuse.count(node->get_type_info()) && - type_to_fuse.at(node->get_type_info())(node, m_to, output.get_index())) - { - // We need to break if original node was replaced - break; - } - } - } - }; - - auto convert_node_input_precision = [this](const std::shared_ptr& node) { - for (auto input : node->inputs()) - { - if (input.get_element_type() == m_from) - { - // For some operations we need to extend their input types to support new type - if (type_to_extend.count(node->get_type_info()) && - type_to_extend.at(node->get_type_info())(node, m_to, input.get_index())) - { - break; - } - } - } - }; - - std::function&)> convert_function_precision = - [this, - ®ister_constants, - &convert_node_output_precision, - &convert_node_input_precision, - &convert_function_precision](const std::shared_ptr& f) { - // Iterate over all nodes in topological order and then iterate over node outputs. - // If output type mismatch given type we try to fuse type into this operation - // otherwise we insert Convert operation. - for (auto& node : f->get_ordered_ops()) - { - transformation_callback(node); - // Recursively apply transformation for sub-graph based operations - if (auto sub_graph_node = std::dynamic_pointer_cast(node)) - { - if (auto sub_graph = sub_graph_node->get_function()) - { - convert_function_precision(sub_graph); - } - } - convert_node_input_precision(node); - } - // Register internal constants only after fixing input type that could lead to nodes - // replacement - register_constants(f); - - for (auto& node : f->get_ordered_ops()) - { - convert_node_output_precision(node); - } - }; - - convert_function_precision(f); - f->validate_nodes_and_infer_types(); - - // TODO: we need to split NopElimination pass to separate MatcherPasses and call Convert - // elimination here - for (auto& node : f->get_ordered_ops()) + for (auto const& p : m_precisions) { - if (auto convert = std::dynamic_pointer_cast(node)) - { - // WA for topK, dont remove fake convert - if (convert->input(0).get_element_type() == convert->get_convert_element_type() && - convert->input_value(0).get_node_shared_ptr()->get_output_size() == 1) - { - replace_output_update_name(convert->output(0), convert->input_value(0)); - } - } + if (used_precisions.count(p.first)) + is_changed = + is_changed | + convert_precision(*this, f, type_to_fuse, type_to_extend, p.first, p.second); } - return true; + + (void)is_changed; // ignored + + // Returning value is false because pass::Manager always apply Validation pass + // if function was changed. This helps to avoid excess Validations after applying + // this pass. In future when we will return more meaningful status code it will be + // replaced with real status reported by manager.run_passes() method call. + return false; } bool fuse_type_to_shapeof(const std::shared_ptr& node, element::Type to, size_t idx) @@ -532,15 +628,6 @@ namespace return new_constant; } - struct EnumClassHash - { - template - std::size_t operator()(T t) const - { - return static_cast(t); - } - }; - /** * @brief Method converts low precision integer types * The method uses the next logic for conversion: @@ -631,8 +718,8 @@ namespace element::Type to) { // Supported integer precisions - static const std::unordered_set - supported_integer_precisions = {element::i4, element::u4, element::u1}; + static const precisions_set_t supported_integer_precisions = { + element::i4, element::u4, element::u1}; // Get source element type and source data auto src_type = constant->get_element_type(); const auto* src_data = reinterpret_cast(constant->get_data_ptr()); diff --git a/ngraph/test/backend/convert.in.cpp b/ngraph/test/backend/convert.in.cpp index 46159d0cbc2..730fc13cd50 100644 --- a/ngraph/test/backend/convert.in.cpp +++ b/ngraph/test/backend/convert.in.cpp @@ -439,3 +439,22 @@ NGRAPH_TEST(${BACKEND_NAME}, convert_u8_to_u64) ConvertTest(input, input_shape, input_type, expected_output, expected_output_type); } + +NGRAPH_TEST(${BACKEND_NAME}, convert_float32_int8) +{ + std::vector f32vec = {-100.5, -20.5, -15, -10.5, -0.5, 0, 0.5, 10.5, 15, 20.5, 100.5}; + std::vector result(f32vec.size()); + std::vector i8vec(std::begin(f32vec), std::end(f32vec)); + runtime::reference::convert(f32vec.data(), result.data(), f32vec.size()); + EXPECT_EQ(result, i8vec); +} + +NGRAPH_TEST(${BACKEND_NAME}, convert_fp16_int8) +{ + std::vector f32vec = {-100.5, -20.5, -15, -10.5, -0.5, 0, 0.5, 10.5, 15, 20.5, 100.5}; + std::vector f16vec(std::begin(f32vec), std::end(f32vec)); + std::vector i8vec(std::begin(f16vec), std::end(f16vec)); + std::vector result(i8vec.size()); + runtime::reference::convert(f16vec.data(), result.data(), f16vec.size()); + EXPECT_EQ(result, i8vec); +}