[CPU][ARM] Set FP16 inference precision by default for non-convolution networks on ARM (#19069)

2023-08-14 20:22:39 +02:00 · 2023-08-14 20:22:39 +02:00 · e48b2dfc34
commit e48b2dfc34
parent e49b2c05f1
7 changed files with 67 additions and 22 deletions
--- a/src/plugins/intel_cpu/CMakeLists.txt
+++ b/src/plugins/intel_cpu/CMakeLists.txt
@ -22,7 +22,7 @@ endif()

 if(ARM)
    set(OV_CPU_ARM_TARGET_ARCH_DEFAULT armv7a)
-else()
+elseif(AARCH64)
    if(APPLE)
        # Apple M1 / M2 is assumed
        set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8.2-a)
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@ -69,7 +69,7 @@ void Config::applyDebugCapsProperties() {
 }
 #endif

-void Config::readProperties(const std::map<std::string, std::string> &prop) {
+void Config::readProperties(const std::map<std::string, std::string> &prop, ModelType modelType) {
    const auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys();
    const auto hintsConfigKeys = perfHintsConfig.SupportedKeys();
    for (const auto& kvp : prop) {
@ -252,6 +252,13 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
        } else {
            inferencePrecision = ov::element::f32;
        }
+#if defined(OV_CPU_ARM_ENABLE_FP16)
+        //fp16 precision is used as default precision on ARM for non-convolution networks
+        //fp16 ACL convolution is slower than fp32
+        if (modelType != ModelType::CNN) {
+            inferencePrecision = ov::element::f16;
+        }
+#endif
    }

    if (!prop.empty())
--- a/src/plugins/intel_cpu/src/config.h
+++ b/src/plugins/intel_cpu/src/config.h
@ -45,6 +45,11 @@ struct Config {
        PER_PLATFORM,
    };

+    enum class ModelType {
+        CNN,
+        Unknown
+    };
+
    bool collectPerfCounters = false;
    bool exclusiveAsyncRequests = false;
    SnippetsMode snippetsMode = SnippetsMode::Enable;
@ -83,7 +88,7 @@ struct Config {
    // is reserved.
    bool DAZOn = false;

-    void readProperties(const std::map<std::string, std::string> &config);
+    void readProperties(const std::map<std::string, std::string> &config, ModelType modelType = ModelType::Unknown);
    void updateProperties();

    std::map<std::string, std::string> _config;
--- a/src/plugins/intel_cpu/src/nodes/mvn.cpp
+++ b/src/plugins/intel_cpu/src/nodes/mvn.cpp
@ -1905,6 +1905,9 @@ void MVN::initSupportedPrimitiveDescriptors() {
        canUseAclExecutor = !supportedPrimitiveDescriptors.empty();
        if (canUseAclExecutor)
            return;
+        else
+            // Reference MVN implementation does not support fp16, so set fp32 explicitly
+            inputPrecision = outputPrecision = Precision::FP32;
 #endif // OV_CPU_WITH_ACL

    impl_desc_type impl_type;
--- a/src/plugins/intel_cpu/src/plugin.cpp
+++ b/src/plugins/intel_cpu/src/plugin.cpp
@ -433,12 +433,20 @@ static bool shouldEnableLPT(const std::map<std::string, std::string>& modelConfi
        IE_THROW() << "Wrong value for property key LP_TRANSFORMS_MODE. Expected values: YES/NO";
 }

-static ov::element::Type getInferencePrecision(const std::map<std::string, std::string>& modelConfig, const Config& engineConfig) {
+static ov::element::Type getInferencePrecision(const std::map<std::string, std::string>& modelConfig,
+                                               const Config& engineConfig,
+                                               Config::ModelType modelType) {
    Config tempConf = engineConfig;
-    tempConf.readProperties(modelConfig);
+    tempConf.readProperties(modelConfig, modelType);
    return tempConf.inferencePrecision;
 }

+static Config::ModelType getModelType(const std::shared_ptr<const Model>& model) {
+    return op::util::has_op_with_type<op::v1::Convolution>(model) ||
+           op::util::has_op_with_type<op::v1::ConvolutionBackpropData>(model) ?
+           Config::ModelType::CNN : Config::ModelType::Unknown;
+}
+
 static Config::SnippetsMode getSnippetsMode(const std::map<std::string, std::string>& modelConfig, const Config& engineConfig) {
    const auto& snippetsMode = modelConfig.find(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE);
    if (snippetsMode == modelConfig.end()) // not set explicitly
@ -484,10 +492,10 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std

    CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network);
    const bool enableLPT = shouldEnableLPT(config, engConfig);
-    ov::element::Type inferencePrecision = getInferencePrecision(config, engConfig);
-    const Config::SnippetsMode snippetsMode = getSnippetsMode(config, engConfig);
-
    auto nGraphFunc = clonedNetwork.getFunction();
+    Config::ModelType modelType = getModelType(nGraphFunc);
+    ov::element::Type inferencePrecision = getInferencePrecision(config, engConfig, modelType);
+    const Config::SnippetsMode snippetsMode = getSnippetsMode(config, engConfig); 

    DEBUG_LOG(PrintableModel(*nGraphFunc, "org_"));

@ -499,7 +507,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
    // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
    Config conf = engConfig;

-    conf.readProperties(config);
+    conf.readProperties(config, modelType);
    CalculateStreams(conf, nGraphFunc);

    Transformations transformations(nGraphFunc, enableLPT, inferencePrecision, isLegacyAPI(), snippetsMode, conf);
@ -755,19 +763,20 @@ void Engine::AddExtension(const InferenceEngine::IExtensionPtr& extension) {
 QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::map<std::string, std::string>& config) const {
    WeightsSharing::Ptr fake_w_cache;

+    auto model = network.getFunction();
+    if (model == nullptr) {
+        IE_THROW() << "Only ngraph-based models are supported!";
+    }
+
    Config conf = engConfig;
-    conf.readProperties(config);
+    Config::ModelType modelType = getModelType(model);
+    conf.readProperties(config, modelType);

    const auto& lptProp = config.find(InferenceEngine::PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE);
    const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
                        || Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled */;
    const Config::SnippetsMode snippetsMode = getSnippetsMode(config, conf);

-    auto model = network.getFunction();
-    if (model == nullptr) {
-        IE_THROW() << "Only ngraph-based models are supported!";
-    }
-
    auto context =
        std::make_shared<GraphContext>(conf, extensionManager, fake_w_cache, false);

@ -807,10 +816,10 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istr
    CNNNetwork cnnnetwork;
    deserializer >> cnnnetwork;

-    Config conf = engConfig;
-    conf.readProperties(config);
-
    auto function = cnnnetwork.getFunction();
+    Config::ModelType modelType = getModelType(function);
+    Config conf = engConfig;
+    conf.readProperties(config, modelType);

    CalculateStreams(conf, function, true);

--- a/src/plugins/intel_cpu/tests/functional/behavior/ov_plugin/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/behavior/ov_plugin/properties.cpp
@ -156,7 +156,11 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinity) {
 TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigHintInferencePrecision) {
    ov::Core ie;
    auto value = ov::element::f32;
+#if defined(OV_CPU_ARM_ENABLE_FP16)
+    const auto precision = ov::element::f16;
+#else
    const auto precision = InferenceEngine::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32;
+#endif

    ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::inference_precision));
    ASSERT_EQ(precision, value);
@ -190,20 +194,25 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigEnableProfiling) {
    ASSERT_EQ(enableProfiling, value);
 }

-const auto bf16_if_supported       = InferenceEngine::with_cpu_x86_bfloat16() ?    ov::element::bf16 : ov::element::f32;
+#if defined(OV_CPU_ARM_ENABLE_FP16)
+    const auto expected_precision_for_performance_mode = ov::element::f16;
+#else
+    const auto expected_precision_for_performance_mode = InferenceEngine::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32;
+#endif
+
 const auto bf16_if_can_be_emulated = InferenceEngine::with_cpu_x86_avx512_core() ? ov::element::bf16 : ov::element::f32;
 using ExpectedModeAndType = std::pair<ov::hint::ExecutionMode, ov::element::Type>;

 const std::map<ov::hint::ExecutionMode, ExpectedModeAndType> exectedTypeByMode {
    {ov::hint::ExecutionMode::PERFORMANCE, {ov::hint::ExecutionMode::PERFORMANCE,
-                                            bf16_if_supported}},
+                                            expected_precision_for_performance_mode}},
    {ov::hint::ExecutionMode::ACCURACY,    {ov::hint::ExecutionMode::ACCURACY,
                                            ov::element::f32}},
 };

 TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigExecutionModeExpectCorrespondingInferencePrecision) {
    ov::Core ie;
-    const auto inference_precision_default = bf16_if_supported;
+    const auto inference_precision_default = expected_precision_for_performance_mode;
    const auto execution_mode_default = ov::hint::ExecutionMode::PERFORMANCE;
    auto execution_mode_value = ov::hint::ExecutionMode::PERFORMANCE;
    auto inference_precision_value = ov::element::undefined;
@ -230,7 +239,7 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigExecutionModeExpectCorrespondi

 TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigExecutionModeAndInferencePrecision) {
    ov::Core ie;
-    const auto inference_precision_default = bf16_if_supported;
+    const auto inference_precision_default = expected_precision_for_performance_mode;
    const auto execution_mode_default = ov::hint::ExecutionMode::PERFORMANCE;

    auto expect_execution_mode = [&](const ov::hint::ExecutionMode expected_value) {
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp
@ -11,6 +11,12 @@ void CoreConfiguration(LayerTestsUtils::LayerTestsCommon* test) {
    if (!configuration.count(InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16)) {
        configuration.insert({InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO});
    }
+    #if defined(OV_CPU_ARM_ENABLE_FP16)
+        //force fp32 inference precision if it is not configured specially
+        if (!configuration.count(ov::hint::inference_precision.name())) {
+            configuration.insert({ov::hint::inference_precision.name(), ov::element::f32.to_string()});
+        }
+    #endif
 }

 namespace ov {
@ -22,6 +28,12 @@ void core_configuration(ov::test::SubgraphBaseTest* test) {
            test->configuration.insert({InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO});
        }
    #endif
+    #if defined(OV_CPU_ARM_ENABLE_FP16)
+        //force fp32 inference precision if it is not configured specially
+        if (!test->configuration.count(ov::hint::inference_precision.name())) {
+            test->configuration.insert({ov::hint::inference_precision.name(), ov::element::f32.to_string()});
+        }
+    #endif
 }

 } // namespace test