[CPU][ARM] Set FP16 inference precision by default for non-convolution networks on ARM (#19069)
This commit is contained in:
parent
e49b2c05f1
commit
e48b2dfc34
@ -22,7 +22,7 @@ endif()
|
||||
|
||||
if(ARM)
|
||||
set(OV_CPU_ARM_TARGET_ARCH_DEFAULT armv7a)
|
||||
else()
|
||||
elseif(AARCH64)
|
||||
if(APPLE)
|
||||
# Apple M1 / M2 is assumed
|
||||
set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8.2-a)
|
||||
|
@ -69,7 +69,7 @@ void Config::applyDebugCapsProperties() {
|
||||
}
|
||||
#endif
|
||||
|
||||
void Config::readProperties(const std::map<std::string, std::string> &prop) {
|
||||
void Config::readProperties(const std::map<std::string, std::string> &prop, ModelType modelType) {
|
||||
const auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys();
|
||||
const auto hintsConfigKeys = perfHintsConfig.SupportedKeys();
|
||||
for (const auto& kvp : prop) {
|
||||
@ -252,6 +252,13 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
|
||||
} else {
|
||||
inferencePrecision = ov::element::f32;
|
||||
}
|
||||
#if defined(OV_CPU_ARM_ENABLE_FP16)
|
||||
//fp16 precision is used as default precision on ARM for non-convolution networks
|
||||
//fp16 ACL convolution is slower than fp32
|
||||
if (modelType != ModelType::CNN) {
|
||||
inferencePrecision = ov::element::f16;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (!prop.empty())
|
||||
|
@ -45,6 +45,11 @@ struct Config {
|
||||
PER_PLATFORM,
|
||||
};
|
||||
|
||||
enum class ModelType {
|
||||
CNN,
|
||||
Unknown
|
||||
};
|
||||
|
||||
bool collectPerfCounters = false;
|
||||
bool exclusiveAsyncRequests = false;
|
||||
SnippetsMode snippetsMode = SnippetsMode::Enable;
|
||||
@ -83,7 +88,7 @@ struct Config {
|
||||
// is reserved.
|
||||
bool DAZOn = false;
|
||||
|
||||
void readProperties(const std::map<std::string, std::string> &config);
|
||||
void readProperties(const std::map<std::string, std::string> &config, ModelType modelType = ModelType::Unknown);
|
||||
void updateProperties();
|
||||
|
||||
std::map<std::string, std::string> _config;
|
||||
|
@ -1905,6 +1905,9 @@ void MVN::initSupportedPrimitiveDescriptors() {
|
||||
canUseAclExecutor = !supportedPrimitiveDescriptors.empty();
|
||||
if (canUseAclExecutor)
|
||||
return;
|
||||
else
|
||||
// Reference MVN implementation does not support fp16, so set fp32 explicitly
|
||||
inputPrecision = outputPrecision = Precision::FP32;
|
||||
#endif // OV_CPU_WITH_ACL
|
||||
|
||||
impl_desc_type impl_type;
|
||||
|
@ -433,12 +433,20 @@ static bool shouldEnableLPT(const std::map<std::string, std::string>& modelConfi
|
||||
IE_THROW() << "Wrong value for property key LP_TRANSFORMS_MODE. Expected values: YES/NO";
|
||||
}
|
||||
|
||||
static ov::element::Type getInferencePrecision(const std::map<std::string, std::string>& modelConfig, const Config& engineConfig) {
|
||||
static ov::element::Type getInferencePrecision(const std::map<std::string, std::string>& modelConfig,
|
||||
const Config& engineConfig,
|
||||
Config::ModelType modelType) {
|
||||
Config tempConf = engineConfig;
|
||||
tempConf.readProperties(modelConfig);
|
||||
tempConf.readProperties(modelConfig, modelType);
|
||||
return tempConf.inferencePrecision;
|
||||
}
|
||||
|
||||
static Config::ModelType getModelType(const std::shared_ptr<const Model>& model) {
|
||||
return op::util::has_op_with_type<op::v1::Convolution>(model) ||
|
||||
op::util::has_op_with_type<op::v1::ConvolutionBackpropData>(model) ?
|
||||
Config::ModelType::CNN : Config::ModelType::Unknown;
|
||||
}
|
||||
|
||||
static Config::SnippetsMode getSnippetsMode(const std::map<std::string, std::string>& modelConfig, const Config& engineConfig) {
|
||||
const auto& snippetsMode = modelConfig.find(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE);
|
||||
if (snippetsMode == modelConfig.end()) // not set explicitly
|
||||
@ -484,10 +492,10 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
|
||||
|
||||
CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network);
|
||||
const bool enableLPT = shouldEnableLPT(config, engConfig);
|
||||
ov::element::Type inferencePrecision = getInferencePrecision(config, engConfig);
|
||||
const Config::SnippetsMode snippetsMode = getSnippetsMode(config, engConfig);
|
||||
|
||||
auto nGraphFunc = clonedNetwork.getFunction();
|
||||
Config::ModelType modelType = getModelType(nGraphFunc);
|
||||
ov::element::Type inferencePrecision = getInferencePrecision(config, engConfig, modelType);
|
||||
const Config::SnippetsMode snippetsMode = getSnippetsMode(config, engConfig);
|
||||
|
||||
DEBUG_LOG(PrintableModel(*nGraphFunc, "org_"));
|
||||
|
||||
@ -499,7 +507,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
|
||||
// TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
|
||||
Config conf = engConfig;
|
||||
|
||||
conf.readProperties(config);
|
||||
conf.readProperties(config, modelType);
|
||||
CalculateStreams(conf, nGraphFunc);
|
||||
|
||||
Transformations transformations(nGraphFunc, enableLPT, inferencePrecision, isLegacyAPI(), snippetsMode, conf);
|
||||
@ -755,19 +763,20 @@ void Engine::AddExtension(const InferenceEngine::IExtensionPtr& extension) {
|
||||
QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::map<std::string, std::string>& config) const {
|
||||
WeightsSharing::Ptr fake_w_cache;
|
||||
|
||||
auto model = network.getFunction();
|
||||
if (model == nullptr) {
|
||||
IE_THROW() << "Only ngraph-based models are supported!";
|
||||
}
|
||||
|
||||
Config conf = engConfig;
|
||||
conf.readProperties(config);
|
||||
Config::ModelType modelType = getModelType(model);
|
||||
conf.readProperties(config, modelType);
|
||||
|
||||
const auto& lptProp = config.find(InferenceEngine::PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE);
|
||||
const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
|
||||
|| Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled */;
|
||||
const Config::SnippetsMode snippetsMode = getSnippetsMode(config, conf);
|
||||
|
||||
auto model = network.getFunction();
|
||||
if (model == nullptr) {
|
||||
IE_THROW() << "Only ngraph-based models are supported!";
|
||||
}
|
||||
|
||||
auto context =
|
||||
std::make_shared<GraphContext>(conf, extensionManager, fake_w_cache, false);
|
||||
|
||||
@ -807,10 +816,10 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istr
|
||||
CNNNetwork cnnnetwork;
|
||||
deserializer >> cnnnetwork;
|
||||
|
||||
Config conf = engConfig;
|
||||
conf.readProperties(config);
|
||||
|
||||
auto function = cnnnetwork.getFunction();
|
||||
Config::ModelType modelType = getModelType(function);
|
||||
Config conf = engConfig;
|
||||
conf.readProperties(config, modelType);
|
||||
|
||||
CalculateStreams(conf, function, true);
|
||||
|
||||
|
@ -156,7 +156,11 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinity) {
|
||||
TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigHintInferencePrecision) {
|
||||
ov::Core ie;
|
||||
auto value = ov::element::f32;
|
||||
#if defined(OV_CPU_ARM_ENABLE_FP16)
|
||||
const auto precision = ov::element::f16;
|
||||
#else
|
||||
const auto precision = InferenceEngine::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32;
|
||||
#endif
|
||||
|
||||
ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::inference_precision));
|
||||
ASSERT_EQ(precision, value);
|
||||
@ -190,20 +194,25 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigEnableProfiling) {
|
||||
ASSERT_EQ(enableProfiling, value);
|
||||
}
|
||||
|
||||
const auto bf16_if_supported = InferenceEngine::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32;
|
||||
#if defined(OV_CPU_ARM_ENABLE_FP16)
|
||||
const auto expected_precision_for_performance_mode = ov::element::f16;
|
||||
#else
|
||||
const auto expected_precision_for_performance_mode = InferenceEngine::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32;
|
||||
#endif
|
||||
|
||||
const auto bf16_if_can_be_emulated = InferenceEngine::with_cpu_x86_avx512_core() ? ov::element::bf16 : ov::element::f32;
|
||||
using ExpectedModeAndType = std::pair<ov::hint::ExecutionMode, ov::element::Type>;
|
||||
|
||||
const std::map<ov::hint::ExecutionMode, ExpectedModeAndType> exectedTypeByMode {
|
||||
{ov::hint::ExecutionMode::PERFORMANCE, {ov::hint::ExecutionMode::PERFORMANCE,
|
||||
bf16_if_supported}},
|
||||
expected_precision_for_performance_mode}},
|
||||
{ov::hint::ExecutionMode::ACCURACY, {ov::hint::ExecutionMode::ACCURACY,
|
||||
ov::element::f32}},
|
||||
};
|
||||
|
||||
TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigExecutionModeExpectCorrespondingInferencePrecision) {
|
||||
ov::Core ie;
|
||||
const auto inference_precision_default = bf16_if_supported;
|
||||
const auto inference_precision_default = expected_precision_for_performance_mode;
|
||||
const auto execution_mode_default = ov::hint::ExecutionMode::PERFORMANCE;
|
||||
auto execution_mode_value = ov::hint::ExecutionMode::PERFORMANCE;
|
||||
auto inference_precision_value = ov::element::undefined;
|
||||
@ -230,7 +239,7 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigExecutionModeExpectCorrespondi
|
||||
|
||||
TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigExecutionModeAndInferencePrecision) {
|
||||
ov::Core ie;
|
||||
const auto inference_precision_default = bf16_if_supported;
|
||||
const auto inference_precision_default = expected_precision_for_performance_mode;
|
||||
const auto execution_mode_default = ov::hint::ExecutionMode::PERFORMANCE;
|
||||
|
||||
auto expect_execution_mode = [&](const ov::hint::ExecutionMode expected_value) {
|
||||
|
@ -11,6 +11,12 @@ void CoreConfiguration(LayerTestsUtils::LayerTestsCommon* test) {
|
||||
if (!configuration.count(InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16)) {
|
||||
configuration.insert({InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO});
|
||||
}
|
||||
#if defined(OV_CPU_ARM_ENABLE_FP16)
|
||||
//force fp32 inference precision if it is not configured specially
|
||||
if (!configuration.count(ov::hint::inference_precision.name())) {
|
||||
configuration.insert({ov::hint::inference_precision.name(), ov::element::f32.to_string()});
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace ov {
|
||||
@ -22,6 +28,12 @@ void core_configuration(ov::test::SubgraphBaseTest* test) {
|
||||
test->configuration.insert({InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO});
|
||||
}
|
||||
#endif
|
||||
#if defined(OV_CPU_ARM_ENABLE_FP16)
|
||||
//force fp32 inference precision if it is not configured specially
|
||||
if (!test->configuration.count(ov::hint::inference_precision.name())) {
|
||||
test->configuration.insert({ov::hint::inference_precision.name(), ov::element::f32.to_string()});
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
|
Loading…
Reference in New Issue
Block a user