[CPU][ARM] Set FP16 inference precision by default for non-convolution networks on ARM (#19069)

This commit is contained in:
Aleksandr Voron 2023-08-14 20:22:39 +02:00 committed by GitHub
parent e49b2c05f1
commit e48b2dfc34
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 67 additions and 22 deletions

View File

@ -22,7 +22,7 @@ endif()
if(ARM)
set(OV_CPU_ARM_TARGET_ARCH_DEFAULT armv7a)
else()
elseif(AARCH64)
if(APPLE)
# Apple M1 / M2 is assumed
set(OV_CPU_ARM_TARGET_ARCH_DEFAULT arm64-v8.2-a)

View File

@ -69,7 +69,7 @@ void Config::applyDebugCapsProperties() {
}
#endif
void Config::readProperties(const std::map<std::string, std::string> &prop) {
void Config::readProperties(const std::map<std::string, std::string> &prop, ModelType modelType) {
const auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys();
const auto hintsConfigKeys = perfHintsConfig.SupportedKeys();
for (const auto& kvp : prop) {
@ -252,6 +252,13 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
} else {
inferencePrecision = ov::element::f32;
}
#if defined(OV_CPU_ARM_ENABLE_FP16)
//fp16 precision is used as default precision on ARM for non-convolution networks
//fp16 ACL convolution is slower than fp32
if (modelType != ModelType::CNN) {
inferencePrecision = ov::element::f16;
}
#endif
}
if (!prop.empty())

View File

@ -45,6 +45,11 @@ struct Config {
PER_PLATFORM,
};
enum class ModelType {
CNN,
Unknown
};
bool collectPerfCounters = false;
bool exclusiveAsyncRequests = false;
SnippetsMode snippetsMode = SnippetsMode::Enable;
@ -83,7 +88,7 @@ struct Config {
// is reserved.
bool DAZOn = false;
void readProperties(const std::map<std::string, std::string> &config);
void readProperties(const std::map<std::string, std::string> &config, ModelType modelType = ModelType::Unknown);
void updateProperties();
std::map<std::string, std::string> _config;

View File

@ -1905,6 +1905,9 @@ void MVN::initSupportedPrimitiveDescriptors() {
canUseAclExecutor = !supportedPrimitiveDescriptors.empty();
if (canUseAclExecutor)
return;
else
// Reference MVN implementation does not support fp16, so set fp32 explicitly
inputPrecision = outputPrecision = Precision::FP32;
#endif // OV_CPU_WITH_ACL
impl_desc_type impl_type;

View File

@ -433,12 +433,20 @@ static bool shouldEnableLPT(const std::map<std::string, std::string>& modelConfi
IE_THROW() << "Wrong value for property key LP_TRANSFORMS_MODE. Expected values: YES/NO";
}
static ov::element::Type getInferencePrecision(const std::map<std::string, std::string>& modelConfig, const Config& engineConfig) {
static ov::element::Type getInferencePrecision(const std::map<std::string, std::string>& modelConfig,
const Config& engineConfig,
Config::ModelType modelType) {
Config tempConf = engineConfig;
tempConf.readProperties(modelConfig);
tempConf.readProperties(modelConfig, modelType);
return tempConf.inferencePrecision;
}
static Config::ModelType getModelType(const std::shared_ptr<const Model>& model) {
return op::util::has_op_with_type<op::v1::Convolution>(model) ||
op::util::has_op_with_type<op::v1::ConvolutionBackpropData>(model) ?
Config::ModelType::CNN : Config::ModelType::Unknown;
}
static Config::SnippetsMode getSnippetsMode(const std::map<std::string, std::string>& modelConfig, const Config& engineConfig) {
const auto& snippetsMode = modelConfig.find(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE);
if (snippetsMode == modelConfig.end()) // not set explicitly
@ -484,10 +492,10 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network);
const bool enableLPT = shouldEnableLPT(config, engConfig);
ov::element::Type inferencePrecision = getInferencePrecision(config, engConfig);
const Config::SnippetsMode snippetsMode = getSnippetsMode(config, engConfig);
auto nGraphFunc = clonedNetwork.getFunction();
Config::ModelType modelType = getModelType(nGraphFunc);
ov::element::Type inferencePrecision = getInferencePrecision(config, engConfig, modelType);
const Config::SnippetsMode snippetsMode = getSnippetsMode(config, engConfig);
DEBUG_LOG(PrintableModel(*nGraphFunc, "org_"));
@ -499,7 +507,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
// TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
Config conf = engConfig;
conf.readProperties(config);
conf.readProperties(config, modelType);
CalculateStreams(conf, nGraphFunc);
Transformations transformations(nGraphFunc, enableLPT, inferencePrecision, isLegacyAPI(), snippetsMode, conf);
@ -755,19 +763,20 @@ void Engine::AddExtension(const InferenceEngine::IExtensionPtr& extension) {
QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::map<std::string, std::string>& config) const {
WeightsSharing::Ptr fake_w_cache;
auto model = network.getFunction();
if (model == nullptr) {
IE_THROW() << "Only ngraph-based models are supported!";
}
Config conf = engConfig;
conf.readProperties(config);
Config::ModelType modelType = getModelType(model);
conf.readProperties(config, modelType);
const auto& lptProp = config.find(InferenceEngine::PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE);
const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
|| Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled */;
const Config::SnippetsMode snippetsMode = getSnippetsMode(config, conf);
auto model = network.getFunction();
if (model == nullptr) {
IE_THROW() << "Only ngraph-based models are supported!";
}
auto context =
std::make_shared<GraphContext>(conf, extensionManager, fake_w_cache, false);
@ -807,10 +816,10 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork(std::istr
CNNNetwork cnnnetwork;
deserializer >> cnnnetwork;
Config conf = engConfig;
conf.readProperties(config);
auto function = cnnnetwork.getFunction();
Config::ModelType modelType = getModelType(function);
Config conf = engConfig;
conf.readProperties(config, modelType);
CalculateStreams(conf, function, true);

View File

@ -156,7 +156,11 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinity) {
TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigHintInferencePrecision) {
ov::Core ie;
auto value = ov::element::f32;
#if defined(OV_CPU_ARM_ENABLE_FP16)
const auto precision = ov::element::f16;
#else
const auto precision = InferenceEngine::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32;
#endif
ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::inference_precision));
ASSERT_EQ(precision, value);
@ -190,20 +194,25 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigEnableProfiling) {
ASSERT_EQ(enableProfiling, value);
}
const auto bf16_if_supported = InferenceEngine::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32;
#if defined(OV_CPU_ARM_ENABLE_FP16)
const auto expected_precision_for_performance_mode = ov::element::f16;
#else
const auto expected_precision_for_performance_mode = InferenceEngine::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32;
#endif
const auto bf16_if_can_be_emulated = InferenceEngine::with_cpu_x86_avx512_core() ? ov::element::bf16 : ov::element::f32;
using ExpectedModeAndType = std::pair<ov::hint::ExecutionMode, ov::element::Type>;
const std::map<ov::hint::ExecutionMode, ExpectedModeAndType> exectedTypeByMode {
{ov::hint::ExecutionMode::PERFORMANCE, {ov::hint::ExecutionMode::PERFORMANCE,
bf16_if_supported}},
expected_precision_for_performance_mode}},
{ov::hint::ExecutionMode::ACCURACY, {ov::hint::ExecutionMode::ACCURACY,
ov::element::f32}},
};
TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigExecutionModeExpectCorrespondingInferencePrecision) {
ov::Core ie;
const auto inference_precision_default = bf16_if_supported;
const auto inference_precision_default = expected_precision_for_performance_mode;
const auto execution_mode_default = ov::hint::ExecutionMode::PERFORMANCE;
auto execution_mode_value = ov::hint::ExecutionMode::PERFORMANCE;
auto inference_precision_value = ov::element::undefined;
@ -230,7 +239,7 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigExecutionModeExpectCorrespondi
TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigExecutionModeAndInferencePrecision) {
ov::Core ie;
const auto inference_precision_default = bf16_if_supported;
const auto inference_precision_default = expected_precision_for_performance_mode;
const auto execution_mode_default = ov::hint::ExecutionMode::PERFORMANCE;
auto expect_execution_mode = [&](const ov::hint::ExecutionMode expected_value) {

View File

@ -11,6 +11,12 @@ void CoreConfiguration(LayerTestsUtils::LayerTestsCommon* test) {
if (!configuration.count(InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16)) {
configuration.insert({InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO});
}
#if defined(OV_CPU_ARM_ENABLE_FP16)
//force fp32 inference precision if it is not configured specially
if (!configuration.count(ov::hint::inference_precision.name())) {
configuration.insert({ov::hint::inference_precision.name(), ov::element::f32.to_string()});
}
#endif
}
namespace ov {
@ -22,6 +28,12 @@ void core_configuration(ov::test::SubgraphBaseTest* test) {
test->configuration.insert({InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO});
}
#endif
#if defined(OV_CPU_ARM_ENABLE_FP16)
//force fp32 inference precision if it is not configured specially
if (!test->configuration.count(ov::hint::inference_precision.name())) {
test->configuration.insert({ov::hint::inference_precision.name(), ov::element::f32.to_string()});
}
#endif
}
} // namespace test