[CPU][ARM] Choose eltwise layout based on model type (#19234)

Eltwise order of the supported primitive descriptors affects the
performance of the model.
Often only one of the port descriptors matches with the layout
of the parent descriptors, i.e. when two parent ports have mixed
layout "nchw nhwc".
So either nchw or nhwc layout will be used for the eltwise node
and reorder will be used for either of the ports.
The shape of the ports also can be different (when one of the inputs is
broadcasted). So reorders on different ports have different
performance impact.
The layout of the eltwise node child has an effect on the performance
as well, since it may or may not require reorder on input.
This commit is contained in:
Egor Duplenskii
2023-09-22 07:33:16 +02:00
committed by GitHub
parent aec4c6c843
commit 6de8579b1d
3 changed files with 21 additions and 10 deletions

View File

@@ -69,7 +69,7 @@ void Config::applyDebugCapsProperties() {
}
#endif
void Config::readProperties(const std::map<std::string, std::string> &prop, ModelType modelType) {
void Config::readProperties(const std::map<std::string, std::string> &prop, const ModelType modelType) {
const auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys();
const auto hintsConfigKeys = perfHintsConfig.SupportedKeys();
for (const auto& kvp : prop) {
@@ -267,6 +267,7 @@ void Config::readProperties(const std::map<std::string, std::string> &prop, Mode
streamExecutorConfig._streams = 1;
streamExecutorConfig._streams_changed = true;
#endif
this->modelType = modelType;
CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
updateProperties();

View File

@@ -88,7 +88,7 @@ struct Config {
// is reserved.
bool DAZOn = false;
void readProperties(const std::map<std::string, std::string> &config, ModelType modelType = ModelType::Unknown);
void readProperties(const std::map<std::string, std::string> &config, const ModelType modelType = ModelType::Unknown);
void updateProperties();
std::map<std::string, std::string> _config;
@@ -96,6 +96,7 @@ struct Config {
bool isLegacyApi = false;
int modelPreferThreads = -1;
ModelType modelType = ModelType::Unknown;
#ifdef CPU_DEBUG_CAPS
DebugCapsConfig debugCaps;

View File

@@ -11,6 +11,7 @@
#include <ie_parallel.hpp>
#include "config.h"
#include "cpu_types.h"
#include "utils/bfloat16.hpp"
#include "ie_ngraph_utils.hpp"
@@ -2171,15 +2172,23 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
#if defined (OV_CPU_WITH_ACL)
eltwiseAttrs = {algorithm, alpha, beta, gamma};
if (isChannelsFirstApplicable) {
auto channelFirstDesc = initDesc(ChannelsFirst, true);
if (channelFirstDesc.getExecutorFactory())
supportedPrimitiveDescriptors.emplace_back(channelFirstDesc);
}
auto planarDesc = initDesc(Planar, true);
if (planarDesc.getExecutorFactory())
supportedPrimitiveDescriptors.emplace_back(planarDesc);
auto addDesc = [&initDesc](std::vector<NodeDesc>& supportedPrimitiveDescriptors, const LayoutType layoutType) {
auto nodeDesc = initDesc(layoutType, true);
if (nodeDesc.getExecutorFactory())
supportedPrimitiveDescriptors.emplace_back(nodeDesc);
};
// @todo should be handled in scope of selectPreferPrimitiveDescriptor
if (context->getConfig().modelType == Config::ModelType::CNN) {
if (isChannelsFirstApplicable)
addDesc(supportedPrimitiveDescriptors, ChannelsFirst);
addDesc(supportedPrimitiveDescriptors, Planar);
} else {
addDesc(supportedPrimitiveDescriptors, Planar);
if (isChannelsFirstApplicable)
addDesc(supportedPrimitiveDescriptors, ChannelsFirst);
}
canUseAclExecutor = !supportedPrimitiveDescriptors.empty();
if (canUseAclExecutor)