[CPU][ARM] Choose eltwise layout based on model type (#19234)
Eltwise order of the supported primitive descriptors affects the performance of the model. Often only one of the port descriptors matches with the layout of the parent descriptors, i.e. when two parent ports have mixed layout "nchw nhwc". So either nchw or nhwc layout will be used for the eltwise node and reorder will be used for either of the ports. The shape of the ports also can be different (when one of the inputs is broadcasted). So reorders on different ports have different performance impact. The layout of the eltwise node child has an effect on the performance as well, since it may or may not require reorder on input.
This commit is contained in:
@@ -69,7 +69,7 @@ void Config::applyDebugCapsProperties() {
|
||||
}
|
||||
#endif
|
||||
|
||||
void Config::readProperties(const std::map<std::string, std::string> &prop, ModelType modelType) {
|
||||
void Config::readProperties(const std::map<std::string, std::string> &prop, const ModelType modelType) {
|
||||
const auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys();
|
||||
const auto hintsConfigKeys = perfHintsConfig.SupportedKeys();
|
||||
for (const auto& kvp : prop) {
|
||||
@@ -267,6 +267,7 @@ void Config::readProperties(const std::map<std::string, std::string> &prop, Mode
|
||||
streamExecutorConfig._streams = 1;
|
||||
streamExecutorConfig._streams_changed = true;
|
||||
#endif
|
||||
this->modelType = modelType;
|
||||
|
||||
CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
|
||||
updateProperties();
|
||||
|
||||
@@ -88,7 +88,7 @@ struct Config {
|
||||
// is reserved.
|
||||
bool DAZOn = false;
|
||||
|
||||
void readProperties(const std::map<std::string, std::string> &config, ModelType modelType = ModelType::Unknown);
|
||||
void readProperties(const std::map<std::string, std::string> &config, const ModelType modelType = ModelType::Unknown);
|
||||
void updateProperties();
|
||||
|
||||
std::map<std::string, std::string> _config;
|
||||
@@ -96,6 +96,7 @@ struct Config {
|
||||
bool isLegacyApi = false;
|
||||
|
||||
int modelPreferThreads = -1;
|
||||
ModelType modelType = ModelType::Unknown;
|
||||
|
||||
#ifdef CPU_DEBUG_CAPS
|
||||
DebugCapsConfig debugCaps;
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
|
||||
#include <ie_parallel.hpp>
|
||||
|
||||
#include "config.h"
|
||||
#include "cpu_types.h"
|
||||
#include "utils/bfloat16.hpp"
|
||||
#include "ie_ngraph_utils.hpp"
|
||||
@@ -2171,15 +2172,23 @@ void Eltwise::initSupportedPrimitiveDescriptors() {
|
||||
|
||||
#if defined (OV_CPU_WITH_ACL)
|
||||
eltwiseAttrs = {algorithm, alpha, beta, gamma};
|
||||
if (isChannelsFirstApplicable) {
|
||||
auto channelFirstDesc = initDesc(ChannelsFirst, true);
|
||||
if (channelFirstDesc.getExecutorFactory())
|
||||
supportedPrimitiveDescriptors.emplace_back(channelFirstDesc);
|
||||
}
|
||||
|
||||
auto planarDesc = initDesc(Planar, true);
|
||||
if (planarDesc.getExecutorFactory())
|
||||
supportedPrimitiveDescriptors.emplace_back(planarDesc);
|
||||
auto addDesc = [&initDesc](std::vector<NodeDesc>& supportedPrimitiveDescriptors, const LayoutType layoutType) {
|
||||
auto nodeDesc = initDesc(layoutType, true);
|
||||
if (nodeDesc.getExecutorFactory())
|
||||
supportedPrimitiveDescriptors.emplace_back(nodeDesc);
|
||||
};
|
||||
|
||||
// @todo should be handled in scope of selectPreferPrimitiveDescriptor
|
||||
if (context->getConfig().modelType == Config::ModelType::CNN) {
|
||||
if (isChannelsFirstApplicable)
|
||||
addDesc(supportedPrimitiveDescriptors, ChannelsFirst);
|
||||
addDesc(supportedPrimitiveDescriptors, Planar);
|
||||
} else {
|
||||
addDesc(supportedPrimitiveDescriptors, Planar);
|
||||
if (isChannelsFirstApplicable)
|
||||
addDesc(supportedPrimitiveDescriptors, ChannelsFirst);
|
||||
}
|
||||
|
||||
canUseAclExecutor = !supportedPrimitiveDescriptors.empty();
|
||||
if (canUseAclExecutor)
|
||||
|
||||
Reference in New Issue
Block a user