[CPU] API 2.0 migration for cpu plugin properties (#20022)
This commit is contained in:
parent
45d6aa2171
commit
b02ddc5831
@ -14,12 +14,38 @@
|
||||
#include <vector>
|
||||
|
||||
#include "openvino/runtime/common.hpp"
|
||||
#include "openvino/runtime/properties.hpp"
|
||||
#include "openvino/runtime/system_conf.hpp"
|
||||
#include "openvino/runtime/threading/itask_executor.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace threading {
|
||||
|
||||
/**
|
||||
* @brief Number of streams in Performance-core(big core)
|
||||
*/
|
||||
static constexpr Property<size_t, PropertyMutability::RW> big_core_streams{"BIG_CORE_STREAMS"};
|
||||
|
||||
/**
|
||||
* @brief Number of streams in Efficient-core(small core) on hybrid cores machine
|
||||
*/
|
||||
static constexpr Property<size_t, PropertyMutability::RW> small_core_streams{"SMALL_CORE_STREAMS"};
|
||||
|
||||
/**
|
||||
* @brief Number of threads per stream in big cores
|
||||
*/
|
||||
static constexpr Property<size_t, PropertyMutability::RW> threads_per_stream_big{"THREADS_PER_STREAM_BIG"};
|
||||
|
||||
/**
|
||||
* @brief Number of threads per stream in small cores on hybrid cores machine
|
||||
*/
|
||||
static constexpr Property<size_t, PropertyMutability::RW> threads_per_stream_small{"THREADS_PER_STREAM_SMALL"};
|
||||
|
||||
/**
|
||||
* @brief Small core start offset when binding cpu cores
|
||||
*/
|
||||
static constexpr Property<size_t, PropertyMutability::RW> small_core_offset{"SMALL_CORE_OFFSET"};
|
||||
|
||||
/**
|
||||
* @interface IStreamsExecutor
|
||||
* @ingroup ov_dev_api_threading
|
||||
|
@ -288,8 +288,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
|
||||
} else if (name == ov::hint::inference_precision) {
|
||||
return decltype(ov::hint::inference_precision)::value_type(config.inferencePrecision);
|
||||
} else if (name == ov::hint::performance_mode) {
|
||||
const auto perfHint = ov::util::from_string(config.perfHintsConfig.ovPerfHint, ov::hint::performance_mode);
|
||||
return perfHint;
|
||||
return decltype(ov::hint::performance_mode)::value_type(config.hintPerfMode);
|
||||
} else if (name == ov::hint::enable_cpu_pinning.name()) {
|
||||
const bool use_pin = config.enableCpuPinning;
|
||||
return decltype(ov::hint::enable_cpu_pinning)::value_type(use_pin);
|
||||
@ -302,8 +301,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
|
||||
} else if (name == ov::hint::execution_mode) {
|
||||
return config.executionMode;
|
||||
} else if (name == ov::hint::num_requests) {
|
||||
const auto perfHintNumRequests = config.perfHintsConfig.ovPerfHintNumRequests;
|
||||
return decltype(ov::hint::num_requests)::value_type(perfHintNumRequests);
|
||||
return decltype(ov::hint::num_requests)::value_type(config.hintNumRequests);
|
||||
} else if (name == ov::execution_devices) {
|
||||
return decltype(ov::execution_devices)::value_type{m_plugin->get_device_name()};
|
||||
} else if (name == ov::intel_cpu::denormals_optimization) {
|
||||
|
@ -4,22 +4,18 @@
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
|
||||
#include "ie_plugin_config.hpp"
|
||||
#include "cpu/cpu_config.hpp"
|
||||
#include "ie_common.h"
|
||||
#include "ie_parallel.hpp"
|
||||
#include "ie_system_conf.h"
|
||||
|
||||
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
|
||||
#include "cpu/x64/cpu_isa_traits.hpp"
|
||||
#include "openvino/core/parallel.hpp"
|
||||
#include "openvino/core/type/element_type_traits.hpp"
|
||||
#include "openvino/runtime/intel_cpu/properties.hpp"
|
||||
#include "openvino/runtime/internal_properties.hpp"
|
||||
#include "openvino/runtime/properties.hpp"
|
||||
#include "utils/debug_capabilities.h"
|
||||
#include "cpu/x64/cpu_isa_traits.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
@ -36,7 +32,7 @@ Config::Config() {
|
||||
#endif
|
||||
|
||||
// for the TBB code-path, additional configuration depending on the OS and CPU types
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
|
||||
# if defined(__APPLE__) || defined(_WIN32)
|
||||
// 'CORES' is not implemented for Win/MacOS; so the 'NONE' or 'NUMA' is default
|
||||
auto numaNodes = get_available_numa_nodes();
|
||||
@ -51,7 +47,6 @@ Config::Config() {
|
||||
streamExecutorConfig._threadBindingType = IStreamsExecutor::HYBRID_AWARE;
|
||||
}
|
||||
#endif
|
||||
|
||||
CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
|
||||
|
||||
updateProperties();
|
||||
@ -73,49 +68,68 @@ void Config::applyDebugCapsProperties() {
|
||||
void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
|
||||
const auto streamExecutorConfigKeys =
|
||||
streamExecutorConfig.get_property(ov::supported_properties.name()).as<std::vector<std::string>>();
|
||||
const auto hintsConfigKeys = perfHintsConfig.SupportedKeys();
|
||||
for (const auto& kvp : prop) {
|
||||
const auto& key = kvp.first;
|
||||
const auto& val = kvp.second.as<std::string>();
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
const auto& val = kvp.second;
|
||||
if (streamExecutorConfigKeys.end() !=
|
||||
std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) {
|
||||
streamExecutorConfig.set_property(key, val);
|
||||
streamExecutorConfig.set_property(key, val.as<std::string>());
|
||||
if (key == ov::affinity.name()) {
|
||||
const auto affinity_val = ov::util::from_string(val, ov::affinity);
|
||||
if (affinity_val == ov::Affinity::CORE || affinity_val == ov::Affinity::HYBRID_AWARE) {
|
||||
enableCpuPinning = true;
|
||||
changedCpuPinning = true;
|
||||
} else if (affinity_val == ov::Affinity::NUMA) {
|
||||
enableCpuPinning = false;
|
||||
changedCpuPinning = true;
|
||||
changedCpuPinning = true;
|
||||
try {
|
||||
const auto affinity_val = val.as<ov::Affinity>();
|
||||
enableCpuPinning =
|
||||
(affinity_val == ov::Affinity::CORE || affinity_val == ov::Affinity::HYBRID_AWARE) ? true
|
||||
: false;
|
||||
} catch (const ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
val.as<std::string>(),
|
||||
"for property key ",
|
||||
key,
|
||||
". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE.");
|
||||
}
|
||||
}
|
||||
} else if (hintsConfigKeys.end() != std::find(hintsConfigKeys.begin(), hintsConfigKeys.end(), key)) {
|
||||
perfHintsConfig.SetConfig(key, val);
|
||||
} else if (key == ov::hint::enable_cpu_pinning.name()) {
|
||||
if (val == InferenceEngine::PluginConfigParams::YES) {
|
||||
enableCpuPinning = true;
|
||||
changedCpuPinning = true;
|
||||
} else if (val == InferenceEngine::PluginConfigParams::NO) {
|
||||
enableCpuPinning = false;
|
||||
changedCpuPinning = true;
|
||||
} else {
|
||||
} else if (key == ov::hint::performance_mode.name()) {
|
||||
try {
|
||||
hintPerfMode = val.as<ov::hint::PerformanceMode>();
|
||||
} catch (const ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
val,
|
||||
val.as<std::string>(),
|
||||
"for property key ",
|
||||
key,
|
||||
". Expected only ov::hint::PerformanceMode::LATENCY/THROUGHPUT/CUMULATIVE_THROUGHPUT.");
|
||||
}
|
||||
} else if (key == ov::hint::num_requests.name()) {
|
||||
try {
|
||||
ov::Any value = val.as<std::string>();
|
||||
int val_i = value.as<int>();
|
||||
if (val_i < 0)
|
||||
OPENVINO_THROW("invalid value.");
|
||||
hintNumRequests = static_cast<uint32_t>(val_i);
|
||||
} catch (const ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
val.as<std::string>(),
|
||||
"for property key ",
|
||||
ov::hint::num_requests.name(),
|
||||
". Expected only > 0.");
|
||||
}
|
||||
} else if (key == ov::hint::enable_cpu_pinning.name()) {
|
||||
try {
|
||||
enableCpuPinning = val.as<bool>();
|
||||
changedCpuPinning = true;
|
||||
} catch (ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
val.as<std::string>(),
|
||||
"for property key ",
|
||||
ov::hint::enable_cpu_pinning.name(),
|
||||
". Expected only true/false.");
|
||||
}
|
||||
} else if (key == ov::hint::scheduling_core_type.name()) {
|
||||
const auto core_type = ov::util::from_string(val, ov::hint::scheduling_core_type);
|
||||
if (core_type == ov::hint::SchedulingCoreType::ANY_CORE ||
|
||||
core_type == ov::hint::SchedulingCoreType::PCORE_ONLY ||
|
||||
core_type == ov::hint::SchedulingCoreType::ECORE_ONLY) {
|
||||
schedulingCoreType = core_type;
|
||||
} else {
|
||||
try {
|
||||
schedulingCoreType = val.as<ov::hint::SchedulingCoreType>();
|
||||
} catch (ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
val,
|
||||
val.as<std::string>(),
|
||||
"for property key ",
|
||||
ov::hint::scheduling_core_type.name(),
|
||||
". Expected only ",
|
||||
@ -126,15 +140,12 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
|
||||
ov::hint::SchedulingCoreType::ECORE_ONLY);
|
||||
}
|
||||
} else if (key == ov::hint::enable_hyper_threading.name()) {
|
||||
if (val == InferenceEngine::PluginConfigParams::YES) {
|
||||
enableHyperThreading = true;
|
||||
try {
|
||||
enableHyperThreading = val.as<bool>();
|
||||
changedHyperThreading = true;
|
||||
} else if (val == InferenceEngine::PluginConfigParams::NO) {
|
||||
enableHyperThreading = false;
|
||||
changedHyperThreading = true;
|
||||
} else {
|
||||
} catch (ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
val,
|
||||
val.as<std::string>(),
|
||||
"for property key ",
|
||||
ov::hint::enable_hyper_threading.name(),
|
||||
". Expected only true/false.");
|
||||
@ -142,8 +153,8 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
|
||||
} else if (key == ov::intel_cpu::sparse_weights_decompression_rate.name()) {
|
||||
float val_f = 0.0f;
|
||||
try {
|
||||
val_f = std::stof(val);
|
||||
} catch (const std::exception&) {
|
||||
val_f = val.as<float>();
|
||||
} catch (const ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value for property key ",
|
||||
ov::intel_cpu::sparse_weights_decompression_rate.name(),
|
||||
". Expected only float numbers");
|
||||
@ -156,127 +167,154 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
|
||||
fcSparseWeiDecompressionRate = val_f;
|
||||
}
|
||||
} else if (key == ov::enable_profiling.name()) {
|
||||
if (val == InferenceEngine::PluginConfigParams::YES)
|
||||
collectPerfCounters = true;
|
||||
else if (val == InferenceEngine::PluginConfigParams::NO)
|
||||
collectPerfCounters = false;
|
||||
else
|
||||
OPENVINO_THROW("Wrong value for property key ", ov::enable_profiling.name(), ". Expected only YES/NO");
|
||||
} else if (key == ov::exclusive_async_requests.name()) {
|
||||
if (val == InferenceEngine::PluginConfigParams::YES)
|
||||
exclusiveAsyncRequests = true;
|
||||
else if (val == InferenceEngine::PluginConfigParams::NO)
|
||||
exclusiveAsyncRequests = false;
|
||||
else
|
||||
OPENVINO_THROW("Wrong value for property key ",
|
||||
ov::exclusive_async_requests.name(),
|
||||
". Expected only YES/NO");
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
try {
|
||||
collectPerfCounters = val.as<bool>();
|
||||
} catch (ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
val.as<std::string>(),
|
||||
" for property key ",
|
||||
ov::enable_profiling.name(),
|
||||
". Expected only true/false");
|
||||
}
|
||||
} else if (key == ov::internal::exclusive_async_requests.name()) {
|
||||
try {
|
||||
exclusiveAsyncRequests = val.as<bool>();
|
||||
} catch (ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
val.as<std::string>(),
|
||||
" for property key ",
|
||||
ov::internal::exclusive_async_requests.name(),
|
||||
". Expected only true/false");
|
||||
}
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
} else if (key.compare(InferenceEngine::PluginConfigParams::KEY_DUMP_EXEC_GRAPH_AS_DOT) == 0) {
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
// empty string means that dumping is switched off
|
||||
dumpToDot = val;
|
||||
} else if (key.compare(InferenceEngine::PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE) == 0) {
|
||||
if (val == InferenceEngine::PluginConfigParams::NO)
|
||||
lpTransformsMode = LPTransformsMode::Off;
|
||||
else if (val == InferenceEngine::PluginConfigParams::YES)
|
||||
lpTransformsMode = LPTransformsMode::On;
|
||||
else
|
||||
OPENVINO_THROW("Wrong value for property key ",
|
||||
InferenceEngine::PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE);
|
||||
dumpToDot = val.as<std::string>();
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
} else if (key == ov::intel_cpu::lp_transforms_mode.name()) {
|
||||
try {
|
||||
lpTransformsMode = val.as<bool>() ? LPTransformsMode::On : LPTransformsMode::Off;
|
||||
} catch (ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
val.as<std::string>(),
|
||||
" for property key ",
|
||||
key,
|
||||
". Expected value only ov::intel_cpu::Config::LPTransformsMode::On/Off");
|
||||
}
|
||||
} else if (key == ov::device::id.name()) {
|
||||
device_id = val;
|
||||
device_id = val.as<std::string>();
|
||||
if (!device_id.empty()) {
|
||||
OPENVINO_THROW("CPU plugin supports only '' as device id");
|
||||
}
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
} else if (key == InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16) {
|
||||
if (val == InferenceEngine::PluginConfigParams::YES) {
|
||||
bool enable;
|
||||
try {
|
||||
enable = val.as<bool>();
|
||||
} catch (ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
val.as<std::string>(),
|
||||
" for property key ",
|
||||
key,
|
||||
". Expected only true/false");
|
||||
}
|
||||
if (enable) {
|
||||
if (mayiuse(avx512_core)) {
|
||||
inferencePrecision = ov::element::bf16;
|
||||
} else {
|
||||
OPENVINO_THROW("Platform doesn't support BF16 format");
|
||||
}
|
||||
} else if (val == InferenceEngine::PluginConfigParams::NO) {
|
||||
inferencePrecision = ov::element::f32;
|
||||
} else {
|
||||
OPENVINO_THROW("Wrong value for property key ",
|
||||
InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16,
|
||||
". Expected only YES/NO");
|
||||
inferencePrecision = ov::element::f32;
|
||||
}
|
||||
inferencePrecisionSetExplicitly = true;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
} else if (key == ov::hint::inference_precision.name()) {
|
||||
if (val == "bf16") {
|
||||
if (mayiuse(avx512_core)) {
|
||||
inferencePrecision = ov::element::bf16;
|
||||
inferencePrecisionSetExplicitly = true;
|
||||
}
|
||||
} else if (val == "f16") {
|
||||
try {
|
||||
auto const prec = val.as<ov::element::Type>();
|
||||
inferencePrecisionSetExplicitly = true;
|
||||
if (prec == ov::element::bf16) {
|
||||
if (mayiuse(avx512_core)) {
|
||||
inferencePrecision = ov::element::bf16;
|
||||
}
|
||||
} else if (prec == ov::element::f16) {
|
||||
#if defined(OPENVINO_ARCH_X86_64)
|
||||
if (mayiuse(avx512_core_fp16) || mayiuse(avx512_core_amx_fp16)) {
|
||||
inferencePrecision = ov::element::f16;
|
||||
inferencePrecisionSetExplicitly = true;
|
||||
}
|
||||
if (mayiuse(avx512_core_fp16) || mayiuse(avx512_core_amx_fp16)) {
|
||||
inferencePrecision = ov::element::f16;
|
||||
}
|
||||
#elif defined(OV_CPU_ARM_ENABLE_FP16)
|
||||
// TODO: add runtime FP16 feature support check for ARM
|
||||
inferencePrecision = ov::element::f16;
|
||||
inferencePrecisionSetExplicitly = true;
|
||||
// TODO: add runtime FP16 feature support check for ARM
|
||||
inferencePrecision = ov::element::f16;
|
||||
#endif
|
||||
} else if (val == "f32") {
|
||||
inferencePrecision = ov::element::f32;
|
||||
inferencePrecisionSetExplicitly = true;
|
||||
} else {
|
||||
OPENVINO_THROW("Wrong value for property key ",
|
||||
} else if (prec == ov::element::f32) {
|
||||
inferencePrecision = ov::element::f32;
|
||||
} else {
|
||||
OPENVINO_THROW("invalid value");
|
||||
}
|
||||
} catch (ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
val.as<std::string>(),
|
||||
" for property key ",
|
||||
ov::hint::inference_precision.name(),
|
||||
". Supported values: bf16, f32");
|
||||
". Supported values: bf16, f16, f32");
|
||||
}
|
||||
} else if (InferenceEngine::PluginConfigInternalParams::KEY_CPU_RUNTIME_CACHE_CAPACITY == key) {
|
||||
} else if (ov::intel_cpu::cpu_runtime_cache_capacity.name() == key) {
|
||||
int val_i = -1;
|
||||
try {
|
||||
val_i = std::stoi(val);
|
||||
} catch (const std::exception&) {
|
||||
OPENVINO_THROW("Wrong value for property key ",
|
||||
InferenceEngine::PluginConfigInternalParams::KEY_CPU_RUNTIME_CACHE_CAPACITY,
|
||||
ov::Any value = val.as<std::string>();
|
||||
val_i = value.as<int>();
|
||||
} catch (const ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
val.as<std::string>(),
|
||||
" for property key ",
|
||||
ov::intel_cpu::cpu_runtime_cache_capacity.name(),
|
||||
". Expected only integer numbers");
|
||||
}
|
||||
// any negative value will be treated
|
||||
// as zero that means disabling the cache
|
||||
rtCacheCapacity = std::max(val_i, 0);
|
||||
} else if (ov::intel_cpu::denormals_optimization.name() == key) {
|
||||
if (val == InferenceEngine::PluginConfigParams::YES) {
|
||||
denormalsOptMode = DenormalsOptMode::DO_On;
|
||||
} else if (val == InferenceEngine::PluginConfigParams::NO) {
|
||||
denormalsOptMode = DenormalsOptMode::DO_Off;
|
||||
} else {
|
||||
try {
|
||||
denormalsOptMode = val.as<bool>() ? DenormalsOptMode::DO_On : DenormalsOptMode::DO_Off;
|
||||
} catch (ov::Exception&) {
|
||||
denormalsOptMode = DenormalsOptMode::DO_Keep;
|
||||
OPENVINO_THROW("Wrong value for property key ",
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
val.as<std::string>(),
|
||||
" for property key ",
|
||||
ov::intel_cpu::denormals_optimization.name(),
|
||||
". Expected only YES/NO");
|
||||
". Expected only true/false");
|
||||
}
|
||||
} else if (key == ov::intel_cpu::snippets_mode.name()) {
|
||||
try {
|
||||
auto const mode = val.as<ov::intel_cpu::SnippetsMode>();
|
||||
if (mode == ov::intel_cpu::SnippetsMode::ENABLE)
|
||||
snippetsMode = SnippetsMode::Enable;
|
||||
else if (mode == ov::intel_cpu::SnippetsMode::IGNORE_CALLBACK)
|
||||
snippetsMode = SnippetsMode::IgnoreCallback;
|
||||
else if (mode == ov::intel_cpu::SnippetsMode::DISABLE)
|
||||
snippetsMode = SnippetsMode::Disable;
|
||||
else
|
||||
OPENVINO_THROW("invalid value");
|
||||
} catch (ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
val.as<std::string>(),
|
||||
" for property key ",
|
||||
ov::intel_cpu::snippets_mode.name(),
|
||||
". Expected values: ov::intel_cpu::SnippetsMode::ENABLE/DISABLE/IGNORE_CALLBACK");
|
||||
}
|
||||
} else if (key == InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE) {
|
||||
if (val == InferenceEngine::PluginConfigInternalParams::ENABLE)
|
||||
snippetsMode = SnippetsMode::Enable;
|
||||
else if (val == InferenceEngine::PluginConfigInternalParams::IGNORE_CALLBACK)
|
||||
snippetsMode = SnippetsMode::IgnoreCallback;
|
||||
else if (val == InferenceEngine::PluginConfigInternalParams::DISABLE)
|
||||
snippetsMode = SnippetsMode::Disable;
|
||||
else
|
||||
OPENVINO_THROW("Wrong value for property key ",
|
||||
InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE,
|
||||
". Expected values: ENABLE/DISABLE/IGNORE_CALLBACK");
|
||||
} else if (key == ov::hint::execution_mode.name()) {
|
||||
if (val == "PERFORMANCE") {
|
||||
executionMode = ov::hint::ExecutionMode::PERFORMANCE;
|
||||
} else if (val == "ACCURACY") {
|
||||
executionMode = ov::hint::ExecutionMode::ACCURACY;
|
||||
} else {
|
||||
OPENVINO_THROW("Wrong value for property key ",
|
||||
try {
|
||||
executionMode = val.as<ov::hint::ExecutionMode>();
|
||||
} catch (ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
val.as<std::string>(),
|
||||
"for property key ",
|
||||
ov::hint::execution_mode.name(),
|
||||
". Supported values: PERFORMANCE, ACCURACY");
|
||||
". Supported values: ov::hint::ExecutionMode::PERFORMANCE/ACCURACY");
|
||||
}
|
||||
} else {
|
||||
OPENVINO_THROW("NotFound: Unsupported property ", key, " by CPU plugin.");
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
// apply execution mode after all the params are handled to prevent possible conflicts
|
||||
// when both execution_mode and inference_precision are specified
|
||||
@ -320,48 +358,50 @@ void Config::updateProperties() {
|
||||
if (!_config.empty())
|
||||
return;
|
||||
|
||||
using namespace InferenceEngine;
|
||||
switch (streamExecutorConfig._threadBindingType) {
|
||||
case IStreamsExecutor::ThreadBindingType::NONE:
|
||||
_config.insert({ PluginConfigParams::KEY_CPU_BIND_THREAD, PluginConfigParams::NO });
|
||||
_config.insert({ov::internal::cpu_bind_thread.name(), "NO"});
|
||||
break;
|
||||
case IStreamsExecutor::ThreadBindingType::CORES:
|
||||
_config.insert({ PluginConfigParams::KEY_CPU_BIND_THREAD, PluginConfigParams::YES });
|
||||
_config.insert({ov::internal::cpu_bind_thread.name(), "YES"});
|
||||
break;
|
||||
case IStreamsExecutor::ThreadBindingType::NUMA:
|
||||
_config.insert({ PluginConfigParams::KEY_CPU_BIND_THREAD, PluginConfigParams::NUMA });
|
||||
_config.insert({ov::internal::cpu_bind_thread.name(), ov::util::to_string(ov::Affinity::NUMA)});
|
||||
break;
|
||||
case IStreamsExecutor::ThreadBindingType::HYBRID_AWARE:
|
||||
_config.insert({ PluginConfigParams::KEY_CPU_BIND_THREAD, PluginConfigParams::HYBRID_AWARE });
|
||||
_config.insert({ov::internal::cpu_bind_thread.name(), ov::util::to_string(ov::Affinity::HYBRID_AWARE)});
|
||||
break;
|
||||
}
|
||||
if (collectPerfCounters == true)
|
||||
_config.insert({ PluginConfigParams::KEY_PERF_COUNT, PluginConfigParams::YES });
|
||||
_config.insert({ov::enable_profiling.name(), "YES"});
|
||||
else
|
||||
_config.insert({ PluginConfigParams::KEY_PERF_COUNT, PluginConfigParams::NO });
|
||||
_config.insert({ov::enable_profiling.name(), "NO"});
|
||||
if (exclusiveAsyncRequests == true)
|
||||
_config.insert({ PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS, PluginConfigParams::YES });
|
||||
_config.insert({ov::internal::exclusive_async_requests.name(), "YES"});
|
||||
else
|
||||
_config.insert({ PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS, PluginConfigParams::NO });
|
||||
_config.insert({ov::internal::exclusive_async_requests.name(), "NO"});
|
||||
|
||||
_config.insert({ PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, std::to_string(streamExecutorConfig._streams) });
|
||||
_config.insert({ov::device::id.name(), device_id});
|
||||
|
||||
_config.insert({ PluginConfigParams::KEY_CPU_THREADS_NUM, std::to_string(streamExecutorConfig._threads) });
|
||||
_config.insert({ov::num_streams.name(), std::to_string(streamExecutorConfig._streams)});
|
||||
_config.insert({ov::inference_num_threads.name(), std::to_string(streamExecutorConfig._threads)});
|
||||
_config.insert({ov::hint::performance_mode.name(), ov::util::to_string(hintPerfMode)});
|
||||
_config.insert({ov::hint::num_requests.name(), std::to_string(hintNumRequests)});
|
||||
|
||||
_config.insert({ PluginConfigParams::KEY_DEVICE_ID, device_id });
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
_config.insert({ PluginConfigParams::KEY_DUMP_EXEC_GRAPH_AS_DOT, dumpToDot });
|
||||
IE_SUPPRESS_DEPRECATED_END;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
if (inferencePrecision == ov::element::bf16) {
|
||||
_config.insert({ PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES });
|
||||
_config.insert(
|
||||
{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::YES});
|
||||
} else {
|
||||
_config.insert({ PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO });
|
||||
_config.insert(
|
||||
{InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO});
|
||||
}
|
||||
|
||||
_config.insert({ PluginConfigParams::KEY_PERFORMANCE_HINT, perfHintsConfig.ovPerfHint });
|
||||
_config.insert({ PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS,
|
||||
std::to_string(perfHintsConfig.ovPerfHintNumRequests) });
|
||||
_config.insert({InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS,
|
||||
std::to_string(streamExecutorConfig._streams)});
|
||||
_config.insert(
|
||||
{InferenceEngine::PluginConfigParams::KEY_CPU_THREADS_NUM, std::to_string(streamExecutorConfig._threads)});
|
||||
_config.insert({InferenceEngine::PluginConfigParams::KEY_DUMP_EXEC_GRAPH_AS_DOT, dumpToDot});
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
|
||||
} // namespace intel_cpu
|
||||
|
@ -4,17 +4,18 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <openvino/runtime/threading/istreams_executor.hpp>
|
||||
#include <ie_performance_hints.hpp>
|
||||
#include <openvino/runtime/properties.hpp>
|
||||
#include <openvino/util/common_util.hpp>
|
||||
#include "openvino/core/type/element_type.hpp"
|
||||
#include "openvino/runtime/properties.hpp"
|
||||
#include "openvino/runtime/threading/istreams_executor.hpp"
|
||||
#include "openvino/util/common_util.hpp"
|
||||
|
||||
#include "internal_properties.hpp"
|
||||
#include "utils/debug_caps_config.h"
|
||||
#include <openvino/core/type/element_type.hpp>
|
||||
|
||||
#include <bitset>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
@ -62,7 +63,8 @@ struct Config {
|
||||
size_t rtCacheCapacity = 0ul;
|
||||
#endif
|
||||
ov::threading::IStreamsExecutor::Config streamExecutorConfig;
|
||||
InferenceEngine::PerfHintsConfig perfHintsConfig;
|
||||
ov::hint::PerformanceMode hintPerfMode = ov::hint::PerformanceMode::LATENCY;
|
||||
uint32_t hintNumRequests = 0;
|
||||
bool enableCpuPinning = true;
|
||||
bool changedCpuPinning = false;
|
||||
ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
|
||||
|
@ -87,7 +87,7 @@ bool get_cpu_pinning(bool& input_value,
|
||||
result_value = latency ? false : true;
|
||||
}
|
||||
}
|
||||
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
|
||||
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
|
||||
# if defined(_WIN32)
|
||||
if (proc_type_table.size() > 1) {
|
||||
result_value = false;
|
||||
|
@ -12,7 +12,6 @@
|
||||
|
||||
#include "cpu_map_scheduling.hpp"
|
||||
#include "graph.h"
|
||||
#include "ie_system_conf.h"
|
||||
#include "openvino/runtime/threading/cpu_streams_info.hpp"
|
||||
#include "openvino/runtime/threading/istreams_executor.hpp"
|
||||
#include "performance_heuristics.hpp"
|
||||
@ -146,7 +145,8 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
|
||||
}
|
||||
}
|
||||
|
||||
if (((input_streams_changed == false) && (input_perf_hint == CONFIG_VALUE(LATENCY)) &&
|
||||
if (((input_streams_changed == false) &&
|
||||
(input_perf_hint == ov::util::to_string(ov::hint::PerformanceMode::LATENCY)) &&
|
||||
((latencyThreadingMode == Config::LatencyThreadingMode::PER_PLATFORM) || (proc_type_table.size() == 1))) ||
|
||||
((input_streams_changed == true) && (input_streams == 1))) {
|
||||
n_streams = 1;
|
||||
@ -175,7 +175,8 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
|
||||
stream_info[PROC_TYPE] = ALL_PROC;
|
||||
}
|
||||
}
|
||||
} else if ((input_streams_changed == false) && (input_perf_hint == CONFIG_VALUE(LATENCY)) &&
|
||||
} else if ((input_streams_changed == false) &&
|
||||
(input_perf_hint == ov::util::to_string(ov::hint::PerformanceMode::LATENCY)) &&
|
||||
(latencyThreadingMode == Config::LatencyThreadingMode::PER_SOCKET)) {
|
||||
for (auto& row : proc_socket_table) {
|
||||
n_threads_per_stream = std::max(n_threads_per_stream, row[ALL_PROC]);
|
||||
@ -188,7 +189,8 @@ std::vector<std::vector<int>> get_streams_info_table(const int input_streams,
|
||||
}
|
||||
n_streams = input_threads > 0 ? static_cast<int>(input_threads / n_threads_per_stream) : n_streams;
|
||||
n_streams = input_infer_requests > 0 ? std::min(input_infer_requests, n_streams) : n_streams;
|
||||
} else if ((input_streams_changed == false) && (input_perf_hint == CONFIG_VALUE(LATENCY)) &&
|
||||
} else if ((input_streams_changed == false) &&
|
||||
(input_perf_hint == ov::util::to_string(ov::hint::PerformanceMode::LATENCY)) &&
|
||||
(latencyThreadingMode == Config::LatencyThreadingMode::PER_NUMA_NODE)) {
|
||||
if (proc_type_table.size() == 1) {
|
||||
n_streams = 1;
|
||||
@ -497,7 +499,7 @@ std::vector<std::vector<int>> generate_stream_info(const int streams,
|
||||
|
||||
proc_type_table = apply_hyper_threading(config.enableHyperThreading,
|
||||
config.changedHyperThreading,
|
||||
config.perfHintsConfig.ovPerfHint,
|
||||
ov::util::to_string(config.hintPerfMode),
|
||||
proc_type_table);
|
||||
executor_config._cpu_reservation = get_cpu_pinning(config.enableCpuPinning,
|
||||
config.changedCpuPinning,
|
||||
@ -511,9 +513,9 @@ std::vector<std::vector<int>> generate_stream_info(const int streams,
|
||||
executor_config._streams_info_table = get_streams_info_table(executor_config._streams,
|
||||
executor_config._streams_changed,
|
||||
executor_config._threads,
|
||||
config.perfHintsConfig.ovPerfHintNumRequests,
|
||||
config.hintNumRequests,
|
||||
model_prefer_threads,
|
||||
config.perfHintsConfig.ovPerfHint,
|
||||
ov::util::to_string(config.hintPerfMode),
|
||||
config.latencyThreadingMode,
|
||||
proc_type_table);
|
||||
return proc_type_table;
|
||||
|
72
src/plugins/intel_cpu/src/internal_properties.hpp
Normal file
72
src/plugins/intel_cpu/src/internal_properties.hpp
Normal file
@ -0,0 +1,72 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "openvino/runtime/intel_cpu/properties.hpp"
|
||||
#include "openvino/runtime/properties.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
/**
|
||||
* @brief Defines how many records can be stored in the CPU runtime parameters cache per CPU runtime parameter type per
|
||||
* stream.
|
||||
*/
|
||||
static constexpr Property<int32_t, PropertyMutability::RW> cpu_runtime_cache_capacity{"CPU_RUNTIME_CACHE_CAPACITY"};
|
||||
|
||||
/**
|
||||
* @brief Allow low precision transform.
|
||||
*/
|
||||
static constexpr Property<bool, PropertyMutability::RW> lp_transforms_mode{"LP_TRANSFORMS_MODE"};
|
||||
|
||||
/**
|
||||
* @brief Enum to define possible snippets mode hints.
|
||||
*/
|
||||
enum class SnippetsMode {
|
||||
ENABLE = 0, //!< Enable
|
||||
IGNORE_CALLBACK = 1, //!< Ignore callback
|
||||
DISABLE = 2, //!< Disable
|
||||
};
|
||||
|
||||
/** @cond INTERNAL */
|
||||
inline std::ostream& operator<<(std::ostream& os, const SnippetsMode& mode) {
|
||||
switch (mode) {
|
||||
case SnippetsMode::ENABLE:
|
||||
return os << "ENABLE";
|
||||
case SnippetsMode::IGNORE_CALLBACK:
|
||||
return os << "IGNORE_CALLBACK";
|
||||
case SnippetsMode::DISABLE:
|
||||
return os << "DISABLE";
|
||||
default:
|
||||
OPENVINO_THROW("Unsupported snippets mode value");
|
||||
}
|
||||
}
|
||||
|
||||
inline std::istream& operator>>(std::istream& is, SnippetsMode& mode) {
|
||||
std::string str;
|
||||
is >> str;
|
||||
if (str == "ENABLE") {
|
||||
mode = SnippetsMode::ENABLE;
|
||||
} else if (str == "IGNORE_CALLBACK") {
|
||||
mode = SnippetsMode::IGNORE_CALLBACK;
|
||||
} else if (str == "DISABLE") {
|
||||
mode = SnippetsMode::DISABLE;
|
||||
} else {
|
||||
OPENVINO_THROW("Unsupported snippets mode: ", str);
|
||||
}
|
||||
return is;
|
||||
}
|
||||
/** @endcond */
|
||||
|
||||
/**
|
||||
* @brief Define tokenization mode for Snippets.
|
||||
* @param ENABLE - default pipeline
|
||||
* @param IGNORE_CALLBACK - disable the Snippets markup transformation and tokenization callback
|
||||
* @param DISABLE - turn off the Snippets
|
||||
*/
|
||||
static constexpr Property<SnippetsMode, PropertyMutability::RW> snippets_mode{"SNIPPETS_MODE"};
|
||||
|
||||
} // namespace intel_cpu
|
||||
} // namespace ov
|
@ -2,20 +2,15 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ie_metric_helpers.hpp" // must be included first
|
||||
|
||||
#include "plugin.h"
|
||||
|
||||
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
|
||||
#include "extension.h"
|
||||
#include "extension_mngr.h"
|
||||
|
||||
#include "ie_ngraph_utils.hpp"
|
||||
#include "ie_plugin_config.hpp"
|
||||
#include "ie_system_conf.h"
|
||||
#include "itt.h"
|
||||
#include "openvino/runtime/threading/cpu_streams_info.hpp"
|
||||
#include "internal_properties.hpp"
|
||||
#include "openvino/runtime/intel_cpu/properties.hpp"
|
||||
#include "openvino/runtime/internal_properties.hpp"
|
||||
|
||||
#include "openvino/runtime/properties.hpp"
|
||||
#include "openvino/runtime/threading/cpu_streams_info.hpp"
|
||||
#include "openvino/runtime/threading/executor_manager.hpp"
|
||||
@ -34,13 +29,13 @@
|
||||
|
||||
#include <cpu/x64/cpu_isa_traits.hpp>
|
||||
|
||||
using namespace ov::threading;
|
||||
|
||||
#if defined(OV_CPU_WITH_ACL)
|
||||
#include "nodes/executors/acl/acl_ie_scheduler.hpp"
|
||||
#include "arm_compute/runtime/CPP/CPPScheduler.h"
|
||||
#endif
|
||||
|
||||
using namespace ov::threading;
|
||||
|
||||
namespace ov {
|
||||
namespace intel_cpu {
|
||||
|
||||
@ -187,16 +182,14 @@ Engine::~Engine() {
|
||||
}
|
||||
|
||||
static bool streamsSet(const ov::AnyMap& config) {
|
||||
return config.count(InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS) ||
|
||||
config.count(ov::num_streams.name());
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
if (config.count(InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS))
|
||||
return true;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
return config.count(ov::num_streams.name());
|
||||
}
|
||||
|
||||
void Engine::apply_performance_hints(ov::AnyMap& config, const std::shared_ptr<ov::Model>& model) const {
|
||||
auto getNumStreamsLatency = [&]() {
|
||||
return std::pair<std::string, std::string>(CONFIG_VALUE(CPU_THROUGHPUT_NUMA),
|
||||
ov::util::to_string(ov::streams::NUMA));
|
||||
};
|
||||
|
||||
auto getNumStreamsThroughput = [&]() {
|
||||
const auto isa = dnnl::get_effective_cpu_isa();
|
||||
float isaSpecificThreshold = 1.0f;
|
||||
@ -250,17 +243,27 @@ void Engine::apply_performance_hints(ov::AnyMap& config, const std::shared_ptr<o
|
||||
|
||||
auto num_requests = config.find(ov::hint::num_requests.name());
|
||||
if (num_requests != config.end()) { // arrived with config to the LoadNetwork (and thus higher pri)
|
||||
auto val = InferenceEngine::PerfHintsConfig::CheckPerformanceHintRequestValue(num_requests->second.as<std::string>());
|
||||
int val = -1;
|
||||
try {
|
||||
ov::Any value = num_requests->second.as<std::string>();
|
||||
val = value.as<int>();
|
||||
if (val < 0)
|
||||
OPENVINO_THROW("invalid value!");
|
||||
} catch (const ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value of ",
|
||||
num_requests->second.as<std::string>(),
|
||||
" for property key ",
|
||||
ov::hint::num_requests.name(),
|
||||
". Expected only positive integer numbers");
|
||||
}
|
||||
if (val > 0)
|
||||
streams_info.num_streams = std::min(streams_info.num_streams, val);
|
||||
} else if (engConfig.perfHintsConfig.ovPerfHintNumRequests) { // set thru SetConfig to the plugin, 2nd priority
|
||||
streams_info.num_streams =
|
||||
std::min(streams_info.num_streams, engConfig.perfHintsConfig.ovPerfHintNumRequests);
|
||||
} else if (engConfig.hintNumRequests > 0) { // set thru SetConfig to the plugin, 2nd priority
|
||||
streams_info.num_streams = std::min(streams_info.num_streams, static_cast<int>(engConfig.hintNumRequests));
|
||||
}
|
||||
return std::pair<std::string, StreamCfg>(std::to_string(streams_info.num_streams), streams_info);
|
||||
};
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
auto getPerfHintName = [&]() {
|
||||
const bool streamsExplicitlySetForModel = streamsSet(config);
|
||||
// checking streams (to avoid overriding what user might explicitly set in the incoming config or previously via
|
||||
@ -269,59 +272,56 @@ void Engine::apply_performance_hints(ov::AnyMap& config, const std::shared_ptr<o
|
||||
return std::string();
|
||||
|
||||
const auto& perf_hint = config.find(ov::hint::performance_mode.name());
|
||||
// the perf_hint may have just arrived to the LoadNetwork, or was set with the plugin's SetConfig
|
||||
if (perf_hint == config.end() && engConfig.perfHintsConfig.ovPerfHint.empty())
|
||||
return std::string();
|
||||
/* performance hints set for network has higher pririty than engine ones.
|
||||
* This applies for all the configuration parameters */
|
||||
const auto perf_hint_name =
|
||||
(perf_hint != config.end())
|
||||
? InferenceEngine::PerfHintsConfig::CheckPerformanceHintValue(perf_hint->second.as<std::string>())
|
||||
: engConfig.perfHintsConfig.ovPerfHint;
|
||||
(perf_hint != config.end()) ? perf_hint->second.as<std::string>() : ov::util::to_string(engConfig.hintPerfMode);
|
||||
return perf_hint_name;
|
||||
};
|
||||
|
||||
// We compute both hints values because the optimal number of streams are computed based on ov::Model
|
||||
// while we export model in cpu internal opset so we need to save precomputed optimal # streams for both hint modes
|
||||
const auto latency_hints = getNumStreamsLatency();
|
||||
const auto latency_hints = ov::util::to_string(ov::streams::NUMA);
|
||||
const auto tput_hints = getNumStreamsThroughput();
|
||||
|
||||
// save hints parameters to model rt_info
|
||||
ov::AnyMap hints_props;
|
||||
const auto latency_name = std::string(CONFIG_VALUE(LATENCY)) + "_" + std::string(ov::num_streams.name());
|
||||
const auto tput_name = std::string(CONFIG_VALUE(THROUGHPUT)) + "_" + std::string(ov::num_streams.name());
|
||||
hints_props.insert({latency_name, latency_hints.second});
|
||||
const auto latency_name =
|
||||
ov::util::to_string(ov::hint::PerformanceMode::LATENCY) + "_" + std::string(ov::num_streams.name());
|
||||
const auto tput_name =
|
||||
ov::util::to_string(ov::hint::PerformanceMode::THROUGHPUT) + "_" + std::string(ov::num_streams.name());
|
||||
hints_props.insert({latency_name, latency_hints});
|
||||
hints_props.insert({tput_name, std::to_string(tput_hints.second.num_streams)});
|
||||
model->set_rt_info(hints_props, "intel_cpu_hints_config");
|
||||
|
||||
const auto perf_hint_name = getPerfHintName();
|
||||
if (perf_hint_name == CONFIG_VALUE(LATENCY)) {
|
||||
config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = latency_hints.first;
|
||||
config[ov::num_streams.name()] = latency_hints.second;
|
||||
} else if (perf_hint_name == CONFIG_VALUE(THROUGHPUT)) {
|
||||
if (perf_hint_name == ov::util::to_string(ov::hint::PerformanceMode::LATENCY)) {
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = CONFIG_VALUE(CPU_THROUGHPUT_NUMA);
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
config[ov::num_streams.name()] = latency_hints;
|
||||
} else if (perf_hint_name == ov::util::to_string(ov::hint::PerformanceMode::THROUGHPUT)) {
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = tput_hints.first;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
config[ov::num_streams.name()] = tput_hints.first;
|
||||
config[CONFIG_KEY_INTERNAL(BIG_CORE_STREAMS)] = std::to_string(tput_hints.second.big_core_streams);
|
||||
config[CONFIG_KEY_INTERNAL(SMALL_CORE_STREAMS)] = std::to_string(tput_hints.second.small_core_streams);
|
||||
config[CONFIG_KEY_INTERNAL(THREADS_PER_STREAM_BIG)] =
|
||||
std::to_string(tput_hints.second.threads_per_stream_big);
|
||||
config[CONFIG_KEY_INTERNAL(THREADS_PER_STREAM_SMALL)] =
|
||||
config[ov::threading::big_core_streams.name()] = std::to_string(tput_hints.second.big_core_streams);
|
||||
config[ov::threading::small_core_streams.name()] = std::to_string(tput_hints.second.small_core_streams);
|
||||
config[ov::threading::threads_per_stream_big.name()] = std::to_string(tput_hints.second.threads_per_stream_big);
|
||||
config[ov::threading::threads_per_stream_small.name()] =
|
||||
std::to_string(tput_hints.second.threads_per_stream_small);
|
||||
config[CONFIG_KEY_INTERNAL(SMALL_CORE_OFFSET)] = std::to_string(tput_hints.second.small_core_offset);
|
||||
config[ov::threading::small_core_offset.name()] = std::to_string(tput_hints.second.small_core_offset);
|
||||
}
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
|
||||
void Engine::get_performance_streams(Config& config, const std::shared_ptr<ov::Model>& model) const{
|
||||
const auto perf_hint_name = config.perfHintsConfig.ovPerfHint;
|
||||
const int latency_streams = get_default_latency_streams(config.latencyThreadingMode);
|
||||
int streams;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
if (config.streamExecutorConfig._streams_changed) {
|
||||
streams = config.streamExecutorConfig._streams;
|
||||
} else if (perf_hint_name == CONFIG_VALUE(LATENCY)) {
|
||||
} else if (config.hintPerfMode == ov::hint::PerformanceMode::LATENCY) {
|
||||
streams = latency_streams;
|
||||
} else if (perf_hint_name == CONFIG_VALUE(THROUGHPUT)) {
|
||||
} else if (config.hintPerfMode == ov::hint::PerformanceMode::THROUGHPUT) {
|
||||
streams = 0;
|
||||
} else {
|
||||
streams = config.streamExecutorConfig._streams == 1 ? 0 : config.streamExecutorConfig._streams;
|
||||
@ -331,23 +331,26 @@ void Engine::get_performance_streams(Config& config, const std::shared_ptr<ov::M
|
||||
get_num_streams(streams, model, config);
|
||||
}
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
config._config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = std::to_string(config.streamExecutorConfig._streams);
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
}
|
||||
|
||||
void Engine::calculate_streams(Config& conf, const std::shared_ptr<ov::Model>& model, bool imported) const{
|
||||
void Engine::calculate_streams(Config& conf, const std::shared_ptr<ov::Model>& model, bool imported) const {
|
||||
// import config props from caching model
|
||||
if (imported && !is_cpu_map_available()) {
|
||||
if (model->has_rt_info("intel_cpu_hints_config") && !conf.perfHintsConfig.ovPerfHint.empty()) {
|
||||
const auto mode_name = conf.perfHintsConfig.ovPerfHint;
|
||||
if (mode_name == CONFIG_VALUE(LATENCY) || mode_name == CONFIG_VALUE(THROUGHPUT)) {
|
||||
if (model->has_rt_info("intel_cpu_hints_config")) {
|
||||
const auto perf_mode = conf.hintPerfMode;
|
||||
if (perf_mode == ov::hint::PerformanceMode::LATENCY || perf_mode == ov::hint::PerformanceMode::THROUGHPUT) {
|
||||
const auto& hints_config = model->get_rt_info<ov::AnyMap>("intel_cpu_hints_config");
|
||||
const auto hints_param_name = mode_name + "_" + std::string(ov::num_streams.name());
|
||||
const auto hints_param_name =
|
||||
ov::util::to_string(perf_mode) + "_" + std::string(ov::num_streams.name());
|
||||
const auto it = hints_config.find(hints_param_name);
|
||||
if (it != hints_config.end()) {
|
||||
conf.readProperties({{std::string(ov::num_streams.name()), it->second.as<std::string>()}});
|
||||
} else {
|
||||
OPENVINO_THROW("Cache file doesn't contain precalculated number of streams for mode ", mode_name);
|
||||
OPENVINO_THROW("Cache file doesn't contain precalculated number of streams for mode ",
|
||||
ov::util::to_string(perf_mode));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -363,7 +366,7 @@ void Engine::calculate_streams(Config& conf, const std::shared_ptr<ov::Model>& m
|
||||
if (it_model_prefer != hints_config.end()) {
|
||||
try {
|
||||
cache_model_prefer = it_model_prefer->second.as<int>();
|
||||
} catch (const std::exception&) {
|
||||
} catch (const ov::Exception&) {
|
||||
OPENVINO_THROW("Cache file doesn't have valid value for " + model_prefer_name);
|
||||
}
|
||||
|
||||
@ -463,17 +466,17 @@ StreamCfg Engine::get_streams_num(ov::threading::IStreamsExecutor::ThreadBinding
|
||||
}
|
||||
|
||||
static bool shouldEnableLPT(const ov::AnyMap& modelConfig, const Config& engineConfig) {
|
||||
const auto& enableLPT = modelConfig.find(InferenceEngine::PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE);
|
||||
const auto& enableLPT = modelConfig.find(ov::intel_cpu::lp_transforms_mode.name());
|
||||
if (enableLPT == modelConfig.end()) // model config has higher priority
|
||||
return engineConfig.lpTransformsMode == Config::LPTransformsMode::On;
|
||||
|
||||
const auto& val = enableLPT->second.as<std::string>();
|
||||
if (val == InferenceEngine::PluginConfigParams::YES)
|
||||
return true;
|
||||
else if (val == InferenceEngine::PluginConfigParams::NO)
|
||||
return false;
|
||||
else
|
||||
OPENVINO_THROW("Wrong value for property key LP_TRANSFORMS_MODE. Expected values: YES/NO");
|
||||
try {
|
||||
return enableLPT->second.as<bool>();
|
||||
} catch (ov::Exception&) {
|
||||
OPENVINO_THROW("Wrong value ",
|
||||
enableLPT->second.as<std::string>(),
|
||||
" for property key LP_TRANSFORMS_MODE. Expected values: YES/NO");
|
||||
}
|
||||
}
|
||||
|
||||
static ov::element::Type getInferencePrecision(const ov::AnyMap& modelConfig,
|
||||
@ -491,16 +494,16 @@ static Config::ModelType getModelType(const std::shared_ptr<const Model>& model)
|
||||
}
|
||||
|
||||
static Config::SnippetsMode getSnippetsMode(const ov::AnyMap& modelConfig, const Config& engineConfig) {
|
||||
const auto& snippetsMode = modelConfig.find(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE);
|
||||
const auto& snippetsMode = modelConfig.find(ov::intel_cpu::snippets_mode.name());
|
||||
if (snippetsMode == modelConfig.end()) // not set explicitly
|
||||
return Config::SnippetsMode::Enable; // enable by default
|
||||
|
||||
const auto& val = snippetsMode->second.as<std::string>();
|
||||
if (val == InferenceEngine::PluginConfigInternalParams::IGNORE_CALLBACK)
|
||||
if (val == ov::util::to_string(ov::intel_cpu::SnippetsMode::IGNORE_CALLBACK))
|
||||
return Config::SnippetsMode::IgnoreCallback;
|
||||
else if (val == InferenceEngine::PluginConfigInternalParams::DISABLE)
|
||||
else if (val == ov::util::to_string(ov::intel_cpu::SnippetsMode::DISABLE))
|
||||
return Config::SnippetsMode::Disable;
|
||||
else if (val == InferenceEngine::PluginConfigInternalParams::ENABLE)
|
||||
else if (val == ov::util::to_string(ov::intel_cpu::SnippetsMode::ENABLE))
|
||||
return Config::SnippetsMode::Enable;
|
||||
else
|
||||
OPENVINO_THROW("Wrong value for property key SNIPPETS_MODE. Expected values: ENABLE/DISABLE/IGNORE_CALLBACK");
|
||||
@ -548,6 +551,7 @@ Engine::compile_model(const std::shared_ptr<const ov::Model>& model, const ov::A
|
||||
Config conf = engConfig;
|
||||
|
||||
Transformations transformations(cloned_model, enableLPT, inferencePrecision, is_legacy_api(), snippetsMode, conf);
|
||||
|
||||
transformations.UpToLpt();
|
||||
|
||||
if (!is_cpu_map_available()) {
|
||||
@ -667,8 +671,7 @@ ov::Any Engine::get_property(const std::string& name, const ov::AnyMap& options)
|
||||
} else if (name == ov::hint::inference_precision) {
|
||||
return decltype(ov::hint::inference_precision)::value_type(engConfig.inferencePrecision);
|
||||
} else if (name == ov::hint::performance_mode) {
|
||||
const auto perfHint = ov::util::from_string(engConfig.perfHintsConfig.ovPerfHint, ov::hint::performance_mode);
|
||||
return perfHint;
|
||||
return engConfig.hintPerfMode;
|
||||
} else if (name == ov::hint::enable_cpu_pinning) {
|
||||
const bool pin_value = engConfig.enableCpuPinning;
|
||||
return decltype(ov::hint::enable_cpu_pinning)::value_type(pin_value);
|
||||
@ -679,8 +682,7 @@ ov::Any Engine::get_property(const std::string& name, const ov::AnyMap& options)
|
||||
const bool ht_value = engConfig.enableHyperThreading;
|
||||
return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value);
|
||||
} else if (name == ov::hint::num_requests) {
|
||||
const auto perfHintNumRequests = engConfig.perfHintsConfig.ovPerfHintNumRequests;
|
||||
return decltype(ov::hint::num_requests)::value_type(perfHintNumRequests);
|
||||
return decltype(ov::hint::num_requests)::value_type(engConfig.hintNumRequests);
|
||||
} else if (name == ov::hint::execution_mode) {
|
||||
return engConfig.executionMode;
|
||||
}
|
||||
@ -700,13 +702,13 @@ ov::Any Engine::get_metric_legacy(const std::string& name, const ov::AnyMap& opt
|
||||
METRIC_KEY(RANGE_FOR_STREAMS),
|
||||
METRIC_KEY(IMPORT_EXPORT_SUPPORT),
|
||||
};
|
||||
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
|
||||
} else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
|
||||
IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, deviceFullName);
|
||||
} else if (name == METRIC_KEY(AVAILABLE_DEVICES)) {
|
||||
std::vector<std::string> availableDevices = { "" };
|
||||
IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
|
||||
} else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) {
|
||||
return metrics;
|
||||
} else if (name == ov::device::full_name.name()) {
|
||||
return decltype(ov::device::full_name)::value_type(deviceFullName);
|
||||
} else if (name == ov::available_devices.name()) {
|
||||
std::vector<std::string> availableDevices = {""};
|
||||
return decltype(ov::available_devices)::value_type(std::move(availableDevices));
|
||||
} else if (name == ov::device::capabilities.name()) {
|
||||
std::vector<std::string> capabilities;
|
||||
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16))
|
||||
capabilities.push_back(METRIC_VALUE(BF16));
|
||||
@ -716,27 +718,27 @@ ov::Any Engine::get_metric_legacy(const std::string& name, const ov::AnyMap& opt
|
||||
capabilities.push_back(METRIC_VALUE(FP16));
|
||||
capabilities.push_back(METRIC_VALUE(INT8));
|
||||
capabilities.push_back(METRIC_VALUE(BIN));
|
||||
IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
|
||||
return decltype(ov::device::capabilities)::value_type(std::move(capabilities));
|
||||
} else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
|
||||
std::vector<std::string> configKeys;
|
||||
for (auto && opt : engConfig._config)
|
||||
for (auto&& opt : engConfig._config)
|
||||
configKeys.push_back(opt.first);
|
||||
IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys);
|
||||
} else if (name == METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)) {
|
||||
return configKeys;
|
||||
} else if (name == ov::range_for_async_infer_requests.name()) {
|
||||
std::tuple<unsigned int, unsigned int, unsigned int> range = std::make_tuple(1, 1, 1);
|
||||
IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, range);
|
||||
} else if (name == METRIC_KEY(RANGE_FOR_STREAMS)) {
|
||||
return decltype(ov::range_for_async_infer_requests)::value_type(range);
|
||||
} else if (name == ov::range_for_streams.name()) {
|
||||
std::tuple<unsigned int, unsigned int> range = std::make_tuple(1, parallel_get_max_threads());
|
||||
IE_SET_METRIC_RETURN(RANGE_FOR_STREAMS, range);
|
||||
return decltype(ov::range_for_streams)::value_type(range);
|
||||
} else if (name == METRIC_KEY(IMPORT_EXPORT_SUPPORT)) {
|
||||
IE_SET_METRIC_RETURN(IMPORT_EXPORT_SUPPORT, true);
|
||||
} else if (ov::internal::supported_properties == name) {
|
||||
return true;
|
||||
} else if (ov::internal::supported_properties.name() == name) {
|
||||
return decltype(ov::internal::supported_properties)::value_type{
|
||||
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
|
||||
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}};
|
||||
} else if (name == ov::internal::caching_properties) {
|
||||
std::vector<ov::PropertyName> cachingProperties = { METRIC_KEY(FULL_DEVICE_NAME) };
|
||||
return decltype(ov::internal::caching_properties)::value_type(cachingProperties);
|
||||
std::vector<ov::PropertyName> cachingProperties = {ov::device::full_name.name()};
|
||||
return decltype(ov::internal::caching_properties)::value_type(std::move(cachingProperties));
|
||||
}
|
||||
|
||||
return {};
|
||||
@ -800,13 +802,13 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio
|
||||
} else if (name == ov::device::capabilities) {
|
||||
std::vector<std::string> capabilities;
|
||||
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16))
|
||||
capabilities.push_back(METRIC_VALUE(BF16));
|
||||
capabilities.push_back(ov::device::capability::BF16);
|
||||
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core))
|
||||
capabilities.push_back(METRIC_VALUE(WINOGRAD));
|
||||
capabilities.push_back(METRIC_VALUE(FP32));
|
||||
capabilities.push_back(METRIC_VALUE(FP16));
|
||||
capabilities.push_back(METRIC_VALUE(INT8));
|
||||
capabilities.push_back(METRIC_VALUE(BIN));
|
||||
capabilities.push_back(ov::device::capability::WINOGRAD);
|
||||
capabilities.push_back(ov::device::capability::FP32);
|
||||
capabilities.push_back(ov::device::capability::FP16);
|
||||
capabilities.push_back(ov::device::capability::INT8);
|
||||
capabilities.push_back(ov::device::capability::BIN);
|
||||
capabilities.push_back(ov::device::capability::EXPORT_IMPORT);
|
||||
return decltype(ov::device::capabilities)::value_type(capabilities);
|
||||
} else if (name == ov::range_for_async_infer_requests) {
|
||||
@ -849,10 +851,9 @@ ov::SupportedOpsMap Engine::query_model(const std::shared_ptr<const ov::Model>&
|
||||
Config::ModelType modelType = getModelType(model);
|
||||
conf.readProperties(config, modelType);
|
||||
|
||||
const auto& lptProp = config.find(InferenceEngine::PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE);
|
||||
const auto& lptProp = config.find(ov::intel_cpu::lp_transforms_mode.name());
|
||||
const bool enableLPT =
|
||||
(lptProp != config.end() &&
|
||||
lptProp->second.as<std::string>() == InferenceEngine::PluginConfigParams::YES) /* enabled in the orig_config*/
|
||||
(lptProp != config.end() && lptProp->second.as<bool>() == true) /* enabled in the orig_config*/
|
||||
|| Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled */;
|
||||
const Config::SnippetsMode snippetsMode = getSnippetsMode(config, conf);
|
||||
|
||||
|
@ -43,9 +43,9 @@ void make_config(StreamGenerateionTestCase& test_data, ov::intel_cpu::Config& co
|
||||
config.changedCpuPinning = test_data.input_cpu_changed;
|
||||
config.enableHyperThreading = test_data.input_ht_value;
|
||||
config.changedHyperThreading = test_data.input_ht_changed;
|
||||
config.perfHintsConfig.ovPerfHint = ov::util::to_string(test_data.input_pm_hint);
|
||||
config.hintPerfMode = test_data.input_pm_hint;
|
||||
config.latencyThreadingMode = test_data.input_latency_threading_mode;
|
||||
config.perfHintsConfig.ovPerfHintNumRequests = test_data.input_request;
|
||||
config.hintNumRequests = test_data.input_request;
|
||||
config.streamExecutorConfig._streams = test_data.input_stream;
|
||||
config.streamExecutorConfig._streams_changed = test_data.input_stream_changed;
|
||||
config.streamExecutorConfig._threads = test_data.input_thread;
|
||||
@ -70,8 +70,7 @@ public:
|
||||
ASSERT_EQ(test_data.output_cpu_value, config.streamExecutorConfig._cpu_reservation);
|
||||
ASSERT_EQ(test_data.output_ht_value, config.enableHyperThreading);
|
||||
ASSERT_EQ(test_data.output_type, config.schedulingCoreType);
|
||||
ASSERT_EQ(test_data.output_pm_hint,
|
||||
ov::util::from_string(config.perfHintsConfig.ovPerfHint, ov::hint::performance_mode));
|
||||
ASSERT_EQ(test_data.output_pm_hint, config.hintPerfMode);
|
||||
}
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user