diff --git a/docs/IE_DG/supported_plugins/GNA.md b/docs/IE_DG/supported_plugins/GNA.md
index 5e3af7dd46b..6dfdba55787 100644
--- a/docs/IE_DG/supported_plugins/GNA.md
+++ b/docs/IE_DG/supported_plugins/GNA.md
@@ -32,7 +32,7 @@ Devices with Intel® GNA support:
* [Intel® Core™ Processors (formerly codenamed Cannon Lake)](https://ark.intel.com/content/www/us/en/ark/products/136863/intel-core-i3-8121u-processor-4m-cache-up-to-3-20-ghz.html)
* [10th Generation Intel® Core™ Processors (formerly codenamed Ice Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/74979/ice-lake.html):
-
+
* [11th Generation Intel® Core™ Processors (formerly codenamed Tiger Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/88759/tiger-lake.html).
* [12th Generation Intel® Core™ Processors (formerly codenamed Alder Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/147470/products-formerly-alder-lake.html).
@@ -154,7 +154,7 @@ The following tables provide a more explicit representation of the Intel(R) GNA
|1|2|1|1|1|1|128|240|240|240|240|240|240|240|
|1|2|1|1|1|1|256|240|240|240|240|240|240|170|
|1|2|1|1|1|1|384|240|240|240|240|240|170|113|
-|1|3|1|1|1|1|128|240|240|240|240|240| | |
+|1|3|1|1|1|1|128|240|240|240|240|240| | |
|1|3|1|1|1|1|256|240|240|240|240|240| | |
|1|3|1|1|1|1|384|240|240|240|240|240| | |
|2|1|1|1|1|1|128|192|192|192|192|192|192|192|
@@ -166,7 +166,7 @@ The following tables provide a more explicit representation of the Intel(R) GNA
|3|1|1|1|1|1|128|128|128|128|128|128|128|85|
|3|1|1|1|1|1|256|128|128|128|128|128|128|85|
|3|1|1|1|1|1|384|128|128|128|128|128|128|85|
-|3|3|1|1|1|1|128|130|130|130|130|87 | | |
+|3|3|1|1|1|1|128|130|130|130|130|87 | | |
|3|3|1|1|1|1|256|130|130|130|130|87 | | |
|3|3|1|1|1|1|384|130|130|130|130|87 | | |
|4|1|1|1|1|1|128|96|96|96|96|96|96|64|
@@ -202,7 +202,7 @@ You can use the following options `KEY_GNA_EXEC_TARGET` and `KEY_GNA_COMPILE_TAR
.. tab:: Python
- ``GNA_EXEC_TARGET``, ``GNA_COMPILE_TARGET``
+ ``GNA_EXEC_TARGET``, ``GNA_COMPILE_TARGET``
@endsphinxdirective
@@ -256,12 +256,12 @@ Starting with 2021.4 release of OpenVINO, GNA plugin users are encouraged to use
============================ ==============================================================================================================================================
Mode Description
============================ ==============================================================================================================================================
- ``KEY_GNA_AUTO`` Uses Intel® GNA if available, otherwise uses software execution mode on CPU.
- ``KEY_GNA_HW`` Uses Intel® GNA if available, otherwise raises an error.
- ``KEY_GNA_SW`` *Deprecated*. Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA, but not in the bit-exact mode.
- ``KEY_GNA_SW_EXACT`` Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA in the bit-exact mode.
- ``KEY_GNA_HW_WITH_SW_FBACK`` Uses Intel® GNA if available, otherwise raises an error. If the hardware queue is not empty, automatically falls back to CPU in the bit-exact mode.
- ``KEY_GNA_SW_FP32`` Executes the GNA-compiled graph on CPU but substitutes parameters and calculations from low precision to floating point (``FP32``).
+ ``KEY_GNA_AUTO`` Uses Intel® GNA if available, otherwise uses software execution mode on CPU.
+ ``KEY_GNA_HW`` Uses Intel® GNA if available, otherwise raises an error.
+ ``KEY_GNA_SW`` *Deprecated*. Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA, but not in the bit-exact mode.
+ ``KEY_GNA_SW_EXACT`` Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA in the bit-exact mode.
+ ``KEY_GNA_HW_WITH_SW_FBACK`` Uses Intel® GNA if available, otherwise raises an error. If the hardware queue is not empty, automatically falls back to CPU in the bit-exact mode.
+ ``KEY_GNA_SW_FP32`` Executes the GNA-compiled graph on CPU but substitutes parameters and calculations from low precision to floating point (``FP32``).
============================ ==============================================================================================================================================
.. tab:: Python
@@ -269,15 +269,15 @@ Starting with 2021.4 release of OpenVINO, GNA plugin users are encouraged to use
======================== ==============================================================================================================================================
Mode Description
======================== ==============================================================================================================================================
- ``GNA_AUTO`` Uses Intel® GNA if available, otherwise uses software execution mode on CPU.
- ``GNA_HW`` Uses Intel® GNA if available, otherwise raises an error.
- ``GNA_SW`` *Deprecated*. Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA, but not in the bit-exact mode.
- ``GNA_SW_EXACT`` Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA in the bit-exact mode.
- ``GNA_HW_WITH_SW_FBACK`` Uses Intel® GNA if available, otherwise raises an error. If the hardware queue is not empty, automatically falls back to CPU in the bit-exact mode.
- ``GNA_SW_FP32`` Executes the GNA-compiled graph on CPU but substitutes parameters and calculations from low precision to floating point (``FP32``).
+ ``GNA_AUTO`` Uses Intel® GNA if available, otherwise uses software execution mode on CPU.
+ ``GNA_HW`` Uses Intel® GNA if available, otherwise raises an error.
+ ``GNA_SW`` *Deprecated*. Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA, but not in the bit-exact mode.
+ ``GNA_SW_EXACT`` Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA in the bit-exact mode.
+ ``GNA_HW_WITH_SW_FBACK`` Uses Intel® GNA if available, otherwise raises an error. If the hardware queue is not empty, automatically falls back to CPU in the bit-exact mode.
+ ``GNA_SW_FP32`` Executes the GNA-compiled graph on CPU but substitutes parameters and calculations from low precision to floating point (``FP32``).
======================== ==============================================================================================================================================
-@endsphinxdirective
+@endsphinxdirective
## Supported Configuration Parameters
@@ -313,14 +313,11 @@ The plugin supports the configuration parameters listed below. The parameter nam
+----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
| ``KEY_PERF_COUNT`` | ``YES``, ``NO`` | ``NO`` | Turns on performance counters reporting. |
+----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
- | ``KEY_GNA_LIB_N_THREADS`` | 1-127 integer number | 1 | Sets the number of GNA accelerator library worker threads used |
- | | | | for inference computation in software modes. |
- +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-
+
The parameters are passed as ``std::map`` on ``InferenceEngine::Core::LoadNetwork`` or ``InferenceEngine::SetConfig``.
Normally, you do not need to select the execution target (``KEY_GNA_EXEC_TARGET``) and compilation target (``KEY_GNA_COMPILE_TARGET``). The default value for the execution target corresponds to available hardware, or latest hardware version supported by the plugin (i.e., GNA 3.0) if there is no GNA HW in the system. The compilation target is the same as the execution target by default. However, you may want to change the targets, for example, if you want to check how a model compiled for one generation would behave on the other generation (using the software emulation mode), or if you are willing to export a model for a specific version of GNA HW.
-
+
You can change the ``KEY_GNA_DEVICE_MODE`` parameter at run time using ``InferenceEngine::ExecutableNetwork::SetConfig``, which works for any value excluding ``GNA_SW_FP32``. This enables you to switch the execution between software emulation mode and hardware execution mode after the model is loaded.
.. tab:: Python
@@ -352,9 +349,6 @@ The plugin supports the configuration parameters listed below. The parameter nam
+----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
| ``PERF_COUNT`` | ``YES``, ``NO`` | ``NO`` | Turns on performance counters reporting. |
+----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
- | ``GNA_LIB_N_THREADS`` | 1-127 integer number | 1 | Sets the number of GNA accelerator library worker threads used |
- | | | | for inference computation in software modes. |
- +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
The parameters are passed as strings to `IECore.load_network `_.
@@ -362,7 +356,7 @@ The plugin supports the configuration parameters listed below. The parameter nam
You can change the ``GNA_DEVICE_MODE`` parameter at run time by sending a configuration dict to the `IECore.load_network `_ call, which works for any value excluding ``GNA_SW_FP32``. This enables you to switch the execution between software emulation mode and hardware execution mode after the model is loaded.
-@endsphinxdirective
+@endsphinxdirective
## How to Interpret Performance Counters
With the following methods, you can collect performance counters that provides various performance data about execution on GNA:
@@ -370,16 +364,16 @@ With the following methods, you can collect performance counters that provides v
@sphinxdirective
.. tab:: C++
- ``InferenceEngine::InferRequest::GetPerformanceCounts``
-
+ ``InferenceEngine::InferRequest::GetPerformanceCounts``
+
The returned map stores a counter description as a key, and a counter value in the ``realTime_uSec`` field of the ``InferenceEngineProfileInfo`` structure.
.. tab:: Python
- ``openvino.inference_engine.InferRequest.get_perf_counts``
-
- The returned map stores a counter description as a key, and a counter value in the ``real_time`` field.
+ ``openvino.inference_engine.InferRequest.get_perf_counts``
+
+ The returned map stores a counter description as a key, and a counter value in the ``real_time`` field.
@endsphinxdirective
@@ -402,25 +396,6 @@ Performance counters provided for the time being:
* Number of total cycles spent on scoring in hardware including compute and memory stall cycles
* Number of stall cycles spent in hardware
-## Multithreading Support in GNA Plugin
-
-The GNA plugin supports the following configuration parameters for multithreading management:
-
-@sphinxdirective
-.. tab:: C++
-
- ``KEY_GNA_LIB_N_THREADS``
-
-.. tab:: Python
-
- ``GNA_LIB_N_THREADS``
-
-@endsphinxdirective
-
-By default, the GNA plugin uses one worker thread for inference computations. This parameter allows you to create up to 127 threads for software modes.
-
-> **NOTE**: Multithreading mode does not guarantee the same computation order as the order of issuing. Additionally, in this case, software modes do not implement any serializations.
-
## Network Batch Size
Intel® GNA plugin supports the processing of context-windowed speech frames in batches of 1-8 frames in one
diff --git a/src/inference/include/ie/gna/gna_config.hpp b/src/inference/include/ie/gna/gna_config.hpp
index 963a0413271..1d9672af3cc 100644
--- a/src/inference/include/ie/gna/gna_config.hpp
+++ b/src/inference/include/ie/gna/gna_config.hpp
@@ -112,6 +112,7 @@ DECLARE_GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT);
* Note that multithreading mode does not guarantee the same computation order as order
* of issuing. Additionally, in this case, software modes do not implement any serializations.
*/
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
DECLARE_GNA_CONFIG_KEY(LIB_N_THREADS);
} // namespace GNAConfigParams
@@ -131,6 +132,7 @@ namespace PluginConfigParams {
* It is passed to Core::SetConfig(), this option should be used with values:
* PluginConfigParams::YES or PluginConfigParams::NO
*/
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
DECLARE_CONFIG_KEY(SINGLE_THREAD);
} // namespace PluginConfigParams
diff --git a/src/plugins/intel_gna/gna_device.cpp b/src/plugins/intel_gna/gna_device.cpp
index 381d2b87c8a..a462065bb18 100644
--- a/src/plugins/intel_gna/gna_device.cpp
+++ b/src/plugins/intel_gna/gna_device.cpp
@@ -441,7 +441,7 @@ void GNADeviceHelper::updateGnaDeviceVersion() {
checkGna2Status(status, "Gna2DeviceGetVersion");
}
-void GNADeviceHelper::open(uint8_t n_threads) {
+void GNADeviceHelper::open() {
std::unique_lock lockGnaCalls{ acrossPluginsSync };
updateGnaDeviceVersion();
const auto gnaExecTarget = parseTarget(executionTarget);
@@ -457,10 +457,6 @@ void GNADeviceHelper::open(uint8_t n_threads) {
const auto status = Gna2DeviceOpen(nGnaDeviceIndex);
checkGna2Status(status, "Gna2DeviceOpen");
}
-
- // TODO: GNA2: uncomment when scratchpad repaired
- // status = Gna2DeviceSetNumberOfThreads(nGnaDeviceIndex, n_threads);
- // checkGna2Status(status);
deviceOpened = true;
}
@@ -483,12 +479,6 @@ void GNADeviceHelper::close() {
deviceOpened = false;
}
-void GNADeviceHelper::setOMPThreads(uint8_t const n_threads) {
- std::unique_lock lockGnaCalls{ acrossPluginsSync };
- const auto status = Gna2DeviceSetNumberOfThreads(nGnaDeviceIndex, n_threads);
- checkGna2Status(status, "Gna2DeviceSetNumberOfThreads");
-}
-
void GNADeviceHelper::updateGnaPerfCounters() {
if (!isPerformanceMeasuring)
return;
diff --git a/src/plugins/intel_gna/gna_device.hpp b/src/plugins/intel_gna/gna_device.hpp
index 7d82d68d355..c473437d701 100644
--- a/src/plugins/intel_gna/gna_device.hpp
+++ b/src/plugins/intel_gna/gna_device.hpp
@@ -65,8 +65,6 @@ public:
explicit GNADeviceHelper(std::string executionTargetIn = "",
std::string compileTargetIn = "",
bool swExactModeIn = false,
- uint8_t lib_async_n_threads = 1,
- bool use_openmp = false,
bool isPerformanceMeasuring = false,
bool deviceEmbedded = false,
int deviceVersionParsed = 0) :
@@ -77,7 +75,7 @@ public:
nGnaDeviceIndex{selectGnaDevice()},
useDeviceEmbeddedExport(deviceEmbedded),
exportGeneration(static_cast(deviceVersionParsed)) {
- open(lib_async_n_threads);
+ open();
initGnaPerfCounters();
// check GNA Library version
@@ -88,11 +86,6 @@ public:
if (gnaLibVersion.rfind("3.0", 0) == 0) {
isGnaLibVersion3_0 = true;
}
-
- if (use_openmp) {
- uint8_t num_cores = std::thread::hardware_concurrency();
- setOMPThreads((num_cores != 0) ? num_cores : 1);
- }
}
GNADeviceHelper(const GNADeviceHelper&) = delete;
@@ -149,7 +142,7 @@ public:
std::string getEffectiveGnaCompileTarget() const;
private:
- void open(uint8_t const n_threads);
+ void open();
void close();
static std::string getGnaLibraryVersionPrivate();
@@ -167,7 +160,6 @@ public:
void createVirtualDevice(Gna2DeviceVersion devVersion, std::string purpose = "");
void updateGnaDeviceVersion();
- void setOMPThreads(uint8_t const n_threads);
void initGnaPerfCounters() {
std::unique_lock lockGnaCalls{ acrossPluginsSync };
diff --git a/src/plugins/intel_gna/gna_plugin.cpp b/src/plugins/intel_gna/gna_plugin.cpp
index c396aee3bbe..386437794e8 100644
--- a/src/plugins/intel_gna/gna_plugin.cpp
+++ b/src/plugins/intel_gna/gna_plugin.cpp
@@ -358,8 +358,6 @@ void GNAPlugin::InitGNADevice() {
gnadevice = std::make_shared(config.gnaExecTarget,
config.gnaCompileTarget,
config.swExactMode,
- gnaFlags->gna_lib_async_threads_num,
- gnaFlags->gna_openmp_multithreading,
gnaFlags->performance_counting,
!config.dumpXNNPath.empty(),
GetDeviceVersionFromString(config.dumpXNNGeneration));
diff --git a/src/plugins/intel_gna/gna_plugin_config.cpp b/src/plugins/intel_gna/gna_plugin_config.cpp
index 1210eea099d..5cf34b38cfd 100644
--- a/src/plugins/intel_gna/gna_plugin_config.cpp
+++ b/src/plugins/intel_gna/gna_plugin_config.cpp
@@ -29,6 +29,7 @@ static const std::set supportedTargets = {
""
};
+OPENVINO_SUPPRESS_DEPRECATED_START
void Config::UpdateFromMap(const std::map& config) {
for (auto&& item : config) {
auto key = item.first;
@@ -210,6 +211,7 @@ void Config::UpdateFromMap(const std::map& config) {
AdjustKeyMapValues();
}
+ OPENVINO_SUPPRESS_DEPRECATED_END
void Config::AdjustKeyMapValues() {
std::lock_guard lockGuard{ mtx4keyConfigMap };
@@ -251,9 +253,11 @@ void Config::AdjustKeyMapValues() {
keyConfigMap[GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT)] = std::to_string(gnaFlags.pwlMaxErrorPercent);
keyConfigMap[CONFIG_KEY(PERF_COUNT)] =
gnaFlags.performance_counting ? PluginConfigParams::YES: PluginConfigParams::NO;
+OPENVINO_SUPPRESS_DEPRECATED_START
keyConfigMap[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(gnaFlags.gna_lib_async_threads_num);
keyConfigMap[CONFIG_KEY(SINGLE_THREAD)] =
gnaFlags.gna_openmp_multithreading ? PluginConfigParams::NO: PluginConfigParams::YES;
+OPENVINO_SUPPRESS_DEPRECATED_END
keyConfigMap[CONFIG_KEY(LOG_LEVEL)] = gnaFlags.log_level;
}
diff --git a/src/tests/functional/plugin/gna/shared_tests_instances/behavior/ov_infer_request/multithreading.cpp b/src/tests/functional/plugin/gna/shared_tests_instances/behavior/ov_infer_request/multithreading.cpp
index 3f632dab111..36e85a90dc6 100644
--- a/src/tests/functional/plugin/gna/shared_tests_instances/behavior/ov_infer_request/multithreading.cpp
+++ b/src/tests/functional/plugin/gna/shared_tests_instances/behavior/ov_infer_request/multithreading.cpp
@@ -7,10 +7,14 @@
using namespace ov::test::behavior;
namespace {
+OPENVINO_SUPPRESS_DEPRECATED_START
+
const std::vector> configs = {
{{GNA_CONFIG_KEY(LIB_N_THREADS), "3"}}
};
+OPENVINO_SUPPRESS_DEPRECATED_END
+
INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVInferRequestMultithreadingTests,
::testing::Combine(
::testing::Values(CommonTestUtils::DEVICE_GNA),
diff --git a/src/tests/unit/gna/gna_plugin_config_test.cpp b/src/tests/unit/gna/gna_plugin_config_test.cpp
index 542652f5ebc..8ffce728e02 100644
--- a/src/tests/unit/gna/gna_plugin_config_test.cpp
+++ b/src/tests/unit/gna/gna_plugin_config_test.cpp
@@ -11,6 +11,7 @@
using namespace InferenceEngine;
using namespace GNAPluginNS;
+IE_SUPPRESS_DEPRECATED_START
const std::map supportedConfigKeysWithDefaults = {
{GNA_CONFIG_KEY(SCALE_FACTOR), "1.000000"},
{GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_0"), "1.000000"},
@@ -28,6 +29,7 @@ const std::map supportedConfigKeysWithDefaults = {
{CONFIG_KEY(SINGLE_THREAD), CONFIG_VALUE(YES)},
{CONFIG_KEY(LOG_LEVEL), PluginConfigParams::LOG_NONE}
};
+IE_SUPPRESS_DEPRECATED_END
class GNAPluginConfigTest : public ::testing::Test {
protected:
@@ -159,6 +161,8 @@ TEST_F(GNAPluginConfigTest, GnaConfigPerfCountTest) {
config.gnaFlags.performance_counting);
}
+IE_SUPPRESS_DEPRECATED_START
+
TEST_F(GNAPluginConfigTest, GnaConfigLibNThreadsTest) {
SetAndCompare(GNA_CONFIG_KEY(LIB_N_THREADS), "2");
EXPECT_EQ(config.gnaFlags.gna_lib_async_threads_num, 2);
@@ -176,6 +180,8 @@ TEST_F(GNAPluginConfigTest, GnaConfigSingleThreadTest) {
true);
}
+IE_SUPPRESS_DEPRECATED_END
+
TEST_F(GNAPluginConfigTest, GnaConfigGnaExecTargetTest) {
SetAndCompare(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_2_0");
EXPECT_EQ(config.gnaExecTarget, "GNA_TARGET_2_0");
diff --git a/src/tests_deprecated/unit/engines/gna/configuration_test.cpp b/src/tests_deprecated/unit/engines/gna/configuration_test.cpp
index 7f24b1e9bfe..7be51a178c0 100644
--- a/src/tests_deprecated/unit/engines/gna/configuration_test.cpp
+++ b/src/tests_deprecated/unit/engines/gna/configuration_test.cpp
@@ -61,15 +61,6 @@ TEST_F(GNAConfigTest, canNOTMatchWith128AsyncThreads) {
.throws();
}
-TEST_F(GNAConfigTest, canMatchWithSingleMultipleOMPThreads) {
- assert_that()
- .onInferModel(GNATestIRs::Fc2DOutputModel())
- .inNotCompactMode()
- .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
- .enable_omp_multithreading()
- .gna().propagate_forward().called_without().pwl_inserted_into_nnet();
-}
-
TEST_F(GNAConfigTest, failToCreatePluginWithDifferentInputScaleFactors) {
assert_that().creating().gna_plugin()
.withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR))+"_0", 1000)
diff --git a/src/tests_deprecated/unit/engines/gna/gna_matcher.hpp b/src/tests_deprecated/unit/engines/gna/gna_matcher.hpp
index da05e64aa0a..5e546edbe47 100644
--- a/src/tests_deprecated/unit/engines/gna/gna_matcher.hpp
+++ b/src/tests_deprecated/unit/engines/gna/gna_matcher.hpp
@@ -183,12 +183,6 @@ class GNATestConfigurability : public GNATestBase{
_env.config[CONFIG_KEY(PERF_COUNT)] = InferenceEngine::PluginConfigParams::YES;
return *dynamic_cast(this);
}
-
- T & enable_omp_multithreading() {
- _env.is_setup_of_omp_theads_expected = true;
- _env.config[CONFIG_KEY(SINGLE_THREAD)] = InferenceEngine::PluginConfigParams::NO;
- return *dynamic_cast(this);
- }
};
/**