Deprecate GNA_LIB_N_THREADS and SINGLE_THREAD parameters (#9486)

2022-01-13 14:30:41 +03:00 · 2022-01-13 14:30:41 +03:00 · c06d4e7fb7
commit c06d4e7fb7
parent 2b996c32ad
10 changed files with 44 additions and 88 deletions
--- a/docs/IE_DG/supported_plugins/GNA.md
+++ b/docs/IE_DG/supported_plugins/GNA.md
@ -32,7 +32,7 @@ Devices with Intel® GNA support:
 * [Intel® Core™ Processors (formerly codenamed Cannon Lake)](https://ark.intel.com/content/www/us/en/ark/products/136863/intel-core-i3-8121u-processor-4m-cache-up-to-3-20-ghz.html)
 * [10th Generation Intel® Core™ Processors (formerly codenamed Ice Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/74979/ice-lake.html):
-	
+
 * [11th Generation Intel® Core™ Processors (formerly codenamed Tiger Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/88759/tiger-lake.html).
 * [12th Generation Intel® Core™ Processors (formerly codenamed Alder Lake)](https://ark.intel.com/content/www/us/en/ark/products/codename/147470/products-formerly-alder-lake.html).
@ -154,7 +154,7 @@ The following tables provide a more explicit representation of the Intel(R) GNA
 |1|2|1|1|1|1|128|240|240|240|240|240|240|240|
 |1|2|1|1|1|1|256|240|240|240|240|240|240|170|
 |1|2|1|1|1|1|384|240|240|240|240|240|170|113|
-|1|3|1|1|1|1|128|240|240|240|240|240|   |   |   
+|1|3|1|1|1|1|128|240|240|240|240|240|   |   |
 |1|3|1|1|1|1|256|240|240|240|240|240|   |   |
 |1|3|1|1|1|1|384|240|240|240|240|240|   |   |
 |2|1|1|1|1|1|128|192|192|192|192|192|192|192|
@ -166,7 +166,7 @@ The following tables provide a more explicit representation of the Intel(R) GNA
 |3|1|1|1|1|1|128|128|128|128|128|128|128|85|
 |3|1|1|1|1|1|256|128|128|128|128|128|128|85|
 |3|1|1|1|1|1|384|128|128|128|128|128|128|85|
-|3|3|1|1|1|1|128|130|130|130|130|87 |   |  |   
+|3|3|1|1|1|1|128|130|130|130|130|87 |   |  |
 |3|3|1|1|1|1|256|130|130|130|130|87 |   |  |
 |3|3|1|1|1|1|384|130|130|130|130|87 |   |  |
 |4|1|1|1|1|1|128|96|96|96|96|96|96|64|
@ -202,7 +202,7 @@ You can use the following options `KEY_GNA_EXEC_TARGET` and `KEY_GNA_COMPILE_TAR
 .. tab:: Python
-   ``GNA_EXEC_TARGET``,  ``GNA_COMPILE_TARGET`` 
+   ``GNA_EXEC_TARGET``,  ``GNA_COMPILE_TARGET``
@endsphinxdirective
@ -256,12 +256,12 @@ Starting with 2021.4 release of OpenVINO, GNA plugin users are encouraged to use
   ============================  ==============================================================================================================================================
   Mode                          Description
   ============================  ==============================================================================================================================================
-   ``KEY_GNA_AUTO``              Uses Intel® GNA if available, otherwise uses software execution mode on CPU. 
+   ``KEY_GNA_AUTO``              Uses Intel® GNA if available, otherwise uses software execution mode on CPU.
-   ``KEY_GNA_HW``                Uses Intel® GNA if available, otherwise raises an error. 
+   ``KEY_GNA_HW``                Uses Intel® GNA if available, otherwise raises an error.
-   ``KEY_GNA_SW``                *Deprecated*. Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA, but not in the bit-exact mode. 
+   ``KEY_GNA_SW``                *Deprecated*. Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA, but not in the bit-exact mode.
-   ``KEY_GNA_SW_EXACT``          Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA in the bit-exact mode. 
+   ``KEY_GNA_SW_EXACT``          Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA in the bit-exact mode.
-   ``KEY_GNA_HW_WITH_SW_FBACK``  Uses Intel® GNA if available, otherwise raises an error. If the hardware queue is not empty, automatically falls back to CPU in the bit-exact mode. 
+   ``KEY_GNA_HW_WITH_SW_FBACK``  Uses Intel® GNA if available, otherwise raises an error. If the hardware queue is not empty, automatically falls back to CPU in the bit-exact mode.
-   ``KEY_GNA_SW_FP32``           Executes the GNA-compiled graph on CPU but substitutes parameters and calculations from low precision to floating point (``FP32``). 
+   ``KEY_GNA_SW_FP32``           Executes the GNA-compiled graph on CPU but substitutes parameters and calculations from low precision to floating point (``FP32``).
   ============================  ==============================================================================================================================================
 .. tab:: Python
@ -269,15 +269,15 @@ Starting with 2021.4 release of OpenVINO, GNA plugin users are encouraged to use
   ========================  ==============================================================================================================================================
   Mode                      Description
   ========================  ==============================================================================================================================================
-   ``GNA_AUTO``              Uses Intel® GNA if available, otherwise uses software execution mode on CPU. 
+   ``GNA_AUTO``              Uses Intel® GNA if available, otherwise uses software execution mode on CPU.
-   ``GNA_HW``                Uses Intel® GNA if available, otherwise raises an error. 
+   ``GNA_HW``                Uses Intel® GNA if available, otherwise raises an error.
-   ``GNA_SW``                *Deprecated*. Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA, but not in the bit-exact mode. 
+   ``GNA_SW``                *Deprecated*. Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA, but not in the bit-exact mode.
-   ``GNA_SW_EXACT``          Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA in the bit-exact mode. 
+   ``GNA_SW_EXACT``          Executes the GNA-compiled graph on CPU performing calculations in the same precision as the Intel® GNA in the bit-exact mode.
-   ``GNA_HW_WITH_SW_FBACK``  Uses Intel® GNA if available, otherwise raises an error. If the hardware queue is not empty, automatically falls back to CPU in the bit-exact mode. 
+   ``GNA_HW_WITH_SW_FBACK``  Uses Intel® GNA if available, otherwise raises an error. If the hardware queue is not empty, automatically falls back to CPU in the bit-exact mode.
-   ``GNA_SW_FP32``           Executes the GNA-compiled graph on CPU but substitutes parameters and calculations from low precision to floating point (``FP32``). 
+   ``GNA_SW_FP32``           Executes the GNA-compiled graph on CPU but substitutes parameters and calculations from low precision to floating point (``FP32``).
   ========================  ==============================================================================================================================================
-@endsphinxdirective  
+@endsphinxdirective
 ## <a name="supported-configuration-parameters">Supported Configuration Parameters</a>
@ -313,14 +313,11 @@ The plugin supports the configuration parameters listed below. The parameter nam
   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
   | ``KEY_PERF_COUNT``               | ``YES``, ``NO``         | ``NO``        | Turns on performance counters reporting.                        |
   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``KEY_GNA_LIB_N_THREADS``        | 1-127 integer number    | 1             | Sets the number of GNA accelerator library worker threads used  |
+
   |                                  |                         |               | for inference computation in software modes.                    |
   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
   The parameters are passed as ``std::map<std::string, std::string>`` on ``InferenceEngine::Core::LoadNetwork`` or ``InferenceEngine::SetConfig``.
   Normally, you do not need to select the execution target (``KEY_GNA_EXEC_TARGET``) and compilation target (``KEY_GNA_COMPILE_TARGET``). The default value for the execution target corresponds to available hardware, or latest hardware version supported by the plugin (i.e., GNA 3.0) if there is no GNA HW in the system. The compilation target is the same as the execution target by default. However, you may want to change the targets, for example, if you want to check how a model compiled for one generation would behave on the other generation (using the software emulation mode), or if you are willing to export a model for a specific version of GNA HW.
-   
+
   You can change the ``KEY_GNA_DEVICE_MODE`` parameter at run time using ``InferenceEngine::ExecutableNetwork::SetConfig``, which works for any value excluding ``GNA_SW_FP32``. This enables you to switch the execution between software emulation mode and hardware execution mode after the model is loaded.
 .. tab:: Python
@ -352,9 +349,6 @@ The plugin supports the configuration parameters listed below. The parameter nam
   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
   | ``PERF_COUNT``                   | ``YES``, ``NO``         | ``NO``        | Turns on performance counters reporting.                        |
   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
   | ``GNA_LIB_N_THREADS``            | 1-127 integer number    | 1             | Sets the number of GNA accelerator library worker threads used  |
   |                                  |                         |               | for inference computation in software modes.                    |
   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
   The parameters are passed as strings to `IECore.load_network <api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.load_network>`_.
@ -362,7 +356,7 @@ The plugin supports the configuration parameters listed below. The parameter nam
   You can change the ``GNA_DEVICE_MODE`` parameter at run time by sending a configuration dict to the `IECore.load_network <api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.load_network>`_ call, which works for any value excluding ``GNA_SW_FP32``. This enables you to switch the execution between software emulation mode and hardware execution mode after the model is loaded.
-@endsphinxdirective   
+@endsphinxdirective
 ## How to Interpret Performance Counters
 With the following methods, you can collect performance counters that provides various performance data about execution on GNA:
@ -370,16 +364,16 @@ With the following methods, you can collect performance counters that provides v
@sphinxdirective
 .. tab:: C++
-   ``InferenceEngine::InferRequest::GetPerformanceCounts`` 
+   ``InferenceEngine::InferRequest::GetPerformanceCounts``
-   
+
   The returned map stores a counter description as a key, and a counter value in the ``realTime_uSec`` field of the ``InferenceEngineProfileInfo`` structure.
 .. tab:: Python
-   ``openvino.inference_engine.InferRequest.get_perf_counts`` 
+   ``openvino.inference_engine.InferRequest.get_perf_counts``
-   
+
-   The returned map stores a counter description as a key, and a counter value in the ``real_time`` field. 
+   The returned map stores a counter description as a key, and a counter value in the ``real_time`` field.
@endsphinxdirective
@ -402,25 +396,6 @@ Performance counters provided for the time being:
 	* Number of total cycles spent on scoring in hardware including compute and memory stall cycles
 	* Number of stall cycles spent in hardware
 ## Multithreading Support in GNA Plugin
 The GNA plugin supports the following configuration parameters for multithreading management:
@sphinxdirective
 .. tab:: C++
   ``KEY_GNA_LIB_N_THREADS``
 .. tab:: Python
   ``GNA_LIB_N_THREADS``
@endsphinxdirective
 By default, the GNA plugin uses one worker thread for inference computations. This parameter allows you to create up to 127 threads for software modes.
 > **NOTE**: Multithreading mode does not guarantee the same computation order as the order of issuing. Additionally, in this case, software modes do not implement any serializations.
 ## Network Batch Size
 Intel® GNA plugin supports the processing of context-windowed speech frames in batches of 1-8 frames in one
--- a/src/inference/include/ie/gna/gna_config.hpp
+++ b/src/inference/include/ie/gna/gna_config.hpp
@ -112,6 +112,7 @@ DECLARE_GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT);
 * Note that multithreading mode does not guarantee the same computation order as order
 * of issuing. Additionally, in this case, software modes do not implement any serializations.
 */
 INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_GNA_CONFIG_KEY(LIB_N_THREADS);
 }  // namespace GNAConfigParams
@ -131,6 +132,7 @@ namespace PluginConfigParams {
 * It is passed to Core::SetConfig(), this option should be used with values:
 * PluginConfigParams::YES or PluginConfigParams::NO
 */
 INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_CONFIG_KEY(SINGLE_THREAD);
 }  // namespace PluginConfigParams
--- a/src/plugins/intel_gna/gna_device.cpp
+++ b/src/plugins/intel_gna/gna_device.cpp
@ -441,7 +441,7 @@ void GNADeviceHelper::updateGnaDeviceVersion() {
    checkGna2Status(status, "Gna2DeviceGetVersion");
 }
-void GNADeviceHelper::open(uint8_t n_threads) {
+void GNADeviceHelper::open() {
    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    updateGnaDeviceVersion();
    const auto gnaExecTarget = parseTarget(executionTarget);
@ -457,10 +457,6 @@ void GNADeviceHelper::open(uint8_t n_threads) {
        const auto status = Gna2DeviceOpen(nGnaDeviceIndex);
        checkGna2Status(status, "Gna2DeviceOpen");
    }
    // TODO: GNA2: uncomment when scratchpad repaired
    // status = Gna2DeviceSetNumberOfThreads(nGnaDeviceIndex, n_threads);
    // checkGna2Status(status);
    deviceOpened = true;
 }
@ -483,12 +479,6 @@ void GNADeviceHelper::close() {
    deviceOpened = false;
 }
 void GNADeviceHelper::setOMPThreads(uint8_t const n_threads) {
    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    const auto status = Gna2DeviceSetNumberOfThreads(nGnaDeviceIndex, n_threads);
    checkGna2Status(status, "Gna2DeviceSetNumberOfThreads");
 }
 void GNADeviceHelper::updateGnaPerfCounters() {
    if (!isPerformanceMeasuring)
        return;
--- a/src/plugins/intel_gna/gna_device.hpp
+++ b/src/plugins/intel_gna/gna_device.hpp
@ -65,8 +65,6 @@ public:
    explicit GNADeviceHelper(std::string executionTargetIn = "",
         std::string compileTargetIn = "",
         bool swExactModeIn = false,
         uint8_t lib_async_n_threads = 1,
         bool use_openmp = false,
         bool isPerformanceMeasuring = false,
         bool deviceEmbedded = false,
         int deviceVersionParsed = 0) :
@ -77,7 +75,7 @@ public:
         nGnaDeviceIndex{selectGnaDevice()},
         useDeviceEmbeddedExport(deviceEmbedded),
         exportGeneration(static_cast<Gna2DeviceVersion>(deviceVersionParsed)) {
-        open(lib_async_n_threads);
+        open();
        initGnaPerfCounters();
        // check GNA Library version
@ -88,11 +86,6 @@ public:
        if (gnaLibVersion.rfind("3.0", 0) == 0) {
            isGnaLibVersion3_0 = true;
        }
        if (use_openmp) {
            uint8_t num_cores = std::thread::hardware_concurrency();
            setOMPThreads((num_cores != 0) ? num_cores : 1);
        }
    }
    GNADeviceHelper(const GNADeviceHelper&) = delete;
@ -149,7 +142,7 @@ public:
    std::string getEffectiveGnaCompileTarget() const;
 private:
-    void open(uint8_t const n_threads);
+    void open();
    void close();
    static std::string getGnaLibraryVersionPrivate();
@ -167,7 +160,6 @@ public:
    void createVirtualDevice(Gna2DeviceVersion devVersion, std::string purpose = "");
    void updateGnaDeviceVersion();
    void setOMPThreads(uint8_t const n_threads);
    void initGnaPerfCounters() {
        std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
--- a/src/plugins/intel_gna/gna_plugin.cpp
+++ b/src/plugins/intel_gna/gna_plugin.cpp
@ -358,8 +358,6 @@ void GNAPlugin::InitGNADevice() {
    gnadevice = std::make_shared<GNADeviceHelper>(config.gnaExecTarget,
                config.gnaCompileTarget,
                config.swExactMode,
                gnaFlags->gna_lib_async_threads_num,
                gnaFlags->gna_openmp_multithreading,
                gnaFlags->performance_counting,
                !config.dumpXNNPath.empty(),
                GetDeviceVersionFromString(config.dumpXNNGeneration));
--- a/src/plugins/intel_gna/gna_plugin_config.cpp
+++ b/src/plugins/intel_gna/gna_plugin_config.cpp
@ -29,6 +29,7 @@ static const std::set<std::string> supportedTargets = {
    ""
 };
 OPENVINO_SUPPRESS_DEPRECATED_START
 void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
    for (auto&& item : config) {
        auto key = item.first;
@ -210,6 +211,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
    AdjustKeyMapValues();
 }
 OPENVINO_SUPPRESS_DEPRECATED_END
 void Config::AdjustKeyMapValues() {
    std::lock_guard<std::mutex> lockGuard{ mtx4keyConfigMap };
@ -251,9 +253,11 @@ void Config::AdjustKeyMapValues() {
    keyConfigMap[GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT)] = std::to_string(gnaFlags.pwlMaxErrorPercent);
    keyConfigMap[CONFIG_KEY(PERF_COUNT)] =
            gnaFlags.performance_counting ? PluginConfigParams::YES: PluginConfigParams::NO;
 OPENVINO_SUPPRESS_DEPRECATED_START
    keyConfigMap[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(gnaFlags.gna_lib_async_threads_num);
    keyConfigMap[CONFIG_KEY(SINGLE_THREAD)] =
            gnaFlags.gna_openmp_multithreading ? PluginConfigParams::NO: PluginConfigParams::YES;
 OPENVINO_SUPPRESS_DEPRECATED_END
    keyConfigMap[CONFIG_KEY(LOG_LEVEL)] = gnaFlags.log_level;
 }
--- a/src/tests/functional/plugin/gna/shared_tests_instances/behavior/ov_infer_request/multithreading.cpp
+++ b/src/tests/functional/plugin/gna/shared_tests_instances/behavior/ov_infer_request/multithreading.cpp
@ -7,10 +7,14 @@
 using namespace ov::test::behavior;
 namespace {
 OPENVINO_SUPPRESS_DEPRECATED_START
 const std::vector<std::map<std::string, std::string>> configs = {
        {{GNA_CONFIG_KEY(LIB_N_THREADS), "3"}}
 };
 OPENVINO_SUPPRESS_DEPRECATED_END
 INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVInferRequestMultithreadingTests,
        ::testing::Combine(
                ::testing::Values(CommonTestUtils::DEVICE_GNA),
--- a/src/tests/unit/gna/gna_plugin_config_test.cpp
+++ b/src/tests/unit/gna/gna_plugin_config_test.cpp
@ -11,6 +11,7 @@
 using namespace InferenceEngine;
 using namespace GNAPluginNS;
 IE_SUPPRESS_DEPRECATED_START
 const std::map<std::string, std::string>  supportedConfigKeysWithDefaults = {
    {GNA_CONFIG_KEY(SCALE_FACTOR), "1.000000"},
    {GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_0"), "1.000000"},
@ -28,6 +29,7 @@ const std::map<std::string, std::string>  supportedConfigKeysWithDefaults = {
    {CONFIG_KEY(SINGLE_THREAD), CONFIG_VALUE(YES)},
    {CONFIG_KEY(LOG_LEVEL), PluginConfigParams::LOG_NONE}
 };
 IE_SUPPRESS_DEPRECATED_END
 class GNAPluginConfigTest : public ::testing::Test {
 protected:
@ -159,6 +161,8 @@ TEST_F(GNAPluginConfigTest, GnaConfigPerfCountTest) {
                    config.gnaFlags.performance_counting);
 }
 IE_SUPPRESS_DEPRECATED_START
 TEST_F(GNAPluginConfigTest, GnaConfigLibNThreadsTest) {
    SetAndCompare(GNA_CONFIG_KEY(LIB_N_THREADS), "2");
    EXPECT_EQ(config.gnaFlags.gna_lib_async_threads_num, 2);
@ -176,6 +180,8 @@ TEST_F(GNAPluginConfigTest, GnaConfigSingleThreadTest) {
                    true);
 }
 IE_SUPPRESS_DEPRECATED_END
 TEST_F(GNAPluginConfigTest, GnaConfigGnaExecTargetTest) {
    SetAndCompare(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_2_0");
    EXPECT_EQ(config.gnaExecTarget, "GNA_TARGET_2_0");
--- a/src/tests_deprecated/unit/engines/gna/configuration_test.cpp
+++ b/src/tests_deprecated/unit/engines/gna/configuration_test.cpp
@ -61,15 +61,6 @@ TEST_F(GNAConfigTest, canNOTMatchWith128AsyncThreads) {
        .throws();
 }
 TEST_F(GNAConfigTest, canMatchWithSingleMultipleOMPThreads) {
    assert_that()
        .onInferModel(GNATestIRs::Fc2DOutputModel())
        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .enable_omp_multithreading()
        .gna().propagate_forward().called_without().pwl_inserted_into_nnet();
 }
 TEST_F(GNAConfigTest, failToCreatePluginWithDifferentInputScaleFactors) {
    assert_that().creating().gna_plugin()
        .withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR))+"_0", 1000)
--- a/src/tests_deprecated/unit/engines/gna/gna_matcher.hpp
+++ b/src/tests_deprecated/unit/engines/gna/gna_matcher.hpp
@ -183,12 +183,6 @@ class GNATestConfigurability : public GNATestBase{
        _env.config[CONFIG_KEY(PERF_COUNT)] = InferenceEngine::PluginConfigParams::YES;
        return *dynamic_cast<T*>(this);
    }
    T & enable_omp_multithreading() {
        _env.is_setup_of_omp_theads_expected = true;
        _env.config[CONFIG_KEY(SINGLE_THREAD)] = InferenceEngine::PluginConfigParams::NO;
        return *dynamic_cast<T*>(this);
    }
 };
 /**