Deprecate GNA_LIB_N_THREADS and SINGLE_THREAD parameters (#9486)

2022-01-13 14:30:41 +03:00 · 2022-01-13 14:30:41 +03:00 · c06d4e7fb7
commit c06d4e7fb7
parent 2b996c32ad
10 changed files with 44 additions and 88 deletions
--- a/docs/IE_DG/supported_plugins/GNA.md
+++ b/docs/IE_DG/supported_plugins/GNA.md
@ -313,9 +313,6 @@ The plugin supports the configuration parameters listed below. The parameter nam
   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
   | ``KEY_PERF_COUNT``               | ``YES``, ``NO``         | ``NO``        | Turns on performance counters reporting.                        |
   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``KEY_GNA_LIB_N_THREADS``        | 1-127 integer number    | 1             | Sets the number of GNA accelerator library worker threads used  |
-   |                                  |                         |               | for inference computation in software modes.                    |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+

   The parameters are passed as ``std::map<std::string, std::string>`` on ``InferenceEngine::Core::LoadNetwork`` or ``InferenceEngine::SetConfig``.

@ -352,9 +349,6 @@ The plugin supports the configuration parameters listed below. The parameter nam
   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
   | ``PERF_COUNT``                   | ``YES``, ``NO``         | ``NO``        | Turns on performance counters reporting.                        |
   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
-   | ``GNA_LIB_N_THREADS``            | 1-127 integer number    | 1             | Sets the number of GNA accelerator library worker threads used  |
-   |                                  |                         |               | for inference computation in software modes.                    |
-   +----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+

   The parameters are passed as strings to `IECore.load_network <api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.load_network>`_.

@ -402,25 +396,6 @@ Performance counters provided for the time being:
 	* Number of total cycles spent on scoring in hardware including compute and memory stall cycles
 	* Number of stall cycles spent in hardware

-## Multithreading Support in GNA Plugin
-
-The GNA plugin supports the following configuration parameters for multithreading management:
-
-@sphinxdirective
-.. tab:: C++
-
-   ``KEY_GNA_LIB_N_THREADS``
-
-.. tab:: Python
-
-   ``GNA_LIB_N_THREADS``
-
-@endsphinxdirective
-
-By default, the GNA plugin uses one worker thread for inference computations. This parameter allows you to create up to 127 threads for software modes.
-
-> **NOTE**: Multithreading mode does not guarantee the same computation order as the order of issuing. Additionally, in this case, software modes do not implement any serializations.
-
 ## Network Batch Size

 Intel® GNA plugin supports the processing of context-windowed speech frames in batches of 1-8 frames in one
--- a/src/inference/include/ie/gna/gna_config.hpp
+++ b/src/inference/include/ie/gna/gna_config.hpp
@ -112,6 +112,7 @@ DECLARE_GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT);
 * Note that multithreading mode does not guarantee the same computation order as order
 * of issuing. Additionally, in this case, software modes do not implement any serializations.
 */
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_GNA_CONFIG_KEY(LIB_N_THREADS);
 }  // namespace GNAConfigParams

@ -131,6 +132,7 @@ namespace PluginConfigParams {
 * It is passed to Core::SetConfig(), this option should be used with values:
 * PluginConfigParams::YES or PluginConfigParams::NO
 */
+INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
 DECLARE_CONFIG_KEY(SINGLE_THREAD);

 }  // namespace PluginConfigParams
--- a/src/plugins/intel_gna/gna_device.cpp
+++ b/src/plugins/intel_gna/gna_device.cpp
@ -441,7 +441,7 @@ void GNADeviceHelper::updateGnaDeviceVersion() {
    checkGna2Status(status, "Gna2DeviceGetVersion");
 }

-void GNADeviceHelper::open(uint8_t n_threads) {
+void GNADeviceHelper::open() {
    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    updateGnaDeviceVersion();
    const auto gnaExecTarget = parseTarget(executionTarget);
@ -457,10 +457,6 @@ void GNADeviceHelper::open(uint8_t n_threads) {
        const auto status = Gna2DeviceOpen(nGnaDeviceIndex);
        checkGna2Status(status, "Gna2DeviceOpen");
    }
-
-    // TODO: GNA2: uncomment when scratchpad repaired
-    // status = Gna2DeviceSetNumberOfThreads(nGnaDeviceIndex, n_threads);
-    // checkGna2Status(status);
    deviceOpened = true;
 }

@ -483,12 +479,6 @@ void GNADeviceHelper::close() {
    deviceOpened = false;
 }

-void GNADeviceHelper::setOMPThreads(uint8_t const n_threads) {
-    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
-    const auto status = Gna2DeviceSetNumberOfThreads(nGnaDeviceIndex, n_threads);
-    checkGna2Status(status, "Gna2DeviceSetNumberOfThreads");
-}
-
 void GNADeviceHelper::updateGnaPerfCounters() {
    if (!isPerformanceMeasuring)
        return;
--- a/src/plugins/intel_gna/gna_device.hpp
+++ b/src/plugins/intel_gna/gna_device.hpp
@ -65,8 +65,6 @@ public:
    explicit GNADeviceHelper(std::string executionTargetIn = "",
         std::string compileTargetIn = "",
         bool swExactModeIn = false,
-         uint8_t lib_async_n_threads = 1,
-         bool use_openmp = false,
         bool isPerformanceMeasuring = false,
         bool deviceEmbedded = false,
         int deviceVersionParsed = 0) :
@ -77,7 +75,7 @@ public:
         nGnaDeviceIndex{selectGnaDevice()},
         useDeviceEmbeddedExport(deviceEmbedded),
         exportGeneration(static_cast<Gna2DeviceVersion>(deviceVersionParsed)) {
-        open(lib_async_n_threads);
+        open();
        initGnaPerfCounters();

        // check GNA Library version
@ -88,11 +86,6 @@ public:
        if (gnaLibVersion.rfind("3.0", 0) == 0) {
            isGnaLibVersion3_0 = true;
        }
-
-        if (use_openmp) {
-            uint8_t num_cores = std::thread::hardware_concurrency();
-            setOMPThreads((num_cores != 0) ? num_cores : 1);
-        }
    }

    GNADeviceHelper(const GNADeviceHelper&) = delete;
@ -149,7 +142,7 @@ public:
    std::string getEffectiveGnaCompileTarget() const;

 private:
-    void open(uint8_t const n_threads);
+    void open();

    void close();
    static std::string getGnaLibraryVersionPrivate();
@ -167,7 +160,6 @@ public:

    void createVirtualDevice(Gna2DeviceVersion devVersion, std::string purpose = "");
    void updateGnaDeviceVersion();
-    void setOMPThreads(uint8_t const n_threads);

    void initGnaPerfCounters() {
        std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
--- a/src/plugins/intel_gna/gna_plugin.cpp
+++ b/src/plugins/intel_gna/gna_plugin.cpp
@ -358,8 +358,6 @@ void GNAPlugin::InitGNADevice() {
    gnadevice = std::make_shared<GNADeviceHelper>(config.gnaExecTarget,
                config.gnaCompileTarget,
                config.swExactMode,
-                gnaFlags->gna_lib_async_threads_num,
-                gnaFlags->gna_openmp_multithreading,
                gnaFlags->performance_counting,
                !config.dumpXNNPath.empty(),
                GetDeviceVersionFromString(config.dumpXNNGeneration));
--- a/src/plugins/intel_gna/gna_plugin_config.cpp
+++ b/src/plugins/intel_gna/gna_plugin_config.cpp
@ -29,6 +29,7 @@ static const std::set<std::string> supportedTargets = {
    ""
 };

+OPENVINO_SUPPRESS_DEPRECATED_START
 void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
    for (auto&& item : config) {
        auto key = item.first;
@ -210,6 +211,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {

    AdjustKeyMapValues();
 }
+ OPENVINO_SUPPRESS_DEPRECATED_END

 void Config::AdjustKeyMapValues() {
    std::lock_guard<std::mutex> lockGuard{ mtx4keyConfigMap };
@ -251,9 +253,11 @@ void Config::AdjustKeyMapValues() {
    keyConfigMap[GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT)] = std::to_string(gnaFlags.pwlMaxErrorPercent);
    keyConfigMap[CONFIG_KEY(PERF_COUNT)] =
            gnaFlags.performance_counting ? PluginConfigParams::YES: PluginConfigParams::NO;
+OPENVINO_SUPPRESS_DEPRECATED_START
    keyConfigMap[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(gnaFlags.gna_lib_async_threads_num);
    keyConfigMap[CONFIG_KEY(SINGLE_THREAD)] =
            gnaFlags.gna_openmp_multithreading ? PluginConfigParams::NO: PluginConfigParams::YES;
+OPENVINO_SUPPRESS_DEPRECATED_END
    keyConfigMap[CONFIG_KEY(LOG_LEVEL)] = gnaFlags.log_level;
 }

--- a/src/tests/functional/plugin/gna/shared_tests_instances/behavior/ov_infer_request/multithreading.cpp
+++ b/src/tests/functional/plugin/gna/shared_tests_instances/behavior/ov_infer_request/multithreading.cpp
@ -7,10 +7,14 @@
 using namespace ov::test::behavior;
 namespace {

+OPENVINO_SUPPRESS_DEPRECATED_START
+
 const std::vector<std::map<std::string, std::string>> configs = {
        {{GNA_CONFIG_KEY(LIB_N_THREADS), "3"}}
 };

+OPENVINO_SUPPRESS_DEPRECATED_END
+
 INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVInferRequestMultithreadingTests,
        ::testing::Combine(
                ::testing::Values(CommonTestUtils::DEVICE_GNA),
--- a/src/tests/unit/gna/gna_plugin_config_test.cpp
+++ b/src/tests/unit/gna/gna_plugin_config_test.cpp
@ -11,6 +11,7 @@
 using namespace InferenceEngine;
 using namespace GNAPluginNS;

+IE_SUPPRESS_DEPRECATED_START
 const std::map<std::string, std::string>  supportedConfigKeysWithDefaults = {
    {GNA_CONFIG_KEY(SCALE_FACTOR), "1.000000"},
    {GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_0"), "1.000000"},
@ -28,6 +29,7 @@ const std::map<std::string, std::string>  supportedConfigKeysWithDefaults = {
    {CONFIG_KEY(SINGLE_THREAD), CONFIG_VALUE(YES)},
    {CONFIG_KEY(LOG_LEVEL), PluginConfigParams::LOG_NONE}
 };
+IE_SUPPRESS_DEPRECATED_END

 class GNAPluginConfigTest : public ::testing::Test {
 protected:
@ -159,6 +161,8 @@ TEST_F(GNAPluginConfigTest, GnaConfigPerfCountTest) {
                    config.gnaFlags.performance_counting);
 }

+IE_SUPPRESS_DEPRECATED_START
+
 TEST_F(GNAPluginConfigTest, GnaConfigLibNThreadsTest) {
    SetAndCompare(GNA_CONFIG_KEY(LIB_N_THREADS), "2");
    EXPECT_EQ(config.gnaFlags.gna_lib_async_threads_num, 2);
@ -176,6 +180,8 @@ TEST_F(GNAPluginConfigTest, GnaConfigSingleThreadTest) {
                    true);
 }

+IE_SUPPRESS_DEPRECATED_END
+
 TEST_F(GNAPluginConfigTest, GnaConfigGnaExecTargetTest) {
    SetAndCompare(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_2_0");
    EXPECT_EQ(config.gnaExecTarget, "GNA_TARGET_2_0");
--- a/src/tests_deprecated/unit/engines/gna/configuration_test.cpp
+++ b/src/tests_deprecated/unit/engines/gna/configuration_test.cpp
@ -61,15 +61,6 @@ TEST_F(GNAConfigTest, canNOTMatchWith128AsyncThreads) {
        .throws();
 }

-TEST_F(GNAConfigTest, canMatchWithSingleMultipleOMPThreads) {
-    assert_that()
-        .onInferModel(GNATestIRs::Fc2DOutputModel())
-        .inNotCompactMode()
-        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
-        .enable_omp_multithreading()
-        .gna().propagate_forward().called_without().pwl_inserted_into_nnet();
-}
-
 TEST_F(GNAConfigTest, failToCreatePluginWithDifferentInputScaleFactors) {
    assert_that().creating().gna_plugin()
        .withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR))+"_0", 1000)
--- a/src/tests_deprecated/unit/engines/gna/gna_matcher.hpp
+++ b/src/tests_deprecated/unit/engines/gna/gna_matcher.hpp
@ -183,12 +183,6 @@ class GNATestConfigurability : public GNATestBase{
        _env.config[CONFIG_KEY(PERF_COUNT)] = InferenceEngine::PluginConfigParams::YES;
        return *dynamic_cast<T*>(this);
    }
-
-    T & enable_omp_multithreading() {
-        _env.is_setup_of_omp_theads_expected = true;
-        _env.config[CONFIG_KEY(SINGLE_THREAD)] = InferenceEngine::PluginConfigParams::NO;
-        return *dynamic_cast<T*>(this);
-    }
 };

 /**