Deprecate GNA_LIB_N_THREADS and SINGLE_THREAD parameters (#9486)
This commit is contained in:
parent
2b996c32ad
commit
c06d4e7fb7
@ -313,9 +313,6 @@ The plugin supports the configuration parameters listed below. The parameter nam
|
||||
+----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
|
||||
| ``KEY_PERF_COUNT`` | ``YES``, ``NO`` | ``NO`` | Turns on performance counters reporting. |
|
||||
+----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
|
||||
| ``KEY_GNA_LIB_N_THREADS`` | 1-127 integer number | 1 | Sets the number of GNA accelerator library worker threads used |
|
||||
| | | | for inference computation in software modes. |
|
||||
+----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
|
||||
|
||||
The parameters are passed as ``std::map<std::string, std::string>`` on ``InferenceEngine::Core::LoadNetwork`` or ``InferenceEngine::SetConfig``.
|
||||
|
||||
@ -352,9 +349,6 @@ The plugin supports the configuration parameters listed below. The parameter nam
|
||||
+----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
|
||||
| ``PERF_COUNT`` | ``YES``, ``NO`` | ``NO`` | Turns on performance counters reporting. |
|
||||
+----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
|
||||
| ``GNA_LIB_N_THREADS`` | 1-127 integer number | 1 | Sets the number of GNA accelerator library worker threads used |
|
||||
| | | | for inference computation in software modes. |
|
||||
+----------------------------------+-------------------------+---------------+-----------------------------------------------------------------+
|
||||
|
||||
The parameters are passed as strings to `IECore.load_network <api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.load_network>`_.
|
||||
|
||||
@ -402,25 +396,6 @@ Performance counters provided for the time being:
|
||||
* Number of total cycles spent on scoring in hardware including compute and memory stall cycles
|
||||
* Number of stall cycles spent in hardware
|
||||
|
||||
## Multithreading Support in GNA Plugin
|
||||
|
||||
The GNA plugin supports the following configuration parameters for multithreading management:
|
||||
|
||||
@sphinxdirective
|
||||
.. tab:: C++
|
||||
|
||||
``KEY_GNA_LIB_N_THREADS``
|
||||
|
||||
.. tab:: Python
|
||||
|
||||
``GNA_LIB_N_THREADS``
|
||||
|
||||
@endsphinxdirective
|
||||
|
||||
By default, the GNA plugin uses one worker thread for inference computations. This parameter allows you to create up to 127 threads for software modes.
|
||||
|
||||
> **NOTE**: Multithreading mode does not guarantee the same computation order as the order of issuing. Additionally, in this case, software modes do not implement any serializations.
|
||||
|
||||
## Network Batch Size
|
||||
|
||||
Intel® GNA plugin supports the processing of context-windowed speech frames in batches of 1-8 frames in one
|
||||
|
@ -112,6 +112,7 @@ DECLARE_GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT);
|
||||
* Note that multithreading mode does not guarantee the same computation order as order
|
||||
* of issuing. Additionally, in this case, software modes do not implement any serializations.
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
|
||||
DECLARE_GNA_CONFIG_KEY(LIB_N_THREADS);
|
||||
} // namespace GNAConfigParams
|
||||
|
||||
@ -131,6 +132,7 @@ namespace PluginConfigParams {
|
||||
* It is passed to Core::SetConfig(), this option should be used with values:
|
||||
* PluginConfigParams::YES or PluginConfigParams::NO
|
||||
*/
|
||||
INFERENCE_ENGINE_DEPRECATED("The config key will be removed")
|
||||
DECLARE_CONFIG_KEY(SINGLE_THREAD);
|
||||
|
||||
} // namespace PluginConfigParams
|
||||
|
@ -441,7 +441,7 @@ void GNADeviceHelper::updateGnaDeviceVersion() {
|
||||
checkGna2Status(status, "Gna2DeviceGetVersion");
|
||||
}
|
||||
|
||||
void GNADeviceHelper::open(uint8_t n_threads) {
|
||||
void GNADeviceHelper::open() {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
updateGnaDeviceVersion();
|
||||
const auto gnaExecTarget = parseTarget(executionTarget);
|
||||
@ -457,10 +457,6 @@ void GNADeviceHelper::open(uint8_t n_threads) {
|
||||
const auto status = Gna2DeviceOpen(nGnaDeviceIndex);
|
||||
checkGna2Status(status, "Gna2DeviceOpen");
|
||||
}
|
||||
|
||||
// TODO: GNA2: uncomment when scratchpad repaired
|
||||
// status = Gna2DeviceSetNumberOfThreads(nGnaDeviceIndex, n_threads);
|
||||
// checkGna2Status(status);
|
||||
deviceOpened = true;
|
||||
}
|
||||
|
||||
@ -483,12 +479,6 @@ void GNADeviceHelper::close() {
|
||||
deviceOpened = false;
|
||||
}
|
||||
|
||||
void GNADeviceHelper::setOMPThreads(uint8_t const n_threads) {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
const auto status = Gna2DeviceSetNumberOfThreads(nGnaDeviceIndex, n_threads);
|
||||
checkGna2Status(status, "Gna2DeviceSetNumberOfThreads");
|
||||
}
|
||||
|
||||
void GNADeviceHelper::updateGnaPerfCounters() {
|
||||
if (!isPerformanceMeasuring)
|
||||
return;
|
||||
|
@ -65,8 +65,6 @@ public:
|
||||
explicit GNADeviceHelper(std::string executionTargetIn = "",
|
||||
std::string compileTargetIn = "",
|
||||
bool swExactModeIn = false,
|
||||
uint8_t lib_async_n_threads = 1,
|
||||
bool use_openmp = false,
|
||||
bool isPerformanceMeasuring = false,
|
||||
bool deviceEmbedded = false,
|
||||
int deviceVersionParsed = 0) :
|
||||
@ -77,7 +75,7 @@ public:
|
||||
nGnaDeviceIndex{selectGnaDevice()},
|
||||
useDeviceEmbeddedExport(deviceEmbedded),
|
||||
exportGeneration(static_cast<Gna2DeviceVersion>(deviceVersionParsed)) {
|
||||
open(lib_async_n_threads);
|
||||
open();
|
||||
initGnaPerfCounters();
|
||||
|
||||
// check GNA Library version
|
||||
@ -88,11 +86,6 @@ public:
|
||||
if (gnaLibVersion.rfind("3.0", 0) == 0) {
|
||||
isGnaLibVersion3_0 = true;
|
||||
}
|
||||
|
||||
if (use_openmp) {
|
||||
uint8_t num_cores = std::thread::hardware_concurrency();
|
||||
setOMPThreads((num_cores != 0) ? num_cores : 1);
|
||||
}
|
||||
}
|
||||
|
||||
GNADeviceHelper(const GNADeviceHelper&) = delete;
|
||||
@ -149,7 +142,7 @@ public:
|
||||
std::string getEffectiveGnaCompileTarget() const;
|
||||
|
||||
private:
|
||||
void open(uint8_t const n_threads);
|
||||
void open();
|
||||
|
||||
void close();
|
||||
static std::string getGnaLibraryVersionPrivate();
|
||||
@ -167,7 +160,6 @@ public:
|
||||
|
||||
void createVirtualDevice(Gna2DeviceVersion devVersion, std::string purpose = "");
|
||||
void updateGnaDeviceVersion();
|
||||
void setOMPThreads(uint8_t const n_threads);
|
||||
|
||||
void initGnaPerfCounters() {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
|
@ -358,8 +358,6 @@ void GNAPlugin::InitGNADevice() {
|
||||
gnadevice = std::make_shared<GNADeviceHelper>(config.gnaExecTarget,
|
||||
config.gnaCompileTarget,
|
||||
config.swExactMode,
|
||||
gnaFlags->gna_lib_async_threads_num,
|
||||
gnaFlags->gna_openmp_multithreading,
|
||||
gnaFlags->performance_counting,
|
||||
!config.dumpXNNPath.empty(),
|
||||
GetDeviceVersionFromString(config.dumpXNNGeneration));
|
||||
|
@ -29,6 +29,7 @@ static const std::set<std::string> supportedTargets = {
|
||||
""
|
||||
};
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
|
||||
for (auto&& item : config) {
|
||||
auto key = item.first;
|
||||
@ -210,6 +211,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
|
||||
|
||||
AdjustKeyMapValues();
|
||||
}
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
|
||||
void Config::AdjustKeyMapValues() {
|
||||
std::lock_guard<std::mutex> lockGuard{ mtx4keyConfigMap };
|
||||
@ -251,9 +253,11 @@ void Config::AdjustKeyMapValues() {
|
||||
keyConfigMap[GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT)] = std::to_string(gnaFlags.pwlMaxErrorPercent);
|
||||
keyConfigMap[CONFIG_KEY(PERF_COUNT)] =
|
||||
gnaFlags.performance_counting ? PluginConfigParams::YES: PluginConfigParams::NO;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
keyConfigMap[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(gnaFlags.gna_lib_async_threads_num);
|
||||
keyConfigMap[CONFIG_KEY(SINGLE_THREAD)] =
|
||||
gnaFlags.gna_openmp_multithreading ? PluginConfigParams::NO: PluginConfigParams::YES;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
keyConfigMap[CONFIG_KEY(LOG_LEVEL)] = gnaFlags.log_level;
|
||||
}
|
||||
|
||||
|
@ -7,10 +7,14 @@
|
||||
using namespace ov::test::behavior;
|
||||
namespace {
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
|
||||
const std::vector<std::map<std::string, std::string>> configs = {
|
||||
{{GNA_CONFIG_KEY(LIB_N_THREADS), "3"}}
|
||||
};
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVInferRequestMultithreadingTests,
|
||||
::testing::Combine(
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
|
@ -11,6 +11,7 @@
|
||||
using namespace InferenceEngine;
|
||||
using namespace GNAPluginNS;
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
const std::map<std::string, std::string> supportedConfigKeysWithDefaults = {
|
||||
{GNA_CONFIG_KEY(SCALE_FACTOR), "1.000000"},
|
||||
{GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_0"), "1.000000"},
|
||||
@ -28,6 +29,7 @@ const std::map<std::string, std::string> supportedConfigKeysWithDefaults = {
|
||||
{CONFIG_KEY(SINGLE_THREAD), CONFIG_VALUE(YES)},
|
||||
{CONFIG_KEY(LOG_LEVEL), PluginConfigParams::LOG_NONE}
|
||||
};
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
class GNAPluginConfigTest : public ::testing::Test {
|
||||
protected:
|
||||
@ -159,6 +161,8 @@ TEST_F(GNAPluginConfigTest, GnaConfigPerfCountTest) {
|
||||
config.gnaFlags.performance_counting);
|
||||
}
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
|
||||
TEST_F(GNAPluginConfigTest, GnaConfigLibNThreadsTest) {
|
||||
SetAndCompare(GNA_CONFIG_KEY(LIB_N_THREADS), "2");
|
||||
EXPECT_EQ(config.gnaFlags.gna_lib_async_threads_num, 2);
|
||||
@ -176,6 +180,8 @@ TEST_F(GNAPluginConfigTest, GnaConfigSingleThreadTest) {
|
||||
true);
|
||||
}
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
TEST_F(GNAPluginConfigTest, GnaConfigGnaExecTargetTest) {
|
||||
SetAndCompare(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_2_0");
|
||||
EXPECT_EQ(config.gnaExecTarget, "GNA_TARGET_2_0");
|
||||
|
@ -61,15 +61,6 @@ TEST_F(GNAConfigTest, canNOTMatchWith128AsyncThreads) {
|
||||
.throws();
|
||||
}
|
||||
|
||||
TEST_F(GNAConfigTest, canMatchWithSingleMultipleOMPThreads) {
|
||||
assert_that()
|
||||
.onInferModel(GNATestIRs::Fc2DOutputModel())
|
||||
.inNotCompactMode()
|
||||
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||
.enable_omp_multithreading()
|
||||
.gna().propagate_forward().called_without().pwl_inserted_into_nnet();
|
||||
}
|
||||
|
||||
TEST_F(GNAConfigTest, failToCreatePluginWithDifferentInputScaleFactors) {
|
||||
assert_that().creating().gna_plugin()
|
||||
.withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR))+"_0", 1000)
|
||||
|
@ -183,12 +183,6 @@ class GNATestConfigurability : public GNATestBase{
|
||||
_env.config[CONFIG_KEY(PERF_COUNT)] = InferenceEngine::PluginConfigParams::YES;
|
||||
return *dynamic_cast<T*>(this);
|
||||
}
|
||||
|
||||
T & enable_omp_multithreading() {
|
||||
_env.is_setup_of_omp_theads_expected = true;
|
||||
_env.config[CONFIG_KEY(SINGLE_THREAD)] = InferenceEngine::PluginConfigParams::NO;
|
||||
return *dynamic_cast<T*>(this);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user