[GNA] Introduce an option to invoke the QoS feature (#5827)

* [GNA] [WIP] Introduce an option to invoke the QoS feature 56759

* Apply remaining revew (typos) for PR 5741

* Introduce HW_WITH_SW_FBACK

* Add unit test for HW_WITH_SW_FBACK

* Enable HW_WITH_SW_FBACK in speech_sample cpp

* Use perf counters to report number of HW delivered frames to the user (eg speech_sample)

* Update speech_sample.hpp

based on review

* Update gna_config.hpp

Describe special perf counter

* lint fix

* Apply review

  * Remove special performance counter
  * Add GNA frequency for 6/151 CPU family/model

* Update inference-engine/samples/speech_sample/main.cpp

Co-authored-by: Mikhail Ryzhov <mikhail.ryzhov@intel.com>

* Update main.cpp

Co-authored-by: Mikhail Ryzhov <mikhail.ryzhov@intel.com>
This commit is contained in:
Krzysztof Bruniecki 2021-06-18 11:42:18 +02:00 committed by GitHub
parent 70f9d8564e
commit 1048e6f69b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 66 additions and 56 deletions

View File

@ -65,6 +65,7 @@ DECLARE_GNA_CONFIG_KEY(DEVICE_MODE);
DECLARE_GNA_CONFIG_VALUE(AUTO);
DECLARE_GNA_CONFIG_VALUE(HW);
DECLARE_GNA_CONFIG_VALUE(HW_WITH_SW_FBACK);
DECLARE_GNA_CONFIG_VALUE(SW);
DECLARE_GNA_CONFIG_VALUE(SW_EXACT);
DECLARE_GNA_CONFIG_VALUE(SW_FP32);

View File

@ -236,7 +236,8 @@ float getGnaFrequencyMHz() {
const uint8_t cannon_lake_model = 102;
const uint8_t gemini_lake_model = 122;
const uint8_t ice_lake_model = 126;
const uint8_t next_model = 140;
const uint8_t tgl_model = 140;
const uint8_t next_model = 151;
native_cpuid(&eax, &ebx, &ecx, &edx);
family = (eax >> 8) & 0xF;
@ -254,6 +255,7 @@ float getGnaFrequencyMHz() {
switch (model) {
case cannon_lake_model:
case ice_lake_model:
case tgl_model:
case next_model:
return 400;
case gemini_lake_model:
@ -287,13 +289,14 @@ void printReferenceCompareResults(score_error_t const& totalError, size_t frames
/**
* @brief Print a report on the performance counts
* @param utterancePerfMap reference to a map to store performance counters
* @param callsNum frame index
* @param numberOfFrames number of frames
* @param stream output stream
* @param fullDeviceName full device name string
* @param numberOfFramesOnHw number of frames delivered to GNA HW
* @return none.
*/
void printPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const& utterancePerfMap, size_t callsNum, std::ostream& stream,
std::string fullDeviceName) {
void printPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const& utterancePerfMap, size_t numberOfFrames,
std::ostream& stream, std::string fullDeviceName, const uint64_t numberOfFramesOnHw) {
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
stream << std::endl << "Performance counts:" << std::endl;
stream << std::setw(10) << std::right << ""
@ -305,29 +308,29 @@ void printPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEn
stream << std::setw(46) << "(ms)";
stream << std::setw(24) << "(us per call)";
stream << std::endl;
for (const auto& it : utterancePerfMap) {
std::string const& counter_name = it.first;
float current_units = static_cast<float>(it.second.realTime_uSec);
float call_units = current_units / callsNum;
// if GNA HW counters
// get frequency of GNA module
float freq = getGnaFrequencyMHz();
current_units /= freq * 1000;
call_units /= freq;
for (const auto& it : utterancePerfMap) {
std::string const& counter_name = it.first;
float current_units_us = static_cast<float>(it.second.realTime_uSec) / freq;
float call_units_us = current_units_us / numberOfFrames;
if (FLAGS_d.find("GNA") != std::string::npos) {
stream << std::setw(30) << std::left << counter_name.substr(4, counter_name.size() - 1);
} else {
stream << std::setw(30) << std::left << counter_name;
}
stream << std::setw(16) << std::right << current_units;
stream << std::setw(21) << std::right << call_units;
stream << std::setw(16) << std::right << current_units_us / 1000;
stream << std::setw(21) << std::right << call_units_us;
stream << std::endl;
}
stream << std::endl;
std::cout << std::endl;
std::cout << "Full device name: " << fullDeviceName << std::endl;
std::cout << std::endl;
stream << "Number of frames delivered to GNA HW: " << numberOfFramesOnHw;
stream << "/" << numberOfFrames;
stream << std::endl;
#endif
}
@ -346,16 +349,20 @@ void getPerformanceCounters(InferenceEngine::InferRequest& request, std::map<std
}
/**
* @brief Summarize performance counts
* @brief Summarize performance counts and total number of frames executed on the GNA HW device
* @param perfCounters reference to a map to get performance counters
* @param totalPerfCounters reference to a map to save total performance counters
* @param totalRunsOnHw reference to a total number of frames computed on GNA HW
* @return none.
*/
void sumPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const& perfCounters,
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& totalPerfCounters) {
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& totalPerfCounters, uint64_t& totalRunsOnHw) {
auto runOnHw = false;
for (const auto& pair : perfCounters) {
totalPerfCounters[pair.first].realTime_uSec += pair.second.realTime_uSec;
runOnHw |= pair.second.realTime_uSec > 0; // if realTime is above zero, that means that a primitive was executed on the device
}
totalRunsOnHw += runOnHw;
}
/**
@ -443,6 +450,7 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
"GPU",
"GNA_AUTO",
"GNA_HW",
"GNA_HW_WITH_SW_FBACK",
"GNA_SW_EXACT",
"GNA_SW",
"GNA_SW_FP32",
@ -829,6 +837,7 @@ int main(int argc, char* argv[]) {
/** Work with each utterance **/
for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) {
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> utterancePerfMap;
uint64_t totalNumberOfRunsOnHw = 0;
std::string uttName;
uint32_t numFrames(0), n(0);
std::vector<uint32_t> numFrameElementsInput;
@ -984,7 +993,7 @@ int main(int argc, char* argv[]) {
// retrieve new counters
getPerformanceCounters(inferRequest.inferRequest, callPerfMap);
// summarize retrieved counters with all previous
sumPerformanceCounters(callPerfMap, utterancePerfMap);
sumPerformanceCounters(callPerfMap, utterancePerfMap, totalNumberOfRunsOnHw);
}
}
// -----------------------------------------------------------------------------------------------------
@ -1092,7 +1101,7 @@ int main(int argc, char* argv[]) {
std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast<double>(numFrames) << " ms" << std::endl;
if (FLAGS_pc) {
// print performance results
printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d));
printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d), totalNumberOfRunsOnHw);
}
if (!FLAGS_r.empty()) {
// print statistical score error

View File

@ -21,10 +21,9 @@ static const char model_message[] = "Required. Path to an .xml file with a train
/// @brief message for assigning cnn calculation to device
static const char target_device_message[] = "Optional. Specify a target device to infer on. CPU, GPU, MYRIAD, GNA_AUTO, GNA_HW, "
"GNA_SW_FP32, "
"GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, "
"GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU"
" as a secondary (e.g. HETERO:GNA,CPU) are supported. The list of available devices is shown "
"below. "
" as a secondary (e.g. HETERO:GNA,CPU) are supported. "
"The sample will look for a suitable plugin for device specified.";
/// @brief message for execution target

View File

@ -96,14 +96,12 @@ void GNADeviceHelper::setUpActiveList(const uint32_t requestConfigId, uint32_t l
const auto status = Gna2RequestConfigEnableActiveList(requestConfigId, layerIndex, num_active_indices, ptr_active_indices);
checkGna2Status(status, "Gna2RequestConfigEnableActiveList");
}
void GNADeviceHelper::propagateSync(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode) {
wait(propagate(requestConfigId, gna2AccelerationMode));
}
uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
uint32_t reqId{};
if (gna2AccelerationMode == Gna2AccelerationModeHardware &&
if ((gna2AccelerationMode == Gna2AccelerationModeHardware ||
gna2AccelerationMode == Gna2AccelerationModeHardwareWithSoftwareFallback) &&
detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation) {
gnawarn() << "GNA Device not detected, consider using other mode of acceleration";
}
@ -541,6 +539,8 @@ void GNADeviceHelper::updateGnaPerfCounters() {
#if GNA_LIB_VER == 2
instrumentationTotal[0] = instrumentationResults[0];
instrumentationTotal[1] = instrumentationResults[1];
instrumentationResults[0] = 0;
instrumentationResults[1] = 0;
#else
nGNAPerfResultsTotal.hw.stall = nGNAPerfResults.hw.stall;
nGNAPerfResultsTotal.hw.total = nGNAPerfResults.hw.total;

View File

@ -117,18 +117,12 @@ public:
uint8_t *alloc(uint32_t size_requested, uint32_t *size_granted);
#if GNA_LIB_VER == 1
void propagateSync(const intel_nnet_type_t *pNeuralNetwork,
const uint32_t *pActiveIndices,
uint32_t nActiveIndices,
intel_gna_proc_t nGNAProcType);
uint32_t propagate(const intel_nnet_type_t *pNeuralNetwork,
const uint32_t *pActiveIndices,
uint32_t nActiveIndices,
intel_gna_proc_t nGNAProcType);
#else
void setUpActiveList(unsigned req_config_id, uint32_t layerIndex, uint32_t* ptr_active_indices, uint32_t num_active_indices);
void propagateSync(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode);
uint32_t propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode);
uint32_t createModel(Gna2Model& gnaModel) const;
void releaseModel(const uint32_t model_id);

View File

@ -23,6 +23,7 @@ static const caseless_unordered_map<std::string, uint32_t> supported_values = {
{GNAConfigParams::GNA_SW_EXACT, GNA_SOFTWARE & GNA_HARDWARE}
};
static const std::vector<std::string> supported_values_on_gna2 = {
GNAConfigParams::GNA_HW_WITH_SW_FBACK,
GNAConfigParams::GNA_GEN,
GNAConfigParams::GNA_GEN_EXACT,
GNAConfigParams::GNA_SSE,
@ -36,6 +37,7 @@ static const std::vector<std::string> supported_values_on_gna2 = {
static const caseless_unordered_map <std::string, std::pair<Gna2AccelerationMode, bool>> supported_values = {
{GNAConfigParams::GNA_AUTO, {Gna2AccelerationModeAuto, false}},
{GNAConfigParams::GNA_HW, {Gna2AccelerationModeHardware, false}},
{GNAConfigParams::GNA_HW_WITH_SW_FBACK, {Gna2AccelerationModeHardwareWithSoftwareFallback, false}},
{GNAConfigParams::GNA_SW, {Gna2AccelerationModeSoftware, false}},
{GNAConfigParams::GNA_SW_EXACT, {Gna2AccelerationModeSoftware, true}},
{GNAConfigParams::GNA_GEN, {Gna2AccelerationModeGeneric, false}},

View File

@ -24,7 +24,7 @@ const std::vector<std::vector<size_t >> kernels2D = {
{3, 3},
};
const std::vector<std::vector<size_t >> InvalidKernels2D = {
const std::vector<std::vector<size_t >> kernels2DInvalid = {
{1, 4},
{2, 3},
{3, 2},
@ -50,8 +50,8 @@ const std::vector<std::vector<size_t >> dilations2D = { {1, 1},
};
const std::vector<std::vector<size_t >> dilations2DInvalid = { {2, 2},
};
const std::vector<size_t> numOutCannels2D = { 32 };
const std::vector<size_t> numOutCannels2DInvalid = { 1, 7, 9, 400 };
const std::vector<size_t> numOutChannels2D = { 32 };
const std::vector<size_t> numOutChannels2DInvalid = { 1, 7, 9, 400 };
const std::vector<std::vector<size_t>> input2DNCHWFine = { { 1, 8, 20, 16 } };
@ -68,16 +68,16 @@ const auto conv2DParametersFine = ::testing::Combine(
::testing::ValuesIn(padBegins2D),
::testing::ValuesIn(padEnds2D),
::testing::ValuesIn(dilations2D),
::testing::ValuesIn(numOutCannels2D),
::testing::ValuesIn(numOutChannels2D),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const auto conv2DParametersInvalidKernel = ::testing::Combine(
::testing::ValuesIn(InvalidKernels2D),
::testing::ValuesIn(kernels2DInvalid),
::testing::ValuesIn(strides2D),
::testing::ValuesIn(padBegins2D),
::testing::ValuesIn(padEnds2D),
::testing::ValuesIn(dilations2D),
::testing::ValuesIn(numOutCannels2D),
::testing::ValuesIn(numOutChannels2D),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const auto conv2DParametersInvalidFilterNumber = ::testing::Combine(
@ -86,7 +86,7 @@ const auto conv2DParametersInvalidFilterNumber = ::testing::Combine(
::testing::ValuesIn(padBegins2D),
::testing::ValuesIn(padEnds2D),
::testing::ValuesIn(dilations2D),
::testing::ValuesIn(numOutCannels2DInvalid),
::testing::ValuesIn(numOutChannels2DInvalid),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const auto conv2DParametersInvalidPadding = ::testing::Combine(
@ -95,7 +95,7 @@ const auto conv2DParametersInvalidPadding = ::testing::Combine(
::testing::ValuesIn(padBegins2DInvalid),
::testing::ValuesIn(padEnds2DInvalid),
::testing::ValuesIn(dilations2D),
::testing::ValuesIn(numOutCannels2D),
::testing::ValuesIn(numOutChannels2D),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const auto conv2DParametersInvalidStride = ::testing::Combine(
@ -104,7 +104,7 @@ const auto conv2DParametersInvalidStride = ::testing::Combine(
::testing::ValuesIn(padBegins2D),
::testing::ValuesIn(padEnds2D),
::testing::ValuesIn(dilations2D),
::testing::ValuesIn(numOutCannels2D),
::testing::ValuesIn(numOutChannels2D),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
const auto conv2DParametersInvalidDilation = ::testing::Combine(
@ -113,7 +113,7 @@ const auto conv2DParametersInvalidDilation = ::testing::Combine(
::testing::ValuesIn(padBegins2D),
::testing::ValuesIn(padEnds2D),
::testing::ValuesIn(dilations2DInvalid),
::testing::ValuesIn(numOutCannels2D),
::testing::ValuesIn(numOutChannels2D),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);
@ -142,7 +142,7 @@ protected:
}
};
#define GNA_NEG_INSTANTIATE(whats_wrong, sufix_params, sufix_input, error_message) \
#define GNA_NEG_INSTANTIATE(whats_wrong, suffix_params, suffix_input, error_message) \
struct GnaConv2DNegativeTest##whats_wrong : GnaConv2DNegativeTest { \
std::string expectedSubstring() override { \
return error_message; \
@ -153,13 +153,13 @@ TEST_P(GnaConv2DNegativeTest##whats_wrong, ThrowAsNotSupported) {
} \
INSTANTIATE_TEST_CASE_P(smoke_GnaConv2DNegativeTestInvalid##whats_wrong, GnaConv2DNegativeTest##whats_wrong, \
::testing::Combine( \
conv2DParameters##sufix_params, \
conv2DParameters##suffix_params, \
::testing::ValuesIn(netPrecisions), \
::testing::Values(InferenceEngine::Precision::UNSPECIFIED), \
::testing::Values(InferenceEngine::Precision::UNSPECIFIED), \
::testing::Values(InferenceEngine::Layout::ANY), \
::testing::Values(InferenceEngine::Layout::ANY), \
::testing::ValuesIn(input2DNCHW##sufix_input), \
::testing::ValuesIn(input2DNCHW##suffix_input), \
::testing::Values(CommonTestUtils::DEVICE_GNA)), \
GnaConv2DNegativeTest##whats_wrong::getTestCaseName);

View File

@ -107,6 +107,11 @@ TEST_F(GNAPluginConfigTest, GnaConfigDeviceModeTest) {
#else
EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeHardware);
EXPECT_EQ(config.swExactMode, false);
#endif
#if GNA_LIB_VER == 2
SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_HW_WITH_SW_FBACK);
EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeHardwareWithSoftwareFallback);
EXPECT_EQ(config.swExactMode, false);
#endif
SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW);
#if GNA_LIB_VER == 1