[GNA] Introduce an option to invoke the QoS feature (#5827)
* [GNA] [WIP] Introduce an option to invoke the QoS feature 56759 * Apply remaining revew (typos) for PR 5741 * Introduce HW_WITH_SW_FBACK * Add unit test for HW_WITH_SW_FBACK * Enable HW_WITH_SW_FBACK in speech_sample cpp * Use perf counters to report number of HW delivered frames to the user (eg speech_sample) * Update speech_sample.hpp based on review * Update gna_config.hpp Describe special perf counter * lint fix * Apply review * Remove special performance counter * Add GNA frequency for 6/151 CPU family/model * Update inference-engine/samples/speech_sample/main.cpp Co-authored-by: Mikhail Ryzhov <mikhail.ryzhov@intel.com> * Update main.cpp Co-authored-by: Mikhail Ryzhov <mikhail.ryzhov@intel.com>
This commit is contained in:
parent
70f9d8564e
commit
1048e6f69b
@ -65,6 +65,7 @@ DECLARE_GNA_CONFIG_KEY(DEVICE_MODE);
|
||||
|
||||
DECLARE_GNA_CONFIG_VALUE(AUTO);
|
||||
DECLARE_GNA_CONFIG_VALUE(HW);
|
||||
DECLARE_GNA_CONFIG_VALUE(HW_WITH_SW_FBACK);
|
||||
DECLARE_GNA_CONFIG_VALUE(SW);
|
||||
DECLARE_GNA_CONFIG_VALUE(SW_EXACT);
|
||||
DECLARE_GNA_CONFIG_VALUE(SW_FP32);
|
||||
|
@ -236,7 +236,8 @@ float getGnaFrequencyMHz() {
|
||||
const uint8_t cannon_lake_model = 102;
|
||||
const uint8_t gemini_lake_model = 122;
|
||||
const uint8_t ice_lake_model = 126;
|
||||
const uint8_t next_model = 140;
|
||||
const uint8_t tgl_model = 140;
|
||||
const uint8_t next_model = 151;
|
||||
|
||||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||||
family = (eax >> 8) & 0xF;
|
||||
@ -254,6 +255,7 @@ float getGnaFrequencyMHz() {
|
||||
switch (model) {
|
||||
case cannon_lake_model:
|
||||
case ice_lake_model:
|
||||
case tgl_model:
|
||||
case next_model:
|
||||
return 400;
|
||||
case gemini_lake_model:
|
||||
@ -287,13 +289,14 @@ void printReferenceCompareResults(score_error_t const& totalError, size_t frames
|
||||
/**
|
||||
* @brief Print a report on the performance counts
|
||||
* @param utterancePerfMap reference to a map to store performance counters
|
||||
* @param callsNum frame index
|
||||
* @param numberOfFrames number of frames
|
||||
* @param stream output stream
|
||||
* @param fullDeviceName full device name string
|
||||
* @param numberOfFramesOnHw number of frames delivered to GNA HW
|
||||
* @return none.
|
||||
*/
|
||||
void printPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const& utterancePerfMap, size_t callsNum, std::ostream& stream,
|
||||
std::string fullDeviceName) {
|
||||
void printPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const& utterancePerfMap, size_t numberOfFrames,
|
||||
std::ostream& stream, std::string fullDeviceName, const uint64_t numberOfFramesOnHw) {
|
||||
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
|
||||
stream << std::endl << "Performance counts:" << std::endl;
|
||||
stream << std::setw(10) << std::right << ""
|
||||
@ -305,29 +308,29 @@ void printPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEn
|
||||
stream << std::setw(46) << "(ms)";
|
||||
stream << std::setw(24) << "(us per call)";
|
||||
stream << std::endl;
|
||||
|
||||
// if GNA HW counters
|
||||
// get frequency of GNA module
|
||||
float freq = getGnaFrequencyMHz();
|
||||
for (const auto& it : utterancePerfMap) {
|
||||
std::string const& counter_name = it.first;
|
||||
float current_units = static_cast<float>(it.second.realTime_uSec);
|
||||
float call_units = current_units / callsNum;
|
||||
// if GNA HW counters
|
||||
// get frequency of GNA module
|
||||
float freq = getGnaFrequencyMHz();
|
||||
current_units /= freq * 1000;
|
||||
call_units /= freq;
|
||||
float current_units_us = static_cast<float>(it.second.realTime_uSec) / freq;
|
||||
float call_units_us = current_units_us / numberOfFrames;
|
||||
if (FLAGS_d.find("GNA") != std::string::npos) {
|
||||
stream << std::setw(30) << std::left << counter_name.substr(4, counter_name.size() - 1);
|
||||
} else {
|
||||
stream << std::setw(30) << std::left << counter_name;
|
||||
}
|
||||
stream << std::setw(16) << std::right << current_units;
|
||||
stream << std::setw(21) << std::right << call_units;
|
||||
stream << std::setw(16) << std::right << current_units_us / 1000;
|
||||
stream << std::setw(21) << std::right << call_units_us;
|
||||
stream << std::endl;
|
||||
}
|
||||
stream << std::endl;
|
||||
std::cout << std::endl;
|
||||
std::cout << "Full device name: " << fullDeviceName << std::endl;
|
||||
std::cout << std::endl;
|
||||
stream << "Number of frames delivered to GNA HW: " << numberOfFramesOnHw;
|
||||
stream << "/" << numberOfFrames;
|
||||
stream << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -346,16 +349,20 @@ void getPerformanceCounters(InferenceEngine::InferRequest& request, std::map<std
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Summarize performance counts
|
||||
* @brief Summarize performance counts and total number of frames executed on the GNA HW device
|
||||
* @param perfCounters reference to a map to get performance counters
|
||||
* @param totalPerfCounters reference to a map to save total performance counters
|
||||
* @param totalRunsOnHw reference to a total number of frames computed on GNA HW
|
||||
* @return none.
|
||||
*/
|
||||
void sumPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const& perfCounters,
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& totalPerfCounters) {
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& totalPerfCounters, uint64_t& totalRunsOnHw) {
|
||||
auto runOnHw = false;
|
||||
for (const auto& pair : perfCounters) {
|
||||
totalPerfCounters[pair.first].realTime_uSec += pair.second.realTime_uSec;
|
||||
runOnHw |= pair.second.realTime_uSec > 0; // if realTime is above zero, that means that a primitive was executed on the device
|
||||
}
|
||||
totalRunsOnHw += runOnHw;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -443,6 +450,7 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
|
||||
"GPU",
|
||||
"GNA_AUTO",
|
||||
"GNA_HW",
|
||||
"GNA_HW_WITH_SW_FBACK",
|
||||
"GNA_SW_EXACT",
|
||||
"GNA_SW",
|
||||
"GNA_SW_FP32",
|
||||
@ -829,6 +837,7 @@ int main(int argc, char* argv[]) {
|
||||
/** Work with each utterance **/
|
||||
for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) {
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> utterancePerfMap;
|
||||
uint64_t totalNumberOfRunsOnHw = 0;
|
||||
std::string uttName;
|
||||
uint32_t numFrames(0), n(0);
|
||||
std::vector<uint32_t> numFrameElementsInput;
|
||||
@ -984,7 +993,7 @@ int main(int argc, char* argv[]) {
|
||||
// retrieve new counters
|
||||
getPerformanceCounters(inferRequest.inferRequest, callPerfMap);
|
||||
// summarize retrieved counters with all previous
|
||||
sumPerformanceCounters(callPerfMap, utterancePerfMap);
|
||||
sumPerformanceCounters(callPerfMap, utterancePerfMap, totalNumberOfRunsOnHw);
|
||||
}
|
||||
}
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
@ -1092,7 +1101,7 @@ int main(int argc, char* argv[]) {
|
||||
std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast<double>(numFrames) << " ms" << std::endl;
|
||||
if (FLAGS_pc) {
|
||||
// print performance results
|
||||
printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d));
|
||||
printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d), totalNumberOfRunsOnHw);
|
||||
}
|
||||
if (!FLAGS_r.empty()) {
|
||||
// print statistical score error
|
||||
|
@ -21,10 +21,9 @@ static const char model_message[] = "Required. Path to an .xml file with a train
|
||||
|
||||
/// @brief message for assigning cnn calculation to device
|
||||
static const char target_device_message[] = "Optional. Specify a target device to infer on. CPU, GPU, MYRIAD, GNA_AUTO, GNA_HW, "
|
||||
"GNA_SW_FP32, "
|
||||
"GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, "
|
||||
"GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU"
|
||||
" as a secondary (e.g. HETERO:GNA,CPU) are supported. The list of available devices is shown "
|
||||
"below. "
|
||||
" as a secondary (e.g. HETERO:GNA,CPU) are supported. "
|
||||
"The sample will look for a suitable plugin for device specified.";
|
||||
|
||||
/// @brief message for execution target
|
||||
|
@ -96,14 +96,12 @@ void GNADeviceHelper::setUpActiveList(const uint32_t requestConfigId, uint32_t l
|
||||
const auto status = Gna2RequestConfigEnableActiveList(requestConfigId, layerIndex, num_active_indices, ptr_active_indices);
|
||||
checkGna2Status(status, "Gna2RequestConfigEnableActiveList");
|
||||
}
|
||||
void GNADeviceHelper::propagateSync(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode) {
|
||||
wait(propagate(requestConfigId, gna2AccelerationMode));
|
||||
}
|
||||
|
||||
uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode) {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
uint32_t reqId{};
|
||||
if (gna2AccelerationMode == Gna2AccelerationModeHardware &&
|
||||
if ((gna2AccelerationMode == Gna2AccelerationModeHardware ||
|
||||
gna2AccelerationMode == Gna2AccelerationModeHardwareWithSoftwareFallback) &&
|
||||
detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation) {
|
||||
gnawarn() << "GNA Device not detected, consider using other mode of acceleration";
|
||||
}
|
||||
@ -541,6 +539,8 @@ void GNADeviceHelper::updateGnaPerfCounters() {
|
||||
#if GNA_LIB_VER == 2
|
||||
instrumentationTotal[0] = instrumentationResults[0];
|
||||
instrumentationTotal[1] = instrumentationResults[1];
|
||||
instrumentationResults[0] = 0;
|
||||
instrumentationResults[1] = 0;
|
||||
#else
|
||||
nGNAPerfResultsTotal.hw.stall = nGNAPerfResults.hw.stall;
|
||||
nGNAPerfResultsTotal.hw.total = nGNAPerfResults.hw.total;
|
||||
|
@ -117,18 +117,12 @@ public:
|
||||
uint8_t *alloc(uint32_t size_requested, uint32_t *size_granted);
|
||||
|
||||
#if GNA_LIB_VER == 1
|
||||
void propagateSync(const intel_nnet_type_t *pNeuralNetwork,
|
||||
const uint32_t *pActiveIndices,
|
||||
uint32_t nActiveIndices,
|
||||
intel_gna_proc_t nGNAProcType);
|
||||
|
||||
uint32_t propagate(const intel_nnet_type_t *pNeuralNetwork,
|
||||
const uint32_t *pActiveIndices,
|
||||
uint32_t nActiveIndices,
|
||||
intel_gna_proc_t nGNAProcType);
|
||||
#else
|
||||
void setUpActiveList(unsigned req_config_id, uint32_t layerIndex, uint32_t* ptr_active_indices, uint32_t num_active_indices);
|
||||
void propagateSync(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode);
|
||||
uint32_t propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode);
|
||||
uint32_t createModel(Gna2Model& gnaModel) const;
|
||||
void releaseModel(const uint32_t model_id);
|
||||
|
@ -23,6 +23,7 @@ static const caseless_unordered_map<std::string, uint32_t> supported_values = {
|
||||
{GNAConfigParams::GNA_SW_EXACT, GNA_SOFTWARE & GNA_HARDWARE}
|
||||
};
|
||||
static const std::vector<std::string> supported_values_on_gna2 = {
|
||||
GNAConfigParams::GNA_HW_WITH_SW_FBACK,
|
||||
GNAConfigParams::GNA_GEN,
|
||||
GNAConfigParams::GNA_GEN_EXACT,
|
||||
GNAConfigParams::GNA_SSE,
|
||||
@ -34,18 +35,19 @@ static const std::vector<std::string> supported_values_on_gna2 = {
|
||||
};
|
||||
#else
|
||||
static const caseless_unordered_map <std::string, std::pair<Gna2AccelerationMode, bool>> supported_values = {
|
||||
{GNAConfigParams::GNA_AUTO, {Gna2AccelerationModeAuto, false}},
|
||||
{GNAConfigParams::GNA_HW, {Gna2AccelerationModeHardware, false}},
|
||||
{GNAConfigParams::GNA_SW, {Gna2AccelerationModeSoftware, false}},
|
||||
{GNAConfigParams::GNA_SW_EXACT, {Gna2AccelerationModeSoftware, true}},
|
||||
{GNAConfigParams::GNA_GEN, {Gna2AccelerationModeGeneric, false}},
|
||||
{GNAConfigParams::GNA_GEN_EXACT, {Gna2AccelerationModeGeneric, true}},
|
||||
{GNAConfigParams::GNA_SSE, {Gna2AccelerationModeSse4x2, false}},
|
||||
{GNAConfigParams::GNA_SSE_EXACT, {Gna2AccelerationModeSse4x2, true}},
|
||||
{GNAConfigParams::GNA_AVX1, {Gna2AccelerationModeAvx1, false}},
|
||||
{GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1, true}},
|
||||
{GNAConfigParams::GNA_AVX2, {Gna2AccelerationModeAvx2, false}},
|
||||
{GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2, true}},
|
||||
{GNAConfigParams::GNA_AUTO, {Gna2AccelerationModeAuto, false}},
|
||||
{GNAConfigParams::GNA_HW, {Gna2AccelerationModeHardware, false}},
|
||||
{GNAConfigParams::GNA_HW_WITH_SW_FBACK, {Gna2AccelerationModeHardwareWithSoftwareFallback, false}},
|
||||
{GNAConfigParams::GNA_SW, {Gna2AccelerationModeSoftware, false}},
|
||||
{GNAConfigParams::GNA_SW_EXACT, {Gna2AccelerationModeSoftware, true}},
|
||||
{GNAConfigParams::GNA_GEN, {Gna2AccelerationModeGeneric, false}},
|
||||
{GNAConfigParams::GNA_GEN_EXACT, {Gna2AccelerationModeGeneric, true}},
|
||||
{GNAConfigParams::GNA_SSE, {Gna2AccelerationModeSse4x2, false}},
|
||||
{GNAConfigParams::GNA_SSE_EXACT, {Gna2AccelerationModeSse4x2, true}},
|
||||
{GNAConfigParams::GNA_AVX1, {Gna2AccelerationModeAvx1, false}},
|
||||
{GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1, true}},
|
||||
{GNAConfigParams::GNA_AVX2, {Gna2AccelerationModeAvx2, false}},
|
||||
{GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2, true}},
|
||||
};
|
||||
#endif
|
||||
|
||||
|
@ -24,7 +24,7 @@ const std::vector<std::vector<size_t >> kernels2D = {
|
||||
{3, 3},
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t >> InvalidKernels2D = {
|
||||
const std::vector<std::vector<size_t >> kernels2DInvalid = {
|
||||
{1, 4},
|
||||
{2, 3},
|
||||
{3, 2},
|
||||
@ -50,8 +50,8 @@ const std::vector<std::vector<size_t >> dilations2D = { {1, 1},
|
||||
};
|
||||
const std::vector<std::vector<size_t >> dilations2DInvalid = { {2, 2},
|
||||
};
|
||||
const std::vector<size_t> numOutCannels2D = { 32 };
|
||||
const std::vector<size_t> numOutCannels2DInvalid = { 1, 7, 9, 400 };
|
||||
const std::vector<size_t> numOutChannels2D = { 32 };
|
||||
const std::vector<size_t> numOutChannels2DInvalid = { 1, 7, 9, 400 };
|
||||
|
||||
const std::vector<std::vector<size_t>> input2DNCHWFine = { { 1, 8, 20, 16 } };
|
||||
|
||||
@ -68,16 +68,16 @@ const auto conv2DParametersFine = ::testing::Combine(
|
||||
::testing::ValuesIn(padBegins2D),
|
||||
::testing::ValuesIn(padEnds2D),
|
||||
::testing::ValuesIn(dilations2D),
|
||||
::testing::ValuesIn(numOutCannels2D),
|
||||
::testing::ValuesIn(numOutChannels2D),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
const auto conv2DParametersInvalidKernel = ::testing::Combine(
|
||||
::testing::ValuesIn(InvalidKernels2D),
|
||||
::testing::ValuesIn(kernels2DInvalid),
|
||||
::testing::ValuesIn(strides2D),
|
||||
::testing::ValuesIn(padBegins2D),
|
||||
::testing::ValuesIn(padEnds2D),
|
||||
::testing::ValuesIn(dilations2D),
|
||||
::testing::ValuesIn(numOutCannels2D),
|
||||
::testing::ValuesIn(numOutChannels2D),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
const auto conv2DParametersInvalidFilterNumber = ::testing::Combine(
|
||||
@ -86,7 +86,7 @@ const auto conv2DParametersInvalidFilterNumber = ::testing::Combine(
|
||||
::testing::ValuesIn(padBegins2D),
|
||||
::testing::ValuesIn(padEnds2D),
|
||||
::testing::ValuesIn(dilations2D),
|
||||
::testing::ValuesIn(numOutCannels2DInvalid),
|
||||
::testing::ValuesIn(numOutChannels2DInvalid),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
const auto conv2DParametersInvalidPadding = ::testing::Combine(
|
||||
@ -95,7 +95,7 @@ const auto conv2DParametersInvalidPadding = ::testing::Combine(
|
||||
::testing::ValuesIn(padBegins2DInvalid),
|
||||
::testing::ValuesIn(padEnds2DInvalid),
|
||||
::testing::ValuesIn(dilations2D),
|
||||
::testing::ValuesIn(numOutCannels2D),
|
||||
::testing::ValuesIn(numOutChannels2D),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
const auto conv2DParametersInvalidStride = ::testing::Combine(
|
||||
@ -104,7 +104,7 @@ const auto conv2DParametersInvalidStride = ::testing::Combine(
|
||||
::testing::ValuesIn(padBegins2D),
|
||||
::testing::ValuesIn(padEnds2D),
|
||||
::testing::ValuesIn(dilations2D),
|
||||
::testing::ValuesIn(numOutCannels2D),
|
||||
::testing::ValuesIn(numOutChannels2D),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
const auto conv2DParametersInvalidDilation = ::testing::Combine(
|
||||
@ -113,7 +113,7 @@ const auto conv2DParametersInvalidDilation = ::testing::Combine(
|
||||
::testing::ValuesIn(padBegins2D),
|
||||
::testing::ValuesIn(padEnds2D),
|
||||
::testing::ValuesIn(dilations2DInvalid),
|
||||
::testing::ValuesIn(numOutCannels2D),
|
||||
::testing::ValuesIn(numOutChannels2D),
|
||||
::testing::Values(ngraph::op::PadType::EXPLICIT)
|
||||
);
|
||||
|
||||
@ -142,7 +142,7 @@ protected:
|
||||
}
|
||||
};
|
||||
|
||||
#define GNA_NEG_INSTANTIATE(whats_wrong, sufix_params, sufix_input, error_message) \
|
||||
#define GNA_NEG_INSTANTIATE(whats_wrong, suffix_params, suffix_input, error_message) \
|
||||
struct GnaConv2DNegativeTest##whats_wrong : GnaConv2DNegativeTest { \
|
||||
std::string expectedSubstring() override { \
|
||||
return error_message; \
|
||||
@ -153,13 +153,13 @@ TEST_P(GnaConv2DNegativeTest##whats_wrong, ThrowAsNotSupported) {
|
||||
} \
|
||||
INSTANTIATE_TEST_CASE_P(smoke_GnaConv2DNegativeTestInvalid##whats_wrong, GnaConv2DNegativeTest##whats_wrong, \
|
||||
::testing::Combine( \
|
||||
conv2DParameters##sufix_params, \
|
||||
conv2DParameters##suffix_params, \
|
||||
::testing::ValuesIn(netPrecisions), \
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED), \
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED), \
|
||||
::testing::Values(InferenceEngine::Layout::ANY), \
|
||||
::testing::Values(InferenceEngine::Layout::ANY), \
|
||||
::testing::ValuesIn(input2DNCHW##sufix_input), \
|
||||
::testing::ValuesIn(input2DNCHW##suffix_input), \
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA)), \
|
||||
GnaConv2DNegativeTest##whats_wrong::getTestCaseName);
|
||||
|
||||
|
@ -107,6 +107,11 @@ TEST_F(GNAPluginConfigTest, GnaConfigDeviceModeTest) {
|
||||
#else
|
||||
EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeHardware);
|
||||
EXPECT_EQ(config.swExactMode, false);
|
||||
#endif
|
||||
#if GNA_LIB_VER == 2
|
||||
SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_HW_WITH_SW_FBACK);
|
||||
EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeHardwareWithSoftwareFallback);
|
||||
EXPECT_EQ(config.swExactMode, false);
|
||||
#endif
|
||||
SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW);
|
||||
#if GNA_LIB_VER == 1
|
||||
|
Loading…
Reference in New Issue
Block a user