[GNA] Introduce an option to invoke the QoS feature (#5827)

* [GNA] [WIP] Introduce an option to invoke the QoS feature 56759 * Apply remaining revew (typos) for PR 5741 * Introduce HW_WITH_SW_FBACK * Add unit test for HW_WITH_SW_FBACK * Enable HW_WITH_SW_FBACK in speech_sample cpp * Use perf counters to report number of HW delivered frames to the user (eg speech_sample) * Update speech_sample.hpp based on review * Update gna_config.hpp Describe special perf counter * lint fix * Apply review * Remove special performance counter * Add GNA frequency for 6/151 CPU family/model * Update inference-engine/samples/speech_sample/main.cpp Co-authored-by: Mikhail Ryzhov <mikhail.ryzhov@intel.com> * Update main.cpp Co-authored-by: Mikhail Ryzhov <mikhail.ryzhov@intel.com>
2021-06-18 11:42:18 +02:00 · 2021-06-18 11:42:18 +02:00 · 1048e6f69b
commit 1048e6f69b
parent 70f9d8564e
8 changed files with 66 additions and 56 deletions
--- a/inference-engine/include/gna/gna_config.hpp
+++ b/inference-engine/include/gna/gna_config.hpp
@ -65,6 +65,7 @@ DECLARE_GNA_CONFIG_KEY(DEVICE_MODE);

 DECLARE_GNA_CONFIG_VALUE(AUTO);
 DECLARE_GNA_CONFIG_VALUE(HW);
+DECLARE_GNA_CONFIG_VALUE(HW_WITH_SW_FBACK);
 DECLARE_GNA_CONFIG_VALUE(SW);
 DECLARE_GNA_CONFIG_VALUE(SW_EXACT);
 DECLARE_GNA_CONFIG_VALUE(SW_FP32);
--- a/inference-engine/samples/speech_sample/main.cpp
+++ b/inference-engine/samples/speech_sample/main.cpp
@ -236,7 +236,8 @@ float getGnaFrequencyMHz() {
    const uint8_t cannon_lake_model = 102;
    const uint8_t gemini_lake_model = 122;
    const uint8_t ice_lake_model = 126;
-    const uint8_t next_model = 140;
+    const uint8_t tgl_model = 140;
+    const uint8_t next_model = 151;

    native_cpuid(&eax, &ebx, &ecx, &edx);
    family = (eax >> 8) & 0xF;
@ -254,6 +255,7 @@ float getGnaFrequencyMHz() {
        switch (model) {
        case cannon_lake_model:
        case ice_lake_model:
+        case tgl_model:
        case next_model:
            return 400;
        case gemini_lake_model:
@ -287,13 +289,14 @@ void printReferenceCompareResults(score_error_t const& totalError, size_t frames
 /**
 * @brief Print a report on the performance counts
 * @param utterancePerfMap reference to a map to store performance counters
- * @param callsNum frame index
+ * @param numberOfFrames number of frames
 * @param stream output stream
 * @param fullDeviceName full device name string
+ * @param numberOfFramesOnHw number of frames delivered to GNA HW
 * @return none.
 */
-void printPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const& utterancePerfMap, size_t callsNum, std::ostream& stream,
-                              std::string fullDeviceName) {
+void printPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const& utterancePerfMap, size_t numberOfFrames,
+                              std::ostream& stream, std::string fullDeviceName, const uint64_t numberOfFramesOnHw) {
 #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
    stream << std::endl << "Performance counts:" << std::endl;
    stream << std::setw(10) << std::right << ""
@ -305,29 +308,29 @@ void printPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEn
    stream << std::setw(46) << "(ms)";
    stream << std::setw(24) << "(us per call)";
    stream << std::endl;
-
-    for (const auto& it : utterancePerfMap) {
-        std::string const& counter_name = it.first;
-        float current_units = static_cast<float>(it.second.realTime_uSec);
-        float call_units = current_units / callsNum;
    // if GNA HW counters
    // get frequency of GNA module
    float freq = getGnaFrequencyMHz();
-        current_units /= freq * 1000;
-        call_units /= freq;
+    for (const auto& it : utterancePerfMap) {
+        std::string const& counter_name = it.first;
+        float current_units_us = static_cast<float>(it.second.realTime_uSec) / freq;
+        float call_units_us = current_units_us / numberOfFrames;
        if (FLAGS_d.find("GNA") != std::string::npos) {
            stream << std::setw(30) << std::left << counter_name.substr(4, counter_name.size() - 1);
        } else {
            stream << std::setw(30) << std::left << counter_name;
        }
-        stream << std::setw(16) << std::right << current_units;
-        stream << std::setw(21) << std::right << call_units;
+        stream << std::setw(16) << std::right << current_units_us / 1000;
+        stream << std::setw(21) << std::right << call_units_us;
        stream << std::endl;
    }
    stream << std::endl;
    std::cout << std::endl;
    std::cout << "Full device name: " << fullDeviceName << std::endl;
    std::cout << std::endl;
+    stream << "Number of frames delivered to GNA HW: " << numberOfFramesOnHw;
+    stream << "/" << numberOfFrames;
+    stream << std::endl;
 #endif
 }

@ -346,16 +349,20 @@ void getPerformanceCounters(InferenceEngine::InferRequest& request, std::map<std
 }

 /**
- * @brief Summarize performance counts
+ * @brief Summarize performance counts and total number of frames executed on the GNA HW device
 * @param perfCounters reference to a map to get performance counters
 * @param totalPerfCounters reference to a map to save total performance counters
+ * @param totalRunsOnHw reference to a total number of frames computed on GNA HW
 * @return none.
 */
 void sumPerformanceCounters(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> const& perfCounters,
-                            std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& totalPerfCounters) {
+                            std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& totalPerfCounters, uint64_t& totalRunsOnHw) {
+    auto runOnHw = false;
    for (const auto& pair : perfCounters) {
        totalPerfCounters[pair.first].realTime_uSec += pair.second.realTime_uSec;
+        runOnHw |= pair.second.realTime_uSec > 0;  // if realTime is above zero, that means that a primitive was executed on the device
    }
+    totalRunsOnHw += runOnHw;
 }

 /**
@ -443,6 +450,7 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) {
                                                 "GPU",
                                                 "GNA_AUTO",
                                                 "GNA_HW",
+                                                 "GNA_HW_WITH_SW_FBACK",
                                                 "GNA_SW_EXACT",
                                                 "GNA_SW",
                                                 "GNA_SW_FP32",
@ -829,6 +837,7 @@ int main(int argc, char* argv[]) {
            /** Work with each utterance **/
            for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) {
                std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> utterancePerfMap;
+                uint64_t totalNumberOfRunsOnHw = 0;
                std::string uttName;
                uint32_t numFrames(0), n(0);
                std::vector<uint32_t> numFrameElementsInput;
@ -984,7 +993,7 @@ int main(int argc, char* argv[]) {
                                    // retrieve new counters
                                    getPerformanceCounters(inferRequest.inferRequest, callPerfMap);
                                    // summarize retrieved counters with all previous
-                                    sumPerformanceCounters(callPerfMap, utterancePerfMap);
+                                    sumPerformanceCounters(callPerfMap, utterancePerfMap, totalNumberOfRunsOnHw);
                                }
                            }
                            // -----------------------------------------------------------------------------------------------------
@ -1092,7 +1101,7 @@ int main(int argc, char* argv[]) {
                std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast<double>(numFrames) << " ms" << std::endl;
                if (FLAGS_pc) {
                    // print performance results
-                    printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d));
+                    printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d), totalNumberOfRunsOnHw);
                }
                if (!FLAGS_r.empty()) {
                    // print statistical score error
--- a/inference-engine/samples/speech_sample/speech_sample.hpp
+++ b/inference-engine/samples/speech_sample/speech_sample.hpp
@ -21,10 +21,9 @@ static const char model_message[] = "Required. Path to an .xml file with a train

 /// @brief message for assigning cnn calculation to device
 static const char target_device_message[] = "Optional. Specify a target device to infer on. CPU, GPU, MYRIAD, GNA_AUTO, GNA_HW, "
-                                            "GNA_SW_FP32, "
+                                            "GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, "
                                            "GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU"
-                                            " as a secondary (e.g. HETERO:GNA,CPU) are supported. The list of available devices is shown "
-                                            "below. "
+                                            " as a secondary (e.g. HETERO:GNA,CPU) are supported. "
                                            "The sample will look for a suitable plugin for device specified.";

 /// @brief message for execution target
--- a/inference-engine/src/gna_plugin/gna_device.cpp
+++ b/inference-engine/src/gna_plugin/gna_device.cpp
@ -96,14 +96,12 @@ void GNADeviceHelper::setUpActiveList(const uint32_t requestConfigId, uint32_t l
    const auto status = Gna2RequestConfigEnableActiveList(requestConfigId, layerIndex, num_active_indices, ptr_active_indices);
    checkGna2Status(status, "Gna2RequestConfigEnableActiveList");
 }
-void GNADeviceHelper::propagateSync(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode) {
-    wait(propagate(requestConfigId, gna2AccelerationMode));
-}

 uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode) {
    std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
    uint32_t reqId{};
-    if (gna2AccelerationMode == Gna2AccelerationModeHardware &&
+    if ((gna2AccelerationMode == Gna2AccelerationModeHardware ||
+         gna2AccelerationMode == Gna2AccelerationModeHardwareWithSoftwareFallback) &&
        detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation) {
        gnawarn() << "GNA Device not detected, consider using other mode of acceleration";
    }
@ -541,6 +539,8 @@ void GNADeviceHelper::updateGnaPerfCounters() {
 #if GNA_LIB_VER == 2
    instrumentationTotal[0] = instrumentationResults[0];
    instrumentationTotal[1] = instrumentationResults[1];
+    instrumentationResults[0] = 0;
+    instrumentationResults[1] = 0;
 #else
    nGNAPerfResultsTotal.hw.stall = nGNAPerfResults.hw.stall;
    nGNAPerfResultsTotal.hw.total = nGNAPerfResults.hw.total;
--- a/inference-engine/src/gna_plugin/gna_device.hpp
+++ b/inference-engine/src/gna_plugin/gna_device.hpp
@ -117,18 +117,12 @@ public:
    uint8_t *alloc(uint32_t size_requested, uint32_t *size_granted);

 #if GNA_LIB_VER == 1
-    void propagateSync(const intel_nnet_type_t *pNeuralNetwork,
-                       const uint32_t *pActiveIndices,
-                       uint32_t nActiveIndices,
-                       intel_gna_proc_t nGNAProcType);
-
    uint32_t propagate(const intel_nnet_type_t *pNeuralNetwork,
                       const uint32_t *pActiveIndices,
                       uint32_t nActiveIndices,
                       intel_gna_proc_t nGNAProcType);
 #else
    void setUpActiveList(unsigned req_config_id, uint32_t layerIndex, uint32_t* ptr_active_indices, uint32_t num_active_indices);
-    void propagateSync(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode);
    uint32_t propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode);
    uint32_t createModel(Gna2Model& gnaModel) const;
    void releaseModel(const uint32_t model_id);
--- a/inference-engine/src/gna_plugin/gna_plugin_config.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_config.cpp
@ -23,6 +23,7 @@ static const caseless_unordered_map<std::string, uint32_t> supported_values = {
        {GNAConfigParams::GNA_SW_EXACT, GNA_SOFTWARE & GNA_HARDWARE}
 };
 static const  std::vector<std::string> supported_values_on_gna2 = {
+        GNAConfigParams::GNA_HW_WITH_SW_FBACK,
        GNAConfigParams::GNA_GEN,
        GNAConfigParams::GNA_GEN_EXACT,
        GNAConfigParams::GNA_SSE,
@ -36,6 +37,7 @@ static const  std::vector<std::string> supported_values_on_gna2 = {
 static const caseless_unordered_map <std::string, std::pair<Gna2AccelerationMode, bool>> supported_values = {
                {GNAConfigParams::GNA_AUTO,             {Gna2AccelerationModeAuto,                         false}},
                {GNAConfigParams::GNA_HW,               {Gna2AccelerationModeHardware,                     false}},
+                {GNAConfigParams::GNA_HW_WITH_SW_FBACK, {Gna2AccelerationModeHardwareWithSoftwareFallback, false}},
                {GNAConfigParams::GNA_SW,               {Gna2AccelerationModeSoftware,                     false}},
                {GNAConfigParams::GNA_SW_EXACT,         {Gna2AccelerationModeSoftware,                     true}},
                {GNAConfigParams::GNA_GEN,              {Gna2AccelerationModeGeneric,                      false}},
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution_negative.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution_negative.cpp
@ -24,7 +24,7 @@ const std::vector<std::vector<size_t >> kernels2D = {
    {3, 3},
 };

-const std::vector<std::vector<size_t >> InvalidKernels2D = {
+const std::vector<std::vector<size_t >> kernels2DInvalid = {
    {1, 4},
    {2, 3},
    {3, 2},
@ -50,8 +50,8 @@ const std::vector<std::vector<size_t >> dilations2D = { {1, 1},
 };
 const std::vector<std::vector<size_t >> dilations2DInvalid = { {2, 2},
 };
-const std::vector<size_t> numOutCannels2D = { 32 };
-const std::vector<size_t> numOutCannels2DInvalid = { 1, 7, 9, 400 };
+const std::vector<size_t> numOutChannels2D = { 32 };
+const std::vector<size_t> numOutChannels2DInvalid = { 1, 7, 9, 400 };

 const std::vector<std::vector<size_t>> input2DNCHWFine = { { 1, 8, 20, 16 } };

@ -68,16 +68,16 @@ const auto conv2DParametersFine = ::testing::Combine(
    ::testing::ValuesIn(padBegins2D),
    ::testing::ValuesIn(padEnds2D),
    ::testing::ValuesIn(dilations2D),
-    ::testing::ValuesIn(numOutCannels2D),
+    ::testing::ValuesIn(numOutChannels2D),
    ::testing::Values(ngraph::op::PadType::EXPLICIT)
 );
 const auto conv2DParametersInvalidKernel = ::testing::Combine(
-    ::testing::ValuesIn(InvalidKernels2D),
+    ::testing::ValuesIn(kernels2DInvalid),
    ::testing::ValuesIn(strides2D),
    ::testing::ValuesIn(padBegins2D),
    ::testing::ValuesIn(padEnds2D),
    ::testing::ValuesIn(dilations2D),
-    ::testing::ValuesIn(numOutCannels2D),
+    ::testing::ValuesIn(numOutChannels2D),
    ::testing::Values(ngraph::op::PadType::EXPLICIT)
 );
 const auto conv2DParametersInvalidFilterNumber = ::testing::Combine(
@ -86,7 +86,7 @@ const auto conv2DParametersInvalidFilterNumber = ::testing::Combine(
    ::testing::ValuesIn(padBegins2D),
    ::testing::ValuesIn(padEnds2D),
    ::testing::ValuesIn(dilations2D),
-    ::testing::ValuesIn(numOutCannels2DInvalid),
+    ::testing::ValuesIn(numOutChannels2DInvalid),
    ::testing::Values(ngraph::op::PadType::EXPLICIT)
 );
 const auto conv2DParametersInvalidPadding = ::testing::Combine(
@ -95,7 +95,7 @@ const auto conv2DParametersInvalidPadding = ::testing::Combine(
    ::testing::ValuesIn(padBegins2DInvalid),
    ::testing::ValuesIn(padEnds2DInvalid),
    ::testing::ValuesIn(dilations2D),
-    ::testing::ValuesIn(numOutCannels2D),
+    ::testing::ValuesIn(numOutChannels2D),
    ::testing::Values(ngraph::op::PadType::EXPLICIT)
 );
 const auto conv2DParametersInvalidStride = ::testing::Combine(
@ -104,7 +104,7 @@ const auto conv2DParametersInvalidStride = ::testing::Combine(
    ::testing::ValuesIn(padBegins2D),
    ::testing::ValuesIn(padEnds2D),
    ::testing::ValuesIn(dilations2D),
-    ::testing::ValuesIn(numOutCannels2D),
+    ::testing::ValuesIn(numOutChannels2D),
    ::testing::Values(ngraph::op::PadType::EXPLICIT)
 );
 const auto conv2DParametersInvalidDilation = ::testing::Combine(
@ -113,7 +113,7 @@ const auto conv2DParametersInvalidDilation = ::testing::Combine(
    ::testing::ValuesIn(padBegins2D),
    ::testing::ValuesIn(padEnds2D),
    ::testing::ValuesIn(dilations2DInvalid),
-    ::testing::ValuesIn(numOutCannels2D),
+    ::testing::ValuesIn(numOutChannels2D),
    ::testing::Values(ngraph::op::PadType::EXPLICIT)
 );

@ -142,7 +142,7 @@ protected:
    }
 };

-#define GNA_NEG_INSTANTIATE(whats_wrong, sufix_params, sufix_input, error_message)                              \
+#define GNA_NEG_INSTANTIATE(whats_wrong, suffix_params, suffix_input, error_message)                            \
 struct GnaConv2DNegativeTest##whats_wrong : GnaConv2DNegativeTest {                                             \
    std::string expectedSubstring() override {                                                                  \
        return error_message;                                                                                   \
@ -153,13 +153,13 @@ TEST_P(GnaConv2DNegativeTest##whats_wrong, ThrowAsNotSupported) {
 }                                                                                                               \
 INSTANTIATE_TEST_CASE_P(smoke_GnaConv2DNegativeTestInvalid##whats_wrong, GnaConv2DNegativeTest##whats_wrong,    \
 ::testing::Combine(                                                                                             \
-    conv2DParameters##sufix_params,                                                                             \
+    conv2DParameters##suffix_params,                                                                            \
    ::testing::ValuesIn(netPrecisions),                                                                         \
    ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),                                                 \
    ::testing::Values(InferenceEngine::Precision::UNSPECIFIED),                                                 \
    ::testing::Values(InferenceEngine::Layout::ANY),                                                            \
    ::testing::Values(InferenceEngine::Layout::ANY),                                                            \
-    ::testing::ValuesIn(input2DNCHW##sufix_input),                                                              \
+    ::testing::ValuesIn(input2DNCHW##suffix_input),                                                             \
    ::testing::Values(CommonTestUtils::DEVICE_GNA)),                                                            \
    GnaConv2DNegativeTest##whats_wrong::getTestCaseName);

--- a/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp
+++ b/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp
@ -107,6 +107,11 @@ TEST_F(GNAPluginConfigTest, GnaConfigDeviceModeTest) {
 #else
    EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeHardware);
    EXPECT_EQ(config.swExactMode, false);
+#endif
+#if GNA_LIB_VER == 2
+    SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_HW_WITH_SW_FBACK);
+    EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeHardwareWithSoftwareFallback);
+    EXPECT_EQ(config.swExactMode, false);
 #endif
    SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW);
 #if GNA_LIB_VER == 1