diff --git a/inference-engine/include/gna/gna_config.hpp b/inference-engine/include/gna/gna_config.hpp index 958227696a1..3433ab58887 100644 --- a/inference-engine/include/gna/gna_config.hpp +++ b/inference-engine/include/gna/gna_config.hpp @@ -65,6 +65,7 @@ DECLARE_GNA_CONFIG_KEY(DEVICE_MODE); DECLARE_GNA_CONFIG_VALUE(AUTO); DECLARE_GNA_CONFIG_VALUE(HW); +DECLARE_GNA_CONFIG_VALUE(HW_WITH_SW_FBACK); DECLARE_GNA_CONFIG_VALUE(SW); DECLARE_GNA_CONFIG_VALUE(SW_EXACT); DECLARE_GNA_CONFIG_VALUE(SW_FP32); diff --git a/inference-engine/samples/speech_sample/main.cpp b/inference-engine/samples/speech_sample/main.cpp index 2b9131774ad..57db61a8e9e 100644 --- a/inference-engine/samples/speech_sample/main.cpp +++ b/inference-engine/samples/speech_sample/main.cpp @@ -236,7 +236,8 @@ float getGnaFrequencyMHz() { const uint8_t cannon_lake_model = 102; const uint8_t gemini_lake_model = 122; const uint8_t ice_lake_model = 126; - const uint8_t next_model = 140; + const uint8_t tgl_model = 140; + const uint8_t next_model = 151; native_cpuid(&eax, &ebx, &ecx, &edx); family = (eax >> 8) & 0xF; @@ -254,6 +255,7 @@ float getGnaFrequencyMHz() { switch (model) { case cannon_lake_model: case ice_lake_model: + case tgl_model: case next_model: return 400; case gemini_lake_model: @@ -287,13 +289,14 @@ void printReferenceCompareResults(score_error_t const& totalError, size_t frames /** * @brief Print a report on the performance counts * @param utterancePerfMap reference to a map to store performance counters - * @param callsNum frame index + * @param numberOfFrames number of frames * @param stream output stream * @param fullDeviceName full device name string + * @param numberOfFramesOnHw number of frames delivered to GNA HW * @return none. */ -void printPerformanceCounters(std::map const& utterancePerfMap, size_t callsNum, std::ostream& stream, - std::string fullDeviceName) { +void printPerformanceCounters(std::map const& utterancePerfMap, size_t numberOfFrames, + std::ostream& stream, std::string fullDeviceName, const uint64_t numberOfFramesOnHw) { #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) stream << std::endl << "Performance counts:" << std::endl; stream << std::setw(10) << std::right << "" @@ -305,29 +308,29 @@ void printPerformanceCounters(std::map(it.second.realTime_uSec); - float call_units = current_units / callsNum; - // if GNA HW counters - // get frequency of GNA module - float freq = getGnaFrequencyMHz(); - current_units /= freq * 1000; - call_units /= freq; + float current_units_us = static_cast(it.second.realTime_uSec) / freq; + float call_units_us = current_units_us / numberOfFrames; if (FLAGS_d.find("GNA") != std::string::npos) { stream << std::setw(30) << std::left << counter_name.substr(4, counter_name.size() - 1); } else { stream << std::setw(30) << std::left << counter_name; } - stream << std::setw(16) << std::right << current_units; - stream << std::setw(21) << std::right << call_units; + stream << std::setw(16) << std::right << current_units_us / 1000; + stream << std::setw(21) << std::right << call_units_us; stream << std::endl; } stream << std::endl; std::cout << std::endl; std::cout << "Full device name: " << fullDeviceName << std::endl; std::cout << std::endl; + stream << "Number of frames delivered to GNA HW: " << numberOfFramesOnHw; + stream << "/" << numberOfFrames; + stream << std::endl; #endif } @@ -346,16 +349,20 @@ void getPerformanceCounters(InferenceEngine::InferRequest& request, std::map const& perfCounters, - std::map& totalPerfCounters) { + std::map& totalPerfCounters, uint64_t& totalRunsOnHw) { + auto runOnHw = false; for (const auto& pair : perfCounters) { totalPerfCounters[pair.first].realTime_uSec += pair.second.realTime_uSec; + runOnHw |= pair.second.realTime_uSec > 0; // if realTime is above zero, that means that a primitive was executed on the device } + totalRunsOnHw += runOnHw; } /** @@ -443,6 +450,7 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) { "GPU", "GNA_AUTO", "GNA_HW", + "GNA_HW_WITH_SW_FBACK", "GNA_SW_EXACT", "GNA_SW", "GNA_SW_FP32", @@ -829,6 +837,7 @@ int main(int argc, char* argv[]) { /** Work with each utterance **/ for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) { std::map utterancePerfMap; + uint64_t totalNumberOfRunsOnHw = 0; std::string uttName; uint32_t numFrames(0), n(0); std::vector numFrameElementsInput; @@ -984,7 +993,7 @@ int main(int argc, char* argv[]) { // retrieve new counters getPerformanceCounters(inferRequest.inferRequest, callPerfMap); // summarize retrieved counters with all previous - sumPerformanceCounters(callPerfMap, utterancePerfMap); + sumPerformanceCounters(callPerfMap, utterancePerfMap, totalNumberOfRunsOnHw); } } // ----------------------------------------------------------------------------------------------------- @@ -1092,7 +1101,7 @@ int main(int argc, char* argv[]) { std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast(numFrames) << " ms" << std::endl; if (FLAGS_pc) { // print performance results - printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d)); + printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d), totalNumberOfRunsOnHw); } if (!FLAGS_r.empty()) { // print statistical score error diff --git a/inference-engine/samples/speech_sample/speech_sample.hpp b/inference-engine/samples/speech_sample/speech_sample.hpp index cafe4db5c61..66d3b24a4c5 100644 --- a/inference-engine/samples/speech_sample/speech_sample.hpp +++ b/inference-engine/samples/speech_sample/speech_sample.hpp @@ -21,10 +21,9 @@ static const char model_message[] = "Required. Path to an .xml file with a train /// @brief message for assigning cnn calculation to device static const char target_device_message[] = "Optional. Specify a target device to infer on. CPU, GPU, MYRIAD, GNA_AUTO, GNA_HW, " - "GNA_SW_FP32, " + "GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, " "GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU" - " as a secondary (e.g. HETERO:GNA,CPU) are supported. The list of available devices is shown " - "below. " + " as a secondary (e.g. HETERO:GNA,CPU) are supported. " "The sample will look for a suitable plugin for device specified."; /// @brief message for execution target diff --git a/inference-engine/src/gna_plugin/gna_device.cpp b/inference-engine/src/gna_plugin/gna_device.cpp index cbfc47f57aa..85a246ea34f 100644 --- a/inference-engine/src/gna_plugin/gna_device.cpp +++ b/inference-engine/src/gna_plugin/gna_device.cpp @@ -96,14 +96,12 @@ void GNADeviceHelper::setUpActiveList(const uint32_t requestConfigId, uint32_t l const auto status = Gna2RequestConfigEnableActiveList(requestConfigId, layerIndex, num_active_indices, ptr_active_indices); checkGna2Status(status, "Gna2RequestConfigEnableActiveList"); } -void GNADeviceHelper::propagateSync(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode) { - wait(propagate(requestConfigId, gna2AccelerationMode)); -} uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode) { std::unique_lock lockGnaCalls{ acrossPluginsSync }; uint32_t reqId{}; - if (gna2AccelerationMode == Gna2AccelerationModeHardware && + if ((gna2AccelerationMode == Gna2AccelerationModeHardware || + gna2AccelerationMode == Gna2AccelerationModeHardwareWithSoftwareFallback) && detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation) { gnawarn() << "GNA Device not detected, consider using other mode of acceleration"; } @@ -541,6 +539,8 @@ void GNADeviceHelper::updateGnaPerfCounters() { #if GNA_LIB_VER == 2 instrumentationTotal[0] = instrumentationResults[0]; instrumentationTotal[1] = instrumentationResults[1]; + instrumentationResults[0] = 0; + instrumentationResults[1] = 0; #else nGNAPerfResultsTotal.hw.stall = nGNAPerfResults.hw.stall; nGNAPerfResultsTotal.hw.total = nGNAPerfResults.hw.total; diff --git a/inference-engine/src/gna_plugin/gna_device.hpp b/inference-engine/src/gna_plugin/gna_device.hpp index e032e5532da..cae32c70b1d 100644 --- a/inference-engine/src/gna_plugin/gna_device.hpp +++ b/inference-engine/src/gna_plugin/gna_device.hpp @@ -117,18 +117,12 @@ public: uint8_t *alloc(uint32_t size_requested, uint32_t *size_granted); #if GNA_LIB_VER == 1 - void propagateSync(const intel_nnet_type_t *pNeuralNetwork, - const uint32_t *pActiveIndices, - uint32_t nActiveIndices, - intel_gna_proc_t nGNAProcType); - uint32_t propagate(const intel_nnet_type_t *pNeuralNetwork, const uint32_t *pActiveIndices, uint32_t nActiveIndices, intel_gna_proc_t nGNAProcType); #else void setUpActiveList(unsigned req_config_id, uint32_t layerIndex, uint32_t* ptr_active_indices, uint32_t num_active_indices); - void propagateSync(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode); uint32_t propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode); uint32_t createModel(Gna2Model& gnaModel) const; void releaseModel(const uint32_t model_id); diff --git a/inference-engine/src/gna_plugin/gna_plugin_config.cpp b/inference-engine/src/gna_plugin/gna_plugin_config.cpp index 2dcb05d6ab8..766e7d2d52c 100644 --- a/inference-engine/src/gna_plugin/gna_plugin_config.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin_config.cpp @@ -23,6 +23,7 @@ static const caseless_unordered_map supported_values = { {GNAConfigParams::GNA_SW_EXACT, GNA_SOFTWARE & GNA_HARDWARE} }; static const std::vector supported_values_on_gna2 = { + GNAConfigParams::GNA_HW_WITH_SW_FBACK, GNAConfigParams::GNA_GEN, GNAConfigParams::GNA_GEN_EXACT, GNAConfigParams::GNA_SSE, @@ -34,18 +35,19 @@ static const std::vector supported_values_on_gna2 = { }; #else static const caseless_unordered_map > supported_values = { - {GNAConfigParams::GNA_AUTO, {Gna2AccelerationModeAuto, false}}, - {GNAConfigParams::GNA_HW, {Gna2AccelerationModeHardware, false}}, - {GNAConfigParams::GNA_SW, {Gna2AccelerationModeSoftware, false}}, - {GNAConfigParams::GNA_SW_EXACT, {Gna2AccelerationModeSoftware, true}}, - {GNAConfigParams::GNA_GEN, {Gna2AccelerationModeGeneric, false}}, - {GNAConfigParams::GNA_GEN_EXACT, {Gna2AccelerationModeGeneric, true}}, - {GNAConfigParams::GNA_SSE, {Gna2AccelerationModeSse4x2, false}}, - {GNAConfigParams::GNA_SSE_EXACT, {Gna2AccelerationModeSse4x2, true}}, - {GNAConfigParams::GNA_AVX1, {Gna2AccelerationModeAvx1, false}}, - {GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1, true}}, - {GNAConfigParams::GNA_AVX2, {Gna2AccelerationModeAvx2, false}}, - {GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2, true}}, + {GNAConfigParams::GNA_AUTO, {Gna2AccelerationModeAuto, false}}, + {GNAConfigParams::GNA_HW, {Gna2AccelerationModeHardware, false}}, + {GNAConfigParams::GNA_HW_WITH_SW_FBACK, {Gna2AccelerationModeHardwareWithSoftwareFallback, false}}, + {GNAConfigParams::GNA_SW, {Gna2AccelerationModeSoftware, false}}, + {GNAConfigParams::GNA_SW_EXACT, {Gna2AccelerationModeSoftware, true}}, + {GNAConfigParams::GNA_GEN, {Gna2AccelerationModeGeneric, false}}, + {GNAConfigParams::GNA_GEN_EXACT, {Gna2AccelerationModeGeneric, true}}, + {GNAConfigParams::GNA_SSE, {Gna2AccelerationModeSse4x2, false}}, + {GNAConfigParams::GNA_SSE_EXACT, {Gna2AccelerationModeSse4x2, true}}, + {GNAConfigParams::GNA_AVX1, {Gna2AccelerationModeAvx1, false}}, + {GNAConfigParams::GNA_AVX1_EXACT, {Gna2AccelerationModeAvx1, true}}, + {GNAConfigParams::GNA_AVX2, {Gna2AccelerationModeAvx2, false}}, + {GNAConfigParams::GNA_AVX2_EXACT, {Gna2AccelerationModeAvx2, true}}, }; #endif diff --git a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution_negative.cpp b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution_negative.cpp index e25236aafab..aa4975e602f 100644 --- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution_negative.cpp +++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/single_layer_tests/convolution_negative.cpp @@ -24,7 +24,7 @@ const std::vector> kernels2D = { {3, 3}, }; -const std::vector> InvalidKernels2D = { +const std::vector> kernels2DInvalid = { {1, 4}, {2, 3}, {3, 2}, @@ -50,8 +50,8 @@ const std::vector> dilations2D = { {1, 1}, }; const std::vector> dilations2DInvalid = { {2, 2}, }; -const std::vector numOutCannels2D = { 32 }; -const std::vector numOutCannels2DInvalid = { 1, 7, 9, 400 }; +const std::vector numOutChannels2D = { 32 }; +const std::vector numOutChannels2DInvalid = { 1, 7, 9, 400 }; const std::vector> input2DNCHWFine = { { 1, 8, 20, 16 } }; @@ -68,16 +68,16 @@ const auto conv2DParametersFine = ::testing::Combine( ::testing::ValuesIn(padBegins2D), ::testing::ValuesIn(padEnds2D), ::testing::ValuesIn(dilations2D), - ::testing::ValuesIn(numOutCannels2D), + ::testing::ValuesIn(numOutChannels2D), ::testing::Values(ngraph::op::PadType::EXPLICIT) ); const auto conv2DParametersInvalidKernel = ::testing::Combine( - ::testing::ValuesIn(InvalidKernels2D), + ::testing::ValuesIn(kernels2DInvalid), ::testing::ValuesIn(strides2D), ::testing::ValuesIn(padBegins2D), ::testing::ValuesIn(padEnds2D), ::testing::ValuesIn(dilations2D), - ::testing::ValuesIn(numOutCannels2D), + ::testing::ValuesIn(numOutChannels2D), ::testing::Values(ngraph::op::PadType::EXPLICIT) ); const auto conv2DParametersInvalidFilterNumber = ::testing::Combine( @@ -86,7 +86,7 @@ const auto conv2DParametersInvalidFilterNumber = ::testing::Combine( ::testing::ValuesIn(padBegins2D), ::testing::ValuesIn(padEnds2D), ::testing::ValuesIn(dilations2D), - ::testing::ValuesIn(numOutCannels2DInvalid), + ::testing::ValuesIn(numOutChannels2DInvalid), ::testing::Values(ngraph::op::PadType::EXPLICIT) ); const auto conv2DParametersInvalidPadding = ::testing::Combine( @@ -95,7 +95,7 @@ const auto conv2DParametersInvalidPadding = ::testing::Combine( ::testing::ValuesIn(padBegins2DInvalid), ::testing::ValuesIn(padEnds2DInvalid), ::testing::ValuesIn(dilations2D), - ::testing::ValuesIn(numOutCannels2D), + ::testing::ValuesIn(numOutChannels2D), ::testing::Values(ngraph::op::PadType::EXPLICIT) ); const auto conv2DParametersInvalidStride = ::testing::Combine( @@ -104,7 +104,7 @@ const auto conv2DParametersInvalidStride = ::testing::Combine( ::testing::ValuesIn(padBegins2D), ::testing::ValuesIn(padEnds2D), ::testing::ValuesIn(dilations2D), - ::testing::ValuesIn(numOutCannels2D), + ::testing::ValuesIn(numOutChannels2D), ::testing::Values(ngraph::op::PadType::EXPLICIT) ); const auto conv2DParametersInvalidDilation = ::testing::Combine( @@ -113,7 +113,7 @@ const auto conv2DParametersInvalidDilation = ::testing::Combine( ::testing::ValuesIn(padBegins2D), ::testing::ValuesIn(padEnds2D), ::testing::ValuesIn(dilations2DInvalid), - ::testing::ValuesIn(numOutCannels2D), + ::testing::ValuesIn(numOutChannels2D), ::testing::Values(ngraph::op::PadType::EXPLICIT) ); @@ -142,7 +142,7 @@ protected: } }; -#define GNA_NEG_INSTANTIATE(whats_wrong, sufix_params, sufix_input, error_message) \ +#define GNA_NEG_INSTANTIATE(whats_wrong, suffix_params, suffix_input, error_message) \ struct GnaConv2DNegativeTest##whats_wrong : GnaConv2DNegativeTest { \ std::string expectedSubstring() override { \ return error_message; \ @@ -153,13 +153,13 @@ TEST_P(GnaConv2DNegativeTest##whats_wrong, ThrowAsNotSupported) { } \ INSTANTIATE_TEST_CASE_P(smoke_GnaConv2DNegativeTestInvalid##whats_wrong, GnaConv2DNegativeTest##whats_wrong, \ ::testing::Combine( \ - conv2DParameters##sufix_params, \ + conv2DParameters##suffix_params, \ ::testing::ValuesIn(netPrecisions), \ ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), \ ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), \ ::testing::Values(InferenceEngine::Layout::ANY), \ ::testing::Values(InferenceEngine::Layout::ANY), \ - ::testing::ValuesIn(input2DNCHW##sufix_input), \ + ::testing::ValuesIn(input2DNCHW##suffix_input), \ ::testing::Values(CommonTestUtils::DEVICE_GNA)), \ GnaConv2DNegativeTest##whats_wrong::getTestCaseName); diff --git a/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp b/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp index bdfd50ba037..7fa12a42825 100644 --- a/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp +++ b/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp @@ -107,6 +107,11 @@ TEST_F(GNAPluginConfigTest, GnaConfigDeviceModeTest) { #else EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeHardware); EXPECT_EQ(config.swExactMode, false); +#endif +#if GNA_LIB_VER == 2 + SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_HW_WITH_SW_FBACK); + EXPECT_EQ(config.pluginGna2AccMode, Gna2AccelerationModeHardwareWithSoftwareFallback); + EXPECT_EQ(config.swExactMode, false); #endif SetAndCompare(GNA_CONFIG_KEY(DEVICE_MODE), GNAConfigParams::GNA_SW); #if GNA_LIB_VER == 1