From 0091d52c784dfd5f86249cfd7b5f5d8ba14787d1 Mon Sep 17 00:00:00 2001 From: Andrey Noskov Date: Thu, 17 Mar 2022 11:32:46 +0300 Subject: [PATCH] [GNA] Added SW_FP32 mode w/o SF for BasicLSTM (#10115) * [GNA] Added SW_FP32 mode w/o SF for BasicLSTM * deleted additional test added sw_fp32 mode for exisiting test changed reference output for new mode * [GNA] Fixed according to review * [GNA] Parametrized weights range * fixed after review Co-authored-by: Mikhail Ryzhov --- .../subgraph_tests/basic_lstm.cpp | 12 +++- .../include/subgraph_tests/basic_lstm.hpp | 10 +-- .../subgraph/basic_lstm.hpp | 7 +- .../src/subgraph/basic_lstm.cpp | 72 ++++--------------- 4 files changed, 30 insertions(+), 71 deletions(-) diff --git a/src/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/basic_lstm.cpp b/src/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/basic_lstm.cpp index f39ec9bf036..0916ba1a0fd 100644 --- a/src/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/basic_lstm.cpp +++ b/src/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/basic_lstm.cpp @@ -19,6 +19,10 @@ const std::vector> configs = { { {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, {"GNA_SCALE_FACTOR_0", "1638.4"} + }, + { + {"GNA_DEVICE_MODE", "GNA_SW_FP32"}, + {"GNA_SCALE_FACTOR_0", "1638.4"} } }; @@ -31,6 +35,8 @@ size_t small_num_cells = 10; size_t big_num_cells = 49; +std::pair weights_range = {0.f, 0.02f}; + const std::vector decompose = { false, true }; INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM, Basic_LSTM_S, @@ -40,7 +46,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM, Basic_LSTM_S, ::testing::ValuesIn(configs), ::testing::ValuesIn(size_params), ::testing::Values(small_num_cells), - ::testing::ValuesIn(decompose)), + ::testing::ValuesIn(decompose), + ::testing::Values(weights_range)), Basic_LSTM_S::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM_big_cells_num, Basic_LSTM_S, @@ -50,6 +57,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM_big_cells_num, Basic_LSTM_S, ::testing::ValuesIn(configs), ::testing::Values(size_params[0]), ::testing::Values(big_num_cells), - ::testing::ValuesIn(decompose)), + ::testing::ValuesIn(decompose), + ::testing::Values(weights_range)), Basic_LSTM_S::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/basic_lstm.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/basic_lstm.hpp index 2cc6fa495e9..4c61b565adc 100644 --- a/src/tests/functional/plugin/shared/include/subgraph_tests/basic_lstm.hpp +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/basic_lstm.hpp @@ -43,15 +43,9 @@ TEST_P(Basic_LSTM_S, CompareWithRefImpl_LowLatencyTransformation) { // Generate inputs GenerateInputs(); - - // Calculate References for the network before transformation passes - auto referenceOutputs = CalculateRefs(); - - // Apply LowLatency and UnrollTensorIterator transformations - ngraph::pass::Manager manager; - manager.register_pass(); // LowLatency enables UnrollTI - manager.run_passes(function); + functionRefs = ngraph::clone_function(*function); LoadNetwork(); + auto referenceOutputs = CalculateRefs(); auto states = inferRequest.QueryState(); for (auto& state : states) { auto name = state.GetName(); diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/basic_lstm.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/basic_lstm.hpp index ddfcfd4a99e..5272e588d9c 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/basic_lstm.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/basic_lstm.hpp @@ -21,7 +21,8 @@ typedef std::tuple< std::map, // Configuration std::pair, // Third dimenstion and hidden size size_t, // Number of Cells - bool // Decompose LSTMCell + bool, // Decompose LSTMCell + std::pair // Input and weights range > basicLstmParams; class Basic_LSTM_S : public testing::WithParamInterface, @@ -33,6 +34,7 @@ public: static std::shared_ptr GetNetwork(size_t thirdDimOut, size_t hiddenSize, size_t num_cells = 10, + std::pair weights_range = {0.f, 10.f}, const InferenceEngine::Precision& netPrecission = InferenceEngine::Precision::FP32, std::vector* hidden_memory_init_out = nullptr, std::vector* cell_memory_init_out = nullptr); @@ -43,10 +45,11 @@ protected: size_t hidden_size; size_t third_dim; + std::pair weights_range; std::vector hidden_memory_init; std::vector cell_memory_init; void SetUp() override; - std::vector>> CalculateRefs() override; + InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override; }; } // namespace SubgraphTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/src/subgraph/basic_lstm.cpp b/src/tests/functional/shared_test_classes/src/subgraph/basic_lstm.cpp index 4c6a9391830..4f454b65cdb 100644 --- a/src/tests/functional/shared_test_classes/src/subgraph/basic_lstm.cpp +++ b/src/tests/functional/shared_test_classes/src/subgraph/basic_lstm.cpp @@ -17,7 +17,8 @@ std::string Basic_LSTM_S::getTestCaseName(const testing::TestParamInfo size_params; size_t num_cells; bool decompose; - std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose) = obj.param; + std::pair weights; + std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose, weights) = obj.param; std::ostringstream result; result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_"; @@ -39,12 +40,12 @@ void Basic_LSTM_S::SetUp() { std::pair size_params; size_t num_cells; bool decompose; - std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose) = this->GetParam(); + std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose, weights_range) = this->GetParam(); third_dim = size_params.first; hidden_size = size_params.second; outPrc = InferenceEngine::Precision::FP32; - function = GetNetwork(size_params.first, size_params.second, num_cells, netPrecision, &hidden_memory_init, &cell_memory_init); + function = GetNetwork(size_params.first, size_params.second, num_cells, weights_range, netPrecision, &hidden_memory_init, &cell_memory_init); if (decompose) { ngraph::pass::Manager manager; manager.register_pass(); @@ -55,6 +56,7 @@ void Basic_LSTM_S::SetUp() { std::shared_ptr Basic_LSTM_S::GetNetwork(size_t thirdDimOut, size_t hiddenSize, size_t num_cells, + std::pair weights_range, const InferenceEngine::Precision& netPrecission, std::vector* hidden_memory_init_out, std::vector* cell_memory_init_out) { @@ -70,8 +72,8 @@ std::shared_ptr Basic_LSTM_S::GetNetwork(size_t thirdDimOut, auto reshape1 = std::make_shared(params[0], pattern1, false); auto reshape1_shape = reshape1->output(0).get_shape(); - auto H_init = ngraph::builder::makeConstant(ngPrc, { batch_size, hiddenSize }, {}, true); - auto C_init = ngraph::builder::makeConstant(ngPrc, { batch_size, hiddenSize }, {}, true); + auto H_init = ngraph::builder::makeConstant(ngPrc, { batch_size, hiddenSize }, {}, true, weights_range.second, weights_range.first); + auto C_init = ngraph::builder::makeConstant(ngPrc, { batch_size, hiddenSize }, {}, true, weights_range.second, weights_range.first); if (hidden_memory_init_out != nullptr) { *hidden_memory_init_out = std::static_pointer_cast(H_init)->cast_vector(); } @@ -84,8 +86,9 @@ std::shared_ptr Basic_LSTM_S::GetNetwork(size_t thirdDimOut, C_t->set_friendly_name("cell_state_1"); //Body auto X = std::make_shared(ngPrc, ngraph::Shape{ batch_size, 1, reshape1_shape[2] }); - auto weightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, reshape1_shape[2] }, {}, true); - auto reccurrenceWeightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, {}, true); + auto weightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, reshape1_shape[2] }, {}, true, weights_range.second, weights_range.first); + auto reccurrenceWeightsNode = ngraph::builder::makeConstant(ngPrc, { 4 * hiddenSize, hiddenSize }, {}, true, weights_range.second, + weights_range.first); //lstm [1, 10], [1, 118], [1, 118] -> [1, 118], [1, 118] outFormShapes1 = { batch_size, reshape1_shape[2] }; @@ -112,7 +115,7 @@ std::shared_ptr Basic_LSTM_S::GetNetwork(size_t thirdDimOut, auto out0 = tensor_iterator->get_iter_value(H_o, -1); const size_t output_size = 12; - auto fc1 = ngraph::builder::makeFullyConnected(out0, ngPrc, output_size, true, { hiddenSize, output_size }, { 1 }, { 1 }); + auto fc1 = ngraph::builder::makeFullyConnected(out0, ngPrc, output_size, true, { hiddenSize, output_size }, { weights_range.second }, { 0.f }); ngraph::ResultVector results{ std::make_shared(fc1) }; return std::make_shared(results, params, "Basic_LSTM_S"); @@ -146,60 +149,11 @@ void Basic_LSTM_S::Run() { const auto& actualOutputs = GetOutputs(); auto referenceOutputs = CalculateRefs(); - Compare(referenceOutputs, actualOutputs); } -std::vector>> Basic_LSTM_S::CalculateRefs() { - //For now TensorIterator is not implemented in ngraph interpreter so it is needed to validate with another reference - auto reference_model = ngraph::clone_function(*function); - ngraph::pass::Manager manager; - manager.register_pass(); - manager.run_passes(reference_model); - - auto refCnnNetwork = InferenceEngine::CNNNetwork{ reference_model }; - auto refExecutableNetwork = core->LoadNetwork(refCnnNetwork, targetDevice); - - auto refInferRequest = refExecutableNetwork.CreateInferRequest(); - std::vector refInfos; - for (const auto& input : refCnnNetwork.getInputsInfo()) { - const auto& info = input.second; - refInfos.push_back(info); - } - - for (std::size_t i = 0; i < inputs.size(); ++i) { - const auto& input = inputs[i]; - const auto& info = refInfos[i]; - - refInferRequest.SetBlob(info->name(), input); - } - - refInferRequest.Infer(); - - auto refOutputs = std::vector{}; - for (const auto& output : refCnnNetwork.getOutputsInfo()) { - const auto& name = output.first; - refOutputs.push_back(refInferRequest.GetBlob(name)); - } - - auto referenceOutputs = std::vector>>(refOutputs.size()); - for (std::size_t i = 0; i < refOutputs.size(); ++i) { - const auto& reference = refOutputs[i]; - const auto refSize = reference->byteSize(); - - referenceOutputs[i].first = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(reference->getTensorDesc().getPrecision()); - auto& expectedOutput = referenceOutputs[i].second; - expectedOutput.resize(refSize); - - auto refMemory = InferenceEngine::as(reference); - IE_ASSERT(refMemory); - const auto refLockedMemory = refMemory->wmap(); - const auto referenceBuffer = refLockedMemory.as(); - - std::copy(referenceBuffer, referenceBuffer + refSize, expectedOutput.data()); - } - - return referenceOutputs; +InferenceEngine::Blob::Ptr Basic_LSTM_S::GenerateInput(const InferenceEngine::InputInfo& info) const { + return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), weights_range.second, weights_range.first, 1); } } // namespace SubgraphTestsDefinitions