[GNA] Added SW_FP32 mode w/o SF for BasicLSTM (#10115)

* [GNA] Added SW_FP32 mode w/o SF for BasicLSTM * deleted additional test added sw_fp32 mode for exisiting test changed reference output for new mode * [GNA] Fixed according to review * [GNA] Parametrized weights range * fixed after review Co-authored-by: Mikhail Ryzhov <mikhail.ryzhov@intel.com>
2022-03-17 11:32:46 +03:00 · 2022-03-17 11:32:46 +03:00 · 0091d52c78
commit 0091d52c78
parent bfa0e3e1a4
4 changed files with 30 additions and 71 deletions
--- a/src/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/basic_lstm.cpp
+++ b/src/tests/functional/plugin/gna/shared_tests_instances/subgraph_tests/basic_lstm.cpp
@ -19,6 +19,10 @@ const std::vector<std::map<std::string, std::string>> configs = {
    {
        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
        {"GNA_SCALE_FACTOR_0", "1638.4"}
+    },
+        {
+        {"GNA_DEVICE_MODE", "GNA_SW_FP32"},
+        {"GNA_SCALE_FACTOR_0", "1638.4"}
    }
 };

@ -31,6 +35,8 @@ size_t small_num_cells = 10;

 size_t big_num_cells = 49;

+std::pair<float, float> weights_range = {0.f, 0.02f};
+
 const std::vector<bool> decompose = { false, true };

 INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM, Basic_LSTM_S,
@ -40,7 +46,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM, Basic_LSTM_S,
                            ::testing::ValuesIn(configs),
                            ::testing::ValuesIn(size_params),
                            ::testing::Values(small_num_cells),
-                            ::testing::ValuesIn(decompose)),
+                            ::testing::ValuesIn(decompose),
+                            ::testing::Values(weights_range)),
                        Basic_LSTM_S::getTestCaseName);

 INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM_big_cells_num, Basic_LSTM_S,
@ -50,6 +57,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_BasicLSTM_big_cells_num, Basic_LSTM_S,
                            ::testing::ValuesIn(configs),
                            ::testing::Values(size_params[0]),
                            ::testing::Values(big_num_cells),
-                            ::testing::ValuesIn(decompose)),
+                            ::testing::ValuesIn(decompose),
+                            ::testing::Values(weights_range)),
                        Basic_LSTM_S::getTestCaseName);
 }  // namespace
--- a/src/tests/functional/plugin/shared/include/subgraph_tests/basic_lstm.hpp
+++ b/src/tests/functional/plugin/shared/include/subgraph_tests/basic_lstm.hpp
@ -43,15 +43,9 @@ TEST_P(Basic_LSTM_S, CompareWithRefImpl_LowLatencyTransformation) {

    // Generate inputs
    GenerateInputs();
-
-    // Calculate References for the network before transformation passes
-    auto referenceOutputs = CalculateRefs();
-
-    // Apply LowLatency and UnrollTensorIterator transformations
-    ngraph::pass::Manager manager;
-    manager.register_pass<ngraph::pass::LowLatency2>(); // LowLatency enables UnrollTI
-    manager.run_passes(function);
+    functionRefs = ngraph::clone_function(*function);
    LoadNetwork();
+    auto referenceOutputs = CalculateRefs();
    auto states = inferRequest.QueryState();
    for (auto& state : states) {
        auto name = state.GetName();
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/basic_lstm.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/basic_lstm.hpp
@ -21,7 +21,8 @@ typedef std::tuple<
        std::map<std::string, std::string>,  // Configuration
        std::pair<size_t, size_t>,           // Third dimenstion and hidden size
        size_t,                              // Number of Cells
-        bool                                 // Decompose LSTMCell
+        bool,                                // Decompose LSTMCell
+        std::pair<float, float>              // Input and weights range
 > basicLstmParams;

 class Basic_LSTM_S : public testing::WithParamInterface<basicLstmParams>,
@ -33,6 +34,7 @@ public:
    static std::shared_ptr<ngraph::Function> GetNetwork(size_t thirdDimOut,
        size_t hiddenSize,
        size_t num_cells = 10,
+        std::pair<float, float> weights_range = {0.f, 10.f},
        const InferenceEngine::Precision& netPrecission = InferenceEngine::Precision::FP32,
        std::vector<float>* hidden_memory_init_out = nullptr,
        std::vector<float>* cell_memory_init_out = nullptr);
@ -43,10 +45,11 @@ protected:

    size_t hidden_size;
    size_t third_dim;
+    std::pair<float, float> weights_range;
    std::vector<float> hidden_memory_init;
    std::vector<float> cell_memory_init;
    void SetUp() override;
-    std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> CalculateRefs() override;
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override;
 };

 }  // namespace SubgraphTestsDefinitions
--- a/src/tests/functional/shared_test_classes/src/subgraph/basic_lstm.cpp
+++ b/src/tests/functional/shared_test_classes/src/subgraph/basic_lstm.cpp
@ -17,7 +17,8 @@ std::string Basic_LSTM_S::getTestCaseName(const testing::TestParamInfo<basicLstm
    std::pair<size_t, size_t> size_params;
    size_t num_cells;
    bool decompose;
-    std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose) = obj.param;
+    std::pair<float, float> weights;
+    std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose, weights) = obj.param;

    std::ostringstream result;
    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
@ -39,12 +40,12 @@ void Basic_LSTM_S::SetUp() {
    std::pair<size_t, size_t> size_params;
    size_t num_cells;
    bool decompose;
-    std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose) = this->GetParam();
+    std::tie(netPrecision, targetDevice, configuration, size_params, num_cells, decompose, weights_range) = this->GetParam();
    third_dim = size_params.first;
    hidden_size = size_params.second;
    outPrc = InferenceEngine::Precision::FP32;

-    function = GetNetwork(size_params.first, size_params.second, num_cells, netPrecision, &hidden_memory_init, &cell_memory_init);
+    function = GetNetwork(size_params.first, size_params.second, num_cells, weights_range, netPrecision, &hidden_memory_init, &cell_memory_init);
    if (decompose) {
        ngraph::pass::Manager manager;
        manager.register_pass<ngraph::pass::LSTMCellDecomposition>();
@ -55,6 +56,7 @@ void Basic_LSTM_S::SetUp() {
 std::shared_ptr<ngraph::Function> Basic_LSTM_S::GetNetwork(size_t thirdDimOut,
                                                           size_t hiddenSize,
                                                           size_t num_cells,
+                                                           std::pair<float, float> weights_range,
                                                           const InferenceEngine::Precision& netPrecission,
                                                           std::vector<float>* hidden_memory_init_out,
                                                           std::vector<float>* cell_memory_init_out) {
@ -70,8 +72,8 @@ std::shared_ptr<ngraph::Function> Basic_LSTM_S::GetNetwork(size_t thirdDimOut,
    auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(params[0], pattern1, false);

    auto reshape1_shape = reshape1->output(0).get_shape();
-    auto H_init = ngraph::builder::makeConstant<float>(ngPrc, { batch_size, hiddenSize }, {}, true);
-    auto C_init = ngraph::builder::makeConstant<float>(ngPrc, { batch_size, hiddenSize }, {}, true);
+    auto H_init = ngraph::builder::makeConstant<float>(ngPrc, { batch_size, hiddenSize }, {}, true, weights_range.second, weights_range.first);
+    auto C_init = ngraph::builder::makeConstant<float>(ngPrc, { batch_size, hiddenSize }, {}, true, weights_range.second, weights_range.first);
    if (hidden_memory_init_out != nullptr) {
        *hidden_memory_init_out = std::static_pointer_cast<ngraph::opset1::Constant>(H_init)->cast_vector<float>();
    }
@ -84,8 +86,9 @@ std::shared_ptr<ngraph::Function> Basic_LSTM_S::GetNetwork(size_t thirdDimOut,
    C_t->set_friendly_name("cell_state_1");
    //Body
    auto X = std::make_shared<ngraph::opset1::Parameter>(ngPrc, ngraph::Shape{ batch_size, 1, reshape1_shape[2] });
-    auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, reshape1_shape[2] }, {}, true);
-    auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, {}, true);
+    auto weightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, reshape1_shape[2] }, {}, true, weights_range.second, weights_range.first);
+    auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, { 4 * hiddenSize, hiddenSize }, {}, true, weights_range.second,
+                                                                       weights_range.first);

    //lstm [1, 10], [1, 118], [1, 118] -> [1, 118], [1, 118]
    outFormShapes1 = { batch_size, reshape1_shape[2] };
@ -112,7 +115,7 @@ std::shared_ptr<ngraph::Function> Basic_LSTM_S::GetNetwork(size_t thirdDimOut,
    auto out0 = tensor_iterator->get_iter_value(H_o, -1);

    const size_t output_size = 12;
-    auto fc1 = ngraph::builder::makeFullyConnected(out0, ngPrc, output_size, true, { hiddenSize, output_size }, { 1 }, { 1 });
+    auto fc1 = ngraph::builder::makeFullyConnected(out0, ngPrc, output_size, true, { hiddenSize, output_size }, { weights_range.second }, { 0.f });

    ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(fc1) };
    return std::make_shared<ngraph::Function>(results, params, "Basic_LSTM_S");
@ -146,60 +149,11 @@ void Basic_LSTM_S::Run() {

    const auto& actualOutputs = GetOutputs();
    auto referenceOutputs = CalculateRefs();
-
    Compare(referenceOutputs, actualOutputs);
 }

-std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> Basic_LSTM_S::CalculateRefs() {
-    //For now TensorIterator is not implemented in ngraph interpreter so it is needed to validate with another reference
-    auto reference_model = ngraph::clone_function(*function);
-    ngraph::pass::Manager manager;
-    manager.register_pass<ngraph::pass::UnrollTensorIterator>();
-    manager.run_passes(reference_model);
-
-    auto refCnnNetwork = InferenceEngine::CNNNetwork{ reference_model };
-    auto refExecutableNetwork = core->LoadNetwork(refCnnNetwork, targetDevice);
-
-    auto refInferRequest = refExecutableNetwork.CreateInferRequest();
-    std::vector<InferenceEngine::InputInfo::Ptr> refInfos;
-    for (const auto& input : refCnnNetwork.getInputsInfo()) {
-        const auto& info = input.second;
-        refInfos.push_back(info);
-    }
-
-    for (std::size_t i = 0; i < inputs.size(); ++i) {
-        const auto& input = inputs[i];
-        const auto& info = refInfos[i];
-
-        refInferRequest.SetBlob(info->name(), input);
-    }
-
-    refInferRequest.Infer();
-
-    auto refOutputs = std::vector<InferenceEngine::Blob::Ptr>{};
-    for (const auto& output : refCnnNetwork.getOutputsInfo()) {
-        const auto& name = output.first;
-        refOutputs.push_back(refInferRequest.GetBlob(name));
-    }
-
-    auto referenceOutputs = std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>>(refOutputs.size());
-    for (std::size_t i = 0; i < refOutputs.size(); ++i) {
-        const auto& reference = refOutputs[i];
-        const auto refSize = reference->byteSize();
-
-        referenceOutputs[i].first = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(reference->getTensorDesc().getPrecision());
-        auto& expectedOutput = referenceOutputs[i].second;
-        expectedOutput.resize(refSize);
-
-        auto refMemory = InferenceEngine::as<InferenceEngine::MemoryBlob>(reference);
-        IE_ASSERT(refMemory);
-        const auto refLockedMemory = refMemory->wmap();
-        const auto referenceBuffer = refLockedMemory.as<const std::uint8_t*>();
-
-        std::copy(referenceBuffer, referenceBuffer + refSize, expectedOutput.data());
-    }
-
-    return referenceOutputs;
+InferenceEngine::Blob::Ptr Basic_LSTM_S::GenerateInput(const InferenceEngine::InputInfo& info) const {
+    return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), weights_range.second, weights_range.first, 1);
 }

 }  // namespace SubgraphTestsDefinitions