[GNA] MemoryStateImpl (#839)

* [GNA] fix query state for GNAMemoryState * [GNA] MemoryState implementation: Fix shared tests Fix smoke tests fix SetState Implement LastState Fix Reset * Move getPrecision() to GNAMemoryState Change Smoke Reset() test to check resetting one state * [GNA] add dequantize to getLastState() * code refactor Co-authored-by: Anna Alberska <anna.alberska@intel.com> Co-authored-by: kmagiers <kmagiers@intel.com>
2020-07-10 12:37:12 +02:00 · 2020-07-10 12:37:12 +02:00 · d9706da8d0
commit d9706da8d0
parent cc23e6043a
11 changed files with 380 additions and 16 deletions
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@ -1099,11 +1099,14 @@ Blob::Ptr GNAPlugin::GetInputBlob(const std::string& name, InferenceEngine::Prec
 }

 std::vector<InferenceEngine::MemoryStateInternal::Ptr>  GNAPlugin::QueryState() {
-    if (graphCompiler.memory_connection.empty()) {
-        return {};
+    if (memoryStates.size() != graphCompiler.memory_connection.size()) {
+        memoryStates.clear();
+        for (auto& connection : graphCompiler.memory_connection) {
+            auto state = std::make_shared<memory::GNAMemoryState>(connection.first, std::make_shared <GNAMemoryLayer>(connection.second));
+            memoryStates.emplace_back(state);
+        }
    }
-
-    return {std::make_shared<memory::GNAMemoryState>(shared_from_this())};
+    return memoryStates;
 }

 std::string GNAPlugin::GetName() const noexcept {
--- a/inference-engine/src/gna_plugin/gna_plugin.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.hpp
@ -13,7 +13,7 @@
 #include <vector>
 #include <tuple>
 #include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
-#include <cpp_interfaces/interface/ie_imemory_state_internal.hpp>
+#include "cpp_interfaces/impl/ie_memory_state_internal.hpp"
 #include "descriptions/gna_flags.hpp"
 #include "descriptions/gna_input_desc.hpp"
 #include "descriptions/gna_output_desc.hpp"
@ -83,6 +83,7 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::

    InferenceEngine::InputsDataMap inputsDataMap;
    InferenceEngine::OutputsDataMap outputsDataMap;
+    std::vector<InferenceEngine::MemoryStateInternal::Ptr> memoryStates;

 public:
    explicit GNAPlugin(const std::map<std::string, std::string>& configMap);
--- a/inference-engine/src/gna_plugin/layers/gna_memory_layer.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_memory_layer.hpp
@ -25,6 +25,12 @@ public:
    InferenceEngine::SizeVector getDims() const {
        return inputLayer->outData.front()->getDims();
    }
+    /**
+     * @brief Reset the gna memory
+     */
+    void Reset() {
+        std::memset(gna_ptr, 0, reserved_size);
+    }

    /**
     * @brief possible to store memory in different precision
--- a/inference-engine/src/gna_plugin/memory/gna_memory_state.cpp
+++ b/inference-engine/src/gna_plugin/memory/gna_memory_state.cpp
@ -0,0 +1,126 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gna_memory_state.hpp"
+#include "frontend/quantized_layer_params.hpp"
+#include "layer_transform.hpp"
+#include "preprocessing.hpp"
+#include "ie_layouts.h"
+
+namespace  GNAPluginNS {
+
+namespace memory {
+
+    std::string GNAMemoryState::GetName() const {
+        return name;
+    }
+
+    void GNAMemoryState::Reset() {
+        state->Reset();
+    }
+
+    InferenceEngine::Precision GNAMemoryState::getPrecision() const {
+        InferenceEngine::Precision state_precision;
+
+        if (state->getInput()) {
+            state_precision = state->getInput()->precision;
+        } else {
+            auto element_size = state->elementSizeBytes();
+            switch (element_size) {
+            case 4:
+                state_precision = InferenceEngine::Precision::FP32;
+                break;
+            case 2:
+                state_precision = InferenceEngine::Precision::I16;
+                break;
+            default:
+                THROW_GNA_EXCEPTION << "Incorrect state element size " << element_size <<
+                    " to determine precision for MemoryState " << name;
+            }
+        }
+
+        return state_precision;
+    }
+
+    void GNAMemoryState::SetState(InferenceEngine::Blob::Ptr newState) {
+        IE_ASSERT(newState != nullptr);
+
+        auto data_ptr = newState->cbuffer().as<void*>();
+        IE_ASSERT(data_ptr != nullptr);
+        auto data_size = newState->byteSize();
+        auto data_elements = data_size / newState->element_size();
+        if (ALIGN64(state->reserved_size) != ALIGN64((data_size / (newState->element_size() / state->elementSizeBytes())))) {
+            THROW_GNA_EXCEPTION << "Failed to SetState. Sizes of new and old states do not match. ("
+                << state->reserved_size << " != " << (newState->element_size() / state->elementSizeBytes()) << ")";
+        }
+
+        InferenceEngine::Precision state_precision = getPrecision();
+        auto new_state_precision = newState->getTensorDesc().getPrecision();
+
+        if (state->gna_ptr == data_ptr) {
+            return;
+        }
+
+        if (new_state_precision == state_precision) {
+            std::memcpy(state->gna_ptr, data_ptr, data_size);
+            return;
+        }
+
+        switch (state_precision) {
+        case InferenceEngine::Precision::I16: {
+            if (new_state_precision == InferenceEngine::Precision::FP32) {
+                auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(state->getInput());
+                auto scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
+                GNAPluginNS::ConvertToInt16(static_cast<int16_t*>(state->gna_ptr),
+                    newState->buffer().as<float*>(),
+                    1,
+                    data_elements,
+                    scale_factor);
+            } else {
+                THROW_GNA_EXCEPTION << "Failed to SetState for MemoryState " << name
+                    << ". If old state precision is I16 only I16 and FP32 are allowed as new state precisions."
+                    << " Old state: " << state_precision << " New state: " << new_state_precision;
+            }
+            break;
+        }
+        default:
+            THROW_GNA_EXCEPTION << "Failed to SetState for MemoryState " << name
+                << ". Incorrect new/old precision pair"
+                << " Old state: " << state_precision << " New state: " << new_state_precision;
+        }
+    }
+
+    InferenceEngine::Blob::CPtr GNAMemoryState::GetLastState() const {
+        auto elements = state->reserved_size / state->elementSizeBytes();
+        InferenceEngine::Precision state_precision = getPrecision();
+
+        if (state->getInput() && state_precision == InferenceEngine::Precision::I16) {
+            auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(state->getInput());
+            auto scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
+
+            auto result_blob = make_blob_with_precision(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32,
+                InferenceEngine::SizeVector({ 1, elements }),
+                InferenceEngine::NC));
+
+            result_blob->allocate();
+            auto buffer = result_blob->buffer().as<float*>();
+            auto new_gna_ptr = static_cast<int16_t*>(state->gna_ptr);
+
+            for (int i = 0; i < elements; i++) {
+                buffer[i] = new_gna_ptr[i] / scale_factor;
+            }
+
+            return result_blob;
+        } else {
+            auto result_blob = make_blob_with_precision(InferenceEngine::TensorDesc(state_precision,
+                InferenceEngine::SizeVector({ 1, elements }),
+                InferenceEngine::NC));
+            result_blob->allocate();
+            std::memcpy(state->gna_ptr, result_blob->buffer(), state->reserved_size);
+
+            return result_blob;
+        }
+    }
+}  // namespace memory
+}  // namespace GNAPluginNS
--- a/inference-engine/src/gna_plugin/memory/gna_memory_state.hpp
+++ b/inference-engine/src/gna_plugin/memory/gna_memory_state.hpp
@ -11,16 +11,27 @@

 namespace  GNAPluginNS {
 namespace memory {
-class GNAMemoryState : public InferenceEngine::MemoryStateInternal {
-    std::shared_ptr<GNAPlugin> plg;
+class GNAMemoryState : public InferenceEngine::IMemoryStateInternal {
 public:
-    using Ptr = InferenceEngine::MemoryStateInternal::Ptr;
+    GNAMemoryState(std::string name, std::shared_ptr<GNAMemoryLayer> state)
+        : name(name), state(state) { IE_ASSERT(state != nullptr); }

-    explicit GNAMemoryState(std::shared_ptr<GNAPlugin> plg)
-        : InferenceEngine::MemoryStateInternal("GNAResetState"), plg(plg) {}
-    void Reset() override {
-        plg->Reset();
-    }
+    void Reset() override;
+    void SetState(InferenceEngine::Blob::Ptr newState) override;
+    InferenceEngine::Blob::CPtr GetLastState() const override;
+    std::string GetName() const override;
+
+private:
+    std::shared_ptr<GNAMemoryLayer> state;
+    std::string name;
+
+/**
+ * @brief Returns InferenceEngine::Precision of input of state depending of element size
+ * InferenceEngine::Precision::FP32 if element size equals 4
+ * InferenceEngine::Precision::I16 if element size equals 2
+ * Exception otherwise
+ */
+    InferenceEngine::Precision getPrecision() const;
 };
 }  // namespace memory
 }  // namespace GNAPluginNS
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/memory_states.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/behavior/memory_states.cpp
@ -0,0 +1,22 @@
+// Copyright (C) 2020 Intel Corporation
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <common_test_utils/test_constants.hpp>
+#include "behavior/memory_states.hpp"
+#include "functional_test_utils/test_model/test_model.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+
+InferenceEngine::CNNNetwork getNetwork() {
+    auto model = FuncTestUtils::TestModel::getModelWithMultipleMemoryConnections(InferenceEngine::Precision::FP32);
+    auto ie = PluginCache::get().ie();
+    return ie->ReadNetwork(model.model_xml_str, model.weights_blob);
+}
+std::vector<memoryStateParams> memoryStateTestCases = {
+        memoryStateParams(getNetwork(), {"c_1-3", "r_1-3"}, CommonTestUtils::DEVICE_GNA)
+};
+
+INSTANTIATE_TEST_CASE_P(MemoryStateBasic, MemoryStateTest,
+        ::testing::ValuesIn(memoryStateTestCases),
+        MemoryStateTest::getTestCaseName);
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/memory_states.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/memory_states.hpp
@ -0,0 +1,28 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+
+#include "common_test_utils/test_common.hpp"
+#include <ie_core.hpp>
+
+typedef std::tuple<
+        InferenceEngine::CNNNetwork, // CNNNetwork to work with
+        std::vector<std::string>,    // Memory States to query
+        std::string>                 // Target device name
+        memoryStateParams;
+
+class MemoryStateTest : public CommonTestUtils::TestsCommon,
+                        public testing::WithParamInterface<memoryStateParams> {
+protected:
+    InferenceEngine::CNNNetwork net;
+    std::vector<std::string> statesToQuery;
+    std::string deviceName;
+
+    void SetUp();
+    InferenceEngine::ExecutableNetwork PrepareNetwork();
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<memoryStateParams> &obj);
+};
--- a/inference-engine/tests/functional/plugin/shared/src/behavior/memory_states.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/behavior/memory_states.cpp
@ -0,0 +1,106 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+
+#include <common_test_utils/common_utils.hpp>
+#include "behavior/memory_states.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+
+std::string MemoryStateTest::getTestCaseName(const testing::TestParamInfo<memoryStateParams> &obj) {
+    std::ostringstream result;
+    InferenceEngine::CNNNetwork net;
+    std::string targetDevice;
+    std::vector<std::string> statesToQuery;
+    std::tie(net, statesToQuery, targetDevice) = obj.param;
+    result << "targetDevice=" << targetDevice;
+    return result.str();
+}
+
+void MemoryStateTest::SetUp() {
+    std::tie(net, statesToQuery, deviceName) = GetParam();
+}
+
+InferenceEngine::ExecutableNetwork MemoryStateTest::PrepareNetwork() {
+    net.addOutput("Memory_1");
+    net.addOutput("Memory_2");
+    auto ie = PluginCache::get().ie(deviceName);
+    return ie->LoadNetwork(net, deviceName);
+}
+
+TEST_P(MemoryStateTest, smoke_MemoryState_QueryState) {
+    auto executableNet = PrepareNetwork();
+
+    auto states = executableNet.QueryState();
+    ASSERT_TRUE(states.size() == 2) << "Incorrect number of MemoryStates";
+
+    for (auto&& state : states) {
+        auto name = state.GetName();
+        ASSERT_TRUE(std::find(statesToQuery.begin(), statesToQuery.end(), name) != statesToQuery.end())
+            << "State " << name << "expected to be in memory states but it is not!";
+    }
+}
+
+TEST_P(MemoryStateTest, smoke_MemoryState_SetState) {
+    auto executableNet = PrepareNetwork();
+    const float new_state_val = 13.0f;
+    for (auto&& state : executableNet.QueryState()) {
+        state.Reset();
+        auto element_count = state.GetLastState()->size();
+
+        std::vector<float> new_state_data(element_count, new_state_val);
+        auto stateBlob = InferenceEngine::make_shared_blob<float>(
+            { InferenceEngine::Precision::FP32, {element_count}, InferenceEngine::C },
+            new_state_data.data(), new_state_data.size());
+
+        state.SetState(stateBlob);
+    }
+
+    for (auto&& state : executableNet.QueryState()) {
+        auto lastState = state.GetLastState();
+        auto last_state_size = lastState->size();
+        auto last_state_data = lastState->cbuffer().as<float*>();
+        ASSERT_TRUE(last_state_size != 0) << "State size should not be 0";
+
+        for (int i = 0; i < last_state_size; i++) {
+            EXPECT_NEAR(new_state_val, last_state_data[i], 1e-5);
+        }
+    }
+}
+
+TEST_P(MemoryStateTest, smoke_MemoryState_Reset) {
+    auto executableNet = PrepareNetwork();
+    const float new_state_val = 13.0f;
+    for (auto&& state : executableNet.QueryState()) {
+        state.Reset();
+        auto element_count = state.GetLastState()->size();
+
+        std::vector<float> new_state_data(element_count, new_state_val);
+        auto stateBlob = InferenceEngine::make_shared_blob<float>(
+            { InferenceEngine::Precision::FP32, {element_count}, InferenceEngine::C },
+            new_state_data.data(), new_state_data.size());
+
+        state.SetState(stateBlob);
+    }
+
+    executableNet.QueryState().front().Reset();
+
+    auto states = executableNet.QueryState();
+    for (int i = 0; i < states.size(); ++i) {
+        auto lastState = states[i].GetLastState();
+        auto last_state_size = lastState->size();
+        auto last_state_data = lastState->cbuffer().as<float*>();
+
+        ASSERT_TRUE(last_state_size != 0) << "State size should not be 0";
+
+        if (i == 0) {
+            for (int j = 0; j < last_state_size; ++j) {
+                EXPECT_NEAR(0, last_state_data[j], 1e-5);
+            }
+        } else {
+            for (int j = 0; j < last_state_size; ++j) {
+                EXPECT_NEAR(13.0f, last_state_data[j], 1e-5);
+            }
+        }
+    }
+}
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.cpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.cpp
@ -713,5 +713,63 @@ TestModel getModelWithMemory(InferenceEngine::Precision netPrc) {

    return TestModel(serial, CommonTestUtils::getWeightsBlob(0));
 }
+TestModel getModelWithMultipleMemoryConnections(InferenceEngine::Precision netPrc) {
+    CommonTestUtils::IRBuilder_v6 test_model_builder("model");
+
+    auto Memory_1_layer =
+        test_model_builder.AddLayer("Memory_1", "Memory", netPrc, { {"id",    "r_1-3"},
+                                                                    {"index", "1"},
+                                                                    {"size",  "2"} })
+        .AddOutPort({ 1, 200 })
+        .getLayer();
+    auto Input_1_layer = test_model_builder.AddLayer("Input_1", "input", netPrc).AddOutPort({ 1, 200 }).getLayer();
+    auto Eltwise_1_layer = test_model_builder.AddLayer("Eltwise_1", "Eltwise", netPrc, { {"operation", "mul"} })
+        .AddInPort({ 1, 200 })
+        .AddInPort({ 1, 200 })
+        .AddOutPort({ 1, 200 })
+        .getLayer();
+
+    auto Memory_2_layer =
+        test_model_builder.AddLayer("Memory_2", "Memory", netPrc, { {"id",    "c_1-3"},
+                                                                    {"index", "1"},
+                                                                    {"size",  "2"} })
+        .AddOutPort({ 1, 200 })
+        .getLayer();
+    auto Eltwise_2_layer = test_model_builder.AddLayer("Eltwise_2", "Eltwise", netPrc, { {"operation", "mul"} })
+        .AddInPort({ 1, 200 })
+        .AddInPort({ 1, 200 })
+        .AddOutPort({ 1, 200 })
+        .getLayer();
+    auto Memory_3_layer =
+        test_model_builder.AddLayer("Memory_3", "Memory", netPrc, { {"id",    "c_1-3"},
+                                                                   {"index", "0"},
+                                                                   {"size",  "2"} })
+        .AddInPort({ 1, 200 })
+        .getLayer();
+
+    auto Activation_1_layer =
+        test_model_builder.AddLayer("Activation_1", "Activation", netPrc, { {"type", "sigmoid"} })
+        .AddInPort({ 1, 200 })
+        .AddOutPort({ 1, 200 })
+        .getLayer();
+    auto Memory_4_layer =
+        test_model_builder.AddLayer("Memory_4", "Memory", netPrc, { {"id",    "r_1-3"},
+                                                                   {"index", "0"},
+                                                                   {"size",  "2"} })
+        .AddInPort({ 1, 200 })
+        .getLayer();
+
+    test_model_builder.AddEdge(Memory_1_layer.out(0), Eltwise_1_layer.in(0));
+    test_model_builder.AddEdge(Input_1_layer.out(0), Eltwise_1_layer.in(1));
+    test_model_builder.AddEdge(Eltwise_1_layer.out(0), Eltwise_2_layer.in(1));
+    test_model_builder.AddEdge(Memory_2_layer.out(0), Eltwise_2_layer.in(0));
+    test_model_builder.AddEdge(Eltwise_2_layer.out(0), Memory_3_layer.in(0));
+    test_model_builder.AddEdge(Eltwise_2_layer.out(0), Activation_1_layer.in(0));
+    test_model_builder.AddEdge(Activation_1_layer.out(0), Memory_4_layer.in(0));
+
+    auto serial = test_model_builder.serialize();
+
+    return TestModel(serial, CommonTestUtils::getWeightsBlob(0));
+}
 }  // namespace TestModel
-}  // namespace FuncTestUtils
+}  // namespace FuncTestUtils
--- a/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.hpp
+++ b/inference-engine/tests/ie_test_utils/functional_test_utils/test_model/test_model.hpp
@ -43,8 +43,9 @@ const TestModel convReluNormPoolFcModelFP16 = getConvReluNormPoolFcModel(Inferen
 const TestModel convReluNormPoolFcModelQ78 = getConvReluNormPoolFcModel(InferenceEngine::Precision::Q78);

 TestModel getModelWithMemory(InferenceEngine::Precision netPrc);
+TestModel getModelWithMultipleMemoryConnections(InferenceEngine::Precision netPrc);

 const char incorrect_input_name[] = "incorrect_input_name";

 }  // namespace TestModel
-}  // namespace FuncTestUtils
+}  // namespace FuncTestUtils
--- a/inference-engine/tests_deprecated/functional/ie_tests/src/custom_matcher.cpp
+++ b/inference-engine/tests_deprecated/functional/ie_tests/src/custom_matcher.cpp
@ -210,7 +210,9 @@ void Regression::Matchers::CustomMatcher::matchCustom() {
                if (fetchResult.reset) {
                    auto states = executableApi.QueryState();
                    ASSERT_FALSE(states.empty());
-                    states.front().Reset();
+                    for(auto& state : states) {
+                        state.Reset();
+                    }
                    // also store reset indicator for comparison routine
                    auto &outputs = ctx.newOutputs();
                    outputs["reset"] = nullptr;