[GNA] MemoryStateImpl (#839)
* [GNA] fix query state for GNAMemoryState * [GNA] MemoryState implementation: Fix shared tests Fix smoke tests fix SetState Implement LastState Fix Reset * Move getPrecision() to GNAMemoryState Change Smoke Reset() test to check resetting one state * [GNA] add dequantize to getLastState() * code refactor Co-authored-by: Anna Alberska <anna.alberska@intel.com> Co-authored-by: kmagiers <kmagiers@intel.com>
This commit is contained in:
parent
cc23e6043a
commit
d9706da8d0
@ -1099,11 +1099,14 @@ Blob::Ptr GNAPlugin::GetInputBlob(const std::string& name, InferenceEngine::Prec
|
||||
}
|
||||
|
||||
std::vector<InferenceEngine::MemoryStateInternal::Ptr> GNAPlugin::QueryState() {
|
||||
if (graphCompiler.memory_connection.empty()) {
|
||||
return {};
|
||||
if (memoryStates.size() != graphCompiler.memory_connection.size()) {
|
||||
memoryStates.clear();
|
||||
for (auto& connection : graphCompiler.memory_connection) {
|
||||
auto state = std::make_shared<memory::GNAMemoryState>(connection.first, std::make_shared <GNAMemoryLayer>(connection.second));
|
||||
memoryStates.emplace_back(state);
|
||||
}
|
||||
}
|
||||
|
||||
return {std::make_shared<memory::GNAMemoryState>(shared_from_this())};
|
||||
return memoryStates;
|
||||
}
|
||||
|
||||
std::string GNAPlugin::GetName() const noexcept {
|
||||
|
@ -13,7 +13,7 @@
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
#include <cpp_interfaces/interface/ie_iplugin_internal.hpp>
|
||||
#include <cpp_interfaces/interface/ie_imemory_state_internal.hpp>
|
||||
#include "cpp_interfaces/impl/ie_memory_state_internal.hpp"
|
||||
#include "descriptions/gna_flags.hpp"
|
||||
#include "descriptions/gna_input_desc.hpp"
|
||||
#include "descriptions/gna_output_desc.hpp"
|
||||
@ -83,6 +83,7 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
|
||||
|
||||
InferenceEngine::InputsDataMap inputsDataMap;
|
||||
InferenceEngine::OutputsDataMap outputsDataMap;
|
||||
std::vector<InferenceEngine::MemoryStateInternal::Ptr> memoryStates;
|
||||
|
||||
public:
|
||||
explicit GNAPlugin(const std::map<std::string, std::string>& configMap);
|
||||
|
@ -25,6 +25,12 @@ public:
|
||||
InferenceEngine::SizeVector getDims() const {
|
||||
return inputLayer->outData.front()->getDims();
|
||||
}
|
||||
/**
|
||||
* @brief Reset the gna memory
|
||||
*/
|
||||
void Reset() {
|
||||
std::memset(gna_ptr, 0, reserved_size);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief possible to store memory in different precision
|
||||
|
126
inference-engine/src/gna_plugin/memory/gna_memory_state.cpp
Normal file
126
inference-engine/src/gna_plugin/memory/gna_memory_state.cpp
Normal file
@ -0,0 +1,126 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "gna_memory_state.hpp"
|
||||
#include "frontend/quantized_layer_params.hpp"
|
||||
#include "layer_transform.hpp"
|
||||
#include "preprocessing.hpp"
|
||||
#include "ie_layouts.h"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
|
||||
namespace memory {
|
||||
|
||||
std::string GNAMemoryState::GetName() const {
|
||||
return name;
|
||||
}
|
||||
|
||||
void GNAMemoryState::Reset() {
|
||||
state->Reset();
|
||||
}
|
||||
|
||||
InferenceEngine::Precision GNAMemoryState::getPrecision() const {
|
||||
InferenceEngine::Precision state_precision;
|
||||
|
||||
if (state->getInput()) {
|
||||
state_precision = state->getInput()->precision;
|
||||
} else {
|
||||
auto element_size = state->elementSizeBytes();
|
||||
switch (element_size) {
|
||||
case 4:
|
||||
state_precision = InferenceEngine::Precision::FP32;
|
||||
break;
|
||||
case 2:
|
||||
state_precision = InferenceEngine::Precision::I16;
|
||||
break;
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << "Incorrect state element size " << element_size <<
|
||||
" to determine precision for MemoryState " << name;
|
||||
}
|
||||
}
|
||||
|
||||
return state_precision;
|
||||
}
|
||||
|
||||
void GNAMemoryState::SetState(InferenceEngine::Blob::Ptr newState) {
|
||||
IE_ASSERT(newState != nullptr);
|
||||
|
||||
auto data_ptr = newState->cbuffer().as<void*>();
|
||||
IE_ASSERT(data_ptr != nullptr);
|
||||
auto data_size = newState->byteSize();
|
||||
auto data_elements = data_size / newState->element_size();
|
||||
if (ALIGN64(state->reserved_size) != ALIGN64((data_size / (newState->element_size() / state->elementSizeBytes())))) {
|
||||
THROW_GNA_EXCEPTION << "Failed to SetState. Sizes of new and old states do not match. ("
|
||||
<< state->reserved_size << " != " << (newState->element_size() / state->elementSizeBytes()) << ")";
|
||||
}
|
||||
|
||||
InferenceEngine::Precision state_precision = getPrecision();
|
||||
auto new_state_precision = newState->getTensorDesc().getPrecision();
|
||||
|
||||
if (state->gna_ptr == data_ptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (new_state_precision == state_precision) {
|
||||
std::memcpy(state->gna_ptr, data_ptr, data_size);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (state_precision) {
|
||||
case InferenceEngine::Precision::I16: {
|
||||
if (new_state_precision == InferenceEngine::Precision::FP32) {
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(state->getInput());
|
||||
auto scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
|
||||
GNAPluginNS::ConvertToInt16(static_cast<int16_t*>(state->gna_ptr),
|
||||
newState->buffer().as<float*>(),
|
||||
1,
|
||||
data_elements,
|
||||
scale_factor);
|
||||
} else {
|
||||
THROW_GNA_EXCEPTION << "Failed to SetState for MemoryState " << name
|
||||
<< ". If old state precision is I16 only I16 and FP32 are allowed as new state precisions."
|
||||
<< " Old state: " << state_precision << " New state: " << new_state_precision;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << "Failed to SetState for MemoryState " << name
|
||||
<< ". Incorrect new/old precision pair"
|
||||
<< " Old state: " << state_precision << " New state: " << new_state_precision;
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::CPtr GNAMemoryState::GetLastState() const {
|
||||
auto elements = state->reserved_size / state->elementSizeBytes();
|
||||
InferenceEngine::Precision state_precision = getPrecision();
|
||||
|
||||
if (state->getInput() && state_precision == InferenceEngine::Precision::I16) {
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(state->getInput());
|
||||
auto scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
|
||||
|
||||
auto result_blob = make_blob_with_precision(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::SizeVector({ 1, elements }),
|
||||
InferenceEngine::NC));
|
||||
|
||||
result_blob->allocate();
|
||||
auto buffer = result_blob->buffer().as<float*>();
|
||||
auto new_gna_ptr = static_cast<int16_t*>(state->gna_ptr);
|
||||
|
||||
for (int i = 0; i < elements; i++) {
|
||||
buffer[i] = new_gna_ptr[i] / scale_factor;
|
||||
}
|
||||
|
||||
return result_blob;
|
||||
} else {
|
||||
auto result_blob = make_blob_with_precision(InferenceEngine::TensorDesc(state_precision,
|
||||
InferenceEngine::SizeVector({ 1, elements }),
|
||||
InferenceEngine::NC));
|
||||
result_blob->allocate();
|
||||
std::memcpy(state->gna_ptr, result_blob->buffer(), state->reserved_size);
|
||||
|
||||
return result_blob;
|
||||
}
|
||||
}
|
||||
} // namespace memory
|
||||
} // namespace GNAPluginNS
|
@ -11,16 +11,27 @@
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace memory {
|
||||
class GNAMemoryState : public InferenceEngine::MemoryStateInternal {
|
||||
std::shared_ptr<GNAPlugin> plg;
|
||||
class GNAMemoryState : public InferenceEngine::IMemoryStateInternal {
|
||||
public:
|
||||
using Ptr = InferenceEngine::MemoryStateInternal::Ptr;
|
||||
GNAMemoryState(std::string name, std::shared_ptr<GNAMemoryLayer> state)
|
||||
: name(name), state(state) { IE_ASSERT(state != nullptr); }
|
||||
|
||||
explicit GNAMemoryState(std::shared_ptr<GNAPlugin> plg)
|
||||
: InferenceEngine::MemoryStateInternal("GNAResetState"), plg(plg) {}
|
||||
void Reset() override {
|
||||
plg->Reset();
|
||||
}
|
||||
void Reset() override;
|
||||
void SetState(InferenceEngine::Blob::Ptr newState) override;
|
||||
InferenceEngine::Blob::CPtr GetLastState() const override;
|
||||
std::string GetName() const override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<GNAMemoryLayer> state;
|
||||
std::string name;
|
||||
|
||||
/**
|
||||
* @brief Returns InferenceEngine::Precision of input of state depending of element size
|
||||
* InferenceEngine::Precision::FP32 if element size equals 4
|
||||
* InferenceEngine::Precision::I16 if element size equals 2
|
||||
* Exception otherwise
|
||||
*/
|
||||
InferenceEngine::Precision getPrecision() const;
|
||||
};
|
||||
} // namespace memory
|
||||
} // namespace GNAPluginNS
|
||||
|
@ -0,0 +1,22 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <common_test_utils/test_constants.hpp>
|
||||
#include "behavior/memory_states.hpp"
|
||||
#include "functional_test_utils/test_model/test_model.hpp"
|
||||
#include "functional_test_utils/plugin_cache.hpp"
|
||||
|
||||
InferenceEngine::CNNNetwork getNetwork() {
|
||||
auto model = FuncTestUtils::TestModel::getModelWithMultipleMemoryConnections(InferenceEngine::Precision::FP32);
|
||||
auto ie = PluginCache::get().ie();
|
||||
return ie->ReadNetwork(model.model_xml_str, model.weights_blob);
|
||||
}
|
||||
std::vector<memoryStateParams> memoryStateTestCases = {
|
||||
memoryStateParams(getNetwork(), {"c_1-3", "r_1-3"}, CommonTestUtils::DEVICE_GNA)
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MemoryStateBasic, MemoryStateTest,
|
||||
::testing::ValuesIn(memoryStateTestCases),
|
||||
MemoryStateTest::getTestCaseName);
|
@ -0,0 +1,28 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
|
||||
#include "common_test_utils/test_common.hpp"
|
||||
#include <ie_core.hpp>
|
||||
|
||||
typedef std::tuple<
|
||||
InferenceEngine::CNNNetwork, // CNNNetwork to work with
|
||||
std::vector<std::string>, // Memory States to query
|
||||
std::string> // Target device name
|
||||
memoryStateParams;
|
||||
|
||||
class MemoryStateTest : public CommonTestUtils::TestsCommon,
|
||||
public testing::WithParamInterface<memoryStateParams> {
|
||||
protected:
|
||||
InferenceEngine::CNNNetwork net;
|
||||
std::vector<std::string> statesToQuery;
|
||||
std::string deviceName;
|
||||
|
||||
void SetUp();
|
||||
InferenceEngine::ExecutableNetwork PrepareNetwork();
|
||||
public:
|
||||
static std::string getTestCaseName(const testing::TestParamInfo<memoryStateParams> &obj);
|
||||
};
|
@ -0,0 +1,106 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
|
||||
#include <common_test_utils/common_utils.hpp>
|
||||
#include "behavior/memory_states.hpp"
|
||||
#include "functional_test_utils/plugin_cache.hpp"
|
||||
|
||||
std::string MemoryStateTest::getTestCaseName(const testing::TestParamInfo<memoryStateParams> &obj) {
|
||||
std::ostringstream result;
|
||||
InferenceEngine::CNNNetwork net;
|
||||
std::string targetDevice;
|
||||
std::vector<std::string> statesToQuery;
|
||||
std::tie(net, statesToQuery, targetDevice) = obj.param;
|
||||
result << "targetDevice=" << targetDevice;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void MemoryStateTest::SetUp() {
|
||||
std::tie(net, statesToQuery, deviceName) = GetParam();
|
||||
}
|
||||
|
||||
InferenceEngine::ExecutableNetwork MemoryStateTest::PrepareNetwork() {
|
||||
net.addOutput("Memory_1");
|
||||
net.addOutput("Memory_2");
|
||||
auto ie = PluginCache::get().ie(deviceName);
|
||||
return ie->LoadNetwork(net, deviceName);
|
||||
}
|
||||
|
||||
TEST_P(MemoryStateTest, smoke_MemoryState_QueryState) {
|
||||
auto executableNet = PrepareNetwork();
|
||||
|
||||
auto states = executableNet.QueryState();
|
||||
ASSERT_TRUE(states.size() == 2) << "Incorrect number of MemoryStates";
|
||||
|
||||
for (auto&& state : states) {
|
||||
auto name = state.GetName();
|
||||
ASSERT_TRUE(std::find(statesToQuery.begin(), statesToQuery.end(), name) != statesToQuery.end())
|
||||
<< "State " << name << "expected to be in memory states but it is not!";
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(MemoryStateTest, smoke_MemoryState_SetState) {
|
||||
auto executableNet = PrepareNetwork();
|
||||
const float new_state_val = 13.0f;
|
||||
for (auto&& state : executableNet.QueryState()) {
|
||||
state.Reset();
|
||||
auto element_count = state.GetLastState()->size();
|
||||
|
||||
std::vector<float> new_state_data(element_count, new_state_val);
|
||||
auto stateBlob = InferenceEngine::make_shared_blob<float>(
|
||||
{ InferenceEngine::Precision::FP32, {element_count}, InferenceEngine::C },
|
||||
new_state_data.data(), new_state_data.size());
|
||||
|
||||
state.SetState(stateBlob);
|
||||
}
|
||||
|
||||
for (auto&& state : executableNet.QueryState()) {
|
||||
auto lastState = state.GetLastState();
|
||||
auto last_state_size = lastState->size();
|
||||
auto last_state_data = lastState->cbuffer().as<float*>();
|
||||
ASSERT_TRUE(last_state_size != 0) << "State size should not be 0";
|
||||
|
||||
for (int i = 0; i < last_state_size; i++) {
|
||||
EXPECT_NEAR(new_state_val, last_state_data[i], 1e-5);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(MemoryStateTest, smoke_MemoryState_Reset) {
|
||||
auto executableNet = PrepareNetwork();
|
||||
const float new_state_val = 13.0f;
|
||||
for (auto&& state : executableNet.QueryState()) {
|
||||
state.Reset();
|
||||
auto element_count = state.GetLastState()->size();
|
||||
|
||||
std::vector<float> new_state_data(element_count, new_state_val);
|
||||
auto stateBlob = InferenceEngine::make_shared_blob<float>(
|
||||
{ InferenceEngine::Precision::FP32, {element_count}, InferenceEngine::C },
|
||||
new_state_data.data(), new_state_data.size());
|
||||
|
||||
state.SetState(stateBlob);
|
||||
}
|
||||
|
||||
executableNet.QueryState().front().Reset();
|
||||
|
||||
auto states = executableNet.QueryState();
|
||||
for (int i = 0; i < states.size(); ++i) {
|
||||
auto lastState = states[i].GetLastState();
|
||||
auto last_state_size = lastState->size();
|
||||
auto last_state_data = lastState->cbuffer().as<float*>();
|
||||
|
||||
ASSERT_TRUE(last_state_size != 0) << "State size should not be 0";
|
||||
|
||||
if (i == 0) {
|
||||
for (int j = 0; j < last_state_size; ++j) {
|
||||
EXPECT_NEAR(0, last_state_data[j], 1e-5);
|
||||
}
|
||||
} else {
|
||||
for (int j = 0; j < last_state_size; ++j) {
|
||||
EXPECT_NEAR(13.0f, last_state_data[j], 1e-5);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -713,5 +713,63 @@ TestModel getModelWithMemory(InferenceEngine::Precision netPrc) {
|
||||
|
||||
return TestModel(serial, CommonTestUtils::getWeightsBlob(0));
|
||||
}
|
||||
TestModel getModelWithMultipleMemoryConnections(InferenceEngine::Precision netPrc) {
|
||||
CommonTestUtils::IRBuilder_v6 test_model_builder("model");
|
||||
|
||||
auto Memory_1_layer =
|
||||
test_model_builder.AddLayer("Memory_1", "Memory", netPrc, { {"id", "r_1-3"},
|
||||
{"index", "1"},
|
||||
{"size", "2"} })
|
||||
.AddOutPort({ 1, 200 })
|
||||
.getLayer();
|
||||
auto Input_1_layer = test_model_builder.AddLayer("Input_1", "input", netPrc).AddOutPort({ 1, 200 }).getLayer();
|
||||
auto Eltwise_1_layer = test_model_builder.AddLayer("Eltwise_1", "Eltwise", netPrc, { {"operation", "mul"} })
|
||||
.AddInPort({ 1, 200 })
|
||||
.AddInPort({ 1, 200 })
|
||||
.AddOutPort({ 1, 200 })
|
||||
.getLayer();
|
||||
|
||||
auto Memory_2_layer =
|
||||
test_model_builder.AddLayer("Memory_2", "Memory", netPrc, { {"id", "c_1-3"},
|
||||
{"index", "1"},
|
||||
{"size", "2"} })
|
||||
.AddOutPort({ 1, 200 })
|
||||
.getLayer();
|
||||
auto Eltwise_2_layer = test_model_builder.AddLayer("Eltwise_2", "Eltwise", netPrc, { {"operation", "mul"} })
|
||||
.AddInPort({ 1, 200 })
|
||||
.AddInPort({ 1, 200 })
|
||||
.AddOutPort({ 1, 200 })
|
||||
.getLayer();
|
||||
auto Memory_3_layer =
|
||||
test_model_builder.AddLayer("Memory_3", "Memory", netPrc, { {"id", "c_1-3"},
|
||||
{"index", "0"},
|
||||
{"size", "2"} })
|
||||
.AddInPort({ 1, 200 })
|
||||
.getLayer();
|
||||
|
||||
auto Activation_1_layer =
|
||||
test_model_builder.AddLayer("Activation_1", "Activation", netPrc, { {"type", "sigmoid"} })
|
||||
.AddInPort({ 1, 200 })
|
||||
.AddOutPort({ 1, 200 })
|
||||
.getLayer();
|
||||
auto Memory_4_layer =
|
||||
test_model_builder.AddLayer("Memory_4", "Memory", netPrc, { {"id", "r_1-3"},
|
||||
{"index", "0"},
|
||||
{"size", "2"} })
|
||||
.AddInPort({ 1, 200 })
|
||||
.getLayer();
|
||||
|
||||
test_model_builder.AddEdge(Memory_1_layer.out(0), Eltwise_1_layer.in(0));
|
||||
test_model_builder.AddEdge(Input_1_layer.out(0), Eltwise_1_layer.in(1));
|
||||
test_model_builder.AddEdge(Eltwise_1_layer.out(0), Eltwise_2_layer.in(1));
|
||||
test_model_builder.AddEdge(Memory_2_layer.out(0), Eltwise_2_layer.in(0));
|
||||
test_model_builder.AddEdge(Eltwise_2_layer.out(0), Memory_3_layer.in(0));
|
||||
test_model_builder.AddEdge(Eltwise_2_layer.out(0), Activation_1_layer.in(0));
|
||||
test_model_builder.AddEdge(Activation_1_layer.out(0), Memory_4_layer.in(0));
|
||||
|
||||
auto serial = test_model_builder.serialize();
|
||||
|
||||
return TestModel(serial, CommonTestUtils::getWeightsBlob(0));
|
||||
}
|
||||
} // namespace TestModel
|
||||
} // namespace FuncTestUtils
|
||||
} // namespace FuncTestUtils
|
||||
|
@ -43,8 +43,9 @@ const TestModel convReluNormPoolFcModelFP16 = getConvReluNormPoolFcModel(Inferen
|
||||
const TestModel convReluNormPoolFcModelQ78 = getConvReluNormPoolFcModel(InferenceEngine::Precision::Q78);
|
||||
|
||||
TestModel getModelWithMemory(InferenceEngine::Precision netPrc);
|
||||
TestModel getModelWithMultipleMemoryConnections(InferenceEngine::Precision netPrc);
|
||||
|
||||
const char incorrect_input_name[] = "incorrect_input_name";
|
||||
|
||||
} // namespace TestModel
|
||||
} // namespace FuncTestUtils
|
||||
} // namespace FuncTestUtils
|
||||
|
@ -210,7 +210,9 @@ void Regression::Matchers::CustomMatcher::matchCustom() {
|
||||
if (fetchResult.reset) {
|
||||
auto states = executableApi.QueryState();
|
||||
ASSERT_FALSE(states.empty());
|
||||
states.front().Reset();
|
||||
for(auto& state : states) {
|
||||
state.Reset();
|
||||
}
|
||||
// also store reset indicator for comparison routine
|
||||
auto &outputs = ctx.newOutputs();
|
||||
outputs["reset"] = nullptr;
|
||||
|
Loading…
Reference in New Issue
Block a user