[GPU] Assign-6 and ReadValue-6 (#11780)

* Add methods for access to varables information in Program class

* add ReadValue and Assign primitives

* ReadValue and Assign implementations

* Implementation of memory states allocation

* Add output existance check in primitive_inst to avoid crashes if output is set during execution

* Add memory states management functionality in network component

* Integration of memory states feature in inference request component

* Exclude constant path for read_value and assign nodes in cldnn transformations

* Improve memory states test to run on a single inference request

* unit tests for ReadValue and Assign

* single-layer test for ReadValue and Assign

* Add QueryState API implementation

* Add memory state test which covers dynamic batch case

Co-authored-by: Oleksii Khovan <okhovan@lohika.com>
This commit is contained in:
Yaroslav Torzuk
2022-07-12 04:45:53 +02:00
committed by GitHub
parent 60e31ad8c3
commit a250634b75
31 changed files with 1103 additions and 6 deletions

View File

@@ -54,7 +54,7 @@ protected:
/**
* @brief A default dtor
*/
~IVariableStateInternal() = default;
virtual ~IVariableStateInternal() = default;
std::string name;
Blob::Ptr state;

View File

@@ -52,6 +52,18 @@ class primitive_inst;
struct network {
public:
using ptr = std::shared_ptr<network>;
struct VariableState {
using Ptr = std::shared_ptr<VariableState>;
cldnn::memory_ptr memory;
bool is_set;
VariableState(cldnn::memory_ptr mem = nullptr) :
memory { mem }, is_set { false } {
}
};
using variables_states_map = std::map<std::string, VariableState::Ptr>;
explicit network(program::ptr program, stream::ptr stream, bool is_internal = false, bool is_primary_stream = true);
network(engine& engine,
const topology& topo,
@@ -194,6 +206,12 @@ public:
return *_memory_pool;
}
/// Assigns memory state locations
void assign_variables_memories(variables_states_map &&variables_memories);
/// Returns memory state @p variable_id of stateful network
VariableState& get_variable_memory(const std::string &variable_id);
private:
using output_chains_map = std::map<primitive_id, std::vector<std::shared_ptr<primitive_inst>>>;
uint32_t net_id = 0;
@@ -209,6 +227,8 @@ private:
std::vector<std::shared_ptr<primitive_inst>> _outputs;
std::list<std::shared_ptr<primitive_inst>> _exec_order;
std::list<std::shared_ptr<primitive_inst>> _data_outputs;
variables_states_map _variables_states;
std::vector<std::shared_ptr<primitive_inst>> _variable_state_primitives;
std::unordered_map<primitive_id, event::ptr> _events;
output_chains_map _output_chains;

View File

@@ -87,6 +87,27 @@ inline cldnn::data_types DataTypeFromPrecision(ngraph::element::Type t) {
}
}
inline InferenceEngine::Precision PrecisionFromDataType(cldnn::data_types dt) {
switch (dt) {
case cldnn::data_types::bin:
return InferenceEngine::Precision::ePrecision::BIN;
case cldnn::data_types::u8:
return InferenceEngine::Precision::ePrecision::U8;
case cldnn::data_types::i8:
return InferenceEngine::Precision::ePrecision::I8;
case cldnn::data_types::f16:
return InferenceEngine::Precision::ePrecision::FP16;
case cldnn::data_types::f32:
return InferenceEngine::Precision::ePrecision::FP32;
case cldnn::data_types::i32:
return InferenceEngine::Precision::ePrecision::I32;
case cldnn::data_types::i64:
return InferenceEngine::Precision::ePrecision::I64;
default:
IE_THROW(ParameterMismatch) << "The plugin does not support " << cldnn::data_type_traits::name(dt) << " data type";
}
}
inline cldnn::format FormatFromLayout(InferenceEngine::Layout l) {
switch (l) {
// TODO: change 6d case once new layout added in IE

View File

@@ -14,6 +14,7 @@
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "ie_blob.h"
#include "cpp/ie_cnn_network.h"
@@ -38,6 +39,7 @@ public:
POSTPROC = 4
};
typedef std::shared_ptr<Graph> Ptr;
using variable_states_map = std::map<std::string, std::vector<cldnn::network::VariableState::Ptr>>;
Graph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
explicit Graph(std::shared_ptr<Graph> graph, uint16_t stream_id = 0);
@@ -55,6 +57,7 @@ public:
const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); }
const InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_program->GetNetworkInputs(); }
const InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_program->GetNetworkOutputs(); }
variable_states_map AllocateVariablesMemories();
std::map<std::string, std::pair<int64_t, int64_t>> GetInputDynBatchDims() { return m_program->m_input_batch_dim; }
std::map<std::string, int64_t> GetOutputDynBatchDims() { return m_program->m_output_batch_dim; }
size_t GetNetworksCount() const { return m_networks.size(); }

View File

@@ -48,6 +48,7 @@ public:
void SetBlobs(const std::string& name, const std::vector<InferenceEngine::Blob::Ptr> &data) override;
void SetBatch(int batch = -1) override;
std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> QueryState() override;
void SetGraph(std::shared_ptr<Graph> graph);
void EnableProfiling() { m_useProfiling = true; }
void EnableStreams() { m_useStreams = true; }
@@ -106,6 +107,7 @@ private:
std::map<cldnn::primitive_id, cldnn::network_output> internal_outputs;
std::vector<std::map<cldnn::primitive_id, cldnn::network_output>> internal_outputs_dynamic;
Graph::variable_states_map variables_states_;
};
} // namespace intel_gpu

View File

@@ -209,6 +209,8 @@ REGISTER_FACTORY(v6, ExperimentalDetectronROIFeatureExtractor);
REGISTER_FACTORY(v6, ExperimentalDetectronTopKROIs)
REGISTER_FACTORY(v6, ExperimentalDetectronGenerateProposalsSingleImage);
REGISTER_FACTORY(v6, ExperimentalDetectronDetectionOutput);
REGISTER_FACTORY(v6, Assign);
REGISTER_FACTORY(v6, ReadValue);
// ------------------------------ Supported v7 ops ------------------------------ //
REGISTER_FACTORY(v7, DFT);

View File

@@ -10,6 +10,7 @@
#include <string>
#include <cstdint>
#include <mutex>
#include <set>
#include <cpp/ie_cnn_network.h>
#include <ngraph/ngraph.hpp>
@@ -150,6 +151,12 @@ public:
std::shared_ptr<cldnn::topology> GetTopology() const { return m_topology; }
using variables_state_info_map = std::map<std::string, std::set<cldnn::layout>>;
void AddVariableStateInfo(const std::string& variable_id, const cldnn::layout& layout);
const variables_state_info_map& GetVariablesStatesInfo() const { return m_variablesStateInfo; }
private:
static factories_map_t factories_map;
std::vector<std::shared_ptr<cldnn::program>> m_programs;
@@ -159,6 +166,7 @@ private:
std::shared_ptr<cldnn::topology> m_topology;
InferenceEngine::InputsDataMap m_networkInputs;
InferenceEngine::OutputsDataMap m_networkOutputs;
variables_state_info_map m_variablesStateInfo;
bool queryMode;

View File

@@ -0,0 +1,50 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <cpp_interfaces/interface/ie_ivariable_state_internal.hpp>
#include "intel_gpu/plugin/graph.hpp"
#include <functional>
namespace ov {
namespace runtime {
namespace intel_gpu {
class VariableState : public InferenceEngine::IVariableStateInternal {
public:
VariableState(const std::string& name, const std::vector<cldnn::network::VariableState::Ptr>& states,
std::shared_ptr<cldnn::engine> engine, int currentBatch);
/**
* @brief Reset internal variable state for relevant infer request, to a value specified as
* default for according `ReadValue` node
*/
void Reset() override;
/**
* @brief Sets the new state for the next inference
* @param newState A new state
*/
void SetState(const InferenceEngine::Blob::Ptr &newState) override;
/**
* @brief Returns the value of the variable state.
* @return The value of the variable state
*/
InferenceEngine::Blob::CPtr GetState() const override;
protected:
InferenceEngine::SizeVector AggregateShape(const cldnn::layout &layout);
void IterateOverStates(std::function<void(cldnn::network::VariableState&)> f) const;
private:
int currentBatch_;
std::vector<cldnn::network::VariableState::Ptr> states_;
InferenceEngine::TensorDesc desc_;
std::shared_ptr<cldnn::engine> engine_;
};
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@@ -0,0 +1,43 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
#include "primitive.hpp"
#include "intel_gpu/runtime/memory.hpp"
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Sets an input value to the variable_id variable.
struct assign : public primitive_base<assign> {
CLDNN_DECLARE_PRIMITIVE(assign)
/// @brief Constructs Assign primitive.
/// @param id This primitive id
/// @param inputs Input parameters ids
/// @param variable_id Variable id
/// @param output_layout Memory layout
assign(const primitive_id &id,
const std::vector<primitive_id>& inputs,
const std::string& variable_id,
const layout& output_layout)
: primitive_base(id, inputs, "", {}, optional_data_type{output_layout.data_type}),
variable_id{variable_id},
output_layout{output_layout} {}
std::string variable_id;
layout output_layout;
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@@ -0,0 +1,43 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <vector>
#include "primitive.hpp"
#include "intel_gpu/runtime/memory.hpp"
namespace cldnn {
/// @addtogroup cpp_api C++ API
/// @{
/// @addtogroup cpp_topology Network Topology
/// @{
/// @addtogroup cpp_primitives Primitives
/// @{
/// @brief Returns value of the variable_id variable.
struct read_value : public primitive_base<read_value> {
CLDNN_DECLARE_PRIMITIVE(read_value)
/// @brief Constructs ReadValue primitive.
/// @param id This primitive id
/// @param inputs Input parameters ids
/// @param variable_id Variable id
/// @param output_layout Memory layout
read_value(const primitive_id& id,
const std::vector<primitive_id>& inputs,
const std::string& variable_id,
const layout& output_layout)
: primitive_base(id, inputs, "", {}, optional_data_type{output_layout.data_type}),
variable_id{variable_id},
output_layout{output_layout} {}
std::string variable_id;
layout output_layout;
};
/// @}
/// @}
/// @}
} // namespace cldnn

View File

@@ -0,0 +1,38 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <assign_inst.h>
#include "primitive_type_base.h"
#include <sstream>
#include <json_object.h>
#include <data_inst.h>
namespace cldnn {
primitive_type_id assign::type_id() {
static primitive_type_base<assign> instance;
return &instance;
}
assign_inst::typed_primitive_inst(network& network, const assign_node& node) :
parent{network, node, false},
memory_state::variable{node.get_primitive()->variable_id} {
}
layout assign_inst::calc_output_layout(const assign_node& node) {
return node.get_primitive()->output_layout;
}
std::string assign_inst::to_string(const assign_node& node) {
auto node_info = node.desc_to_json();
json_composite assign_info;
assign_info.add("input id", node.input().id());
assign_info.add("variable id", node.get_primitive()->variable_id);
node_info->add("assign info", assign_info);
std::stringstream primitive_description;
node_info->dump(primitive_description);
return primitive_description.str();
}
} // namespace cldnn

View File

@@ -0,0 +1,53 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "assign_inst.h"
#include "impls/implementation_map.hpp"
#include "register.hpp"
namespace cldnn {
namespace cpu {
struct assign_impl : public typed_primitive_impl<assign> {
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<assign_impl>(*this);
}
event::ptr execute_impl(const std::vector<event::ptr>& events, assign_inst& instance) override {
const auto arg = instance.argument;
const auto variable_id = arg.variable_id;
auto& variable = instance.get_network().get_variable_memory(variable_id);
if (variable.memory->get_layout() != arg.output_layout) {
CLDNN_ERROR_MESSAGE(instance.id(), "Layout mismatch");
}
auto& stream = instance.get_network().get_stream();
for (auto e : events) {
e->wait();
}
const auto ev_set_memory = variable.memory->copy_from(stream, instance.input_memory());
variable.is_set = true;
return ev_set_memory;
}
void init_kernels() override {}
public:
static primitive_impl* create(assign_node const& arg) { return new assign_impl{}; }
};
namespace detail {
attach_assign_impl::attach_assign_impl() {
implementation_map<assign>::add(impl_types::cpu, assign_impl::create, {});
}
} // namespace detail
} // namespace cpu
} // namespace cldnn

View File

@@ -0,0 +1,53 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "read_value_inst.h"
#include "impls/implementation_map.hpp"
#include "register.hpp"
namespace cldnn {
namespace cpu {
struct read_value_impl : public typed_primitive_impl<read_value> {
std::unique_ptr<primitive_impl> clone() const override {
return make_unique<read_value_impl>(*this);
}
event::ptr execute_impl(const std::vector<event::ptr>& events, read_value_inst& instance) override {
for (auto e : events) {
e->wait();
}
const auto arg = instance.argument;
const auto variable_id = arg.variable_id;
auto& variable = instance.get_network().get_variable_memory(variable_id);
if (variable.memory->get_layout() != arg.output_layout) {
CLDNN_ERROR_MESSAGE(instance.id(), "Layout mismatch");
}
if (!variable.is_set) {
auto &stream = instance.get_network().get_stream();
const auto ev_set_output = instance.output_memory().fill(stream, 0);
return ev_set_output;
}
return instance.get_network().get_stream().create_user_event(true);
}
void init_kernels() override {}
public:
static primitive_impl* create(read_value_node const& arg) { return new read_value_impl{}; }
};
namespace detail {
attach_read_value_impl::attach_read_value_impl() {
implementation_map<read_value>::add(impl_types::cpu, read_value_impl::create, {});
}
} // namespace detail
} // namespace cpu
} // namespace cldnn

View File

@@ -11,8 +11,10 @@ namespace cpu {
static detail::attach_##prim##_impl attach_##prim
void register_implementations() {
REGISTER_CPU(assign);
REGISTER_CPU(detection_output);
REGISTER_CPU(proposal);
REGISTER_CPU(read_value);
REGISTER_CPU(non_max_suppression);
}

View File

@@ -4,8 +4,10 @@
#pragma once
#include "intel_gpu/primitives/assign.hpp"
#include "intel_gpu/primitives/detection_output.hpp"
#include "intel_gpu/primitives/proposal.hpp"
#include "intel_gpu/primitives/read_value.hpp"
#include "intel_gpu/primitives/non_max_suppression.hpp"
namespace cldnn {
@@ -20,7 +22,9 @@ namespace detail {
attach_##prim##_impl(); \
}
REGISTER_CPU(assign);
REGISTER_CPU(proposal);
REGISTER_CPU(read_value);
REGISTER_CPU(non_max_suppression);
REGISTER_CPU(detection_output);

View File

@@ -0,0 +1,52 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "intel_gpu/primitives/assign.hpp"
#include "primitive_inst.h"
#include "intel_gpu/runtime/error_handler.hpp"
namespace cldnn {
namespace memory_state {
class variable {
public:
explicit variable(const std::string& variable_id) : variable_id_ {variable_id} {}
const std::string& variable_id() const { return variable_id_; }
private:
std::string variable_id_;
};
} // namespace memory_state
template<>
struct typed_program_node<assign> : public typed_program_node_base<assign> {
using parent = typed_program_node_base<assign>;
public:
using parent::parent;
const program_node& input(std::size_t index = 0) const { return get_dependency(index); }
};
using assign_node = typed_program_node<assign>;
template<>
class typed_primitive_inst<assign> : public typed_primitive_inst_base<assign>, public memory_state::variable {
using parent = typed_primitive_inst_base<assign>;
public:
static layout calc_output_layout(const assign_node& node);
static std::string to_string(const assign_node& node);
public:
typed_primitive_inst(network& network, const assign_node& desc);
};
using assign_inst = typed_primitive_inst<assign>;
} // namespace cldnn

View File

@@ -0,0 +1,39 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "assign_inst.h"
#include "intel_gpu/primitives/read_value.hpp"
#include "primitive_inst.h"
#include "intel_gpu/runtime/error_handler.hpp"
namespace cldnn {
template<>
struct typed_program_node<read_value> : public typed_program_node_base<read_value> {
using parent = typed_program_node_base<read_value>;
public:
using parent::parent;
const program_node& input(std::size_t index = 0) const { return get_dependency(index); }
};
using read_value_node = typed_program_node<read_value>;
template<>
class typed_primitive_inst<read_value> : public typed_primitive_inst_base<read_value>, public memory_state::variable {
using parent = typed_primitive_inst_base<read_value>;
public:
static layout calc_output_layout(const read_value_node& node);
static std::string to_string(const read_value_node& node);
typed_primitive_inst(network& network, const read_value_node& desc);
};
using read_value_inst = typed_primitive_inst<read_value>;
} // namespace cldnn

View File

@@ -17,6 +17,8 @@
#include "intel_gpu/graph/program.hpp"
#include "intel_gpu/graph/network.hpp"
#include "assign_inst.h"
#include "read_value_inst.h"
#include "to_string_utils.h"
#include "primitive_inst.h"
@@ -876,6 +878,8 @@ void network::allocate_primitive_instance(program_node const& node) {
if (node.is_type<data>())
_data_outputs.push_back(inst);
}
if (std::dynamic_pointer_cast<assign_inst>(inst) || std::dynamic_pointer_cast<read_value_inst>(inst))
_variable_state_primitives.push_back(inst);
if (node.is_constant())
transfer_memory_to_device(inst, node);
}
@@ -915,4 +919,26 @@ memory::ptr network::get_memory_from_pool(const layout& layout,
return _memory_pool->get_memory(layout, id, get_id(), dependencies, type, reusable);
return _memory_pool->get_memory(layout, type);
}
network::VariableState& network::get_variable_memory(const std::string &variable_id) {
auto it = _variables_states.find(variable_id);
if (it == _variables_states.end()) {
CLDNN_ERROR_MESSAGE(variable_id, "Variable not found");
}
return *it->second;
}
void network::assign_variables_memories(variables_states_map &&variables_memories) {
_variables_states = variables_memories;
for (auto primitive : _variable_state_primitives) {
if (const auto& memory_state_primitive = std::dynamic_pointer_cast<memory_state::variable>(primitive)) {
auto it = _variables_states.find(memory_state_primitive->variable_id());
if (it != _variables_states.end())
primitive->set_output_memory(it->second->memory, false);
else
CLDNN_ERROR_MESSAGE(memory_state_primitive->variable_id(), "Memory state not found");
}
}
}
} // namespace cldnn

View File

@@ -120,7 +120,7 @@ void primitive_inst::check_memory_to_set(const memory& mem, const layout& layout
void primitive_inst::set_output_memory(memory::ptr mem_new, bool check) {
auto& eng = _network.get_engine();
// skip all the buzz if no action actually required
if (eng.is_the_same_buffer(*mem_new, *_output)) {
if (_output && eng.is_the_same_buffer(*mem_new, *_output)) {
return;
}

View File

@@ -587,7 +587,8 @@ void program::post_optimize_graph(bool is_internal) {
// mark if the node is constant assuming that all dependencies are marked properly
void program::mark_if_constant(program_node& node) {
if (node.get_dependencies().empty() || node.is_type<prior_box>()) {
if (node.get_dependencies().empty() || node.is_type<prior_box>() ||
node.is_type<assign>() || node.is_type<read_value>()) {
return;
}
node.constant = true;

View File

@@ -0,0 +1,40 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <read_value_inst.h>
#include "primitive_type_base.h"
#include <sstream>
#include <json_object.h>
#include <data_inst.h>
namespace cldnn {
primitive_type_id read_value::type_id() {
static primitive_type_base<read_value> instance;
return &instance;
}
read_value_inst::typed_primitive_inst(network& network, const read_value_node& node) :
parent(network, node, false),
memory_state::variable{node.get_primitive()->variable_id} {
}
layout read_value_inst::calc_output_layout(const read_value_node& node) {
return node.get_primitive()->output_layout;
}
std::string read_value_inst::to_string(const read_value_node& node) {
auto node_info = node.desc_to_json();
json_composite read_value_info;
read_value_info.add("input id", node.input().id());
read_value_info.add("variable id", node.get_primitive()->variable_id);
node_info->add("read_value info", read_value_info);
std::stringstream primitive_description;
node_info->dump(primitive_description);
return primitive_description.str();
}
} // namespace cldnn

View File

@@ -133,6 +133,23 @@ std::shared_ptr<cldnn::network> Graph::BuildNetwork(std::shared_ptr<cldnn::progr
return network;
}
Graph::variable_states_map Graph::AllocateVariablesMemories() {
Graph::variable_states_map states {};
const auto& memStatesInfo = m_program->GetVariablesStatesInfo();
for (const auto& memStateInfo : memStatesInfo) {
std::vector<cldnn::layout> orderedLayouts {memStateInfo.second.begin(), memStateInfo.second.end()};
std::sort(orderedLayouts.begin(), orderedLayouts.end(), [](cldnn::layout& first, cldnn::layout& second) {
return first.size.batch[0] < second.size.batch[0];
});
std::vector<cldnn::network::VariableState::Ptr> memoryStates;
memoryStates.reserve(orderedLayouts.size());
for (const auto& layout : orderedLayouts)
memoryStates.push_back(std::make_shared<cldnn::network::VariableState>(GetEngine()->allocate_memory(layout, false)));
states.insert({memStateInfo.first, memoryStates });
}
return states;
}
std::shared_ptr<ngraph::Function> Graph::GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& primitives_info,
bool filter_const_primitives) {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::GetExecGraphInfoByPrimitivesInfo");

View File

@@ -12,6 +12,7 @@
#include "intel_gpu/plugin/remote_context.hpp"
#include "intel_gpu/plugin/compiled_model.hpp"
#include "intel_gpu/plugin/itt.hpp"
#include "intel_gpu/plugin/variable_state.hpp"
#include "intel_gpu/runtime/debug_configuration.hpp"
#include "openvino/core/preprocess/input_tensor_info.hpp"
#include <ie_algorithm.hpp>
@@ -532,6 +533,7 @@ void InferRequest::SetGraph(std::shared_ptr<Graph> graph) {
} else {
allocate_inputs();
allocate_outputs();
variables_states_ = m_graph->AllocateVariablesMemories();
}
}
@@ -605,6 +607,7 @@ void InferRequest::SetBatch(int new_batch) {
batchOutputs[no.first] = out_buf;
}
variables_states_ = m_graph->AllocateVariablesMemories();
m_curBatch = new_batch;
}
@@ -744,6 +747,14 @@ void InferRequest::enqueue() {
}
}
cldnn::network::variables_states_map variables_states;
for (auto &variable_state_pair : variables_states_)
variables_states.insert({ variable_state_pair.first, variable_state_pair.second[0] });
auto networkPtr = m_graph->GetNetwork();
networkPtr->assign_variables_memories(std::move(variables_states));
for (auto& item : _outputs) {
std::string outputName = item.first;
Blob::Ptr& outputBlob = item.second;
@@ -751,7 +762,7 @@ void InferRequest::enqueue() {
}
internal_outputs.clear();
internal_outputs = m_graph->GetNetwork()->execute(dependencies);
internal_outputs = networkPtr->execute(dependencies);
// If dump layers path is set, only runs first inference.
GPU_DEBUG_GET_INSTANCE(debug_config);
@@ -826,7 +837,16 @@ void InferRequest::enqueue_dynamic() {
inputLayout.size.batch[0] = mask;
copy_input_data(m_graph->GetNetwork(nb), inputName, inputLayout, *inputBlob, &batchInputs[inputName][nb]);
}
internal_outputs_dynamic[nb] = m_graph->GetNetwork(nb)->execute();
cldnn::network::variables_states_map variables_states;
for (auto &variable_state_pair : variables_states_)
variables_states.insert({ variable_state_pair.first, variable_state_pair.second[nb] });
auto networkPtr = m_graph->GetNetwork(nb);
networkPtr->assign_variables_memories(std::move(variables_states));
internal_outputs_dynamic[nb] = networkPtr->execute();
}
}
}
@@ -1248,6 +1268,14 @@ InferenceEngine::Blob::Ptr InferRequest::create_device_blob(const InferenceEngin
}
}
std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> InferRequest::QueryState() {
std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> ret{};
ret.reserve(variables_states_.size());
for (const auto& pair : variables_states_)
ret.push_back(std::make_shared<VariableState>(pair.first, pair.second, m_graph->GetEngine(), m_curBatch));
return ret;
}
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@@ -0,0 +1,58 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "intel_gpu/plugin/program.hpp"
#include "intel_gpu/plugin/common_utils.hpp"
#include "ngraph/op/assign.hpp"
#include "ngraph/op/read_value.hpp"
#include "intel_gpu/primitives/assign.hpp"
#include "intel_gpu/primitives/read_value.hpp"
namespace ov {
namespace runtime {
namespace intel_gpu {
namespace {
template<typename T_PRIMITIVE>
void CreateVariableAccessPrimitive(Program &p, const std::shared_ptr<ngraph::op::Op> &op,
const std::string &variable_id) {
p.ValidateInputs(op, {1});
const auto output_data_type = DataTypeFromPrecision(op->get_output_element_type(0));
const auto op_output_shape = op->get_output_shape(0);
const auto output_format = DefaultFormatForDims(op_output_shape.size());
const auto output_shape = tensor_from_dims(op_output_shape);
const auto variable_layout = cldnn::layout{output_data_type,
output_format,
output_shape};
auto input_primitives = p.GetInputPrimitiveIDs(op);
p.AddVariableStateInfo(variable_id, variable_layout);
const auto prim = T_PRIMITIVE{layer_type_name_ID(op),
input_primitives,
variable_id,
variable_layout};
p.AddPrimitive(prim);
p.AddPrimitiveToProfiler(op);
}
void CreateReadValueOp(Program& p, const std::shared_ptr<ngraph::op::v6::ReadValue>& op) {
CreateVariableAccessPrimitive<cldnn::read_value>(p, op, op->get_variable_id());
}
void CreateAssignOp(Program& p, const std::shared_ptr<ngraph::op::v6::Assign>& op) {
CreateVariableAccessPrimitive<cldnn::assign>(p, op, op->get_variable_id());
}
} // namespace
REGISTER_FACTORY_IMPL(v6, Assign);
REGISTER_FACTORY_IMPL(v6, ReadValue);
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@@ -488,6 +488,14 @@ void Program::InitProfileInfo(const std::string& layerName,
perfEntry.parentPrimitive = parentId;
}
void Program::AddVariableStateInfo(const std::string& variable_id, const cldnn::layout& layout) {
auto it = m_variablesStateInfo.find(variable_id);
if (it != m_variablesStateInfo.end())
it->second.insert(layout);
else
m_variablesStateInfo.insert({variable_id, { layout }});
}
// TODO: Does it make sense to add such method to ngraph core?
bool IsNodeOnConstPath(const std::shared_ptr<ngraph::Node>& node) {
std::set<std::shared_ptr<ngraph::Node>> nodes_processed = {};

View File

@@ -0,0 +1,74 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <intel_gpu/plugin/variable_state.hpp>
#include <blob_factory.hpp>
namespace ov {
namespace runtime {
namespace intel_gpu {
VariableState::VariableState(const std::string &name,
const std::vector<cldnn::network::VariableState::Ptr> &states,
std::shared_ptr<cldnn::engine> engine, int currentBatch) :
InferenceEngine::IVariableStateInternal {name},
currentBatch_ {currentBatch},
states_ {states},
desc_{
PrecisionFromDataType(states.front()->memory->get_layout().data_type),
AggregateShape(states.front()->memory->get_layout()),
InferenceEngine::Layout::ANY
},
engine_ {std::move(engine)} {
}
void VariableState::Reset() {
IterateOverStates([this](cldnn::network::VariableState &state) {
state.is_set = false;
});
}
void VariableState::SetState(const InferenceEngine::Blob::Ptr &newState) {
auto lock = std::dynamic_pointer_cast<InferenceEngine::MemoryBlob>(newState)->rmap();
auto data = lock.as<char*>();
IterateOverStates([&data, this](cldnn::network::VariableState &state) {
state.memory->copy_from(engine_->get_program_stream(), data);
data += state.memory->get_layout().bytes_count();
state.is_set = true;
});
engine_->get_program_stream().enqueue_barrier();
}
InferenceEngine::Blob::CPtr VariableState::GetState() const {
auto blob = make_blob_with_precision(desc_, InferenceEngine::CreateDefaultAllocator());
blob->allocate();
auto blobLock = std::dynamic_pointer_cast<InferenceEngine::MemoryBlob>(blob)->wmap();
auto data = blobLock.as<char*>();
IterateOverStates([&data, this](cldnn::network::VariableState &state) {
cldnn::mem_lock<char, cldnn::mem_lock_type::read> lock { state.memory, engine_->get_program_stream() };
std::copy(lock.begin(), lock.end(), data);
data += state.memory->get_layout().bytes_count();
});
return blob;
}
InferenceEngine::SizeVector VariableState::AggregateShape(const cldnn::layout &layout) {
const auto& dims = layout.get_dims();
InferenceEngine::SizeVector shape {dims.begin(), dims.end()};
if (currentBatch_ != -1)
shape.front() = currentBatch_;
return shape;
}
void VariableState::IterateOverStates(std::function<void(cldnn::network::VariableState&)> f) const {
for (int i = 0; i < states_.size(); i++) {
auto batch = 1 << i;
if (batch & currentBatch_)
f(*states_[i]);
}
}
} // namespace intel_gpu
} // namespace runtime
} // namespace ov

View File

@@ -0,0 +1,192 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils.h"
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/eltwise.hpp>
#include <intel_gpu/primitives/assign.hpp>
#include <intel_gpu/primitives/read_value.hpp>
using namespace cldnn;
using namespace ::tests;
template<typename T>
struct VariableParams {
cldnn::layout layout;
std::vector<T> values;
};
template<typename T>
struct variable_test : public ::testing::TestWithParam<VariableParams<T>> {
void test() {
const VariableParams<T> param = testing::TestWithParam<VariableParams<T>>::GetParam();
auto& engine = get_test_engine();
const auto variable_layout = param.layout;
const auto input_data = engine.allocate_memory(variable_layout);
set_values(input_data, param.values);
topology topology;
topology.add(input_layout("input", input_data->get_layout()));
topology.add(read_value{"read_value", {"input"}, "v0", variable_layout});
topology.add(eltwise{"sum", {"input", "read_value"}, eltwise_mode::sum, {}, variable_layout.data_type});
topology.add(assign{"assign", {"sum"}, "v0", variable_layout});
network network(engine, topology, build_options{}, false);
network.assign_variables_memories({ { "v0", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) } });
network.set_input_data("input", input_data);
constexpr size_t number_of_inferences = 5;
for (size_t inference = 1; inference <= number_of_inferences; ++inference) {
const auto outputs = network.execute();
const auto output = outputs.at("assign").get_memory();
const cldnn::mem_lock<T> output_ptr(output, get_test_stream());
const auto output_count = output_ptr.size();
ASSERT_EQ(output_count, param.values.size()) << "inference " << inference;
for (size_t i = 0; i < output_count; ++i) {
ASSERT_EQ(output_ptr[i], inference * param.values[i]) << "inference " << inference;
}
}
}
};
using variable_test_i32 = variable_test<int32_t>;
using variable_test_i64 = variable_test<int64_t>;
using variable_test_f32 = variable_test<float>;
TEST_P(variable_test_i32, variable_i32) {
ASSERT_NO_FATAL_FAILURE(test());
}
TEST_P(variable_test_i64, variable_i64) {
ASSERT_NO_FATAL_FAILURE(test());
}
TEST_P(variable_test_f32, variable_f32) {
ASSERT_NO_FATAL_FAILURE(test());
}
INSTANTIATE_TEST_SUITE_P(
basic,
variable_test_i32,
::testing::Values(
VariableParams<int32_t>{ {data_types::i32, format::bfyx, tensor{1}}, {333666} },
VariableParams<int32_t>{ {data_types::i32, format::bfyx, tensor{1, 1, 1, 3}}, {444, 555, 666} },
VariableParams<int32_t>{ {data_types::i32, format::bfzyx, tensor{1, 2, 3, 2}},
{1, 2, 3, 4, 5, 6, 6, 5, 4, 3, 2, 1} }
)
);
INSTANTIATE_TEST_SUITE_P(
basic,
variable_test_i64,
::testing::Values(
VariableParams<int64_t>{ {data_types::i64, format::bfyx, tensor{1}}, {333666L} },
VariableParams<int64_t>{ {data_types::i64, format::bfyx, tensor{1, 1, 1, 3}}, {444L, 555L, 666L} },
VariableParams<int64_t>{ {data_types::i64, format::bfzyx, tensor{1, 2, 3, 2}},
{1L, 2L, 3L, 4L, 5L, 6L, 6L, 5L, 4L, 3L, 2L, 1L} }
)
);
INSTANTIATE_TEST_SUITE_P(
basic,
variable_test_f32,
::testing::Values(
VariableParams<float>{ {data_types::f32, format::bfyx, tensor{1}}, {333666.f} },
VariableParams<float>{ {data_types::f32, format::bfyx, tensor{1, 1, 1, 3}}, {44.4f, 55.5f, 66.6f} },
VariableParams<float>{ {data_types::f32, format::bfzyx, tensor{1, 2, 3, 2}},
{1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 6.f, 5.f, 4.f, 3.f, 2.f, 1.f} }
)
);
TEST(variable_test_common, exception_on_wrong_layout) {
auto& engine = get_test_engine();
const layout variable_layout{data_types::i32, format::bfyx, tensor{1}};
const auto input_data = engine.allocate_memory(variable_layout);
set_values(input_data, {333666});
auto wrong_layout = variable_layout;
wrong_layout.data_type = data_types::f32;
const auto wrong_input_data = engine.allocate_memory(wrong_layout);
set_values(input_data, {333.666});
topology topology;
topology.add(input_layout("input", input_data->get_layout()));
topology.add(read_value{"read_value", {"input"}, "v0", variable_layout});
topology.add(input_layout("wrong_input", wrong_input_data->get_layout()));
topology.add(assign{"assign", {"wrong_input"}, "v0", wrong_layout});
network network(engine, topology, build_options{}, false);
network.assign_variables_memories({ { "v0", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) } });
network.set_input_data("input", input_data);
network.set_input_data("wrong_input", wrong_input_data);
bool layout_mismatch_exception = false;
try {
network.execute();
} catch(std::exception& exc) {
const std::string error = exc.what();
layout_mismatch_exception = error.find("Layout mismatch") != std::string::npos;
}
ASSERT_TRUE(layout_mismatch_exception);
}
TEST(variable_test_common, variables_are_preserved_across_inferences) {
auto& engine = get_test_engine();
const layout variable_layout{data_types::i32, format::bfyx, tensor{1}};
const auto input_1 = engine.allocate_memory(variable_layout);
constexpr auto value_1 = 333;
set_values(input_1, {value_1});
const auto input_2 = engine.allocate_memory(variable_layout);
constexpr auto value_2 = 666;
set_values(input_2, {value_2});
const auto dummy1 = engine.allocate_memory(variable_layout);
set_values(dummy1, {11});
const auto dummy2 = engine.allocate_memory(variable_layout);
set_values(dummy2, {22});
topology topology;
topology.add(input_layout("input_1", input_1->get_layout()));
topology.add(assign{"assign_1", {"input_1"}, "v1", variable_layout});
topology.add(input_layout("input_2", input_2->get_layout()));
topology.add(assign{"assign_2", {"input_2"}, "v2", variable_layout});
topology.add(data("dummy1", dummy1));
topology.add(read_value{"read_value_1", {"dummy1"}, "v1", variable_layout});
topology.add(read_value{"read_value_2", {"dummy1"}, "v2", variable_layout});
topology.add(eltwise{"sum", {"read_value_1", "read_value_2"}, eltwise_mode::sum, {}, variable_layout.data_type});
topology.add(assign{"assign_result", {"sum"}, "v_result", variable_layout});
topology.add(data("dummy2", dummy2));
topology.add(read_value{"read_result", {"dummy2"}, "v_result", variable_layout});
network network{engine, topology, build_options{}, true};
network.assign_variables_memories({
{ "v1", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) },
{ "v2", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) },
{ "v_result", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) }
});
network.set_input_data("input_1", input_1);
network.set_input_data("input_2", input_2);
// set variables with assign on 1st inference, read with read_values on 2nd one
network.execute();
const auto outputs = network.execute();
const auto output = outputs.at("read_result").get_memory();
const cldnn::mem_lock<int> output_ptr(output, get_test_stream());
ASSERT_EQ(output_ptr[0], value_1 + value_2);
}

View File

@@ -0,0 +1,175 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph/opsets/opset8.hpp"
#include "ngraph_functions/subgraph_builders.hpp"
#include "openvino/runtime/core.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
#include <cpp/ie_cnn_network.h>
#include <ie_plugin_config.hpp>
#include "functional_test_utils/ov_plugin_cache.hpp"
#include "common_test_utils/common_utils.hpp"
#include <vector>
#include <gtest/gtest.h>
using namespace ngraph;
using namespace opset8;
using namespace ov::test;
using MemoryDynamicBatchParams = std::tuple<
ov::PartialShape, // Partial shape for network initialization
ov::Shape, // Actual shape to be passed to inference request
int, // Iterations number
std::string>; // Device name
class MemoryDynamicBatch : public ::testing::Test,
public ::testing::WithParamInterface<MemoryDynamicBatchParams> {
public:
static std::string getTestCaseName(::testing::TestParamInfo<MemoryDynamicBatchParams> obj) {
ov::PartialShape inputPartialShape;
ov::Shape inputShape;
int iterationsNum;
std::string targetDevice;
std::tie(inputPartialShape, inputShape, iterationsNum, targetDevice) = obj.param;
std::ostringstream result;
result << "IS=";
result << CommonTestUtils::partialShape2str({ inputPartialShape }) << "_";
result << "TS=";
result << CommonTestUtils::partialShape2str({inputShape});
result << ")_";
result << "iterationsCount=" << iterationsNum << "_";
result << "targetDevice=" << targetDevice;
return result.str();
}
void SetUp() override {
std::tie(inputPartialShape_, inputShape_, iterationsNum_, deviceName_) = GetParam();
model_ = buildModel(precision_, inputPartialShape_);
core_ = ov::test::utils::PluginCache::get().core();
}
static std::shared_ptr<ov::Model> buildModel(ElementType precision, const ov::PartialShape& shape) {
auto param = builder::makeDynamicParams(precision, { shape });
const VariableInfo variable_info { shape, precision, "v0" };
auto variable = std::make_shared<Variable>(variable_info);
auto read_value = std::make_shared<ReadValue>(param.at(0), variable);
auto add = std::make_shared<Add>(read_value, param.at(0));
auto assign = std::make_shared<Assign>(add, variable);
auto res = std::make_shared<Result>(add);
return std::make_shared<ov::Model>(ResultVector { res }, SinkVector { assign }, param,
"MemoryDynamicBatchTest");
}
static std::vector<int> generateInput(const ov::Shape& shape) {
auto len = ov::shape_size(shape);
std::vector<int> result {};
result.reserve(len);
for (int i = 0; i < len; i++)
result.push_back(i);
return result;
}
static std::vector<int> calculateReference(const std::vector<int>& input, int iterations) {
std::vector<int> reference {};
reference.reserve(input.size());
std::transform(input.begin(), input.end(), std::back_inserter(reference), [iterations](const int &i) {
return i * iterations;
});
return reference;
}
protected:
ov::PartialShape inputPartialShape_;
ov::Shape inputShape_;
int iterationsNum_;
std::string deviceName_;
std::shared_ptr<ov::Model> model_;
std::shared_ptr<ov::Core> core_;
std::vector<int> input_;
ElementType precision_ { ElementType::i32 };
};
TEST_P(MemoryDynamicBatch, MultipleInferencesOnTheSameInferRequest) {
auto compiledModel = core_->compile_model(model_, CommonTestUtils::DEVICE_GPU, { });
auto inferRequest = compiledModel.create_infer_request();
input_ = generateInput(inputShape_);
ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
inferRequest.set_input_tensor(inputTensor);
for (int i = 0; i < iterationsNum_; i++)
inferRequest.infer();
auto output = inferRequest.get_output_tensor(0);
std::vector<int> reference = calculateReference(input_, iterationsNum_);
std::vector<int> actual(output.data<int>(), output.data<int>() + output.get_size());
for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
actualIt++, referenceIt++)
EXPECT_EQ(*actualIt, *referenceIt);
}
TEST_P(MemoryDynamicBatch, ResetVariableState) {
auto compiledModel = core_->compile_model(model_, CommonTestUtils::DEVICE_GPU, { });
auto inferRequest = compiledModel.create_infer_request();
input_ = generateInput(inputShape_);
ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
inferRequest.set_input_tensor(inputTensor);
inferRequest.infer();
inferRequest.query_state().front().reset();
inferRequest.infer();
auto output = inferRequest.get_output_tensor(0);
std::vector<int> reference = calculateReference(input_, 1);
std::vector<int> actual(output.data<int>(), output.data<int>() + output.get_size());
for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
actualIt++, referenceIt++)
EXPECT_EQ(*actualIt, *referenceIt);
}
TEST_P(MemoryDynamicBatch, GetVariableState) {
auto compiledModel = core_->compile_model(model_, CommonTestUtils::DEVICE_GPU, { });
auto inferRequest = compiledModel.create_infer_request();
input_ = generateInput(inputShape_);
ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
inferRequest.set_input_tensor(inputTensor);
for (int i = 0; i < iterationsNum_; i++)
inferRequest.infer();
auto blob = inferRequest.query_state().front().get_state();
std::vector<int> reference = calculateReference(input_, iterationsNum_);
std::vector<int> actual(blob.data<int>(), blob.data<int>() + blob.get_size());
for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
actualIt++, referenceIt++)
EXPECT_EQ(*actualIt, *referenceIt);
}
TEST_P(MemoryDynamicBatch, SetVariableState) {
auto compiledModel = core_->compile_model(model_, CommonTestUtils::DEVICE_GPU, { });
auto inferRequest = compiledModel.create_infer_request();
input_ = generateInput(inputShape_);
ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
inferRequest.set_input_tensor(inputTensor);
ov::Tensor state = ov::Tensor(precision_, inputShape_, input_.data());
inferRequest.query_state().front().set_state(state);
for (int i = 0; i < iterationsNum_; i++)
inferRequest.infer();
auto output = inferRequest.get_output_tensor(0);
std::vector<int> reference = calculateReference(input_, iterationsNum_ + 1);
std::vector<int> actual(output.data<int>(), output.data<int>() + output.get_size());
for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
actualIt++, referenceIt++)
EXPECT_EQ(*actualIt, *referenceIt);
}
ov::PartialShape networkPartialShape { {1, 19}, 4, 20, 20 };
std::vector<ov::Shape> inputShapes { { 7, 4, 20, 20 }, { 19, 4, 20, 20 } };
std::vector<int> iterationsNum { 3, 7 };
INSTANTIATE_TEST_SUITE_P(smoke_MemoryDynamicBatch, MemoryDynamicBatch,
::testing::Combine(
::testing::Values(networkPartialShape),
::testing::ValuesIn(inputShapes),
::testing::ValuesIn(iterationsNum),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
MemoryDynamicBatch::getTestCaseName);

View File

@@ -0,0 +1,36 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/memory.h"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::SizeVector> inShapes = {
{1},
{3},
{3, 3, 3},
{2, 3, 4, 5},
};
const std::vector<InferenceEngine::Precision> inputPrecisions = {
InferenceEngine::Precision::I32,
InferenceEngine::Precision::FP32,
};
const std::vector<int64_t> iterationCount {1, 3, 10};
INSTANTIATE_TEST_SUITE_P(smoke_MemoryTest, MemoryTest,
::testing::Combine(
::testing::Values(ngraph::helpers::MemoryTransformation::NONE),
::testing::ValuesIn(iterationCount),
::testing::ValuesIn(inShapes),
::testing::ValuesIn(inputPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GPU)),
MemoryTest::getTestCaseName);
} // namespace

View File

@@ -28,6 +28,7 @@ public:
protected:
std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> CalculateRefs() override;
void SetUp() override;
void Infer() override;
private:
void CreateTIFunc();
void CreateCommonFunc();

View File

@@ -82,6 +82,7 @@ namespace LayerTestsDefinitions {
ConfigureNetwork();
executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
}
inferRequest = executableNetwork.CreateInferRequest();
GenerateInputs();
for (int64_t i = 0; i < iteration_count; ++i) {
Infer();
@@ -101,6 +102,11 @@ namespace LayerTestsDefinitions {
}
}
void MemoryTest::Infer() {
ConfigureInferRequest();
inferRequest.Infer();
}
std::vector<std::pair<element::Type, std::vector<std::uint8_t>>> MemoryTest::CalculateRefs() {
using namespace ngraph;
function->validate_nodes_and_infer_types();
@@ -177,7 +183,9 @@ namespace LayerTestsDefinitions {
void MemoryTest::CreateCommonFunc() {
auto param = builder::makeParams(ngPrc, {inputShape});
auto variable = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"});
const auto variable_info = targetDevice == CommonTestUtils::DEVICE_GPU ?
VariableInfo{Shape{inputShape}, ngPrc, "v0"} : VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"};
auto variable = std::make_shared<Variable>(variable_info);
auto read_value = std::make_shared<ReadValue>(param.at(0), variable);
auto add = std::make_shared<Add>(read_value, param.at(0));
auto assign = std::make_shared<Assign>(add, variable);