[GPU] Assign-6 and ReadValue-6 (#11780)

* Add methods for access to varables information in Program class * add ReadValue and Assign primitives * ReadValue and Assign implementations * Implementation of memory states allocation * Add output existance check in primitive_inst to avoid crashes if output is set during execution * Add memory states management functionality in network component * Integration of memory states feature in inference request component * Exclude constant path for read_value and assign nodes in cldnn transformations * Improve memory states test to run on a single inference request * unit tests for ReadValue and Assign * single-layer test for ReadValue and Assign * Add QueryState API implementation * Add memory state test which covers dynamic batch case Co-authored-by: Oleksii Khovan <okhovan@lohika.com>
2022-07-12 04:45:53 +02:00
parent 60e31ad8c3
commit a250634b75
31 changed files with 1103 additions and 6 deletions
--- a/src/inference/dev_api/cpp_interfaces/interface/ie_ivariable_state_internal.hpp
+++ b/src/inference/dev_api/cpp_interfaces/interface/ie_ivariable_state_internal.hpp
@@ -54,7 +54,7 @@ protected:
    /**
     * @brief A default dtor
     */
-    ~IVariableStateInternal() = default;
+    virtual ~IVariableStateInternal() = default;

    std::string name;
    Blob::Ptr state;
--- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
@@ -52,6 +52,18 @@ class primitive_inst;
 struct network {
 public:
    using ptr = std::shared_ptr<network>;
+
+    struct VariableState {
+        using Ptr = std::shared_ptr<VariableState>;
+
+        cldnn::memory_ptr memory;
+        bool is_set;
+        VariableState(cldnn::memory_ptr mem = nullptr) :
+            memory { mem }, is_set { false } {
+        }
+    };
+    using variables_states_map = std::map<std::string, VariableState::Ptr>;
+
    explicit network(program::ptr program, stream::ptr stream, bool is_internal = false, bool is_primary_stream = true);
    network(engine& engine,
            const topology& topo,
@@ -194,6 +206,12 @@ public:
        return *_memory_pool;
    }

+    /// Assigns memory state locations
+    void assign_variables_memories(variables_states_map &&variables_memories);
+
+    /// Returns memory state @p variable_id of stateful network
+    VariableState& get_variable_memory(const std::string &variable_id);
+
 private:
    using output_chains_map = std::map<primitive_id, std::vector<std::shared_ptr<primitive_inst>>>;
    uint32_t net_id = 0;
@@ -209,6 +227,8 @@ private:
    std::vector<std::shared_ptr<primitive_inst>> _outputs;
    std::list<std::shared_ptr<primitive_inst>> _exec_order;
    std::list<std::shared_ptr<primitive_inst>> _data_outputs;
+    variables_states_map _variables_states;
+    std::vector<std::shared_ptr<primitive_inst>> _variable_state_primitives;

    std::unordered_map<primitive_id, event::ptr> _events;
    output_chains_map _output_chains;
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp
@@ -87,6 +87,27 @@ inline cldnn::data_types DataTypeFromPrecision(ngraph::element::Type t) {
    }
 }

+inline InferenceEngine::Precision PrecisionFromDataType(cldnn::data_types dt) {
+    switch (dt) {
+    case cldnn::data_types::bin:
+        return InferenceEngine::Precision::ePrecision::BIN;
+    case cldnn::data_types::u8:
+        return InferenceEngine::Precision::ePrecision::U8;
+    case cldnn::data_types::i8:
+        return InferenceEngine::Precision::ePrecision::I8;
+    case cldnn::data_types::f16:
+        return InferenceEngine::Precision::ePrecision::FP16;
+    case cldnn::data_types::f32:
+        return InferenceEngine::Precision::ePrecision::FP32;
+    case cldnn::data_types::i32:
+        return InferenceEngine::Precision::ePrecision::I32;
+    case cldnn::data_types::i64:
+        return InferenceEngine::Precision::ePrecision::I64;
+    default:
+        IE_THROW(ParameterMismatch) << "The plugin does not support " << cldnn::data_type_traits::name(dt) << " data type";
+    }
+}
+
 inline cldnn::format FormatFromLayout(InferenceEngine::Layout l) {
    switch (l) {
        // TODO: change 6d case once new layout added in IE
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/graph.hpp
@@ -14,6 +14,7 @@
 #include <memory>
 #include <string>
 #include <utility>
+#include <vector>
 #include "ie_blob.h"
 #include "cpp/ie_cnn_network.h"

@@ -38,6 +39,7 @@ public:
        POSTPROC = 4
    };
    typedef std::shared_ptr<Graph> Ptr;
+    using variable_states_map = std::map<std::string, std::vector<cldnn::network::VariableState::Ptr>>;

    Graph(InferenceEngine::CNNNetwork& network, InferenceEngine::gpu::ClContext::Ptr context, Config config, uint16_t stream_id = 0);
    explicit Graph(std::shared_ptr<Graph> graph, uint16_t stream_id = 0);
@@ -55,6 +57,7 @@ public:
    const std::map<std::string, cldnn::layout>& GetInputLayouts() const { return m_program->GetInputLayouts(); }
    const InferenceEngine::InputsDataMap GetNetworkInputs() const { return m_program->GetNetworkInputs(); }
    const InferenceEngine::OutputsDataMap GetNetworkOutputs() const { return m_program->GetNetworkOutputs(); }
+    variable_states_map AllocateVariablesMemories();
    std::map<std::string, std::pair<int64_t, int64_t>> GetInputDynBatchDims() { return m_program->m_input_batch_dim; }
    std::map<std::string, int64_t> GetOutputDynBatchDims() { return m_program->m_output_batch_dim; }
    size_t GetNetworksCount() const { return m_networks.size(); }
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/infer_request.hpp
@@ -48,6 +48,7 @@ public:
    void SetBlobs(const std::string& name, const std::vector<InferenceEngine::Blob::Ptr> &data) override;

    void SetBatch(int batch = -1) override;
+    std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> QueryState() override;
    void SetGraph(std::shared_ptr<Graph> graph);
    void EnableProfiling() { m_useProfiling = true; }
    void EnableStreams() { m_useStreams = true; }
@@ -106,6 +107,7 @@ private:

    std::map<cldnn::primitive_id, cldnn::network_output> internal_outputs;
    std::vector<std::map<cldnn::primitive_id, cldnn::network_output>> internal_outputs_dynamic;
+    Graph::variable_states_map variables_states_;
 };

 }  // namespace intel_gpu
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp
@@ -209,6 +209,8 @@ REGISTER_FACTORY(v6, ExperimentalDetectronROIFeatureExtractor);
 REGISTER_FACTORY(v6, ExperimentalDetectronTopKROIs)
 REGISTER_FACTORY(v6, ExperimentalDetectronGenerateProposalsSingleImage);
 REGISTER_FACTORY(v6, ExperimentalDetectronDetectionOutput);
+REGISTER_FACTORY(v6, Assign);
+REGISTER_FACTORY(v6, ReadValue);

 // ------------------------------ Supported v7 ops ------------------------------ //
 REGISTER_FACTORY(v7, DFT);
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp
@@ -10,6 +10,7 @@
 #include <string>
 #include <cstdint>
 #include <mutex>
+#include <set>

 #include <cpp/ie_cnn_network.h>
 #include <ngraph/ngraph.hpp>
@@ -150,6 +151,12 @@ public:

    std::shared_ptr<cldnn::topology> GetTopology() const { return m_topology; }

+    using variables_state_info_map = std::map<std::string, std::set<cldnn::layout>>;
+
+    void AddVariableStateInfo(const std::string& variable_id, const cldnn::layout& layout);
+
+    const variables_state_info_map& GetVariablesStatesInfo() const { return m_variablesStateInfo; }
+
 private:
    static factories_map_t factories_map;
    std::vector<std::shared_ptr<cldnn::program>> m_programs;
@@ -159,6 +166,7 @@ private:
    std::shared_ptr<cldnn::topology> m_topology;
    InferenceEngine::InputsDataMap m_networkInputs;
    InferenceEngine::OutputsDataMap m_networkOutputs;
+    variables_state_info_map m_variablesStateInfo;

    bool queryMode;

--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp
@@ -0,0 +1,50 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#include <cpp_interfaces/interface/ie_ivariable_state_internal.hpp>
+#include "intel_gpu/plugin/graph.hpp"
+#include <functional>
+
+namespace ov {
+namespace runtime {
+namespace intel_gpu {
+
+class VariableState : public InferenceEngine::IVariableStateInternal {
+public:
+    VariableState(const std::string& name, const std::vector<cldnn::network::VariableState::Ptr>& states,
+                  std::shared_ptr<cldnn::engine> engine, int currentBatch);
+
+    /**
+     * @brief Reset internal variable state for relevant infer request, to a value specified as
+     * default for according `ReadValue` node
+     */
+    void Reset() override;
+
+    /**
+     * @brief Sets the new state for the next inference
+     * @param newState A new state
+     */
+    void SetState(const InferenceEngine::Blob::Ptr &newState) override;
+
+    /**
+     * @brief Returns the value of the variable state.
+     * @return The value of the variable state
+     */
+    InferenceEngine::Blob::CPtr GetState() const override;
+
+protected:
+    InferenceEngine::SizeVector AggregateShape(const cldnn::layout &layout);
+    void IterateOverStates(std::function<void(cldnn::network::VariableState&)> f) const;
+
+private:
+    int currentBatch_;
+    std::vector<cldnn::network::VariableState::Ptr> states_;
+    InferenceEngine::TensorDesc desc_;
+    std::shared_ptr<cldnn::engine> engine_;
+};
+
+} // namespace intel_gpu
+} // namespace runtime
+} // namespace ov
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/assign.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/assign.hpp
@@ -0,0 +1,43 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+
+#include "primitive.hpp"
+#include "intel_gpu/runtime/memory.hpp"
+
+namespace cldnn {
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief Sets an input value to the variable_id variable.
+struct assign : public primitive_base<assign> {
+    CLDNN_DECLARE_PRIMITIVE(assign)
+
+    /// @brief Constructs Assign primitive.
+    /// @param id This primitive id
+    /// @param inputs Input parameters ids
+    /// @param variable_id Variable id
+    /// @param output_layout Memory layout
+    assign(const primitive_id &id,
+               const std::vector<primitive_id>& inputs,
+               const std::string& variable_id,
+               const layout& output_layout)
+                : primitive_base(id, inputs, "", {}, optional_data_type{output_layout.data_type}),
+                  variable_id{variable_id},
+                  output_layout{output_layout} {}
+
+    std::string variable_id;
+    layout output_layout;
+};
+/// @}
+/// @}
+/// @}
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/read_value.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/read_value.hpp
@@ -0,0 +1,43 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <vector>
+
+#include "primitive.hpp"
+#include "intel_gpu/runtime/memory.hpp"
+
+namespace cldnn {
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief Returns value of the variable_id variable.
+struct read_value : public primitive_base<read_value> {
+    CLDNN_DECLARE_PRIMITIVE(read_value)
+
+    /// @brief Constructs ReadValue primitive.
+    /// @param id This primitive id
+    /// @param inputs Input parameters ids
+    /// @param variable_id Variable id
+    /// @param output_layout Memory layout
+    read_value(const primitive_id& id,
+               const std::vector<primitive_id>& inputs,
+               const std::string& variable_id,
+               const layout& output_layout)
+            : primitive_base(id, inputs, "", {}, optional_data_type{output_layout.data_type}),
+              variable_id{variable_id},
+              output_layout{output_layout} {}
+
+    std::string variable_id;
+    layout output_layout;
+};
+/// @}
+/// @}
+/// @}
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/assign.cpp
+++ b/src/plugins/intel_gpu/src/graph/assign.cpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <assign_inst.h>
+#include "primitive_type_base.h"
+#include <sstream>
+#include <json_object.h>
+#include <data_inst.h>
+
+namespace cldnn {
+
+primitive_type_id assign::type_id() {
+    static primitive_type_base<assign> instance;
+    return &instance;
+}
+
+assign_inst::typed_primitive_inst(network& network, const assign_node& node) :
+    parent{network, node, false},
+    memory_state::variable{node.get_primitive()->variable_id} {
+}
+
+layout assign_inst::calc_output_layout(const assign_node& node) {
+    return node.get_primitive()->output_layout;
+}
+
+std::string assign_inst::to_string(const assign_node& node) {
+    auto node_info = node.desc_to_json();
+    json_composite assign_info;
+    assign_info.add("input id", node.input().id());
+    assign_info.add("variable id", node.get_primitive()->variable_id);
+    node_info->add("assign info", assign_info);
+    std::stringstream primitive_description;
+    node_info->dump(primitive_description);
+    return primitive_description.str();
+}
+
+} // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/cpu/assign.cpp
@@ -0,0 +1,53 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "assign_inst.h"
+#include "impls/implementation_map.hpp"
+#include "register.hpp"
+
+namespace cldnn {
+namespace cpu {
+
+struct assign_impl : public typed_primitive_impl<assign> {
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<assign_impl>(*this);
+    }
+
+    event::ptr execute_impl(const std::vector<event::ptr>& events, assign_inst& instance) override {
+        const auto arg = instance.argument;
+        const auto variable_id = arg.variable_id;
+        auto& variable = instance.get_network().get_variable_memory(variable_id);
+
+        if (variable.memory->get_layout() != arg.output_layout) {
+            CLDNN_ERROR_MESSAGE(instance.id(), "Layout mismatch");
+        }
+
+        auto& stream = instance.get_network().get_stream();
+
+        for (auto e : events) {
+            e->wait();
+        }
+
+        const auto ev_set_memory = variable.memory->copy_from(stream, instance.input_memory());
+        variable.is_set = true;
+
+        return ev_set_memory;
+    }
+
+    void init_kernels() override {}
+
+public:
+    static primitive_impl* create(assign_node const& arg) { return new assign_impl{}; }
+};
+
+
+namespace detail {
+
+attach_assign_impl::attach_assign_impl() {
+    implementation_map<assign>::add(impl_types::cpu, assign_impl::create, {});
+}
+
+}  // namespace detail
+}  // namespace cpu
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/cpu/read_value.cpp
@@ -0,0 +1,53 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "read_value_inst.h"
+#include "impls/implementation_map.hpp"
+#include "register.hpp"
+
+namespace cldnn {
+namespace cpu {
+
+struct read_value_impl : public typed_primitive_impl<read_value> {
+    std::unique_ptr<primitive_impl> clone() const override {
+        return make_unique<read_value_impl>(*this);
+    }
+
+    event::ptr execute_impl(const std::vector<event::ptr>& events, read_value_inst& instance) override {
+        for (auto e : events) {
+            e->wait();
+        }
+        const auto arg = instance.argument;
+        const auto variable_id = arg.variable_id;
+
+        auto& variable = instance.get_network().get_variable_memory(variable_id);
+
+        if (variable.memory->get_layout() != arg.output_layout) {
+            CLDNN_ERROR_MESSAGE(instance.id(), "Layout mismatch");
+        }
+
+        if (!variable.is_set) {
+            auto &stream = instance.get_network().get_stream();
+            const auto ev_set_output = instance.output_memory().fill(stream, 0);
+            return ev_set_output;
+        }
+
+        return instance.get_network().get_stream().create_user_event(true);
+    }
+
+    void init_kernels() override {}
+
+public:
+    static primitive_impl* create(read_value_node const& arg) { return new read_value_impl{}; }
+};
+
+namespace detail {
+
+attach_read_value_impl::attach_read_value_impl() {
+    implementation_map<read_value>::add(impl_types::cpu, read_value_impl::create, {});
+}
+
+}  // namespace detail
+}  // namespace cpu
+}  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp
@@ -11,8 +11,10 @@ namespace cpu {
    static detail::attach_##prim##_impl attach_##prim

 void register_implementations() {
+    REGISTER_CPU(assign);
    REGISTER_CPU(detection_output);
    REGISTER_CPU(proposal);
+    REGISTER_CPU(read_value);
    REGISTER_CPU(non_max_suppression);
 }

--- a/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp
@@ -4,8 +4,10 @@

 #pragma once

+#include "intel_gpu/primitives/assign.hpp"
 #include "intel_gpu/primitives/detection_output.hpp"
 #include "intel_gpu/primitives/proposal.hpp"
+#include "intel_gpu/primitives/read_value.hpp"
 #include "intel_gpu/primitives/non_max_suppression.hpp"

 namespace cldnn {
@@ -20,7 +22,9 @@ namespace detail {
        attach_##prim##_impl();   \
    }

+REGISTER_CPU(assign);
 REGISTER_CPU(proposal);
+REGISTER_CPU(read_value);
 REGISTER_CPU(non_max_suppression);
 REGISTER_CPU(detection_output);

--- a/src/plugins/intel_gpu/src/graph/include/assign_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/assign_inst.h
@@ -0,0 +1,52 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_gpu/primitives/assign.hpp"
+#include "primitive_inst.h"
+#include "intel_gpu/runtime/error_handler.hpp"
+
+namespace cldnn {
+namespace memory_state {
+
+class variable {
+public:
+    explicit variable(const std::string& variable_id) : variable_id_ {variable_id} {}
+
+    const std::string& variable_id() const { return variable_id_; }
+
+private:
+    std::string variable_id_;
+};
+
+} // namespace memory_state
+
+template<>
+struct typed_program_node<assign> : public typed_program_node_base<assign> {
+    using parent = typed_program_node_base<assign>;
+public:
+    using parent::parent;
+
+    const program_node& input(std::size_t index = 0) const { return get_dependency(index); }
+};
+
+using assign_node = typed_program_node<assign>;
+
+template<>
+class typed_primitive_inst<assign> : public typed_primitive_inst_base<assign>, public memory_state::variable {
+    using parent = typed_primitive_inst_base<assign>;
+
+public:
+    static layout calc_output_layout(const assign_node& node);
+
+    static std::string to_string(const assign_node& node);
+
+public:
+    typed_primitive_inst(network& network, const assign_node& desc);
+};
+
+using assign_inst = typed_primitive_inst<assign>;
+
+} // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/include/read_value_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/read_value_inst.h
@@ -0,0 +1,39 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "assign_inst.h"
+#include "intel_gpu/primitives/read_value.hpp"
+#include "primitive_inst.h"
+#include "intel_gpu/runtime/error_handler.hpp"
+
+namespace cldnn {
+
+template<>
+struct typed_program_node<read_value> : public typed_program_node_base<read_value> {
+    using parent = typed_program_node_base<read_value>;
+public:
+    using parent::parent;
+
+    const program_node& input(std::size_t index = 0) const { return get_dependency(index); }
+};
+
+using read_value_node = typed_program_node<read_value>;
+
+template<>
+class typed_primitive_inst<read_value> : public typed_primitive_inst_base<read_value>, public memory_state::variable {
+    using parent = typed_primitive_inst_base<read_value>;
+
+public:
+    static layout calc_output_layout(const read_value_node& node);
+
+    static std::string to_string(const read_value_node& node);
+
+    typed_primitive_inst(network& network, const read_value_node& desc);
+};
+
+using read_value_inst = typed_primitive_inst<read_value>;
+
+} // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/network.cpp
+++ b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -17,6 +17,8 @@

 #include "intel_gpu/graph/program.hpp"
 #include "intel_gpu/graph/network.hpp"
+#include "assign_inst.h"
+#include "read_value_inst.h"

 #include "to_string_utils.h"
 #include "primitive_inst.h"
@@ -876,6 +878,8 @@ void network::allocate_primitive_instance(program_node const& node) {
        if (node.is_type<data>())
            _data_outputs.push_back(inst);
    }
+    if (std::dynamic_pointer_cast<assign_inst>(inst) || std::dynamic_pointer_cast<read_value_inst>(inst))
+        _variable_state_primitives.push_back(inst);
    if (node.is_constant())
        transfer_memory_to_device(inst, node);
 }
@@ -915,4 +919,26 @@ memory::ptr network::get_memory_from_pool(const layout& layout,
        return _memory_pool->get_memory(layout, id, get_id(), dependencies, type, reusable);
    return _memory_pool->get_memory(layout, type);
 }
+
+network::VariableState& network::get_variable_memory(const std::string &variable_id) {
+    auto it = _variables_states.find(variable_id);
+    if (it == _variables_states.end()) {
+        CLDNN_ERROR_MESSAGE(variable_id, "Variable not found");
+    }
+    return *it->second;
+}
+
+void network::assign_variables_memories(variables_states_map &&variables_memories) {
+    _variables_states = variables_memories;
+    for (auto primitive : _variable_state_primitives) {
+        if (const auto& memory_state_primitive = std::dynamic_pointer_cast<memory_state::variable>(primitive)) {
+            auto it = _variables_states.find(memory_state_primitive->variable_id());
+            if (it != _variables_states.end())
+                primitive->set_output_memory(it->second->memory, false);
+            else
+                CLDNN_ERROR_MESSAGE(memory_state_primitive->variable_id(), "Memory state not found");
+        }
+    }
+}
+
 }  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@@ -120,7 +120,7 @@ void primitive_inst::check_memory_to_set(const memory& mem, const layout& layout
 void primitive_inst::set_output_memory(memory::ptr mem_new, bool check) {
    auto& eng = _network.get_engine();
    // skip all the buzz if no action actually required
-    if (eng.is_the_same_buffer(*mem_new, *_output)) {
+    if (_output && eng.is_the_same_buffer(*mem_new, *_output)) {
        return;
    }

--- a/src/plugins/intel_gpu/src/graph/program.cpp
+++ b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -587,7 +587,8 @@ void program::post_optimize_graph(bool is_internal) {

 // mark if the node is constant assuming that all dependencies are marked properly
 void program::mark_if_constant(program_node& node) {
-    if (node.get_dependencies().empty() || node.is_type<prior_box>()) {
+    if (node.get_dependencies().empty() || node.is_type<prior_box>() ||
+        node.is_type<assign>() || node.is_type<read_value>()) {
        return;
    }
    node.constant = true;
--- a/src/plugins/intel_gpu/src/graph/read_value.cpp
+++ b/src/plugins/intel_gpu/src/graph/read_value.cpp
@@ -0,0 +1,40 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <read_value_inst.h>
+#include "primitive_type_base.h"
+#include <sstream>
+#include <json_object.h>
+#include <data_inst.h>
+
+namespace cldnn {
+
+primitive_type_id read_value::type_id() {
+    static primitive_type_base<read_value> instance;
+    return &instance;
+}
+
+read_value_inst::typed_primitive_inst(network& network, const read_value_node& node) :
+    parent(network, node, false),
+    memory_state::variable{node.get_primitive()->variable_id} {
+}
+
+layout read_value_inst::calc_output_layout(const read_value_node& node) {
+    return node.get_primitive()->output_layout;
+}
+
+std::string read_value_inst::to_string(const read_value_node& node) {
+    auto node_info = node.desc_to_json();
+
+    json_composite read_value_info;
+    read_value_info.add("input id", node.input().id());
+    read_value_info.add("variable id", node.get_primitive()->variable_id);
+    node_info->add("read_value info", read_value_info);
+
+    std::stringstream primitive_description;
+    node_info->dump(primitive_description);
+    return primitive_description.str();
+}
+
+} // namespace cldnn
--- a/src/plugins/intel_gpu/src/plugin/graph.cpp
+++ b/src/plugins/intel_gpu/src/plugin/graph.cpp
@@ -133,6 +133,23 @@ std::shared_ptr<cldnn::network> Graph::BuildNetwork(std::shared_ptr<cldnn::progr
    return network;
 }

+Graph::variable_states_map Graph::AllocateVariablesMemories() {
+    Graph::variable_states_map states {};
+    const auto& memStatesInfo = m_program->GetVariablesStatesInfo();
+    for (const auto& memStateInfo : memStatesInfo) {
+        std::vector<cldnn::layout> orderedLayouts {memStateInfo.second.begin(), memStateInfo.second.end()};
+        std::sort(orderedLayouts.begin(), orderedLayouts.end(), [](cldnn::layout& first, cldnn::layout& second) {
+            return first.size.batch[0] < second.size.batch[0];
+        });
+        std::vector<cldnn::network::VariableState::Ptr> memoryStates;
+        memoryStates.reserve(orderedLayouts.size());
+        for (const auto& layout : orderedLayouts)
+            memoryStates.push_back(std::make_shared<cldnn::network::VariableState>(GetEngine()->allocate_memory(layout, false)));
+        states.insert({memStateInfo.first, memoryStates });
+    }
+    return states;
+}
+
 std::shared_ptr<ngraph::Function> Graph::GetExecGraphInfoByPrimitivesInfo(std::vector<cldnn::primitive_info>& primitives_info,
                                                                          bool filter_const_primitives) {
    OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Graph::GetExecGraphInfoByPrimitivesInfo");
--- a/src/plugins/intel_gpu/src/plugin/infer_request.cpp
+++ b/src/plugins/intel_gpu/src/plugin/infer_request.cpp
@@ -12,6 +12,7 @@
 #include "intel_gpu/plugin/remote_context.hpp"
 #include "intel_gpu/plugin/compiled_model.hpp"
 #include "intel_gpu/plugin/itt.hpp"
+#include "intel_gpu/plugin/variable_state.hpp"
 #include "intel_gpu/runtime/debug_configuration.hpp"
 #include "openvino/core/preprocess/input_tensor_info.hpp"
 #include <ie_algorithm.hpp>
@@ -532,6 +533,7 @@ void InferRequest::SetGraph(std::shared_ptr<Graph> graph) {
    } else {
        allocate_inputs();
        allocate_outputs();
+        variables_states_ = m_graph->AllocateVariablesMemories();
    }
 }

@@ -605,6 +607,7 @@ void InferRequest::SetBatch(int new_batch) {

        batchOutputs[no.first] = out_buf;
    }
+    variables_states_ = m_graph->AllocateVariablesMemories();

    m_curBatch = new_batch;
 }
@@ -744,6 +747,14 @@ void InferRequest::enqueue() {
        }
    }

+    cldnn::network::variables_states_map variables_states;
+    for (auto &variable_state_pair : variables_states_)
+        variables_states.insert({ variable_state_pair.first, variable_state_pair.second[0] });
+
+    auto networkPtr = m_graph->GetNetwork();
+
+    networkPtr->assign_variables_memories(std::move(variables_states));
+
    for (auto& item : _outputs) {
        std::string outputName = item.first;
        Blob::Ptr& outputBlob = item.second;
@@ -751,7 +762,7 @@ void InferRequest::enqueue() {
    }

    internal_outputs.clear();
-    internal_outputs = m_graph->GetNetwork()->execute(dependencies);
+    internal_outputs = networkPtr->execute(dependencies);

    // If dump layers path is set, only runs first inference.
    GPU_DEBUG_GET_INSTANCE(debug_config);
@@ -826,7 +837,16 @@ void InferRequest::enqueue_dynamic() {
                inputLayout.size.batch[0] = mask;
                copy_input_data(m_graph->GetNetwork(nb), inputName, inputLayout, *inputBlob, &batchInputs[inputName][nb]);
            }
-            internal_outputs_dynamic[nb] = m_graph->GetNetwork(nb)->execute();
+
+            cldnn::network::variables_states_map variables_states;
+            for (auto &variable_state_pair : variables_states_)
+                variables_states.insert({ variable_state_pair.first, variable_state_pair.second[nb] });
+
+            auto networkPtr = m_graph->GetNetwork(nb);
+
+            networkPtr->assign_variables_memories(std::move(variables_states));
+
+            internal_outputs_dynamic[nb] = networkPtr->execute();
        }
    }
 }
@@ -1248,6 +1268,14 @@ InferenceEngine::Blob::Ptr InferRequest::create_device_blob(const InferenceEngin
    }
 }

+std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> InferRequest::QueryState() {
+    std::vector<std::shared_ptr<InferenceEngine::IVariableStateInternal>> ret{};
+    ret.reserve(variables_states_.size());
+    for (const auto& pair : variables_states_)
+        ret.push_back(std::make_shared<VariableState>(pair.first, pair.second, m_graph->GetEngine(), m_curBatch));
+    return ret;
+}
+
 }  // namespace intel_gpu
 }  // namespace runtime
 }  // namespace ov
--- a/src/plugins/intel_gpu/src/plugin/ops/variable.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/variable.cpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_gpu/plugin/program.hpp"
+#include "intel_gpu/plugin/common_utils.hpp"
+#include "ngraph/op/assign.hpp"
+#include "ngraph/op/read_value.hpp"
+#include "intel_gpu/primitives/assign.hpp"
+#include "intel_gpu/primitives/read_value.hpp"
+
+
+namespace ov {
+namespace runtime {
+namespace intel_gpu {
+
+namespace {
+template<typename T_PRIMITIVE>
+void CreateVariableAccessPrimitive(Program &p, const std::shared_ptr<ngraph::op::Op> &op,
+                                   const std::string &variable_id) {
+    p.ValidateInputs(op, {1});
+
+    const auto output_data_type = DataTypeFromPrecision(op->get_output_element_type(0));
+    const auto op_output_shape = op->get_output_shape(0);
+    const auto output_format = DefaultFormatForDims(op_output_shape.size());
+    const auto output_shape = tensor_from_dims(op_output_shape);
+
+    const auto variable_layout = cldnn::layout{output_data_type,
+                                               output_format,
+                                               output_shape};
+
+    auto input_primitives = p.GetInputPrimitiveIDs(op);
+    p.AddVariableStateInfo(variable_id, variable_layout);
+    const auto prim = T_PRIMITIVE{layer_type_name_ID(op),
+                                  input_primitives,
+                                  variable_id,
+                                  variable_layout};
+
+    p.AddPrimitive(prim);
+    p.AddPrimitiveToProfiler(op);
+}
+
+void CreateReadValueOp(Program& p, const std::shared_ptr<ngraph::op::v6::ReadValue>& op) {
+    CreateVariableAccessPrimitive<cldnn::read_value>(p, op, op->get_variable_id());
+}
+
+void CreateAssignOp(Program& p, const std::shared_ptr<ngraph::op::v6::Assign>& op) {
+    CreateVariableAccessPrimitive<cldnn::assign>(p, op, op->get_variable_id());
+}
+
+} // namespace
+
+REGISTER_FACTORY_IMPL(v6, Assign);
+REGISTER_FACTORY_IMPL(v6, ReadValue);
+
+} // namespace intel_gpu
+} // namespace runtime
+} // namespace ov
--- a/src/plugins/intel_gpu/src/plugin/program.cpp
+++ b/src/plugins/intel_gpu/src/plugin/program.cpp
@@ -488,6 +488,14 @@ void Program::InitProfileInfo(const std::string& layerName,
    perfEntry.parentPrimitive = parentId;
 }

+void Program::AddVariableStateInfo(const std::string& variable_id, const cldnn::layout& layout) {
+    auto it = m_variablesStateInfo.find(variable_id);
+    if (it != m_variablesStateInfo.end())
+        it->second.insert(layout);
+    else
+        m_variablesStateInfo.insert({variable_id, { layout }});
+}
+
 // TODO: Does it make sense to add such method to ngraph core?
 bool IsNodeOnConstPath(const std::shared_ptr<ngraph::Node>& node) {
    std::set<std::shared_ptr<ngraph::Node>> nodes_processed = {};
--- a/src/plugins/intel_gpu/src/plugin/variable_state.cpp
+++ b/src/plugins/intel_gpu/src/plugin/variable_state.cpp
@@ -0,0 +1,74 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#include <intel_gpu/plugin/variable_state.hpp>
+#include <blob_factory.hpp>
+
+namespace ov {
+namespace runtime {
+namespace intel_gpu {
+
+VariableState::VariableState(const std::string &name,
+    const std::vector<cldnn::network::VariableState::Ptr> &states,
+    std::shared_ptr<cldnn::engine> engine, int currentBatch) :
+    InferenceEngine::IVariableStateInternal {name},
+    currentBatch_ {currentBatch},
+    states_ {states},
+    desc_{
+        PrecisionFromDataType(states.front()->memory->get_layout().data_type),
+        AggregateShape(states.front()->memory->get_layout()),
+        InferenceEngine::Layout::ANY
+    },
+    engine_ {std::move(engine)} {
+}
+
+void VariableState::Reset() {
+    IterateOverStates([this](cldnn::network::VariableState &state) {
+        state.is_set = false;
+    });
+}
+
+void VariableState::SetState(const InferenceEngine::Blob::Ptr &newState) {
+    auto lock = std::dynamic_pointer_cast<InferenceEngine::MemoryBlob>(newState)->rmap();
+    auto data = lock.as<char*>();
+    IterateOverStates([&data, this](cldnn::network::VariableState &state) {
+        state.memory->copy_from(engine_->get_program_stream(), data);
+        data += state.memory->get_layout().bytes_count();
+        state.is_set = true;
+    });
+    engine_->get_program_stream().enqueue_barrier();
+}
+
+InferenceEngine::Blob::CPtr VariableState::GetState() const {
+    auto blob = make_blob_with_precision(desc_, InferenceEngine::CreateDefaultAllocator());
+    blob->allocate();
+    auto blobLock = std::dynamic_pointer_cast<InferenceEngine::MemoryBlob>(blob)->wmap();
+    auto data = blobLock.as<char*>();
+    IterateOverStates([&data, this](cldnn::network::VariableState &state) {
+        cldnn::mem_lock<char, cldnn::mem_lock_type::read> lock { state.memory, engine_->get_program_stream() };
+        std::copy(lock.begin(), lock.end(), data);
+        data += state.memory->get_layout().bytes_count();
+    });
+    return blob;
+}
+
+InferenceEngine::SizeVector VariableState::AggregateShape(const cldnn::layout &layout) {
+    const auto& dims = layout.get_dims();
+    InferenceEngine::SizeVector shape {dims.begin(), dims.end()};
+    if (currentBatch_ != -1)
+        shape.front() = currentBatch_;
+    return shape;
+}
+
+void VariableState::IterateOverStates(std::function<void(cldnn::network::VariableState&)> f) const {
+    for (int i = 0; i < states_.size(); i++) {
+        auto batch = 1 << i;
+        if (batch & currentBatch_)
+            f(*states_[i]);
+    }
+}
+
+} // namespace intel_gpu
+} // namespace runtime
+} // namespace ov
+
--- a/src/plugins/intel_gpu/tests/test_cases/variable.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/variable.cpp
@@ -0,0 +1,192 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "test_utils.h"
+
+#include <intel_gpu/primitives/input_layout.hpp>
+#include <intel_gpu/primitives/eltwise.hpp>
+#include <intel_gpu/primitives/assign.hpp>
+#include <intel_gpu/primitives/read_value.hpp>
+
+using namespace cldnn;
+using namespace ::tests;
+
+template<typename T>
+struct VariableParams {
+    cldnn::layout layout;
+    std::vector<T> values;
+};
+
+template<typename T>
+struct variable_test : public ::testing::TestWithParam<VariableParams<T>> {
+    void test() {
+        const VariableParams<T> param = testing::TestWithParam<VariableParams<T>>::GetParam();
+
+        auto& engine = get_test_engine();
+
+        const auto variable_layout = param.layout;
+        const auto input_data = engine.allocate_memory(variable_layout);
+        set_values(input_data, param.values);
+
+        topology topology;
+        topology.add(input_layout("input", input_data->get_layout()));
+        topology.add(read_value{"read_value", {"input"}, "v0", variable_layout});
+        topology.add(eltwise{"sum", {"input", "read_value"}, eltwise_mode::sum, {}, variable_layout.data_type});
+        topology.add(assign{"assign", {"sum"}, "v0", variable_layout});
+
+        network network(engine, topology, build_options{}, false);
+        network.assign_variables_memories({ { "v0", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) } });
+        network.set_input_data("input", input_data);
+
+        constexpr size_t number_of_inferences = 5;
+        for (size_t inference = 1; inference <= number_of_inferences; ++inference) {
+            const auto outputs = network.execute();
+            const auto output = outputs.at("assign").get_memory();
+            const cldnn::mem_lock<T> output_ptr(output, get_test_stream());
+            const auto output_count = output_ptr.size();
+            ASSERT_EQ(output_count, param.values.size()) << "inference " << inference;
+
+            for (size_t i = 0; i < output_count; ++i) {
+                ASSERT_EQ(output_ptr[i], inference * param.values[i]) << "inference " << inference;
+            }
+        }
+    }
+};
+
+using variable_test_i32 = variable_test<int32_t>;
+using variable_test_i64 = variable_test<int64_t>;
+using variable_test_f32 = variable_test<float>;
+
+TEST_P(variable_test_i32, variable_i32) {
+    ASSERT_NO_FATAL_FAILURE(test());
+}
+
+TEST_P(variable_test_i64, variable_i64) {
+    ASSERT_NO_FATAL_FAILURE(test());
+}
+
+TEST_P(variable_test_f32, variable_f32) {
+    ASSERT_NO_FATAL_FAILURE(test());
+}
+
+INSTANTIATE_TEST_SUITE_P(
+        basic,
+        variable_test_i32,
+        ::testing::Values(
+                VariableParams<int32_t>{ {data_types::i32, format::bfyx, tensor{1}}, {333666} },
+                VariableParams<int32_t>{ {data_types::i32, format::bfyx, tensor{1, 1, 1, 3}}, {444, 555, 666} },
+                VariableParams<int32_t>{ {data_types::i32, format::bfzyx, tensor{1, 2, 3, 2}},
+                                {1, 2, 3, 4, 5, 6, 6, 5, 4, 3, 2, 1} }
+        )
+);
+
+INSTANTIATE_TEST_SUITE_P(
+        basic,
+        variable_test_i64,
+        ::testing::Values(
+                VariableParams<int64_t>{ {data_types::i64, format::bfyx, tensor{1}}, {333666L} },
+                VariableParams<int64_t>{ {data_types::i64, format::bfyx, tensor{1, 1, 1, 3}}, {444L, 555L, 666L} },
+                VariableParams<int64_t>{ {data_types::i64, format::bfzyx, tensor{1, 2, 3, 2}},
+                                         {1L, 2L, 3L, 4L, 5L, 6L, 6L, 5L, 4L, 3L, 2L, 1L} }
+        )
+);
+
+INSTANTIATE_TEST_SUITE_P(
+        basic,
+        variable_test_f32,
+        ::testing::Values(
+                VariableParams<float>{ {data_types::f32, format::bfyx, tensor{1}}, {333666.f} },
+                VariableParams<float>{ {data_types::f32, format::bfyx, tensor{1, 1, 1, 3}}, {44.4f, 55.5f, 66.6f} },
+                VariableParams<float>{ {data_types::f32, format::bfzyx, tensor{1, 2, 3, 2}},
+                                         {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 6.f, 5.f, 4.f, 3.f, 2.f, 1.f} }
+        )
+);
+
+
+TEST(variable_test_common, exception_on_wrong_layout) {
+
+    auto& engine = get_test_engine();
+
+    const layout variable_layout{data_types::i32, format::bfyx, tensor{1}};
+    const auto input_data = engine.allocate_memory(variable_layout);
+    set_values(input_data, {333666});
+
+    auto wrong_layout = variable_layout;
+    wrong_layout.data_type = data_types::f32;
+    const auto wrong_input_data = engine.allocate_memory(wrong_layout);
+    set_values(input_data, {333.666});
+
+    topology topology;
+    topology.add(input_layout("input", input_data->get_layout()));
+    topology.add(read_value{"read_value", {"input"}, "v0", variable_layout});
+    topology.add(input_layout("wrong_input", wrong_input_data->get_layout()));
+    topology.add(assign{"assign", {"wrong_input"}, "v0", wrong_layout});
+
+    network network(engine, topology, build_options{}, false);
+    network.assign_variables_memories({ { "v0", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) } });
+    network.set_input_data("input", input_data);
+    network.set_input_data("wrong_input", wrong_input_data);
+
+    bool layout_mismatch_exception = false;
+    try {
+        network.execute();
+    } catch(std::exception& exc) {
+        const std::string error = exc.what();
+        layout_mismatch_exception = error.find("Layout mismatch") != std::string::npos;
+    }
+    ASSERT_TRUE(layout_mismatch_exception);
+}
+
+TEST(variable_test_common, variables_are_preserved_across_inferences) {
+
+    auto& engine = get_test_engine();
+
+    const layout variable_layout{data_types::i32, format::bfyx, tensor{1}};
+
+    const auto input_1 = engine.allocate_memory(variable_layout);
+    constexpr auto value_1 = 333;
+    set_values(input_1, {value_1});
+
+    const auto input_2 = engine.allocate_memory(variable_layout);
+    constexpr auto value_2 = 666;
+    set_values(input_2, {value_2});
+
+    const auto dummy1 = engine.allocate_memory(variable_layout);
+    set_values(dummy1, {11});
+    const auto dummy2 = engine.allocate_memory(variable_layout);
+    set_values(dummy2, {22});
+
+    topology topology;
+    topology.add(input_layout("input_1", input_1->get_layout()));
+    topology.add(assign{"assign_1", {"input_1"}, "v1", variable_layout});
+
+    topology.add(input_layout("input_2", input_2->get_layout()));
+    topology.add(assign{"assign_2", {"input_2"}, "v2", variable_layout});
+
+    topology.add(data("dummy1", dummy1));
+    topology.add(read_value{"read_value_1", {"dummy1"}, "v1", variable_layout});
+    topology.add(read_value{"read_value_2", {"dummy1"}, "v2", variable_layout});
+
+    topology.add(eltwise{"sum", {"read_value_1", "read_value_2"}, eltwise_mode::sum, {}, variable_layout.data_type});
+    topology.add(assign{"assign_result", {"sum"}, "v_result", variable_layout});
+
+    topology.add(data("dummy2", dummy2));
+    topology.add(read_value{"read_result", {"dummy2"}, "v_result", variable_layout});
+
+    network network{engine, topology, build_options{}, true};
+    network.assign_variables_memories({
+        { "v1", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) },
+        { "v2", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) },
+        { "v_result", std::make_shared<network::VariableState>(engine.allocate_memory(variable_layout)) }
+    });
+    network.set_input_data("input_1", input_1);
+    network.set_input_data("input_2", input_2);
+
+    // set variables with assign on 1st inference, read with read_values on 2nd one
+    network.execute();
+    const auto outputs = network.execute();
+    const auto output = outputs.at("read_result").get_memory();
+    const cldnn::mem_lock<int> output_ptr(output, get_test_stream());
+    ASSERT_EQ(output_ptr[0], value_1 + value_2);
+}
--- a/src/tests/functional/plugin/gpu/behavior/memory_dyn_batch.cpp
+++ b/src/tests/functional/plugin/gpu/behavior/memory_dyn_batch.cpp
@@ -0,0 +1,175 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph/opsets/opset8.hpp"
+#include "ngraph_functions/subgraph_builders.hpp"
+#include "openvino/runtime/core.hpp"
+#include "shared_test_classes/base/ov_subgraph.hpp"
+#include <cpp/ie_cnn_network.h>
+#include <ie_plugin_config.hpp>
+#include "functional_test_utils/ov_plugin_cache.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#include <vector>
+
+#include <gtest/gtest.h>
+
+using namespace ngraph;
+using namespace opset8;
+using namespace ov::test;
+
+
+using MemoryDynamicBatchParams = std::tuple<
+    ov::PartialShape,                           // Partial shape for network initialization
+    ov::Shape,                                  // Actual shape to be passed to inference request
+    int,                                        // Iterations number
+    std::string>;                               // Device name
+
+class MemoryDynamicBatch : public ::testing::Test,
+    public ::testing::WithParamInterface<MemoryDynamicBatchParams> {
+public:
+    static std::string getTestCaseName(::testing::TestParamInfo<MemoryDynamicBatchParams> obj) {
+        ov::PartialShape inputPartialShape;
+        ov::Shape inputShape;
+        int iterationsNum;
+        std::string targetDevice;
+        std::tie(inputPartialShape, inputShape, iterationsNum, targetDevice) = obj.param;
+
+        std::ostringstream result;
+        result << "IS=";
+        result << CommonTestUtils::partialShape2str({ inputPartialShape }) << "_";
+        result << "TS=";
+        result << CommonTestUtils::partialShape2str({inputShape});
+        result << ")_";
+        result << "iterationsCount=" << iterationsNum << "_";
+        result << "targetDevice=" << targetDevice;
+        return result.str();
+    }
+
+    void SetUp() override {
+        std::tie(inputPartialShape_, inputShape_, iterationsNum_, deviceName_) = GetParam();
+        model_ = buildModel(precision_, inputPartialShape_);
+        core_ = ov::test::utils::PluginCache::get().core();
+    }
+
+    static std::shared_ptr<ov::Model> buildModel(ElementType precision, const ov::PartialShape& shape) {
+        auto param = builder::makeDynamicParams(precision, { shape });
+        const VariableInfo variable_info { shape, precision, "v0" };
+        auto variable = std::make_shared<Variable>(variable_info);
+        auto read_value = std::make_shared<ReadValue>(param.at(0), variable);
+        auto add = std::make_shared<Add>(read_value, param.at(0));
+        auto assign = std::make_shared<Assign>(add, variable);
+        auto res = std::make_shared<Result>(add);
+        return std::make_shared<ov::Model>(ResultVector { res }, SinkVector { assign }, param,
+            "MemoryDynamicBatchTest");
+    }
+
+    static std::vector<int> generateInput(const ov::Shape& shape) {
+        auto len = ov::shape_size(shape);
+        std::vector<int> result {};
+        result.reserve(len);
+        for (int i = 0; i < len; i++)
+            result.push_back(i);
+        return result;
+    }
+
+    static std::vector<int> calculateReference(const std::vector<int>& input, int iterations) {
+        std::vector<int> reference {};
+        reference.reserve(input.size());
+        std::transform(input.begin(), input.end(), std::back_inserter(reference), [iterations](const int &i) {
+            return i * iterations;
+        });
+        return reference;
+    }
+
+
+protected:
+    ov::PartialShape inputPartialShape_;
+    ov::Shape inputShape_;
+    int iterationsNum_;
+    std::string deviceName_;
+    std::shared_ptr<ov::Model> model_;
+    std::shared_ptr<ov::Core> core_;
+    std::vector<int> input_;
+    ElementType precision_ { ElementType::i32 };
+};
+
+TEST_P(MemoryDynamicBatch, MultipleInferencesOnTheSameInferRequest) {
+    auto compiledModel = core_->compile_model(model_, CommonTestUtils::DEVICE_GPU, { });
+    auto inferRequest = compiledModel.create_infer_request();
+    input_ = generateInput(inputShape_);
+    ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
+    inferRequest.set_input_tensor(inputTensor);
+    for (int i = 0; i < iterationsNum_; i++)
+        inferRequest.infer();
+    auto output = inferRequest.get_output_tensor(0);
+    std::vector<int> reference = calculateReference(input_, iterationsNum_);
+    std::vector<int> actual(output.data<int>(), output.data<int>() + output.get_size());
+    for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
+        actualIt++, referenceIt++)
+        EXPECT_EQ(*actualIt, *referenceIt);
+}
+
+TEST_P(MemoryDynamicBatch, ResetVariableState) {
+    auto compiledModel = core_->compile_model(model_, CommonTestUtils::DEVICE_GPU, { });
+    auto inferRequest = compiledModel.create_infer_request();
+    input_ = generateInput(inputShape_);
+    ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
+    inferRequest.set_input_tensor(inputTensor);
+    inferRequest.infer();
+    inferRequest.query_state().front().reset();
+    inferRequest.infer();
+    auto output = inferRequest.get_output_tensor(0);
+    std::vector<int> reference = calculateReference(input_, 1);
+    std::vector<int> actual(output.data<int>(), output.data<int>() + output.get_size());
+    for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
+        actualIt++, referenceIt++)
+        EXPECT_EQ(*actualIt, *referenceIt);
+}
+
+TEST_P(MemoryDynamicBatch, GetVariableState) {
+    auto compiledModel = core_->compile_model(model_, CommonTestUtils::DEVICE_GPU, { });
+    auto inferRequest = compiledModel.create_infer_request();
+    input_ = generateInput(inputShape_);
+    ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
+    inferRequest.set_input_tensor(inputTensor);
+    for (int i = 0; i < iterationsNum_; i++)
+        inferRequest.infer();
+    auto blob = inferRequest.query_state().front().get_state();
+    std::vector<int> reference = calculateReference(input_, iterationsNum_);
+    std::vector<int> actual(blob.data<int>(), blob.data<int>() + blob.get_size());
+    for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
+        actualIt++, referenceIt++)
+        EXPECT_EQ(*actualIt, *referenceIt);
+}
+
+TEST_P(MemoryDynamicBatch, SetVariableState) {
+    auto compiledModel = core_->compile_model(model_, CommonTestUtils::DEVICE_GPU, { });
+    auto inferRequest = compiledModel.create_infer_request();
+    input_ = generateInput(inputShape_);
+    ov::Tensor inputTensor = ov::Tensor(precision_, inputShape_, input_.data());
+    inferRequest.set_input_tensor(inputTensor);
+    ov::Tensor state = ov::Tensor(precision_, inputShape_, input_.data());
+    inferRequest.query_state().front().set_state(state);
+    for (int i = 0; i < iterationsNum_; i++)
+        inferRequest.infer();
+    auto output = inferRequest.get_output_tensor(0);
+    std::vector<int> reference = calculateReference(input_, iterationsNum_ + 1);
+    std::vector<int> actual(output.data<int>(), output.data<int>() + output.get_size());
+    for (auto actualIt = actual.begin(), referenceIt = reference.begin(); actualIt < actual.end();
+        actualIt++, referenceIt++)
+        EXPECT_EQ(*actualIt, *referenceIt);
+}
+
+ov::PartialShape networkPartialShape { {1, 19}, 4, 20, 20 };
+std::vector<ov::Shape> inputShapes { { 7, 4, 20, 20 }, { 19, 4, 20, 20 } };
+std::vector<int> iterationsNum { 3, 7 };
+
+INSTANTIATE_TEST_SUITE_P(smoke_MemoryDynamicBatch, MemoryDynamicBatch,
+                         ::testing::Combine(
+                             ::testing::Values(networkPartialShape),
+                             ::testing::ValuesIn(inputShapes),
+                             ::testing::ValuesIn(iterationsNum),
+                             ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+                         MemoryDynamicBatch::getTestCaseName);
--- a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/memory.cpp
+++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/memory.cpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/memory.h"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+
+const std::vector<InferenceEngine::SizeVector> inShapes = {
+        {1},
+        {3},
+        {3, 3, 3},
+        {2, 3, 4, 5},
+};
+
+const std::vector<InferenceEngine::Precision> inputPrecisions = {
+        InferenceEngine::Precision::I32,
+        InferenceEngine::Precision::FP32,
+};
+
+const std::vector<int64_t> iterationCount {1, 3, 10};
+
+INSTANTIATE_TEST_SUITE_P(smoke_MemoryTest, MemoryTest,
+        ::testing::Combine(
+                ::testing::Values(ngraph::helpers::MemoryTransformation::NONE),
+                ::testing::ValuesIn(iterationCount),
+                ::testing::ValuesIn(inShapes),
+                ::testing::ValuesIn(inputPrecisions),
+                ::testing::Values(CommonTestUtils::DEVICE_GPU)),
+        MemoryTest::getTestCaseName);
+
+} // namespace
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/memory.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/memory.hpp
@@ -28,6 +28,7 @@ public:
 protected:
    std::vector<std::pair<ngraph::element::Type, std::vector<std::uint8_t>>> CalculateRefs() override;
    void SetUp() override;
+    void Infer() override;
 private:
    void CreateTIFunc();
    void CreateCommonFunc();
--- a/src/tests/functional/shared_test_classes/src/single_layer/memory.cpp
+++ b/src/tests/functional/shared_test_classes/src/single_layer/memory.cpp
@@ -82,6 +82,7 @@ namespace LayerTestsDefinitions {
                ConfigureNetwork();
                executableNetwork = core->LoadNetwork(cnnNetwork, targetDevice, configuration);
            }
+            inferRequest = executableNetwork.CreateInferRequest();
            GenerateInputs();
            for (int64_t i = 0; i < iteration_count; ++i) {
                Infer();
@@ -101,6 +102,11 @@ namespace LayerTestsDefinitions {
        }
    }

+    void MemoryTest::Infer() {
+        ConfigureInferRequest();
+        inferRequest.Infer();
+    }
+
    std::vector<std::pair<element::Type, std::vector<std::uint8_t>>> MemoryTest::CalculateRefs() {
        using namespace ngraph;
        function->validate_nodes_and_infer_types();
@@ -177,7 +183,9 @@ namespace LayerTestsDefinitions {

    void MemoryTest::CreateCommonFunc() {
        auto param = builder::makeParams(ngPrc, {inputShape});
-        auto variable = std::make_shared<Variable>(VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"});
+        const auto variable_info = targetDevice == CommonTestUtils::DEVICE_GPU ?
+            VariableInfo{Shape{inputShape}, ngPrc, "v0"} : VariableInfo{PartialShape::dynamic(), element::dynamic, "v0"};
+        auto variable = std::make_shared<Variable>(variable_info);
        auto read_value = std::make_shared<ReadValue>(param.at(0), variable);
        auto add = std::make_shared<Add>(read_value, param.at(0));
        auto assign = std::make_shared<Assign>(add, variable);