From ec644b9a732d73ab266007bfb30d32d3f3be9e49 Mon Sep 17 00:00:00 2001
From: Sergey Shlyapnikov <sergey.shlyapnikov@intel.com>
Date: Wed, 11 Oct 2023 10:47:33 +0400
Subject: [PATCH] [GPU] Fix device tensors reallocation in case of host user's
 tensors (#20306)

---
 .../intel_gpu/plugin/remote_tensor.hpp        |  2 +-
 .../intel_gpu/src/plugin/remote_tensor.cpp    |  2 +-
 .../src/plugin/sync_infer_request.cpp         | 25 +++++++++++++++++--
 .../functional/behavior/infer_request.cpp     | 23 +++++++++++++++++
 4 files changed, 48 insertions(+), 4 deletions(-)
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp
index 939c7b89784..74a07bbcbf3 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp
@@ -52,6 +52,7 @@ public:
 
     bool is_allocated() const noexcept;
     bool is_surface() const noexcept;
+    bool is_shared() const noexcept;
     cldnn::memory::ptr get_memory() const;
     cldnn::memory::ptr get_original_memory() const;
 
@@ -74,7 +75,6 @@ private:
     uint32_t m_plane;
     size_t m_hash = 0;
 
-    bool is_shared() const;
     bool supports_caching() const;
     void update_strides();
     void init_properties();
diff --git a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp
index cd164940027..a7c68cd8f81 100644
--- a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp
+++ b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp
@@ -169,7 +169,7 @@ const std::string& RemoteTensorImpl::get_device_name() const {
     return m_context->get_device_name();
 }
 
-bool RemoteTensorImpl::is_shared() const {
+bool RemoteTensorImpl::is_shared() const noexcept {
     return m_mem_type == TensorType::BT_BUF_SHARED ||
            m_mem_type == TensorType::BT_USM_SHARED ||
            m_mem_type == TensorType::BT_IMG_SHARED ||
diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
index 5e564f3b9a3..6e9e8bbf353 100644
--- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
+++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
@@ -270,10 +270,31 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const
 
     bool is_input = ov::op::util::is_parameter(port.get_node());
 
+    auto update_tensors_maps = [](const std::string& name,
+                                  std::unordered_map<std::string, ov::intel_gpu::TensorWrapper>& user_tensors,
+                                  std::unordered_map<std::string, ov::intel_gpu::TensorWrapper>& plugin_tensors,
+                                  const ov::SoPtr<ov::ITensor>& tensor) {
+        auto current_tensor_owner = user_tensors[name].owner;
+        auto is_same_tensor = user_tensors[name].ptr == tensor._ptr;
+
+        // Keep PLUGIN as a tensor owner if current user's tensor owner is PLUGIN and underlying tensor pointer is not changed
+        auto new_tensor_owner = current_tensor_owner == TensorOwner::PLUGIN && is_same_tensor ? TensorOwner::PLUGIN
+                                                                                              : TensorOwner::USER;
+
+        user_tensors[name] = { tensor._ptr, new_tensor_owner };
+
+        // We need to properly handle PLUGIN -> USER ownership change to prevent invalid PLUGIN's ush_host buffer sharing,
+        // so remove plugin's tensor to reallocate it in prepare_input() mehtod
+        if (current_tensor_owner == TensorOwner::PLUGIN && new_tensor_owner == TensorOwner::USER) {
+            if (plugin_tensors.count(name) && std::dynamic_pointer_cast<RemoteTensorImpl>(plugin_tensors[name].ptr)->is_shared())
+                plugin_tensors.erase(plugin_tensors.find(name));
+        }
+    };
+
     if (is_input) {
-        m_user_inputs[name] = { tensor._ptr, TensorOwner::USER };
+        update_tensors_maps(name, m_user_inputs, m_plugin_inputs, tensor);
     } else {
-        m_user_outputs[name] = { tensor._ptr, TensorOwner::USER };
+        update_tensors_maps(name, m_user_outputs, m_plugin_outputs, tensor);
     }
 
     ov::ISyncInferRequest::set_tensor(port, tensor);
diff --git a/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp b/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp
index 1d64aa74232..af0229d5e81 100644
--- a/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp
+++ b/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp
@@ -199,3 +199,26 @@ TEST(TensorTest, smoke_canSetTensorForDynamicInput) {
     ASSERT_NO_THROW(inf_req.set_input_tensor(t2));
     ASSERT_NO_THROW(inf_req.infer());
 }
+
+TEST(TensorTest, smoke_canReallocateDeviceInputForHostTensor) {
+    auto ov = ov::Core();
+    using namespace ov::preprocess;
+    auto p = PrePostProcessor(ngraph::builder::subgraph::makeSplitMultiConvConcat());
+    p.input().tensor().set_element_type(ov::element::i8);
+    p.input().preprocess().convert_element_type(ov::element::f32);
+    auto function = p.build();
+
+    auto compiled_model = ov.compile_model(function, ov::test::utils::DEVICE_GPU);
+    auto inf_req = compiled_model.create_infer_request();
+
+    auto input = function->input();
+    ov::Tensor host_tensor(input.get_element_type(), input.get_shape());
+
+    // Infer with pre-allocated input tensor
+    ASSERT_NO_THROW(inf_req.infer());
+
+    // Infer with host_tensor
+    ASSERT_NO_THROW(inf_req.set_input_tensor(host_tensor));
+    ASSERT_NO_THROW(inf_req.infer());
+}
+