[GPU] Fix device tensors reallocation in case of host user's tensors (#20306)

2023-10-11 10:47:33 +04:00 · 2023-10-11 10:47:33 +04:00 · ec644b9a73
commit ec644b9a73
parent ba5878ed2f
4 changed files with 48 additions and 4 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp
@ -52,6 +52,7 @@ public:

    bool is_allocated() const noexcept;
    bool is_surface() const noexcept;
+    bool is_shared() const noexcept;
    cldnn::memory::ptr get_memory() const;
    cldnn::memory::ptr get_original_memory() const;

@ -74,7 +75,6 @@ private:
    uint32_t m_plane;
    size_t m_hash = 0;

-    bool is_shared() const;
    bool supports_caching() const;
    void update_strides();
    void init_properties();
--- a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp
+++ b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp
@ -169,7 +169,7 @@ const std::string& RemoteTensorImpl::get_device_name() const {
    return m_context->get_device_name();
 }

-bool RemoteTensorImpl::is_shared() const {
+bool RemoteTensorImpl::is_shared() const noexcept {
    return m_mem_type == TensorType::BT_BUF_SHARED ||
           m_mem_type == TensorType::BT_USM_SHARED ||
           m_mem_type == TensorType::BT_IMG_SHARED ||
--- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
+++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
@ -270,10 +270,31 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const

    bool is_input = ov::op::util::is_parameter(port.get_node());

+    auto update_tensors_maps = [](const std::string& name,
+                                  std::unordered_map<std::string, ov::intel_gpu::TensorWrapper>& user_tensors,
+                                  std::unordered_map<std::string, ov::intel_gpu::TensorWrapper>& plugin_tensors,
+                                  const ov::SoPtr<ov::ITensor>& tensor) {
+        auto current_tensor_owner = user_tensors[name].owner;
+        auto is_same_tensor = user_tensors[name].ptr == tensor._ptr;
+
+        // Keep PLUGIN as a tensor owner if current user's tensor owner is PLUGIN and underlying tensor pointer is not changed
+        auto new_tensor_owner = current_tensor_owner == TensorOwner::PLUGIN && is_same_tensor ? TensorOwner::PLUGIN
+                                                                                              : TensorOwner::USER;
+
+        user_tensors[name] = { tensor._ptr, new_tensor_owner };
+
+        // We need to properly handle PLUGIN -> USER ownership change to prevent invalid PLUGIN's ush_host buffer sharing,
+        // so remove plugin's tensor to reallocate it in prepare_input() mehtod
+        if (current_tensor_owner == TensorOwner::PLUGIN && new_tensor_owner == TensorOwner::USER) {
+            if (plugin_tensors.count(name) && std::dynamic_pointer_cast<RemoteTensorImpl>(plugin_tensors[name].ptr)->is_shared())
+                plugin_tensors.erase(plugin_tensors.find(name));
+        }
+    };
+
    if (is_input) {
-        m_user_inputs[name] = { tensor._ptr, TensorOwner::USER };
+        update_tensors_maps(name, m_user_inputs, m_plugin_inputs, tensor);
    } else {
-        m_user_outputs[name] = { tensor._ptr, TensorOwner::USER };
+        update_tensors_maps(name, m_user_outputs, m_plugin_outputs, tensor);
    }

    ov::ISyncInferRequest::set_tensor(port, tensor);
--- a/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp
+++ b/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp
@ -199,3 +199,26 @@ TEST(TensorTest, smoke_canSetTensorForDynamicInput) {
    ASSERT_NO_THROW(inf_req.set_input_tensor(t2));
    ASSERT_NO_THROW(inf_req.infer());
 }
+
+TEST(TensorTest, smoke_canReallocateDeviceInputForHostTensor) {
+    auto ov = ov::Core();
+    using namespace ov::preprocess;
+    auto p = PrePostProcessor(ngraph::builder::subgraph::makeSplitMultiConvConcat());
+    p.input().tensor().set_element_type(ov::element::i8);
+    p.input().preprocess().convert_element_type(ov::element::f32);
+    auto function = p.build();
+
+    auto compiled_model = ov.compile_model(function, ov::test::utils::DEVICE_GPU);
+    auto inf_req = compiled_model.create_infer_request();
+
+    auto input = function->input();
+    ov::Tensor host_tensor(input.get_element_type(), input.get_shape());
+
+    // Infer with pre-allocated input tensor
+    ASSERT_NO_THROW(inf_req.infer());
+
+    // Infer with host_tensor
+    ASSERT_NO_THROW(inf_req.set_input_tensor(host_tensor));
+    ASSERT_NO_THROW(inf_req.infer());
+}
+