From ec644b9a732d73ab266007bfb30d32d3f3be9e49 Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Wed, 11 Oct 2023 10:47:33 +0400 Subject: [PATCH] [GPU] Fix device tensors reallocation in case of host user's tensors (#20306) --- .../intel_gpu/plugin/remote_tensor.hpp | 2 +- .../intel_gpu/src/plugin/remote_tensor.cpp | 2 +- .../src/plugin/sync_infer_request.cpp | 25 +++++++++++++++++-- .../functional/behavior/infer_request.cpp | 23 +++++++++++++++++ 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp index 939c7b89784..74a07bbcbf3 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp @@ -52,6 +52,7 @@ public: bool is_allocated() const noexcept; bool is_surface() const noexcept; + bool is_shared() const noexcept; cldnn::memory::ptr get_memory() const; cldnn::memory::ptr get_original_memory() const; @@ -74,7 +75,6 @@ private: uint32_t m_plane; size_t m_hash = 0; - bool is_shared() const; bool supports_caching() const; void update_strides(); void init_properties(); diff --git a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp index cd164940027..a7c68cd8f81 100644 --- a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp +++ b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp @@ -169,7 +169,7 @@ const std::string& RemoteTensorImpl::get_device_name() const { return m_context->get_device_name(); } -bool RemoteTensorImpl::is_shared() const { +bool RemoteTensorImpl::is_shared() const noexcept { return m_mem_type == TensorType::BT_BUF_SHARED || m_mem_type == TensorType::BT_USM_SHARED || m_mem_type == TensorType::BT_IMG_SHARED || diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 5e564f3b9a3..6e9e8bbf353 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -270,10 +270,31 @@ void SyncInferRequest::set_tensor(const ov::Output& port, const bool is_input = ov::op::util::is_parameter(port.get_node()); + auto update_tensors_maps = [](const std::string& name, + std::unordered_map& user_tensors, + std::unordered_map& plugin_tensors, + const ov::SoPtr& tensor) { + auto current_tensor_owner = user_tensors[name].owner; + auto is_same_tensor = user_tensors[name].ptr == tensor._ptr; + + // Keep PLUGIN as a tensor owner if current user's tensor owner is PLUGIN and underlying tensor pointer is not changed + auto new_tensor_owner = current_tensor_owner == TensorOwner::PLUGIN && is_same_tensor ? TensorOwner::PLUGIN + : TensorOwner::USER; + + user_tensors[name] = { tensor._ptr, new_tensor_owner }; + + // We need to properly handle PLUGIN -> USER ownership change to prevent invalid PLUGIN's ush_host buffer sharing, + // so remove plugin's tensor to reallocate it in prepare_input() mehtod + if (current_tensor_owner == TensorOwner::PLUGIN && new_tensor_owner == TensorOwner::USER) { + if (plugin_tensors.count(name) && std::dynamic_pointer_cast(plugin_tensors[name].ptr)->is_shared()) + plugin_tensors.erase(plugin_tensors.find(name)); + } + }; + if (is_input) { - m_user_inputs[name] = { tensor._ptr, TensorOwner::USER }; + update_tensors_maps(name, m_user_inputs, m_plugin_inputs, tensor); } else { - m_user_outputs[name] = { tensor._ptr, TensorOwner::USER }; + update_tensors_maps(name, m_user_outputs, m_plugin_outputs, tensor); } ov::ISyncInferRequest::set_tensor(port, tensor); diff --git a/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp b/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp index 1d64aa74232..af0229d5e81 100644 --- a/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp +++ b/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp @@ -199,3 +199,26 @@ TEST(TensorTest, smoke_canSetTensorForDynamicInput) { ASSERT_NO_THROW(inf_req.set_input_tensor(t2)); ASSERT_NO_THROW(inf_req.infer()); } + +TEST(TensorTest, smoke_canReallocateDeviceInputForHostTensor) { + auto ov = ov::Core(); + using namespace ov::preprocess; + auto p = PrePostProcessor(ngraph::builder::subgraph::makeSplitMultiConvConcat()); + p.input().tensor().set_element_type(ov::element::i8); + p.input().preprocess().convert_element_type(ov::element::f32); + auto function = p.build(); + + auto compiled_model = ov.compile_model(function, ov::test::utils::DEVICE_GPU); + auto inf_req = compiled_model.create_infer_request(); + + auto input = function->input(); + ov::Tensor host_tensor(input.get_element_type(), input.get_shape()); + + // Infer with pre-allocated input tensor + ASSERT_NO_THROW(inf_req.infer()); + + // Infer with host_tensor + ASSERT_NO_THROW(inf_req.set_input_tensor(host_tensor)); + ASSERT_NO_THROW(inf_req.infer()); +} +