[GPU] Fix device tensors reallocation in case of host user's tensors (#20306)

This commit is contained in:
Sergey Shlyapnikov 2023-10-11 10:47:33 +04:00 committed by GitHub
parent ba5878ed2f
commit ec644b9a73
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 48 additions and 4 deletions

View File

@ -52,6 +52,7 @@ public:
bool is_allocated() const noexcept;
bool is_surface() const noexcept;
bool is_shared() const noexcept;
cldnn::memory::ptr get_memory() const;
cldnn::memory::ptr get_original_memory() const;
@ -74,7 +75,6 @@ private:
uint32_t m_plane;
size_t m_hash = 0;
bool is_shared() const;
bool supports_caching() const;
void update_strides();
void init_properties();

View File

@ -169,7 +169,7 @@ const std::string& RemoteTensorImpl::get_device_name() const {
return m_context->get_device_name();
}
bool RemoteTensorImpl::is_shared() const {
bool RemoteTensorImpl::is_shared() const noexcept {
return m_mem_type == TensorType::BT_BUF_SHARED ||
m_mem_type == TensorType::BT_USM_SHARED ||
m_mem_type == TensorType::BT_IMG_SHARED ||

View File

@ -270,10 +270,31 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const
bool is_input = ov::op::util::is_parameter(port.get_node());
auto update_tensors_maps = [](const std::string& name,
std::unordered_map<std::string, ov::intel_gpu::TensorWrapper>& user_tensors,
std::unordered_map<std::string, ov::intel_gpu::TensorWrapper>& plugin_tensors,
const ov::SoPtr<ov::ITensor>& tensor) {
auto current_tensor_owner = user_tensors[name].owner;
auto is_same_tensor = user_tensors[name].ptr == tensor._ptr;
// Keep PLUGIN as a tensor owner if current user's tensor owner is PLUGIN and underlying tensor pointer is not changed
auto new_tensor_owner = current_tensor_owner == TensorOwner::PLUGIN && is_same_tensor ? TensorOwner::PLUGIN
: TensorOwner::USER;
user_tensors[name] = { tensor._ptr, new_tensor_owner };
// We need to properly handle PLUGIN -> USER ownership change to prevent invalid PLUGIN's ush_host buffer sharing,
// so remove plugin's tensor to reallocate it in prepare_input() mehtod
if (current_tensor_owner == TensorOwner::PLUGIN && new_tensor_owner == TensorOwner::USER) {
if (plugin_tensors.count(name) && std::dynamic_pointer_cast<RemoteTensorImpl>(plugin_tensors[name].ptr)->is_shared())
plugin_tensors.erase(plugin_tensors.find(name));
}
};
if (is_input) {
m_user_inputs[name] = { tensor._ptr, TensorOwner::USER };
update_tensors_maps(name, m_user_inputs, m_plugin_inputs, tensor);
} else {
m_user_outputs[name] = { tensor._ptr, TensorOwner::USER };
update_tensors_maps(name, m_user_outputs, m_plugin_outputs, tensor);
}
ov::ISyncInferRequest::set_tensor(port, tensor);

View File

@ -199,3 +199,26 @@ TEST(TensorTest, smoke_canSetTensorForDynamicInput) {
ASSERT_NO_THROW(inf_req.set_input_tensor(t2));
ASSERT_NO_THROW(inf_req.infer());
}
TEST(TensorTest, smoke_canReallocateDeviceInputForHostTensor) {
auto ov = ov::Core();
using namespace ov::preprocess;
auto p = PrePostProcessor(ngraph::builder::subgraph::makeSplitMultiConvConcat());
p.input().tensor().set_element_type(ov::element::i8);
p.input().preprocess().convert_element_type(ov::element::f32);
auto function = p.build();
auto compiled_model = ov.compile_model(function, ov::test::utils::DEVICE_GPU);
auto inf_req = compiled_model.create_infer_request();
auto input = function->input();
ov::Tensor host_tensor(input.get_element_type(), input.get_shape());
// Infer with pre-allocated input tensor
ASSERT_NO_THROW(inf_req.infer());
// Infer with host_tensor
ASSERT_NO_THROW(inf_req.set_input_tensor(host_tensor));
ASSERT_NO_THROW(inf_req.infer());
}