From 3a7acbb5cc6cbbbe758a6762a583693493d343a6 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Fri, 15 Dec 2023 13:57:49 +0400 Subject: [PATCH] [GPU] Allow precision conversion on get_state() call (#21658) --- .../include/intel_gpu/plugin/common_utils.hpp | 4 + .../intel_gpu/plugin/variable_state.hpp | 9 +- .../intel_gpu/src/plugin/common_utils.cpp | 152 ++++++++++++++++++ .../src/plugin/sync_infer_request.cpp | 96 ----------- .../intel_gpu/src/plugin/variable_state.cpp | 17 +- .../intel_gpu/tests/unit/CMakeLists.txt | 1 + 6 files changed, 176 insertions(+), 103 deletions(-) create mode 100644 src/plugins/intel_gpu/src/plugin/common_utils.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp index b69ada3946f..0e8b92e5d63 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp @@ -7,6 +7,7 @@ #include #include #include "intel_gpu/runtime/layout.hpp" +#include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/shape_predictor.hpp" #include "openvino/core/layout.hpp" #include "openvino/core/type/element_type.hpp" @@ -102,6 +103,9 @@ inline void ForceExit() { std::_Exit(-1); } +void convert_and_copy(const cldnn::memory::ptr src, ov::ITensor const* dst, const cldnn::stream& stream); +void convert_and_copy(const ov::ITensor* src, ov::ITensor const* dst, const cldnn::stream& stream); + } // namespace intel_gpu inline std::ostream& operator<<(std::ostream& os, const ov::AnyMap& params) { diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp index 68604a1889f..ffce17acb2a 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/variable_state.hpp @@ -3,6 +3,7 @@ // #pragma once +#include "openvino/core/type/element_type.hpp" #include "openvino/runtime/ivariable_state.hpp" #include "intel_gpu/runtime/layout.hpp" #include "intel_gpu/runtime/shape_predictor.hpp" @@ -15,10 +16,14 @@ namespace intel_gpu { class RemoteContextImpl; struct VariableStateInfo { - VariableStateInfo(const std::string& id, const cldnn::layout& layout) : m_id(id), m_layout(layout) {} + VariableStateInfo(const std::string& id, const cldnn::layout& layout, ov::element::Type_t user_specified_type = ov::element::undefined) + : m_id(id) + , m_layout(layout) + , m_user_specified_type(user_specified_type) {} std::string m_id; cldnn::layout m_layout; + ov::element::Type m_user_specified_type; }; class VariableState : public ov::IVariableState { @@ -38,6 +43,7 @@ public: private: cldnn::layout m_layout; + ov::element::Type m_user_specified_type; std::shared_ptr m_context; std::shared_ptr m_shape_predictor; bool m_is_set = false; @@ -45,6 +51,7 @@ private: size_t actual_size = 0; void update_device_buffer(); + ov::element::Type get_user_specified_type() const; }; using VariablesMap = std::unordered_map; diff --git a/src/plugins/intel_gpu/src/plugin/common_utils.cpp b/src/plugins/intel_gpu/src/plugin/common_utils.cpp new file mode 100644 index 00000000000..0375aa495f2 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/common_utils.cpp @@ -0,0 +1,152 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/plugin/common_utils.hpp" +#include "intel_gpu/plugin/remote_tensor.hpp" +#include "intel_gpu/runtime/layout.hpp" +#include "intel_gpu/runtime/memory.hpp" +#include "intel_gpu/runtime/memory_caps.hpp" + +#include "openvino/core/type/element_type.hpp" +#include "openvino/runtime/tensor.hpp" +#include "openvino/op/util/op_types.hpp" + +#include +#include + +namespace { + +template +void convert_and_copy_no_pad(const src_t* src, dst_t* dst, size_t size) { + OPENVINO_ASSERT(src && dst, "[GPU] Src or Dst ptr is null"); + for (size_t i = 0; i < size; i++) + dst[i] = static_cast(src[i]); +} + +template +void convert_and_copy_padded_source(const src_t* src, dst_t* dst, cldnn::layout layout) { + cldnn::tensor size = layout.get_tensor(); + for (int64_t b = 0; b < size.batch[0]; b++) { + for (int64_t f = 0; f < size.feature[0]; f++) { + for (int64_t w = 0; w < size.spatial[3]; w++) { + for (int64_t z = 0; z < size.spatial[2]; z++) { + for (int64_t y = 0; y < size.spatial[1]; y++) { + for (int64_t x = 0; x < size.spatial[0]; x++) { + *dst++ = static_cast(src[layout.get_linear_offset(cldnn::tensor(b, f, x, y, z, w))]); + } + } + } + } + } + } +} + +void convert_and_copy(const void* src_ptr, ov::element::Type src_et, void* dst_ptr, ov::element::Type dst_et, size_t size, cldnn::layout layout) { + if (size == 0) + return; + + if (src_et == dst_et && !layout.data_padding) { + std::memcpy(dst_ptr, src_ptr, size * src_et.size()); + return; + } + + #define CASE(s_et, d_et, s_type, d_type) \ + if (src_et == s_et && dst_et == d_et) { \ + if (static_cast(layout.data_padding)) { \ + return convert_and_copy_padded_source(static_cast(src_ptr), static_cast(dst_ptr), layout); \ + } else { \ + return convert_and_copy_no_pad(static_cast(src_ptr), static_cast(dst_ptr), size); \ + } \ + } + + // For unsupported inputs + CASE(ov::element::f64, ov::element::f32, double, float); + CASE(ov::element::i16, ov::element::f32, int16_t, float); + CASE(ov::element::u16, ov::element::f32, uint16_t, float); + CASE(ov::element::u64, ov::element::i32, uint64_t, int32_t); + CASE(ov::element::i64, ov::element::i32, int64_t, int32_t); + CASE(ov::element::u32, ov::element::i32, uint32_t, int32_t); + + // For unsupported outputs + CASE(ov::element::f32, ov::element::f64, float, double); + CASE(ov::element::i32, ov::element::i64, int32_t, int64_t); + CASE(ov::element::i32, ov::element::u64, int32_t, uint64_t); + CASE(ov::element::i32, ov::element::u32, int32_t, uint32_t); + CASE(ov::element::f32, ov::element::i16, float, int16_t); + CASE(ov::element::f32, ov::element::u16, float, uint16_t); + + // TODO: Need instances below? + CASE(ov::element::u32, ov::element::i64, uint32_t, int64_t); + CASE(ov::element::u32, ov::element::u64, uint32_t, uint64_t); + + // For state conversions + CASE(ov::element::f32, ov::element::f32, float, float); + CASE(ov::element::f16, ov::element::f16, ov::float16, ov::float16); + CASE(ov::element::f32, ov::element::f16, float, ov::float16); + CASE(ov::element::f16, ov::element::f32, ov::float16, float); + + OPENVINO_THROW("[GPU] Unsupported element types combination for copy: ", src_et, " -> ", dst_et); +} + +} // namespace + +namespace ov { +namespace intel_gpu { + +void convert_and_copy(const cldnn::memory::ptr src, ov::ITensor const* dst, const cldnn::stream& stream) { + auto src_et = src->get_layout().data_type; + auto dst_et = dst->get_element_type(); + + size_t size = ov::shape_size(dst->get_shape()); + + cldnn::mem_lock src_lock(src, stream); + std::unique_ptr> dst_lock = nullptr; + + const void* src_ptr = src_lock.data(); + void* dst_ptr = nullptr; + + if (auto remote = dynamic_cast(dst)) { + auto mem = remote->get_original_memory(); + dst_lock.reset(new cldnn::mem_lock(mem, stream)); + dst_ptr = dst_lock->data(); + } else { + dst_ptr = dst->data(); + } + + return ::convert_and_copy(src_ptr, src_et, dst_ptr, dst_et, size, src->get_layout()); +} + +void convert_and_copy(const ov::ITensor* src, ov::ITensor const* dst, const cldnn::stream& stream) { + auto src_et = src->get_element_type(); + auto dst_et = dst->get_element_type(); + + size_t size = ov::shape_size(dst->get_shape()); + + const void* src_ptr = nullptr; + void* dst_ptr = nullptr; + + std::unique_ptr> src_lock = nullptr; + std::unique_ptr> dst_lock = nullptr; + + if (auto remote = dynamic_cast(src)) { + auto mem = remote->get_original_memory(); + src_lock.reset(new cldnn::mem_lock(mem, stream)); + src_ptr = src_lock->data(); + } else { + src_ptr = src->data(); + } + + if (auto remote = dynamic_cast(dst)) { + auto mem = remote->get_original_memory(); + dst_lock.reset(new cldnn::mem_lock(mem, stream)); + dst_ptr = dst_lock->data(); + } else { + dst_ptr = dst->data(); + } + + return ::convert_and_copy(src_ptr, src_et, dst_ptr, dst_et, size, cldnn::layout({}, ov::element::undefined, cldnn::format::bfyx, cldnn::padding())); +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index fde0b40085f..eb84b99f042 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -62,106 +62,10 @@ inline std::string get_port_name(const ov::Output& port, const b return name; } -template -void convert_any_copy(const src_t* src, dst_t* dst, size_t size) { - OPENVINO_ASSERT(src && dst, "[GPU] Src or Dst ptr is null"); - for (size_t i = 0; i < size; i++) - dst[i] = static_cast(src[i]); -} - -void convert_and_copy(const void* src_ptr, ov::element::Type src_et, void* dst_ptr, ov::element::Type dst_et, size_t size) { - if (size == 0) - return; - - if (src_et == dst_et) { - std::memcpy(dst_ptr, src_ptr, size * src_et.size()); - return; - } - - #define CASE(s_et, d_et, s_type, d_type) \ - if (src_et == s_et && dst_et == d_et) return convert_any_copy(static_cast(src_ptr), static_cast(dst_ptr), size) - - // For unsupported inputs - CASE(ov::element::f64, ov::element::f32, double, float); - CASE(ov::element::i16, ov::element::f32, int16_t, float); - CASE(ov::element::u16, ov::element::f32, uint16_t, float); - CASE(ov::element::u64, ov::element::i32, uint64_t, int32_t); - CASE(ov::element::i64, ov::element::i32, int64_t, int32_t); - CASE(ov::element::u32, ov::element::i32, uint32_t, int32_t); - - // For unsupported outputs - CASE(ov::element::f32, ov::element::f64, float, double); - CASE(ov::element::i32, ov::element::i64, int32_t, int64_t); - CASE(ov::element::i32, ov::element::u64, int32_t, uint64_t); - CASE(ov::element::i32, ov::element::u32, int32_t, uint32_t); - CASE(ov::element::f32, ov::element::i16, float, int16_t); - CASE(ov::element::f32, ov::element::u16, float, uint16_t); - - // TODO: Need instances below? - CASE(ov::element::u32, ov::element::i64, uint32_t, int64_t); - CASE(ov::element::u32, ov::element::u64, uint32_t, uint64_t); - - OPENVINO_THROW("[GPU] Unsupported element types combination for copy: ", src_et, " -> ", dst_et); -} - bool is_convert_required(ov::element::Type src_et, ov::element::Type dst_et) { return src_et != dst_et && !(dst_et == ov::element::boolean && src_et == ov::element::u8); } -void convert_and_copy(const cldnn::memory::ptr src, ov::ITensor const* dst, const cldnn::stream& stream) { - auto src_et = src->get_layout().data_type; - auto dst_et = dst->get_element_type(); - - size_t size = ov::shape_size(dst->get_shape()); - - cldnn::mem_lock src_lock(src, stream); - std::unique_ptr> dst_lock = nullptr; - - const void* src_ptr = src_lock.data(); - void* dst_ptr = nullptr; - - if (auto remote = dynamic_cast(dst)) { - auto mem = remote->get_original_memory(); - dst_lock.reset(new cldnn::mem_lock(mem, stream)); - dst_ptr = dst_lock->data(); - } else { - dst_ptr = dst->data(); - } - - return convert_and_copy(src_ptr, src_et, dst_ptr, dst_et, size); -} - -void convert_and_copy(const ov::ITensor* src, ov::ITensor const* dst, const cldnn::stream& stream) { - auto src_et = src->get_element_type(); - auto dst_et = dst->get_element_type(); - - size_t size = ov::shape_size(dst->get_shape()); - - const void* src_ptr = nullptr; - void* dst_ptr = nullptr; - - std::unique_ptr> src_lock = nullptr; - std::unique_ptr> dst_lock = nullptr; - - if (auto remote = dynamic_cast(src)) { - auto mem = remote->get_original_memory(); - src_lock.reset(new cldnn::mem_lock(mem, stream)); - src_ptr = src_lock->data(); - } else { - src_ptr = src->data(); - } - - if (auto remote = dynamic_cast(dst)) { - auto mem = remote->get_original_memory(); - dst_lock.reset(new cldnn::mem_lock(mem, stream)); - dst_ptr = dst_lock->data(); - } else { - dst_ptr = dst->data(); - } - - return convert_and_copy(src_ptr, src_et, dst_ptr, dst_et, size); -} - bool same_host_mem(cldnn::memory::cptr memory, const uint8_t* host_ptr) { const uint8_t* device_ptr = memory->get_allocation_type() == cldnn::allocation_type::usm_host ? static_cast(memory->get_internal_params().mem) : nullptr; diff --git a/src/plugins/intel_gpu/src/plugin/variable_state.cpp b/src/plugins/intel_gpu/src/plugin/variable_state.cpp index cdd551b5ca8..78c3479eeb7 100644 --- a/src/plugins/intel_gpu/src/plugin/variable_state.cpp +++ b/src/plugins/intel_gpu/src/plugin/variable_state.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/core/type/element_type.hpp" #include "openvino/runtime/make_tensor.hpp" #include "intel_gpu/plugin/remote_context.hpp" #include "intel_gpu/plugin/common_utils.hpp" @@ -19,9 +20,9 @@ namespace intel_gpu { VariableState::VariableState(const VariableStateInfo& info, RemoteContextImpl::Ptr context, std::shared_ptr shape_predictor) : ov::IVariableState {info.m_id} , m_layout(info.m_layout) + , m_user_specified_type(info.m_user_specified_type) , m_context(context) , m_shape_predictor(shape_predictor) { - m_state = m_context->create_host_tensor(m_layout.data_type, get_tensor_shape(m_layout.get_partial_shape())); update_device_buffer(); } @@ -80,12 +81,16 @@ void VariableState::update_device_buffer() { m_memory = m_context->get_engine().reinterpret_buffer(*m_memory, m_layout); } -ov::SoPtr VariableState::get_state() const { - const bool blocking = true; - m_state->set_shape(m_memory->get_layout().get_shape()); - m_memory->copy_to(m_context->get_engine().get_service_stream(), m_state->data(), blocking); +ov::element::Type VariableState::get_user_specified_type() const { + return m_user_specified_type != ov::element::undefined ? m_user_specified_type : ov::element::Type(m_layout.data_type); +} - return m_state; +ov::SoPtr VariableState::get_state() const { + auto tensor = m_context->create_host_tensor(get_user_specified_type(), m_memory->get_layout().get_shape()); + + convert_and_copy(m_memory, tensor._ptr.get(), m_context->get_engine().get_service_stream()); + + return tensor; } } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/tests/unit/CMakeLists.txt b/src/plugins/intel_gpu/tests/unit/CMakeLists.txt index 887594590de..458280bbeae 100644 --- a/src/plugins/intel_gpu/tests/unit/CMakeLists.txt +++ b/src/plugins/intel_gpu/tests/unit/CMakeLists.txt @@ -24,6 +24,7 @@ file(GLOB_RECURSE SOURCES_MAIN "${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/remote_context.cpp" "${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp" "${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp" + "${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/common_utils.cpp" ) if (NOT ENABLE_ONEDNN_FOR_GPU)