[GPU] Allow precision conversion on get_state() call (#21658)

This commit is contained in:
Vladimir Paramuzov 2023-12-15 13:57:49 +04:00 committed by GitHub
parent 7e8a491276
commit 3a7acbb5cc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 176 additions and 103 deletions

View File

@ -7,6 +7,7 @@
#include <ostream> #include <ostream>
#include <tuple> #include <tuple>
#include "intel_gpu/runtime/layout.hpp" #include "intel_gpu/runtime/layout.hpp"
#include "intel_gpu/runtime/memory.hpp"
#include "intel_gpu/runtime/shape_predictor.hpp" #include "intel_gpu/runtime/shape_predictor.hpp"
#include "openvino/core/layout.hpp" #include "openvino/core/layout.hpp"
#include "openvino/core/type/element_type.hpp" #include "openvino/core/type/element_type.hpp"
@ -102,6 +103,9 @@ inline void ForceExit() {
std::_Exit(-1); std::_Exit(-1);
} }
void convert_and_copy(const cldnn::memory::ptr src, ov::ITensor const* dst, const cldnn::stream& stream);
void convert_and_copy(const ov::ITensor* src, ov::ITensor const* dst, const cldnn::stream& stream);
} // namespace intel_gpu } // namespace intel_gpu
inline std::ostream& operator<<(std::ostream& os, const ov::AnyMap& params) { inline std::ostream& operator<<(std::ostream& os, const ov::AnyMap& params) {

View File

@ -3,6 +3,7 @@
// //
#pragma once #pragma once
#include "openvino/core/type/element_type.hpp"
#include "openvino/runtime/ivariable_state.hpp" #include "openvino/runtime/ivariable_state.hpp"
#include "intel_gpu/runtime/layout.hpp" #include "intel_gpu/runtime/layout.hpp"
#include "intel_gpu/runtime/shape_predictor.hpp" #include "intel_gpu/runtime/shape_predictor.hpp"
@ -15,10 +16,14 @@ namespace intel_gpu {
class RemoteContextImpl; class RemoteContextImpl;
struct VariableStateInfo { struct VariableStateInfo {
VariableStateInfo(const std::string& id, const cldnn::layout& layout) : m_id(id), m_layout(layout) {} VariableStateInfo(const std::string& id, const cldnn::layout& layout, ov::element::Type_t user_specified_type = ov::element::undefined)
: m_id(id)
, m_layout(layout)
, m_user_specified_type(user_specified_type) {}
std::string m_id; std::string m_id;
cldnn::layout m_layout; cldnn::layout m_layout;
ov::element::Type m_user_specified_type;
}; };
class VariableState : public ov::IVariableState { class VariableState : public ov::IVariableState {
@ -38,6 +43,7 @@ public:
private: private:
cldnn::layout m_layout; cldnn::layout m_layout;
ov::element::Type m_user_specified_type;
std::shared_ptr<RemoteContextImpl> m_context; std::shared_ptr<RemoteContextImpl> m_context;
std::shared_ptr<cldnn::ShapePredictor> m_shape_predictor; std::shared_ptr<cldnn::ShapePredictor> m_shape_predictor;
bool m_is_set = false; bool m_is_set = false;
@ -45,6 +51,7 @@ private:
size_t actual_size = 0; size_t actual_size = 0;
void update_device_buffer(); void update_device_buffer();
ov::element::Type get_user_specified_type() const;
}; };
using VariablesMap = std::unordered_map<std::string, VariableState::Ptr>; using VariablesMap = std::unordered_map<std::string, VariableState::Ptr>;

View File

@ -0,0 +1,152 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "intel_gpu/plugin/common_utils.hpp"
#include "intel_gpu/plugin/remote_tensor.hpp"
#include "intel_gpu/runtime/layout.hpp"
#include "intel_gpu/runtime/memory.hpp"
#include "intel_gpu/runtime/memory_caps.hpp"
#include "openvino/core/type/element_type.hpp"
#include "openvino/runtime/tensor.hpp"
#include "openvino/op/util/op_types.hpp"
#include <algorithm>
#include <memory>
namespace {
template <typename src_t, typename dst_t>
void convert_and_copy_no_pad(const src_t* src, dst_t* dst, size_t size) {
OPENVINO_ASSERT(src && dst, "[GPU] Src or Dst ptr is null");
for (size_t i = 0; i < size; i++)
dst[i] = static_cast<dst_t>(src[i]);
}
template <typename src_t, typename dst_t>
void convert_and_copy_padded_source(const src_t* src, dst_t* dst, cldnn::layout layout) {
cldnn::tensor size = layout.get_tensor();
for (int64_t b = 0; b < size.batch[0]; b++) {
for (int64_t f = 0; f < size.feature[0]; f++) {
for (int64_t w = 0; w < size.spatial[3]; w++) {
for (int64_t z = 0; z < size.spatial[2]; z++) {
for (int64_t y = 0; y < size.spatial[1]; y++) {
for (int64_t x = 0; x < size.spatial[0]; x++) {
*dst++ = static_cast<dst_t>(src[layout.get_linear_offset(cldnn::tensor(b, f, x, y, z, w))]);
}
}
}
}
}
}
}
void convert_and_copy(const void* src_ptr, ov::element::Type src_et, void* dst_ptr, ov::element::Type dst_et, size_t size, cldnn::layout layout) {
if (size == 0)
return;
if (src_et == dst_et && !layout.data_padding) {
std::memcpy(dst_ptr, src_ptr, size * src_et.size());
return;
}
#define CASE(s_et, d_et, s_type, d_type) \
if (src_et == s_et && dst_et == d_et) { \
if (static_cast<bool>(layout.data_padding)) { \
return convert_and_copy_padded_source(static_cast<const s_type*>(src_ptr), static_cast<d_type*>(dst_ptr), layout); \
} else { \
return convert_and_copy_no_pad(static_cast<const s_type*>(src_ptr), static_cast<d_type*>(dst_ptr), size); \
} \
}
// For unsupported inputs
CASE(ov::element::f64, ov::element::f32, double, float);
CASE(ov::element::i16, ov::element::f32, int16_t, float);
CASE(ov::element::u16, ov::element::f32, uint16_t, float);
CASE(ov::element::u64, ov::element::i32, uint64_t, int32_t);
CASE(ov::element::i64, ov::element::i32, int64_t, int32_t);
CASE(ov::element::u32, ov::element::i32, uint32_t, int32_t);
// For unsupported outputs
CASE(ov::element::f32, ov::element::f64, float, double);
CASE(ov::element::i32, ov::element::i64, int32_t, int64_t);
CASE(ov::element::i32, ov::element::u64, int32_t, uint64_t);
CASE(ov::element::i32, ov::element::u32, int32_t, uint32_t);
CASE(ov::element::f32, ov::element::i16, float, int16_t);
CASE(ov::element::f32, ov::element::u16, float, uint16_t);
// TODO: Need instances below?
CASE(ov::element::u32, ov::element::i64, uint32_t, int64_t);
CASE(ov::element::u32, ov::element::u64, uint32_t, uint64_t);
// For state conversions
CASE(ov::element::f32, ov::element::f32, float, float);
CASE(ov::element::f16, ov::element::f16, ov::float16, ov::float16);
CASE(ov::element::f32, ov::element::f16, float, ov::float16);
CASE(ov::element::f16, ov::element::f32, ov::float16, float);
OPENVINO_THROW("[GPU] Unsupported element types combination for copy: ", src_et, " -> ", dst_et);
}
} // namespace
namespace ov {
namespace intel_gpu {
void convert_and_copy(const cldnn::memory::ptr src, ov::ITensor const* dst, const cldnn::stream& stream) {
auto src_et = src->get_layout().data_type;
auto dst_et = dst->get_element_type();
size_t size = ov::shape_size(dst->get_shape());
cldnn::mem_lock<uint8_t, cldnn::mem_lock_type::read> src_lock(src, stream);
std::unique_ptr<cldnn::mem_lock<uint8_t>> dst_lock = nullptr;
const void* src_ptr = src_lock.data();
void* dst_ptr = nullptr;
if (auto remote = dynamic_cast<const ov::intel_gpu::RemoteTensorImpl*>(dst)) {
auto mem = remote->get_original_memory();
dst_lock.reset(new cldnn::mem_lock<uint8_t>(mem, stream));
dst_ptr = dst_lock->data();
} else {
dst_ptr = dst->data();
}
return ::convert_and_copy(src_ptr, src_et, dst_ptr, dst_et, size, src->get_layout());
}
void convert_and_copy(const ov::ITensor* src, ov::ITensor const* dst, const cldnn::stream& stream) {
auto src_et = src->get_element_type();
auto dst_et = dst->get_element_type();
size_t size = ov::shape_size(dst->get_shape());
const void* src_ptr = nullptr;
void* dst_ptr = nullptr;
std::unique_ptr<cldnn::mem_lock<uint8_t, cldnn::mem_lock_type::read>> src_lock = nullptr;
std::unique_ptr<cldnn::mem_lock<uint8_t>> dst_lock = nullptr;
if (auto remote = dynamic_cast<const ov::intel_gpu::RemoteTensorImpl*>(src)) {
auto mem = remote->get_original_memory();
src_lock.reset(new cldnn::mem_lock<uint8_t, cldnn::mem_lock_type::read>(mem, stream));
src_ptr = src_lock->data();
} else {
src_ptr = src->data();
}
if (auto remote = dynamic_cast<const ov::intel_gpu::RemoteTensorImpl*>(dst)) {
auto mem = remote->get_original_memory();
dst_lock.reset(new cldnn::mem_lock<uint8_t>(mem, stream));
dst_ptr = dst_lock->data();
} else {
dst_ptr = dst->data();
}
return ::convert_and_copy(src_ptr, src_et, dst_ptr, dst_et, size, cldnn::layout({}, ov::element::undefined, cldnn::format::bfyx, cldnn::padding()));
}
} // namespace intel_gpu
} // namespace ov

View File

@ -62,106 +62,10 @@ inline std::string get_port_name(const ov::Output<const ov::Node>& port, const b
return name; return name;
} }
template <typename src_t, typename dst_t>
void convert_any_copy(const src_t* src, dst_t* dst, size_t size) {
OPENVINO_ASSERT(src && dst, "[GPU] Src or Dst ptr is null");
for (size_t i = 0; i < size; i++)
dst[i] = static_cast<dst_t>(src[i]);
}
void convert_and_copy(const void* src_ptr, ov::element::Type src_et, void* dst_ptr, ov::element::Type dst_et, size_t size) {
if (size == 0)
return;
if (src_et == dst_et) {
std::memcpy(dst_ptr, src_ptr, size * src_et.size());
return;
}
#define CASE(s_et, d_et, s_type, d_type) \
if (src_et == s_et && dst_et == d_et) return convert_any_copy(static_cast<const s_type*>(src_ptr), static_cast<d_type*>(dst_ptr), size)
// For unsupported inputs
CASE(ov::element::f64, ov::element::f32, double, float);
CASE(ov::element::i16, ov::element::f32, int16_t, float);
CASE(ov::element::u16, ov::element::f32, uint16_t, float);
CASE(ov::element::u64, ov::element::i32, uint64_t, int32_t);
CASE(ov::element::i64, ov::element::i32, int64_t, int32_t);
CASE(ov::element::u32, ov::element::i32, uint32_t, int32_t);
// For unsupported outputs
CASE(ov::element::f32, ov::element::f64, float, double);
CASE(ov::element::i32, ov::element::i64, int32_t, int64_t);
CASE(ov::element::i32, ov::element::u64, int32_t, uint64_t);
CASE(ov::element::i32, ov::element::u32, int32_t, uint32_t);
CASE(ov::element::f32, ov::element::i16, float, int16_t);
CASE(ov::element::f32, ov::element::u16, float, uint16_t);
// TODO: Need instances below?
CASE(ov::element::u32, ov::element::i64, uint32_t, int64_t);
CASE(ov::element::u32, ov::element::u64, uint32_t, uint64_t);
OPENVINO_THROW("[GPU] Unsupported element types combination for copy: ", src_et, " -> ", dst_et);
}
bool is_convert_required(ov::element::Type src_et, ov::element::Type dst_et) { bool is_convert_required(ov::element::Type src_et, ov::element::Type dst_et) {
return src_et != dst_et && !(dst_et == ov::element::boolean && src_et == ov::element::u8); return src_et != dst_et && !(dst_et == ov::element::boolean && src_et == ov::element::u8);
} }
void convert_and_copy(const cldnn::memory::ptr src, ov::ITensor const* dst, const cldnn::stream& stream) {
auto src_et = src->get_layout().data_type;
auto dst_et = dst->get_element_type();
size_t size = ov::shape_size(dst->get_shape());
cldnn::mem_lock<uint8_t> src_lock(src, stream);
std::unique_ptr<cldnn::mem_lock<uint8_t>> dst_lock = nullptr;
const void* src_ptr = src_lock.data();
void* dst_ptr = nullptr;
if (auto remote = dynamic_cast<const ov::intel_gpu::RemoteTensorImpl*>(dst)) {
auto mem = remote->get_original_memory();
dst_lock.reset(new cldnn::mem_lock<uint8_t>(mem, stream));
dst_ptr = dst_lock->data();
} else {
dst_ptr = dst->data();
}
return convert_and_copy(src_ptr, src_et, dst_ptr, dst_et, size);
}
void convert_and_copy(const ov::ITensor* src, ov::ITensor const* dst, const cldnn::stream& stream) {
auto src_et = src->get_element_type();
auto dst_et = dst->get_element_type();
size_t size = ov::shape_size(dst->get_shape());
const void* src_ptr = nullptr;
void* dst_ptr = nullptr;
std::unique_ptr<cldnn::mem_lock<uint8_t>> src_lock = nullptr;
std::unique_ptr<cldnn::mem_lock<uint8_t>> dst_lock = nullptr;
if (auto remote = dynamic_cast<const ov::intel_gpu::RemoteTensorImpl*>(src)) {
auto mem = remote->get_original_memory();
src_lock.reset(new cldnn::mem_lock<uint8_t>(mem, stream));
src_ptr = src_lock->data();
} else {
src_ptr = src->data();
}
if (auto remote = dynamic_cast<const ov::intel_gpu::RemoteTensorImpl*>(dst)) {
auto mem = remote->get_original_memory();
dst_lock.reset(new cldnn::mem_lock<uint8_t>(mem, stream));
dst_ptr = dst_lock->data();
} else {
dst_ptr = dst->data();
}
return convert_and_copy(src_ptr, src_et, dst_ptr, dst_et, size);
}
bool same_host_mem(cldnn::memory::cptr memory, const uint8_t* host_ptr) { bool same_host_mem(cldnn::memory::cptr memory, const uint8_t* host_ptr) {
const uint8_t* device_ptr = memory->get_allocation_type() == cldnn::allocation_type::usm_host ? const uint8_t* device_ptr = memory->get_allocation_type() == cldnn::allocation_type::usm_host ?
static_cast<uint8_t*>(memory->get_internal_params().mem) : nullptr; static_cast<uint8_t*>(memory->get_internal_params().mem) : nullptr;

View File

@ -2,6 +2,7 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// //
#include "openvino/core/type/element_type.hpp"
#include "openvino/runtime/make_tensor.hpp" #include "openvino/runtime/make_tensor.hpp"
#include "intel_gpu/plugin/remote_context.hpp" #include "intel_gpu/plugin/remote_context.hpp"
#include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/plugin/common_utils.hpp"
@ -19,9 +20,9 @@ namespace intel_gpu {
VariableState::VariableState(const VariableStateInfo& info, RemoteContextImpl::Ptr context, std::shared_ptr<cldnn::ShapePredictor> shape_predictor) VariableState::VariableState(const VariableStateInfo& info, RemoteContextImpl::Ptr context, std::shared_ptr<cldnn::ShapePredictor> shape_predictor)
: ov::IVariableState {info.m_id} : ov::IVariableState {info.m_id}
, m_layout(info.m_layout) , m_layout(info.m_layout)
, m_user_specified_type(info.m_user_specified_type)
, m_context(context) , m_context(context)
, m_shape_predictor(shape_predictor) { , m_shape_predictor(shape_predictor) {
m_state = m_context->create_host_tensor(m_layout.data_type, get_tensor_shape(m_layout.get_partial_shape()));
update_device_buffer(); update_device_buffer();
} }
@ -80,12 +81,16 @@ void VariableState::update_device_buffer() {
m_memory = m_context->get_engine().reinterpret_buffer(*m_memory, m_layout); m_memory = m_context->get_engine().reinterpret_buffer(*m_memory, m_layout);
} }
ov::SoPtr<ov::ITensor> VariableState::get_state() const { ov::element::Type VariableState::get_user_specified_type() const {
const bool blocking = true; return m_user_specified_type != ov::element::undefined ? m_user_specified_type : ov::element::Type(m_layout.data_type);
m_state->set_shape(m_memory->get_layout().get_shape()); }
m_memory->copy_to(m_context->get_engine().get_service_stream(), m_state->data(), blocking);
return m_state; ov::SoPtr<ov::ITensor> VariableState::get_state() const {
auto tensor = m_context->create_host_tensor(get_user_specified_type(), m_memory->get_layout().get_shape());
convert_and_copy(m_memory, tensor._ptr.get(), m_context->get_engine().get_service_stream());
return tensor;
} }
} // namespace intel_gpu } // namespace intel_gpu

View File

@ -24,6 +24,7 @@ file(GLOB_RECURSE SOURCES_MAIN
"${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/remote_context.cpp" "${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/remote_context.cpp"
"${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp" "${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp"
"${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp" "${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp"
"${CMAKE_HOME_DIRECTORY}/src/plugins/intel_gpu/src/plugin/common_utils.cpp"
) )
if (NOT ENABLE_ONEDNN_FOR_GPU) if (NOT ENABLE_ONEDNN_FOR_GPU)