[GPU] added is_dynamic methods to program_node and primitive_inst. Minor refactoring (#12322)

This commit is contained in:
Vladimir Paramuzov 2022-08-01 09:54:20 +04:00 committed by GitHub
parent 88bbad4a5d
commit 075b833a7a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 111 additions and 72 deletions

View File

@ -430,6 +430,10 @@ private:
tensor size;
};
inline ::std::ostream& operator<<(::std::ostream& os, const layout& p) {
return os << p.to_string();
}
class optional_layout {
public:
optional_layout() {}

View File

@ -165,4 +165,41 @@ struct surfaces_lock {
static std::unique_ptr<surfaces_lock> create(engine_types engine_type, std::vector<memory::ptr> mem, const stream& stream);
};
template<typename T>
inline std::vector<T> read_vector(cldnn::memory::ptr mem, cldnn::stream& stream) {
std::vector<T> out_vecs;
if (mem->get_allocation_type() == allocation_type::usm_host || mem->get_allocation_type() == allocation_type::usm_shared) {
switch (mem->get_layout().data_type) {
case data_types::i32: {
auto p_mem = reinterpret_cast<int32_t*>(mem->buffer_ptr());
for (size_t i = 0; i < mem->count(); i++) {
out_vecs.push_back(static_cast<T>(p_mem[i]));
}
break;
}
case data_types::i64: {
auto p_mem = reinterpret_cast<int64_t*>(mem->buffer_ptr());
for (size_t i = 0; i < mem->count(); i++) {
out_vecs.push_back(static_cast<T>(p_mem[i]));
}
break;
}
default: throw ov::Exception("[GPU] read_vector: unsupported data type");
}
} else {
switch (mem->get_layout().data_type) {
case data_types::i32: {
mem_lock<int32_t, mem_lock_type::read> lock{mem, stream};
out_vecs = std::move(std::vector<T>(lock.begin(), lock.end()));
}
case data_types::i64: {
mem_lock<int64_t, mem_lock_type::read> lock{mem, stream};
out_vecs = std::move(std::vector<T>(lock.begin(), lock.end()));
}
default: throw ov::Exception("[GPU] read_vector: unsupported data type");
}
}
return out_vecs;
}
} // namespace cldnn

View File

@ -36,19 +36,7 @@ public:
for (size_t i = 1; i < arg.get_dependencies().size(); ++i) {
auto& input = arg.get_dependency(i).as<data>();
auto mem = input.get_attached_memory_ptr();
std::vector<int32_t> sizes;
if (input.get_output_layout().data_type == cldnn::data_types::i64) {
mem_lock<int64_t> lock{mem, arg.get_program().get_stream()};
int64_t* data = lock.data();
std::vector<int64_t> sizes_i64 = std::vector<int64_t>(data, data + input.get_output_layout().count());
sizes.resize(sizes_i64.size());
for (size_t j = 0; j < sizes.size(); j++)
sizes[j] = static_cast<int32_t>(sizes_i64[j]);
} else {
mem_lock<int32_t> lock{mem, arg.get_program().get_stream()};
int32_t* data = lock.data();
sizes = std::vector<int32_t>(data, data + input.get_output_layout().count());
}
std::vector<int32_t> sizes = read_vector<int32_t>(mem, arg.get_program().get_stream());
pad_vector_to_size(sizes, dims_num, i != 1); // for "begin" completion used 0 value, for other - 1
params.striding_params.push_back(sizes);
}

View File

@ -155,8 +155,8 @@ int64_t get_f_offset(cldnn::layout&& l, dnnl::memory::desc&& desc) {
auto f_padding = l.data_padding.lower_size().feature[0];
if (f_padding != 0) {
offset = f_padding;
for (size_t i = 0; i < l.get_tensor().spatial.size(); ++i) {
offset *= l.get_tensor().spatial[i];
for (size_t i = 0; i < l.get_spatial_rank(); ++i) {
offset *= l.spatial(i);
}
}

View File

@ -142,6 +142,10 @@ public:
return _mem_allocated;
}
bool is_dynamic() const {
return _node.is_dynamic();
}
void allocate_internal_buffers();
static memory::ptr allocate_output(engine& engine, memory_pool& pool,
const program_node& _node, uint32_t net_id, bool is_internal);
@ -274,6 +278,9 @@ protected:
private:
bool do_allocate_memory(typed_node const& typ_node) {
if (typ_node.is_dynamic())
return false;
if (typ_node.template have_user_with_type<concatenation>() && typ_node.get_users().size() == 1 &&
typ_node.get_users().front()->can_be_optimized()) { // check if the only user is concat
return false;

View File

@ -231,6 +231,8 @@ public:
// @p invalidate_users_if_changed is set to true returns whether output layout has changed
bool recalc_output_layout(bool invalidate_users_if_changed = true);
bool is_dynamic() const;
bool is_padded() { return static_cast<bool>(get_output_layout().data_padding); }
bool is_padded() const { return static_cast<bool>(get_output_layout().data_padding); }

View File

@ -286,9 +286,9 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,
// For outputs, cpu prim we want to have lockable alloc type
// Also if the successor of a node is an cpu, then memory needs to be lockable.
auto use_lockable_memory = is_output_buffer(_node) || _node.get_selected_impl()->is_cpu() || is_any_user_cpu(_node.get_users()) ||
bool is_cpu = _node.get_selected_impl() ? _node.get_selected_impl()->is_cpu() : false;
auto use_lockable_memory = is_output_buffer(_node) || is_cpu || is_any_user_cpu(_node.get_users()) ||
!_engine.supports_allocation(allocation_type::usm_device);
GPU_DEBUG_GET_INSTANCE(debug_config);
const auto& lockable_mem_type = _engine.get_lockable_preffered_memory_allocation_type(layout.format.is_image_2d());
const auto& alloc_type = use_lockable_memory ? lockable_mem_type

View File

@ -268,6 +268,15 @@ bool program_node::recalc_output_layout(bool invalidate_users_if_changed) {
return set_output_layout(new_layout, invalidate_users_if_changed);
}
bool program_node::is_dynamic() const {
for (auto& input : get_dependencies()) {
if (input->get_output_layout().is_dynamic())
return true;
}
return get_output_layout().is_dynamic();
}
bool program_node::has_padded_dependency() {
return std::any_of(get_dependencies().begin(), get_dependencies().end(), [](program_node* node) {
return node->is_padded();

View File

@ -140,8 +140,15 @@ std::vector<size_t> layout::get_dims_order() const {
}
std::string layout::to_string() const {
// TODO: Extend with format/data-type info
return format.to_string() + size.to_string();
std::stringstream s;
s << "\n{\n"
<< "\tdata_type=" << data_type_traits::name(data_type) << ";\n"
<< "\tformat=" << format.to_string() << ";\n"
<< "\tshape=" << size.to_string() << ";\n"
<< "\tpad_l=" << data_padding.lower_size().to_string() << ";\n"
<< "\tpad_u=" << data_padding.upper_size().to_string() << ";\n"
<< "}";
return s.str();
}
size_t layout::count() const {

View File

@ -5,6 +5,8 @@
#include "ocl_wrapper.hpp"
#include "openvino/core/except.hpp"
#include <vector>
namespace cldnn {
@ -21,7 +23,7 @@ typedef CL_API_ENTRY cl_command_queue(CL_API_CALL* pfn_clCreateCommandQueueWithP
using ocl_queue_type = cl::CommandQueue;
using ocl_kernel_type = cl::KernelIntel;
class ocl_error : public std::runtime_error {
class ocl_error : public ov::Exception {
public:
explicit ocl_error(cl::Error const& err);
};

View File

@ -37,18 +37,16 @@ namespace cldnn {
namespace ocl {
ocl_error::ocl_error(cl::Error const& err)
: std::runtime_error(err.what() + std::string(", error code: ") + std::to_string(err.err())) {}
: ov::Exception("[GPU] " + std::string(err.what()) + std::string(", error code: ") + std::to_string(err.err())) {}
ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type,
const engine_configuration& conf, const InferenceEngine::ITaskExecutor::Ptr task_executor)
: engine(dev, conf, task_executor) {
if (runtime_type != runtime_types::ocl) {
IE_THROW() << "Invalid runtime type specified for OCL engine. Only OCL runtime is supported";
}
OPENVINO_ASSERT(runtime_type == runtime_types::ocl, "[GPU] Invalid runtime type specified for OCL engine. Only OCL runtime is supported");
auto casted = dynamic_cast<ocl_device*>(dev.get());
if (!casted)
IE_THROW() << "[CLDNN] Invalid device type passed to ocl engine";
throw ov::Exception("[GPU] Invalid device type passed to ocl engine");
casted->get_device().getInfo(CL_DEVICE_EXTENSIONS, &_extensions);
_usm_helper.reset(new cl::UsmHelper(get_cl_context(), get_cl_device(), use_unified_shared_memory()));
@ -62,7 +60,7 @@ ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type,
#ifdef ENABLE_ONEDNN_FOR_GPU
dnnl::engine& ocl_engine::get_onednn_engine() const {
if (!_onednn_engine)
IE_THROW() << "[GPU] onednn engine is nullptr";
throw ov::Exception("[GPU] onednn engine is nullptr");
return *_onednn_engine;
}
#endif
@ -70,14 +68,14 @@ dnnl::engine& ocl_engine::get_onednn_engine() const {
const cl::Context& ocl_engine::get_cl_context() const {
auto cl_device = std::dynamic_pointer_cast<ocl_device>(_device);
if (!cl_device)
IE_THROW() << "Invalid device type for ocl_engine";
throw ov::Exception("[GPU] Invalid device type for ocl_engine");
return cl_device->get_context();
}
const cl::Device& ocl_engine::get_cl_device() const {
auto cl_device = std::dynamic_pointer_cast<ocl_device>(_device);
if (!cl_device)
IE_THROW() << "Invalid device type for ocl_engine";
throw ov::Exception("[GPU] Invalid device type for ocl_engine");
return cl_device->get_device();
}
@ -86,28 +84,21 @@ const cl::UsmHelper& ocl_engine::get_usm_helper() const {
}
memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type type, bool reset) {
if (layout.bytes_count() > get_device_info().max_alloc_mem_size) {
std::stringstream ss;
ss << "Exceeded max size of memory object allocation: "
<< "Requested " << layout.bytes_count() << " bytes "
<< "but max alloc size is " << get_device_info().max_alloc_mem_size << " bytes";
IE_THROW() << ss.str();
}
OPENVINO_ASSERT(!layout.is_dynamic(), "[GPU] Can't allocate memory for dynamic layout");
OPENVINO_ASSERT(layout.bytes_count() <= get_device_info().max_alloc_mem_size,
"[GPU] Exceeded max size of memory object allocation: ",
"Requested ", layout.bytes_count(), " bytes "
"but max alloc size is ", get_device_info().max_alloc_mem_size, " bytes");
auto used_mem = get_used_device_memory(allocation_type::usm_device) + get_used_device_memory(allocation_type::usm_host);
if (layout.bytes_count() + used_mem > get_max_memory_size()) {
std::stringstream ss;
ss << "Exceeded max size of memory allocation: "
<< "Required " << layout.bytes_count() + used_mem << " bytes "
<< "but memory size is " << get_max_memory_size() << " bytes";
IE_THROW() << ss.str();
}
OPENVINO_ASSERT(layout.bytes_count() + used_mem <= get_max_memory_size(),
"[GPU] Exceeded max size of memory allocation: ",
"Required ", (layout.bytes_count() + used_mem), " bytes "
"but memory size is ", get_max_memory_size(), " bytes");
if (type != allocation_type::cl_mem && !supports_allocation(type)) {
std::ostringstream type_str;
type_str << type;
IE_THROW() << "Unsupported allocation type " + type_str.str();
}
OPENVINO_ASSERT(supports_allocation(type) || type == allocation_type::cl_mem,
"[GPU] Unsupported allocation type: ", type);
try {
memory::ptr res = nullptr;
@ -130,24 +121,18 @@ memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type ty
case CL_OUT_OF_RESOURCES:
case CL_OUT_OF_HOST_MEMORY:
case CL_INVALID_BUFFER_SIZE:
IE_THROW() << "out of GPU resources";
throw ov::Exception("[GPU] out of GPU resources");
default:
IE_THROW() << "GPU buffer allocation failed";
throw ov::Exception("[GPU] buffer allocation failed");
}
}
}
memory::ptr ocl_engine::reinterpret_buffer(const memory& memory, const layout& new_layout) {
if (memory.get_engine() != this)
IE_THROW() << "trying to reinterpret buffer allocated by a different engine";
if (new_layout.format.is_image() && !memory.get_layout().format.is_image())
IE_THROW() << "trying to reinterpret non-image buffer as image : " << memory.get_layout().format.to_string()
<< " --> " << new_layout.format.to_string();
if (!new_layout.format.is_image() && memory.get_layout().format.is_image())
IE_THROW() << "trying to reinterpret image buffer as non-image buffer : "
<< memory.get_layout().format.to_string() << " --> " << new_layout.format.to_string();
OPENVINO_ASSERT(memory.get_engine() == this, "[GPU] trying to reinterpret buffer allocated by a different engine");
OPENVINO_ASSERT(new_layout.format.is_image() == memory.get_layout().format.is_image(),
"[GPU] trying to reinterpret between image and non-image layouts. Current: ",
memory.get_layout().format.to_string(), " Target: ", new_layout.format.to_string());
try {
if (new_layout.format.is_image_2d()) {
@ -184,22 +169,20 @@ memory::ptr ocl_engine::reinterpret_handle(const layout& new_layout, shared_mem_
cl::Buffer buf(static_cast<cl_mem>(params.mem), true);
auto actual_mem_size = buf.getInfo<CL_MEM_SIZE>();
auto requested_mem_size = new_layout.bytes_count();
if (actual_mem_size < requested_mem_size) {
IE_THROW() << "[GPU] shared buffer has smaller size (" << std::to_string(actual_mem_size) <<
") than specified layout (" << std::to_string(requested_mem_size) << ")";
}
OPENVINO_ASSERT(actual_mem_size >= requested_mem_size,
"[GPU] shared buffer has smaller size (", actual_mem_size,
") than specified layout (", requested_mem_size, ")");
return std::make_shared<ocl::gpu_buffer>(this, new_layout, buf);
} else if (params.mem_type == shared_mem_type::shared_mem_usm) {
cl::UsmMemory usm_buffer(get_usm_helper(), params.mem);
auto actual_mem_size = get_usm_helper().get_usm_allocation_size(usm_buffer.get());
auto requested_mem_size = new_layout.bytes_count();
if (actual_mem_size < requested_mem_size) {
IE_THROW() << "[GPU] shared USM buffer has smaller size (" << std::to_string(actual_mem_size)
<< ") than specified layout (" << std::to_string(requested_mem_size) << ")";
}
OPENVINO_ASSERT(actual_mem_size >= requested_mem_size,
"[GPU] shared USM buffer has smaller size (", actual_mem_size,
") than specified layout (", requested_mem_size, ")");
return std::make_shared<ocl::gpu_usm>(this, new_layout, usm_buffer);
} else {
IE_THROW() << "unknown shared object fromat or type";
throw ov::Exception("[GPU] unknown shared object fromat or type");
}
}
catch (const cl::Error& clErr) {
@ -208,9 +191,9 @@ memory::ptr ocl_engine::reinterpret_handle(const layout& new_layout, shared_mem_
case CL_OUT_OF_RESOURCES:
case CL_OUT_OF_HOST_MEMORY:
case CL_INVALID_BUFFER_SIZE:
IE_THROW() << "out of GPU resources";
throw ov::Exception("[GPU] out of GPU resources");
default:
IE_THROW() << "GPU buffer allocation failed";
throw ov::Exception("[GPU] buffer allocation failed");
}
}
}