diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp index 84f55979934..6302a314d19 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp @@ -430,6 +430,10 @@ private: tensor size; }; +inline ::std::ostream& operator<<(::std::ostream& os, const layout& p) { + return os << p.to_string(); +} + class optional_layout { public: optional_layout() {} diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp index 9ecf82d3893..af106273fd2 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp @@ -165,4 +165,41 @@ struct surfaces_lock { static std::unique_ptr create(engine_types engine_type, std::vector mem, const stream& stream); }; +template +inline std::vector read_vector(cldnn::memory::ptr mem, cldnn::stream& stream) { + std::vector out_vecs; + if (mem->get_allocation_type() == allocation_type::usm_host || mem->get_allocation_type() == allocation_type::usm_shared) { + switch (mem->get_layout().data_type) { + case data_types::i32: { + auto p_mem = reinterpret_cast(mem->buffer_ptr()); + for (size_t i = 0; i < mem->count(); i++) { + out_vecs.push_back(static_cast(p_mem[i])); + } + break; + } + case data_types::i64: { + auto p_mem = reinterpret_cast(mem->buffer_ptr()); + for (size_t i = 0; i < mem->count(); i++) { + out_vecs.push_back(static_cast(p_mem[i])); + } + break; + } + default: throw ov::Exception("[GPU] read_vector: unsupported data type"); + } + } else { + switch (mem->get_layout().data_type) { + case data_types::i32: { + mem_lock lock{mem, stream}; + out_vecs = std::move(std::vector(lock.begin(), lock.end())); + } + case data_types::i64: { + mem_lock lock{mem, stream}; + out_vecs = std::move(std::vector(lock.begin(), lock.end())); + } + default: throw ov::Exception("[GPU] read_vector: unsupported data type"); + } + } + return out_vecs; +} + } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp index 1a80adaa11e..299234e52a7 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp @@ -36,19 +36,7 @@ public: for (size_t i = 1; i < arg.get_dependencies().size(); ++i) { auto& input = arg.get_dependency(i).as(); auto mem = input.get_attached_memory_ptr(); - std::vector sizes; - if (input.get_output_layout().data_type == cldnn::data_types::i64) { - mem_lock lock{mem, arg.get_program().get_stream()}; - int64_t* data = lock.data(); - std::vector sizes_i64 = std::vector(data, data + input.get_output_layout().count()); - sizes.resize(sizes_i64.size()); - for (size_t j = 0; j < sizes.size(); j++) - sizes[j] = static_cast(sizes_i64[j]); - } else { - mem_lock lock{mem, arg.get_program().get_stream()}; - int32_t* data = lock.data(); - sizes = std::vector(data, data + input.get_output_layout().count()); - } + std::vector sizes = read_vector(mem, arg.get_program().get_stream()); pad_vector_to_size(sizes, dims_num, i != 1); // for "begin" completion used 0 value, for other - 1 params.striding_params.push_back(sizes); } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp index facec7952f4..421717d2db0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp @@ -155,8 +155,8 @@ int64_t get_f_offset(cldnn::layout&& l, dnnl::memory::desc&& desc) { auto f_padding = l.data_padding.lower_size().feature[0]; if (f_padding != 0) { offset = f_padding; - for (size_t i = 0; i < l.get_tensor().spatial.size(); ++i) { - offset *= l.get_tensor().spatial[i]; + for (size_t i = 0; i < l.get_spatial_rank(); ++i) { + offset *= l.spatial(i); } } diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h index 5a3e60869db..0a3566f4155 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h @@ -142,6 +142,10 @@ public: return _mem_allocated; } + bool is_dynamic() const { + return _node.is_dynamic(); + } + void allocate_internal_buffers(); static memory::ptr allocate_output(engine& engine, memory_pool& pool, const program_node& _node, uint32_t net_id, bool is_internal); @@ -274,6 +278,9 @@ protected: private: bool do_allocate_memory(typed_node const& typ_node) { + if (typ_node.is_dynamic()) + return false; + if (typ_node.template have_user_with_type() && typ_node.get_users().size() == 1 && typ_node.get_users().front()->can_be_optimized()) { // check if the only user is concat return false; diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index 64f13cd0c6d..78c097ec202 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -231,6 +231,8 @@ public: // @p invalidate_users_if_changed is set to true returns whether output layout has changed bool recalc_output_layout(bool invalidate_users_if_changed = true); + bool is_dynamic() const; + bool is_padded() { return static_cast(get_output_layout().data_padding); } bool is_padded() const { return static_cast(get_output_layout().data_padding); } diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 55bb74a43ba..a3880606f70 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -286,9 +286,9 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool, // For outputs, cpu prim we want to have lockable alloc type // Also if the successor of a node is an cpu, then memory needs to be lockable. - auto use_lockable_memory = is_output_buffer(_node) || _node.get_selected_impl()->is_cpu() || is_any_user_cpu(_node.get_users()) || + bool is_cpu = _node.get_selected_impl() ? _node.get_selected_impl()->is_cpu() : false; + auto use_lockable_memory = is_output_buffer(_node) || is_cpu || is_any_user_cpu(_node.get_users()) || !_engine.supports_allocation(allocation_type::usm_device); - GPU_DEBUG_GET_INSTANCE(debug_config); const auto& lockable_mem_type = _engine.get_lockable_preffered_memory_allocation_type(layout.format.is_image_2d()); const auto& alloc_type = use_lockable_memory ? lockable_mem_type diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 33b67c2290e..27069f5342c 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -268,6 +268,15 @@ bool program_node::recalc_output_layout(bool invalidate_users_if_changed) { return set_output_layout(new_layout, invalidate_users_if_changed); } +bool program_node::is_dynamic() const { + for (auto& input : get_dependencies()) { + if (input->get_output_layout().is_dynamic()) + return true; + } + + return get_output_layout().is_dynamic(); +} + bool program_node::has_padded_dependency() { return std::any_of(get_dependencies().begin(), get_dependencies().end(), [](program_node* node) { return node->is_padded(); diff --git a/src/plugins/intel_gpu/src/runtime/layout.cpp b/src/plugins/intel_gpu/src/runtime/layout.cpp index 3ee5947a19b..e23d5c1813b 100644 --- a/src/plugins/intel_gpu/src/runtime/layout.cpp +++ b/src/plugins/intel_gpu/src/runtime/layout.cpp @@ -140,8 +140,15 @@ std::vector layout::get_dims_order() const { } std::string layout::to_string() const { - // TODO: Extend with format/data-type info - return format.to_string() + size.to_string(); + std::stringstream s; + s << "\n{\n" + << "\tdata_type=" << data_type_traits::name(data_type) << ";\n" + << "\tformat=" << format.to_string() << ";\n" + << "\tshape=" << size.to_string() << ";\n" + << "\tpad_l=" << data_padding.lower_size().to_string() << ";\n" + << "\tpad_u=" << data_padding.upper_size().to_string() << ";\n" + << "}"; + return s.str(); } size_t layout::count() const { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_common.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_common.hpp index 34d0bebc2f8..482426094c7 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_common.hpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_common.hpp @@ -5,6 +5,8 @@ #include "ocl_wrapper.hpp" +#include "openvino/core/except.hpp" + #include namespace cldnn { @@ -21,7 +23,7 @@ typedef CL_API_ENTRY cl_command_queue(CL_API_CALL* pfn_clCreateCommandQueueWithP using ocl_queue_type = cl::CommandQueue; using ocl_kernel_type = cl::KernelIntel; -class ocl_error : public std::runtime_error { +class ocl_error : public ov::Exception { public: explicit ocl_error(cl::Error const& err); }; diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp index e1bc3a1791d..d19683be1c1 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp @@ -37,18 +37,16 @@ namespace cldnn { namespace ocl { ocl_error::ocl_error(cl::Error const& err) - : std::runtime_error(err.what() + std::string(", error code: ") + std::to_string(err.err())) {} + : ov::Exception("[GPU] " + std::string(err.what()) + std::string(", error code: ") + std::to_string(err.err())) {} ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type, const engine_configuration& conf, const InferenceEngine::ITaskExecutor::Ptr task_executor) : engine(dev, conf, task_executor) { - if (runtime_type != runtime_types::ocl) { - IE_THROW() << "Invalid runtime type specified for OCL engine. Only OCL runtime is supported"; - } + OPENVINO_ASSERT(runtime_type == runtime_types::ocl, "[GPU] Invalid runtime type specified for OCL engine. Only OCL runtime is supported"); auto casted = dynamic_cast(dev.get()); if (!casted) - IE_THROW() << "[CLDNN] Invalid device type passed to ocl engine"; + throw ov::Exception("[GPU] Invalid device type passed to ocl engine"); casted->get_device().getInfo(CL_DEVICE_EXTENSIONS, &_extensions); _usm_helper.reset(new cl::UsmHelper(get_cl_context(), get_cl_device(), use_unified_shared_memory())); @@ -62,7 +60,7 @@ ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type, #ifdef ENABLE_ONEDNN_FOR_GPU dnnl::engine& ocl_engine::get_onednn_engine() const { if (!_onednn_engine) - IE_THROW() << "[GPU] onednn engine is nullptr"; + throw ov::Exception("[GPU] onednn engine is nullptr"); return *_onednn_engine; } #endif @@ -70,14 +68,14 @@ dnnl::engine& ocl_engine::get_onednn_engine() const { const cl::Context& ocl_engine::get_cl_context() const { auto cl_device = std::dynamic_pointer_cast(_device); if (!cl_device) - IE_THROW() << "Invalid device type for ocl_engine"; + throw ov::Exception("[GPU] Invalid device type for ocl_engine"); return cl_device->get_context(); } const cl::Device& ocl_engine::get_cl_device() const { auto cl_device = std::dynamic_pointer_cast(_device); if (!cl_device) - IE_THROW() << "Invalid device type for ocl_engine"; + throw ov::Exception("[GPU] Invalid device type for ocl_engine"); return cl_device->get_device(); } @@ -86,28 +84,21 @@ const cl::UsmHelper& ocl_engine::get_usm_helper() const { } memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type type, bool reset) { - if (layout.bytes_count() > get_device_info().max_alloc_mem_size) { - std::stringstream ss; - ss << "Exceeded max size of memory object allocation: " - << "Requested " << layout.bytes_count() << " bytes " - << "but max alloc size is " << get_device_info().max_alloc_mem_size << " bytes"; - IE_THROW() << ss.str(); - } + OPENVINO_ASSERT(!layout.is_dynamic(), "[GPU] Can't allocate memory for dynamic layout"); + + OPENVINO_ASSERT(layout.bytes_count() <= get_device_info().max_alloc_mem_size, + "[GPU] Exceeded max size of memory object allocation: ", + "Requested ", layout.bytes_count(), " bytes " + "but max alloc size is ", get_device_info().max_alloc_mem_size, " bytes"); auto used_mem = get_used_device_memory(allocation_type::usm_device) + get_used_device_memory(allocation_type::usm_host); - if (layout.bytes_count() + used_mem > get_max_memory_size()) { - std::stringstream ss; - ss << "Exceeded max size of memory allocation: " - << "Required " << layout.bytes_count() + used_mem << " bytes " - << "but memory size is " << get_max_memory_size() << " bytes"; - IE_THROW() << ss.str(); - } + OPENVINO_ASSERT(layout.bytes_count() + used_mem <= get_max_memory_size(), + "[GPU] Exceeded max size of memory allocation: ", + "Required ", (layout.bytes_count() + used_mem), " bytes " + "but memory size is ", get_max_memory_size(), " bytes"); - if (type != allocation_type::cl_mem && !supports_allocation(type)) { - std::ostringstream type_str; - type_str << type; - IE_THROW() << "Unsupported allocation type " + type_str.str(); - } + OPENVINO_ASSERT(supports_allocation(type) || type == allocation_type::cl_mem, + "[GPU] Unsupported allocation type: ", type); try { memory::ptr res = nullptr; @@ -130,24 +121,18 @@ memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type ty case CL_OUT_OF_RESOURCES: case CL_OUT_OF_HOST_MEMORY: case CL_INVALID_BUFFER_SIZE: - IE_THROW() << "out of GPU resources"; + throw ov::Exception("[GPU] out of GPU resources"); default: - IE_THROW() << "GPU buffer allocation failed"; + throw ov::Exception("[GPU] buffer allocation failed"); } } } memory::ptr ocl_engine::reinterpret_buffer(const memory& memory, const layout& new_layout) { - if (memory.get_engine() != this) - IE_THROW() << "trying to reinterpret buffer allocated by a different engine"; - - if (new_layout.format.is_image() && !memory.get_layout().format.is_image()) - IE_THROW() << "trying to reinterpret non-image buffer as image : " << memory.get_layout().format.to_string() - << " --> " << new_layout.format.to_string(); - - if (!new_layout.format.is_image() && memory.get_layout().format.is_image()) - IE_THROW() << "trying to reinterpret image buffer as non-image buffer : " - << memory.get_layout().format.to_string() << " --> " << new_layout.format.to_string(); + OPENVINO_ASSERT(memory.get_engine() == this, "[GPU] trying to reinterpret buffer allocated by a different engine"); + OPENVINO_ASSERT(new_layout.format.is_image() == memory.get_layout().format.is_image(), + "[GPU] trying to reinterpret between image and non-image layouts. Current: ", + memory.get_layout().format.to_string(), " Target: ", new_layout.format.to_string()); try { if (new_layout.format.is_image_2d()) { @@ -184,22 +169,20 @@ memory::ptr ocl_engine::reinterpret_handle(const layout& new_layout, shared_mem_ cl::Buffer buf(static_cast(params.mem), true); auto actual_mem_size = buf.getInfo(); auto requested_mem_size = new_layout.bytes_count(); - if (actual_mem_size < requested_mem_size) { - IE_THROW() << "[GPU] shared buffer has smaller size (" << std::to_string(actual_mem_size) << - ") than specified layout (" << std::to_string(requested_mem_size) << ")"; - } + OPENVINO_ASSERT(actual_mem_size >= requested_mem_size, + "[GPU] shared buffer has smaller size (", actual_mem_size, + ") than specified layout (", requested_mem_size, ")"); return std::make_shared(this, new_layout, buf); } else if (params.mem_type == shared_mem_type::shared_mem_usm) { cl::UsmMemory usm_buffer(get_usm_helper(), params.mem); auto actual_mem_size = get_usm_helper().get_usm_allocation_size(usm_buffer.get()); auto requested_mem_size = new_layout.bytes_count(); - if (actual_mem_size < requested_mem_size) { - IE_THROW() << "[GPU] shared USM buffer has smaller size (" << std::to_string(actual_mem_size) - << ") than specified layout (" << std::to_string(requested_mem_size) << ")"; - } + OPENVINO_ASSERT(actual_mem_size >= requested_mem_size, + "[GPU] shared USM buffer has smaller size (", actual_mem_size, + ") than specified layout (", requested_mem_size, ")"); return std::make_shared(this, new_layout, usm_buffer); } else { - IE_THROW() << "unknown shared object fromat or type"; + throw ov::Exception("[GPU] unknown shared object fromat or type"); } } catch (const cl::Error& clErr) { @@ -208,9 +191,9 @@ memory::ptr ocl_engine::reinterpret_handle(const layout& new_layout, shared_mem_ case CL_OUT_OF_RESOURCES: case CL_OUT_OF_HOST_MEMORY: case CL_INVALID_BUFFER_SIZE: - IE_THROW() << "out of GPU resources"; + throw ov::Exception("[GPU] out of GPU resources"); default: - IE_THROW() << "GPU buffer allocation failed"; + throw ov::Exception("[GPU] buffer allocation failed"); } } } @@ -253,12 +236,12 @@ stream& ocl_engine::get_program_stream() const { } std::shared_ptr ocl_engine::create(const device::ptr device, runtime_types runtime_type, - const engine_configuration& configuration, const InferenceEngine::ITaskExecutor::Ptr task_executor) { + const engine_configuration& configuration, const InferenceEngine::ITaskExecutor::Ptr task_executor) { return std::make_shared(device, runtime_type, configuration, task_executor); } std::shared_ptr create_ocl_engine(const device::ptr device, runtime_types runtime_type, - const engine_configuration& configuration, const InferenceEngine::ITaskExecutor::Ptr task_executor) { + const engine_configuration& configuration, const InferenceEngine::ITaskExecutor::Ptr task_executor) { return ocl_engine::create(device, runtime_type, configuration, task_executor); }