[GPU] added is_dynamic methods to program_node and primitive_inst. Minor refactoring (#12322)

2022-08-01 09:54:20 +04:00 · 2022-08-01 09:54:20 +04:00 · 075b833a7a
commit 075b833a7a
parent 88bbad4a5d
11 changed files with 111 additions and 72 deletions
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp
@ -430,6 +430,10 @@ private:
    tensor size;
 };

+inline ::std::ostream& operator<<(::std::ostream& os, const layout& p) {
+    return os << p.to_string();
+}
+
 class optional_layout {
 public:
    optional_layout() {}
--- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory.hpp
@ -165,4 +165,41 @@ struct surfaces_lock {
    static std::unique_ptr<surfaces_lock> create(engine_types engine_type, std::vector<memory::ptr> mem, const stream& stream);
 };

+template<typename T>
+inline std::vector<T> read_vector(cldnn::memory::ptr mem, cldnn::stream& stream) {
+    std::vector<T> out_vecs;
+    if (mem->get_allocation_type() == allocation_type::usm_host || mem->get_allocation_type() == allocation_type::usm_shared) {
+        switch (mem->get_layout().data_type) {
+            case data_types::i32: {
+                auto p_mem = reinterpret_cast<int32_t*>(mem->buffer_ptr());
+                for (size_t i = 0; i < mem->count(); i++) {
+                    out_vecs.push_back(static_cast<T>(p_mem[i]));
+                }
+                break;
+            }
+            case data_types::i64: {
+                auto p_mem = reinterpret_cast<int64_t*>(mem->buffer_ptr());
+                for (size_t i = 0; i < mem->count(); i++) {
+                    out_vecs.push_back(static_cast<T>(p_mem[i]));
+                }
+                break;
+            }
+            default: throw ov::Exception("[GPU] read_vector: unsupported data type");
+        }
+    } else {
+        switch (mem->get_layout().data_type) {
+            case data_types::i32: {
+                mem_lock<int32_t, mem_lock_type::read> lock{mem, stream};
+                out_vecs = std::move(std::vector<T>(lock.begin(), lock.end()));
+            }
+            case data_types::i64: {
+                mem_lock<int64_t, mem_lock_type::read> lock{mem, stream};
+                out_vecs = std::move(std::vector<T>(lock.begin(), lock.end()));
+            }
+            default: throw ov::Exception("[GPU] read_vector: unsupported data type");
+        }
+    }
+    return out_vecs;
+}
+
 }  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/ocl/strided_slice.cpp
@ -36,19 +36,7 @@ public:
        for (size_t i = 1; i < arg.get_dependencies().size(); ++i) {
            auto& input = arg.get_dependency(i).as<data>();
            auto mem = input.get_attached_memory_ptr();
-            std::vector<int32_t> sizes;
-            if (input.get_output_layout().data_type == cldnn::data_types::i64) {
-                mem_lock<int64_t> lock{mem, arg.get_program().get_stream()};
-                int64_t* data = lock.data();
-                std::vector<int64_t> sizes_i64 = std::vector<int64_t>(data, data + input.get_output_layout().count());
-                sizes.resize(sizes_i64.size());
-                for (size_t j = 0; j < sizes.size(); j++)
-                    sizes[j] = static_cast<int32_t>(sizes_i64[j]);
-            } else {
-                mem_lock<int32_t> lock{mem, arg.get_program().get_stream()};
-                int32_t* data = lock.data();
-                sizes = std::vector<int32_t>(data, data + input.get_output_layout().count());
-            }
+            std::vector<int32_t> sizes = read_vector<int32_t>(mem, arg.get_program().get_stream());
            pad_vector_to_size(sizes, dims_num, i != 1);  // for "begin" completion used 0 value, for other - 1
            params.striding_params.push_back(sizes);
        }
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp
@ -155,8 +155,8 @@ int64_t get_f_offset(cldnn::layout&& l, dnnl::memory::desc&& desc) {
    auto f_padding = l.data_padding.lower_size().feature[0];
    if (f_padding != 0) {
        offset = f_padding;
-        for (size_t i = 0; i < l.get_tensor().spatial.size(); ++i) {
-            offset *= l.get_tensor().spatial[i];
+        for (size_t i = 0; i < l.get_spatial_rank(); ++i) {
+            offset *= l.spatial(i);
        }
    }

--- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
+++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h
@ -142,6 +142,10 @@ public:
        return _mem_allocated;
    }

+    bool is_dynamic() const {
+        return _node.is_dynamic();
+    }
+
    void allocate_internal_buffers();
    static memory::ptr allocate_output(engine& engine, memory_pool& pool,
                                        const program_node& _node, uint32_t net_id, bool is_internal);
@ -274,6 +278,9 @@ protected:

 private:
    bool do_allocate_memory(typed_node const& typ_node) {
+        if (typ_node.is_dynamic())
+            return false;
+
        if (typ_node.template have_user_with_type<concatenation>() && typ_node.get_users().size() == 1 &&
            typ_node.get_users().front()->can_be_optimized()) {  // check if the only user is concat
            return false;
--- a/src/plugins/intel_gpu/src/graph/include/program_node.h
+++ b/src/plugins/intel_gpu/src/graph/include/program_node.h
@ -231,6 +231,8 @@ public:
    // @p invalidate_users_if_changed is set to true returns whether output layout has changed
    bool recalc_output_layout(bool invalidate_users_if_changed = true);

+    bool is_dynamic() const;
+
    bool is_padded() { return static_cast<bool>(get_output_layout().data_padding); }
    bool is_padded() const { return static_cast<bool>(get_output_layout().data_padding); }

--- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
+++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp
@ -286,9 +286,9 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,

    // For outputs, cpu prim we want to have lockable alloc type
    // Also if the successor of a node is an cpu, then memory needs to be lockable.
-    auto use_lockable_memory = is_output_buffer(_node) || _node.get_selected_impl()->is_cpu() || is_any_user_cpu(_node.get_users()) ||
+    bool is_cpu = _node.get_selected_impl() ? _node.get_selected_impl()->is_cpu() : false;
+    auto use_lockable_memory = is_output_buffer(_node) || is_cpu || is_any_user_cpu(_node.get_users()) ||
                               !_engine.supports_allocation(allocation_type::usm_device);
-
    GPU_DEBUG_GET_INSTANCE(debug_config);
    const auto& lockable_mem_type = _engine.get_lockable_preffered_memory_allocation_type(layout.format.is_image_2d());
    const auto& alloc_type = use_lockable_memory ? lockable_mem_type
--- a/src/plugins/intel_gpu/src/graph/program_node.cpp
+++ b/src/plugins/intel_gpu/src/graph/program_node.cpp
@ -268,6 +268,15 @@ bool program_node::recalc_output_layout(bool invalidate_users_if_changed) {
    return set_output_layout(new_layout, invalidate_users_if_changed);
 }

+bool program_node::is_dynamic() const {
+    for (auto& input : get_dependencies()) {
+        if (input->get_output_layout().is_dynamic())
+            return true;
+    }
+
+    return get_output_layout().is_dynamic();
+}
+
 bool program_node::has_padded_dependency() {
    return std::any_of(get_dependencies().begin(), get_dependencies().end(), [](program_node* node) {
        return node->is_padded();
--- a/src/plugins/intel_gpu/src/runtime/layout.cpp
+++ b/src/plugins/intel_gpu/src/runtime/layout.cpp
@ -140,8 +140,15 @@ std::vector<size_t> layout::get_dims_order() const {
 }

 std::string layout::to_string() const {
-    // TODO: Extend with format/data-type info
-    return format.to_string() + size.to_string();
+    std::stringstream s;
+    s << "\n{\n"
+      << "\tdata_type=" << data_type_traits::name(data_type) << ";\n"
+      << "\tformat=" << format.to_string() << ";\n"
+      << "\tshape=" << size.to_string() << ";\n"
+      << "\tpad_l=" << data_padding.lower_size().to_string() << ";\n"
+      << "\tpad_u=" << data_padding.upper_size().to_string() << ";\n"
+      << "}";
+    return s.str();
 }

 size_t layout::count() const {
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_common.hpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_common.hpp
@ -5,6 +5,8 @@

 #include "ocl_wrapper.hpp"

+#include "openvino/core/except.hpp"
+
 #include <vector>

 namespace cldnn {
@ -21,7 +23,7 @@ typedef CL_API_ENTRY cl_command_queue(CL_API_CALL* pfn_clCreateCommandQueueWithP
 using ocl_queue_type = cl::CommandQueue;
 using ocl_kernel_type = cl::KernelIntel;

-class ocl_error : public std::runtime_error {
+class ocl_error : public ov::Exception {
 public:
    explicit ocl_error(cl::Error const& err);
 };
--- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
+++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_engine.cpp
@ -37,18 +37,16 @@ namespace cldnn {
 namespace ocl {

 ocl_error::ocl_error(cl::Error const& err)
-    : std::runtime_error(err.what() + std::string(", error code: ") + std::to_string(err.err())) {}
+    : ov::Exception("[GPU] " + std::string(err.what()) + std::string(", error code: ") + std::to_string(err.err())) {}

 ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type,
            const engine_configuration& conf, const InferenceEngine::ITaskExecutor::Ptr task_executor)
    : engine(dev, conf, task_executor) {
-    if (runtime_type != runtime_types::ocl) {
-        IE_THROW() << "Invalid runtime type specified for OCL engine. Only OCL runtime is supported";
-    }
+    OPENVINO_ASSERT(runtime_type == runtime_types::ocl, "[GPU] Invalid runtime type specified for OCL engine. Only OCL runtime is supported");

    auto casted = dynamic_cast<ocl_device*>(dev.get());
    if (!casted)
-        IE_THROW() << "[CLDNN] Invalid device type passed to ocl engine";
+        throw ov::Exception("[GPU] Invalid device type passed to ocl engine");
    casted->get_device().getInfo(CL_DEVICE_EXTENSIONS, &_extensions);

    _usm_helper.reset(new cl::UsmHelper(get_cl_context(), get_cl_device(), use_unified_shared_memory()));
@ -62,7 +60,7 @@ ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type,
 #ifdef ENABLE_ONEDNN_FOR_GPU
 dnnl::engine& ocl_engine::get_onednn_engine() const {
    if (!_onednn_engine)
-        IE_THROW() << "[GPU] onednn engine is nullptr";
+        throw ov::Exception("[GPU] onednn engine is nullptr");
    return *_onednn_engine;
 }
 #endif
@ -70,14 +68,14 @@ dnnl::engine& ocl_engine::get_onednn_engine() const {
 const cl::Context& ocl_engine::get_cl_context() const {
    auto cl_device = std::dynamic_pointer_cast<ocl_device>(_device);
    if (!cl_device)
-        IE_THROW() << "Invalid device type for ocl_engine";
+        throw ov::Exception("[GPU] Invalid device type for ocl_engine");
    return cl_device->get_context();
 }

 const cl::Device& ocl_engine::get_cl_device() const {
    auto cl_device = std::dynamic_pointer_cast<ocl_device>(_device);
    if (!cl_device)
-        IE_THROW() << "Invalid device type for ocl_engine";
+        throw ov::Exception("[GPU] Invalid device type for ocl_engine");
    return cl_device->get_device();
 }

@ -86,28 +84,21 @@ const cl::UsmHelper& ocl_engine::get_usm_helper() const {
 }

 memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type type, bool reset) {
-    if (layout.bytes_count() > get_device_info().max_alloc_mem_size) {
-        std::stringstream ss;
-        ss << "Exceeded max size of memory object allocation: "
-            << "Requested " << layout.bytes_count() << " bytes "
-            << "but max alloc size is " << get_device_info().max_alloc_mem_size << " bytes";
-        IE_THROW() << ss.str();
-    }
+    OPENVINO_ASSERT(!layout.is_dynamic(), "[GPU] Can't allocate memory for dynamic layout");
+
+    OPENVINO_ASSERT(layout.bytes_count() <= get_device_info().max_alloc_mem_size,
+                    "[GPU] Exceeded max size of memory object allocation: ",
+                    "Requested ", layout.bytes_count(), " bytes "
+                    "but max alloc size is ", get_device_info().max_alloc_mem_size, " bytes");

    auto used_mem = get_used_device_memory(allocation_type::usm_device) + get_used_device_memory(allocation_type::usm_host);
-    if (layout.bytes_count() + used_mem > get_max_memory_size()) {
-        std::stringstream ss;
-        ss << "Exceeded max size of memory allocation: "
-            << "Required " << layout.bytes_count() + used_mem << " bytes "
-            << "but memory size is " << get_max_memory_size() << " bytes";
-        IE_THROW() << ss.str();
-    }
+    OPENVINO_ASSERT(layout.bytes_count() + used_mem <= get_max_memory_size(),
+                    "[GPU] Exceeded max size of memory allocation: ",
+                    "Required ", (layout.bytes_count() + used_mem), " bytes "
+                    "but memory size is ", get_max_memory_size(), " bytes");

-    if (type != allocation_type::cl_mem && !supports_allocation(type)) {
-        std::ostringstream type_str;
-        type_str << type;
-        IE_THROW() << "Unsupported allocation type " + type_str.str();
-    }
+    OPENVINO_ASSERT(supports_allocation(type) || type == allocation_type::cl_mem,
+                    "[GPU] Unsupported allocation type: ", type);

    try {
        memory::ptr res = nullptr;
@ -130,24 +121,18 @@ memory::ptr ocl_engine::allocate_memory(const layout& layout, allocation_type ty
            case CL_OUT_OF_RESOURCES:
            case CL_OUT_OF_HOST_MEMORY:
            case CL_INVALID_BUFFER_SIZE:
-                IE_THROW() << "out of GPU resources";
+                throw ov::Exception("[GPU] out of GPU resources");
            default:
-                IE_THROW() << "GPU buffer allocation failed";
+                throw ov::Exception("[GPU] buffer allocation failed");
        }
    }
 }

 memory::ptr ocl_engine::reinterpret_buffer(const memory& memory, const layout& new_layout) {
-    if (memory.get_engine() != this)
-        IE_THROW() << "trying to reinterpret buffer allocated by a different engine";
-
-    if (new_layout.format.is_image() && !memory.get_layout().format.is_image())
-        IE_THROW() << "trying to reinterpret non-image buffer as image : " << memory.get_layout().format.to_string()
-                   << " --> " << new_layout.format.to_string();
-
-    if (!new_layout.format.is_image() && memory.get_layout().format.is_image())
-        IE_THROW() << "trying to reinterpret image buffer as non-image buffer : "
-                   << memory.get_layout().format.to_string() << " --> " << new_layout.format.to_string();
+    OPENVINO_ASSERT(memory.get_engine() == this, "[GPU] trying to reinterpret buffer allocated by a different engine");
+    OPENVINO_ASSERT(new_layout.format.is_image() == memory.get_layout().format.is_image(),
+                    "[GPU] trying to reinterpret between image and non-image layouts. Current: ",
+                    memory.get_layout().format.to_string(), " Target: ", new_layout.format.to_string());

    try {
        if (new_layout.format.is_image_2d()) {
@ -184,22 +169,20 @@ memory::ptr ocl_engine::reinterpret_handle(const layout& new_layout, shared_mem_
            cl::Buffer buf(static_cast<cl_mem>(params.mem), true);
            auto actual_mem_size = buf.getInfo<CL_MEM_SIZE>();
            auto requested_mem_size = new_layout.bytes_count();
-            if (actual_mem_size < requested_mem_size) {
-                IE_THROW() << "[GPU] shared buffer has smaller size (" << std::to_string(actual_mem_size) <<
-                                  ") than specified layout (" << std::to_string(requested_mem_size) << ")";
-            }
+            OPENVINO_ASSERT(actual_mem_size >= requested_mem_size,
+                            "[GPU] shared buffer has smaller size (", actual_mem_size,
+                            ") than specified layout (", requested_mem_size, ")");
            return std::make_shared<ocl::gpu_buffer>(this, new_layout, buf);
        } else if (params.mem_type == shared_mem_type::shared_mem_usm) {
            cl::UsmMemory usm_buffer(get_usm_helper(), params.mem);
            auto actual_mem_size = get_usm_helper().get_usm_allocation_size(usm_buffer.get());
            auto requested_mem_size = new_layout.bytes_count();
-            if (actual_mem_size < requested_mem_size) {
-                IE_THROW() << "[GPU] shared USM buffer has smaller size (" << std::to_string(actual_mem_size)
-                           << ") than specified layout (" << std::to_string(requested_mem_size) << ")";
-            }
+            OPENVINO_ASSERT(actual_mem_size >= requested_mem_size,
+                            "[GPU] shared USM buffer has smaller size (", actual_mem_size,
+                            ") than specified layout (", requested_mem_size, ")");
            return std::make_shared<ocl::gpu_usm>(this, new_layout, usm_buffer);
        } else {
-            IE_THROW() << "unknown shared object fromat or type";
+            throw ov::Exception("[GPU] unknown shared object fromat or type");
        }
    }
    catch (const cl::Error& clErr) {
@ -208,9 +191,9 @@ memory::ptr ocl_engine::reinterpret_handle(const layout& new_layout, shared_mem_
        case CL_OUT_OF_RESOURCES:
        case CL_OUT_OF_HOST_MEMORY:
        case CL_INVALID_BUFFER_SIZE:
-            IE_THROW() << "out of GPU resources";
+            throw ov::Exception("[GPU] out of GPU resources");
        default:
-            IE_THROW() << "GPU buffer allocation failed";
+            throw ov::Exception("[GPU] buffer allocation failed");
        }
    }
 }