diff --git a/src/plugins/intel_gpu/CMakeLists.txt b/src/plugins/intel_gpu/CMakeLists.txt index 5acf5d20fc8..5b8badf6c60 100644 --- a/src/plugins/intel_gpu/CMakeLists.txt +++ b/src/plugins/intel_gpu/CMakeLists.txt @@ -25,6 +25,10 @@ if(ENABLE_GPU_DEBUG_CAPS) add_definitions(-DGPU_DEBUG_CONFIG=1) endif() +set(INTEL_GPU_TARGET_OCL_VERSION "200" CACHE "Target version of OpenCL which should be used by GPU plugin" STRING) + +add_definitions(-DCL_TARGET_OPENCL_VERSION=${INTEL_GPU_TARGET_OCL_VERSION}) + set(MAIN_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp index cd707ea98e3..d6940667fb6 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp @@ -85,6 +85,9 @@ public: /// Returns preferred allocation type which can be mapped to host ptr allocation_type get_lockable_preferred_memory_allocation_type(bool is_image_layout = false) const; + /// Returns preferred device allocation type which may be not lockable + allocation_type get_preferred_memory_allocation_type(bool is_image_layout = false) const; + /// Checks if the current engine supports speicied allocation @p type bool supports_allocation(allocation_type type) const; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 7c89637be43..9de819e7a81 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -510,7 +510,7 @@ void primitive_inst::allocate_internal_buffers(void) { auto total_device_mem_size = std::accumulate(inst_deps.begin(), inst_deps.end(), size_t(0), device_mem_acc); for (const auto& output : _outputs) { - if (output->get_allocation_type() == allocation_type::usm_device) + if (output->get_allocation_type() == allocation_type::usm_device) total_device_mem_size += output->size(); } @@ -530,10 +530,13 @@ void primitive_inst::allocate_internal_buffers(void) { GPU_DEBUG_IF(debug_config->verbose >= 2) { GPU_DEBUG_COUT << "[" << _node->id() << ": internal buf]" << std::endl; } - if (input_device_mem && (available_device_mem_size - (int64_t)layout.bytes_count() >= 0)) - _intermediates_memory.push_back(engine.allocate_memory(layout, allocation_type::usm_device)); - else - _intermediates_memory.push_back(engine.allocate_memory(layout, allocation_type::usm_host)); + auto alloc_type = allocation_type::unknown; + if (input_device_mem && (available_device_mem_size - (int64_t)layout.bytes_count() >= 0)) { + alloc_type = engine.get_preferred_memory_allocation_type(); + } else { + alloc_type = engine.get_lockable_preferred_memory_allocation_type(); + } + _intermediates_memory.push_back(engine.allocate_memory(layout, alloc_type)); } } @@ -598,7 +601,8 @@ event::ptr primitive_inst::update_weights() { } _impl_params->reordered_weights = engine.reinterpret_buffer(*_impl_params->reordered_weights, expected_layout); } else { - _impl_params->reordered_weights = engine.allocate_memory(expected_layout, allocation_type::usm_device); + auto alloc_type = engine.get_preferred_memory_allocation_type(); + _impl_params->reordered_weights = engine.allocate_memory(expected_layout, alloc_type); } kernel_arguments_data args; diff --git a/src/plugins/intel_gpu/src/runtime/engine.cpp b/src/plugins/intel_gpu/src/runtime/engine.cpp index 9cb5bfb71fa..f3e7304bd1b 100644 --- a/src/plugins/intel_gpu/src/runtime/engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/engine.cpp @@ -111,7 +111,21 @@ allocation_type engine::get_lockable_preferred_memory_allocation_type(bool is_im if (support_usm_host) return allocation_type::usm_host; - throw std::runtime_error("[clDNN internal error] Could not find proper allocation type!"); + OPENVINO_ASSERT(false, "[GPU] Couldn't find proper allocation type in get_lockable_preferred_memory_allocation_type method"); +} + +allocation_type engine::get_preferred_memory_allocation_type(bool is_image_layout) const { + if (!use_unified_shared_memory() || is_image_layout) + return get_default_allocation_type(); + + if (supports_allocation(allocation_type::usm_device)) + return allocation_type::usm_device; + + // Fallback to host allocations in case if device ones are not supported for some reason + if (supports_allocation(allocation_type::usm_host)) + return allocation_type::usm_host; + + OPENVINO_ASSERT(false, "[GPU] Couldn't find proper allocation type in get_preferred_memory_allocation_type method"); } memory::ptr engine::attach_memory(const layout& layout, void* ptr) { diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.cpp index ee22d2fe5c6..51b80fca3a1 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.cpp @@ -16,6 +16,7 @@ command_queues_builder::command_queues_builder() _priority_mode(priority_mode_types::disabled), _throttle_mode(throttle_mode_types::disabled) {} +#if CL_TARGET_OPENCL_VERSION >= 200 std::vector command_queues_builder::get_properties(const cl::Device& device, uint16_t stream_id) { std::vector properties; @@ -75,6 +76,14 @@ std::vector command_queues_builder::get_properties(const cl return properties; } +#else +cl_command_queue_properties command_queues_builder::get_properties(const cl::Device& device, uint16_t stream_id) { + cl_command_queue_properties cl_queue_properties = + ((_profiling ? CL_QUEUE_PROFILING_ENABLE : 0) | (_out_of_order ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0)); + + return cl_queue_properties; +} +#endif ocl_queue_type command_queues_builder::build(const cl::Context& context, const cl::Device& device) { ocl_queue_type queue; @@ -82,9 +91,11 @@ ocl_queue_type command_queues_builder::build(const cl::Context& context, const c static std::atomic stream_id{0}; auto properties = get_properties(device, stream_id++); - +#if CL_TARGET_OPENCL_VERSION >= 200 queue = clCreateCommandQueueWithProperties(context.get(), device.get(), properties.data(), &error_code); - +#else + queue = clCreateCommandQueue(context.get(), device.get(), properties, &error_code); +#endif if (error_code != CL_SUCCESS) { CLDNN_ERROR_MESSAGE("Command queues builders", "clCreateCommandQueueWithPropertiesINTEL error " + std::to_string(error_code)); diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.hpp index 8bc5b983911..30e74761b5c 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.hpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_command_queues_builder.hpp @@ -26,8 +26,11 @@ private: bool _supports_queue_families; priority_mode_types _priority_mode; throttle_mode_types _throttle_mode; - +#if CL_TARGET_OPENCL_VERSION >= 200 std::vector get_properties(const cl::Device& device, uint16_t stream_id = 0); +#else + cl_command_queue_properties get_properties(const cl::Device& device, uint16_t stream_id = 0); +#endif }; } // namespace ocl diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_common.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_common.hpp index 482426094c7..4e034eb8e06 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_common.hpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_common.hpp @@ -12,14 +12,6 @@ namespace cldnn { namespace ocl { -typedef cl::vector> kernels_binaries_vector; -typedef cl::vector kernels_binaries_container; -typedef CL_API_ENTRY cl_command_queue(CL_API_CALL* pfn_clCreateCommandQueueWithPropertiesINTEL)( - cl_context context, - cl_device_id device, - const cl_queue_properties* properties, - cl_int* errcodeRet); - using ocl_queue_type = cl::CommandQueue; using ocl_kernel_type = cl::KernelIntel; diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device_detector.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device_detector.cpp index 3f6673b90e4..497a7aeefd6 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device_detector.cpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device_detector.cpp @@ -25,6 +25,17 @@ static const char create_device_error_msg[] = "[GPU] No supported OCL devices found or unexpected error happened during devices query.\n" "[GPU] Please check OpenVINO documentation for GPU drivers setup guide.\n"; +std::vector split(const std::string& s, char delim) { + std::vector result; + std::stringstream ss(s); + std::string item; + + while (getline(ss, item, delim)) { + result.push_back(item); + } + return result; +} + bool does_device_match_config(bool out_of_order, const cl::Device& device) { if (device.getInfo() != CL_DEVICE_TYPE_GPU) { return false; @@ -44,6 +55,32 @@ bool does_device_match_config(bool out_of_order, const cl::Device& device) { } } + int32_t ocl_major = -1; + int32_t ocl_minor = -1; + // Spec says that the format of this string is OpenCL + auto ocl_version_string = device.getInfo(); + auto tokens = split(ocl_version_string, ' '); + + if (tokens.size() > 1) { + auto version_string = tokens[1]; + auto version_tokens = split(version_string, '.'); + if (version_tokens.size() == 2) { + ocl_major = std::stoi(version_tokens[0]); + ocl_minor = std::stoi(version_tokens[1]); + } + } + + if (ocl_major != -1 && ocl_minor != -1) { + int32_t ocl_version = ocl_major*100 + ocl_minor*10; +#if CL_TARGET_OPENCL_VERSION >= 200 + int32_t min_ocl_version = 200; +#else + int32_t min_ocl_version = 120; +#endif + if (ocl_version < min_ocl_version) + return false; + } + return true; } diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_wrapper.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_wrapper.hpp index ec9e41af182..d6399a93fa4 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_wrapper.hpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_wrapper.hpp @@ -15,9 +15,20 @@ // we want exceptions #define CL_HPP_ENABLE_EXCEPTIONS + +#ifndef CL_TARGET_OPENCL_VERSION +#define CL_TARGET_OPENCL_VERSION 200 +#endif + +#ifndef CL_HPP_TARGET_OPENCL_VERSION +#define CL_HPP_TARGET_OPENCL_VERSION CL_TARGET_OPENCL_VERSION +#endif +#ifndef CL_HPP_MINIMUM_OPENCL_VERSION +#define CL_HPP_MINIMUM_OPENCL_VERSION CL_TARGET_OPENCL_VERSION +#endif +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION >= 200 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS -#define CL_HPP_MINIMUM_OPENCL_VERSION 200 -#define CL_HPP_TARGET_OPENCL_VERSION 200 +#endif // Check for compiler and change specific diagnostics. #if defined __INTEL_COMPILER @@ -59,11 +70,13 @@ #include "ocl_ext.hpp" +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION >= 200 namespace cl { namespace detail { CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) } } +#endif // Restore specific diagnostics. #if defined __INTEL_COMPILER diff --git a/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp b/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp index 16d2bd54cf1..32b5d301b58 100644 --- a/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp @@ -190,7 +190,7 @@ TEST(cl_mem_check, check_input) { image_desc.image_slice_pitch = 0; image_desc.num_mip_levels = 0; image_desc.num_samples = 0; - image_desc.mem_object = NULL; + image_desc.buffer = NULL; cl_mem img = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL, &image_format, &image_desc, NULL, &err); @@ -198,7 +198,7 @@ TEST(cl_mem_check, check_input) { image_desc.image_width = 0; image_desc.image_height = 0; - image_desc.mem_object = img; + image_desc.buffer = img; image_desc.image_depth = 0; image_format.image_channel_order = CL_R;