[GPU] Ability to compile OV with OCL 1.2 (#13706)

This commit is contained in:
Vladimir Paramuzov 2022-11-04 12:31:01 +04:00 committed by GitHub
parent 8789fcda04
commit 8b93e3f2ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 103 additions and 22 deletions

View File

@ -25,6 +25,10 @@ if(ENABLE_GPU_DEBUG_CAPS)
add_definitions(-DGPU_DEBUG_CONFIG=1) add_definitions(-DGPU_DEBUG_CONFIG=1)
endif() endif()
set(INTEL_GPU_TARGET_OCL_VERSION "200" CACHE "Target version of OpenCL which should be used by GPU plugin" STRING)
add_definitions(-DCL_TARGET_OPENCL_VERSION=${INTEL_GPU_TARGET_OCL_VERSION})
set(MAIN_DIR "${CMAKE_CURRENT_SOURCE_DIR}") set(MAIN_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
set(INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include") set(INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")

View File

@ -85,6 +85,9 @@ public:
/// Returns preferred allocation type which can be mapped to host ptr /// Returns preferred allocation type which can be mapped to host ptr
allocation_type get_lockable_preferred_memory_allocation_type(bool is_image_layout = false) const; allocation_type get_lockable_preferred_memory_allocation_type(bool is_image_layout = false) const;
/// Returns preferred device allocation type which may be not lockable
allocation_type get_preferred_memory_allocation_type(bool is_image_layout = false) const;
/// Checks if the current engine supports speicied allocation @p type /// Checks if the current engine supports speicied allocation @p type
bool supports_allocation(allocation_type type) const; bool supports_allocation(allocation_type type) const;

View File

@ -510,7 +510,7 @@ void primitive_inst::allocate_internal_buffers(void) {
auto total_device_mem_size = std::accumulate(inst_deps.begin(), inst_deps.end(), size_t(0), device_mem_acc); auto total_device_mem_size = std::accumulate(inst_deps.begin(), inst_deps.end(), size_t(0), device_mem_acc);
for (const auto& output : _outputs) { for (const auto& output : _outputs) {
if (output->get_allocation_type() == allocation_type::usm_device) if (output->get_allocation_type() == allocation_type::usm_device)
total_device_mem_size += output->size(); total_device_mem_size += output->size();
} }
@ -530,10 +530,13 @@ void primitive_inst::allocate_internal_buffers(void) {
GPU_DEBUG_IF(debug_config->verbose >= 2) { GPU_DEBUG_IF(debug_config->verbose >= 2) {
GPU_DEBUG_COUT << "[" << _node->id() << ": internal buf]" << std::endl; GPU_DEBUG_COUT << "[" << _node->id() << ": internal buf]" << std::endl;
} }
if (input_device_mem && (available_device_mem_size - (int64_t)layout.bytes_count() >= 0)) auto alloc_type = allocation_type::unknown;
_intermediates_memory.push_back(engine.allocate_memory(layout, allocation_type::usm_device)); if (input_device_mem && (available_device_mem_size - (int64_t)layout.bytes_count() >= 0)) {
else alloc_type = engine.get_preferred_memory_allocation_type();
_intermediates_memory.push_back(engine.allocate_memory(layout, allocation_type::usm_host)); } else {
alloc_type = engine.get_lockable_preferred_memory_allocation_type();
}
_intermediates_memory.push_back(engine.allocate_memory(layout, alloc_type));
} }
} }
@ -598,7 +601,8 @@ event::ptr primitive_inst::update_weights() {
} }
_impl_params->reordered_weights = engine.reinterpret_buffer(*_impl_params->reordered_weights, expected_layout); _impl_params->reordered_weights = engine.reinterpret_buffer(*_impl_params->reordered_weights, expected_layout);
} else { } else {
_impl_params->reordered_weights = engine.allocate_memory(expected_layout, allocation_type::usm_device); auto alloc_type = engine.get_preferred_memory_allocation_type();
_impl_params->reordered_weights = engine.allocate_memory(expected_layout, alloc_type);
} }
kernel_arguments_data args; kernel_arguments_data args;

View File

@ -111,7 +111,21 @@ allocation_type engine::get_lockable_preferred_memory_allocation_type(bool is_im
if (support_usm_host) if (support_usm_host)
return allocation_type::usm_host; return allocation_type::usm_host;
throw std::runtime_error("[clDNN internal error] Could not find proper allocation type!"); OPENVINO_ASSERT(false, "[GPU] Couldn't find proper allocation type in get_lockable_preferred_memory_allocation_type method");
}
allocation_type engine::get_preferred_memory_allocation_type(bool is_image_layout) const {
if (!use_unified_shared_memory() || is_image_layout)
return get_default_allocation_type();
if (supports_allocation(allocation_type::usm_device))
return allocation_type::usm_device;
// Fallback to host allocations in case if device ones are not supported for some reason
if (supports_allocation(allocation_type::usm_host))
return allocation_type::usm_host;
OPENVINO_ASSERT(false, "[GPU] Couldn't find proper allocation type in get_preferred_memory_allocation_type method");
} }
memory::ptr engine::attach_memory(const layout& layout, void* ptr) { memory::ptr engine::attach_memory(const layout& layout, void* ptr) {

View File

@ -16,6 +16,7 @@ command_queues_builder::command_queues_builder()
_priority_mode(priority_mode_types::disabled), _priority_mode(priority_mode_types::disabled),
_throttle_mode(throttle_mode_types::disabled) {} _throttle_mode(throttle_mode_types::disabled) {}
#if CL_TARGET_OPENCL_VERSION >= 200
std::vector<cl_queue_properties> command_queues_builder::get_properties(const cl::Device& device, uint16_t stream_id) { std::vector<cl_queue_properties> command_queues_builder::get_properties(const cl::Device& device, uint16_t stream_id) {
std::vector<cl_queue_properties> properties; std::vector<cl_queue_properties> properties;
@ -75,6 +76,14 @@ std::vector<cl_queue_properties> command_queues_builder::get_properties(const cl
return properties; return properties;
} }
#else
cl_command_queue_properties command_queues_builder::get_properties(const cl::Device& device, uint16_t stream_id) {
cl_command_queue_properties cl_queue_properties =
((_profiling ? CL_QUEUE_PROFILING_ENABLE : 0) | (_out_of_order ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0));
return cl_queue_properties;
}
#endif
ocl_queue_type command_queues_builder::build(const cl::Context& context, const cl::Device& device) { ocl_queue_type command_queues_builder::build(const cl::Context& context, const cl::Device& device) {
ocl_queue_type queue; ocl_queue_type queue;
@ -82,9 +91,11 @@ ocl_queue_type command_queues_builder::build(const cl::Context& context, const c
static std::atomic<uint16_t> stream_id{0}; static std::atomic<uint16_t> stream_id{0};
auto properties = get_properties(device, stream_id++); auto properties = get_properties(device, stream_id++);
#if CL_TARGET_OPENCL_VERSION >= 200
queue = clCreateCommandQueueWithProperties(context.get(), device.get(), properties.data(), &error_code); queue = clCreateCommandQueueWithProperties(context.get(), device.get(), properties.data(), &error_code);
#else
queue = clCreateCommandQueue(context.get(), device.get(), properties, &error_code);
#endif
if (error_code != CL_SUCCESS) { if (error_code != CL_SUCCESS) {
CLDNN_ERROR_MESSAGE("Command queues builders", CLDNN_ERROR_MESSAGE("Command queues builders",
"clCreateCommandQueueWithPropertiesINTEL error " + std::to_string(error_code)); "clCreateCommandQueueWithPropertiesINTEL error " + std::to_string(error_code));

View File

@ -26,8 +26,11 @@ private:
bool _supports_queue_families; bool _supports_queue_families;
priority_mode_types _priority_mode; priority_mode_types _priority_mode;
throttle_mode_types _throttle_mode; throttle_mode_types _throttle_mode;
#if CL_TARGET_OPENCL_VERSION >= 200
std::vector<cl_queue_properties> get_properties(const cl::Device& device, uint16_t stream_id = 0); std::vector<cl_queue_properties> get_properties(const cl::Device& device, uint16_t stream_id = 0);
#else
cl_command_queue_properties get_properties(const cl::Device& device, uint16_t stream_id = 0);
#endif
}; };
} // namespace ocl } // namespace ocl

View File

@ -12,14 +12,6 @@
namespace cldnn { namespace cldnn {
namespace ocl { namespace ocl {
typedef cl::vector<cl::vector<unsigned char>> kernels_binaries_vector;
typedef cl::vector<kernels_binaries_vector> kernels_binaries_container;
typedef CL_API_ENTRY cl_command_queue(CL_API_CALL* pfn_clCreateCommandQueueWithPropertiesINTEL)(
cl_context context,
cl_device_id device,
const cl_queue_properties* properties,
cl_int* errcodeRet);
using ocl_queue_type = cl::CommandQueue; using ocl_queue_type = cl::CommandQueue;
using ocl_kernel_type = cl::KernelIntel; using ocl_kernel_type = cl::KernelIntel;

View File

@ -25,6 +25,17 @@ static const char create_device_error_msg[] =
"[GPU] No supported OCL devices found or unexpected error happened during devices query.\n" "[GPU] No supported OCL devices found or unexpected error happened during devices query.\n"
"[GPU] Please check OpenVINO documentation for GPU drivers setup guide.\n"; "[GPU] Please check OpenVINO documentation for GPU drivers setup guide.\n";
std::vector<std::string> split(const std::string& s, char delim) {
std::vector<std::string> result;
std::stringstream ss(s);
std::string item;
while (getline(ss, item, delim)) {
result.push_back(item);
}
return result;
}
bool does_device_match_config(bool out_of_order, const cl::Device& device) { bool does_device_match_config(bool out_of_order, const cl::Device& device) {
if (device.getInfo<CL_DEVICE_TYPE>() != CL_DEVICE_TYPE_GPU) { if (device.getInfo<CL_DEVICE_TYPE>() != CL_DEVICE_TYPE_GPU) {
return false; return false;
@ -44,6 +55,32 @@ bool does_device_match_config(bool out_of_order, const cl::Device& device) {
} }
} }
int32_t ocl_major = -1;
int32_t ocl_minor = -1;
// Spec says that the format of this string is OpenCL<space><major_version.minor_version><space><vendor-specific information>
auto ocl_version_string = device.getInfo<CL_DEVICE_VERSION>();
auto tokens = split(ocl_version_string, ' ');
if (tokens.size() > 1) {
auto version_string = tokens[1];
auto version_tokens = split(version_string, '.');
if (version_tokens.size() == 2) {
ocl_major = std::stoi(version_tokens[0]);
ocl_minor = std::stoi(version_tokens[1]);
}
}
if (ocl_major != -1 && ocl_minor != -1) {
int32_t ocl_version = ocl_major*100 + ocl_minor*10;
#if CL_TARGET_OPENCL_VERSION >= 200
int32_t min_ocl_version = 200;
#else
int32_t min_ocl_version = 120;
#endif
if (ocl_version < min_ocl_version)
return false;
}
return true; return true;
} }

View File

@ -15,9 +15,20 @@
// we want exceptions // we want exceptions
#define CL_HPP_ENABLE_EXCEPTIONS #define CL_HPP_ENABLE_EXCEPTIONS
#ifndef CL_TARGET_OPENCL_VERSION
#define CL_TARGET_OPENCL_VERSION 200
#endif
#ifndef CL_HPP_TARGET_OPENCL_VERSION
#define CL_HPP_TARGET_OPENCL_VERSION CL_TARGET_OPENCL_VERSION
#endif
#ifndef CL_HPP_MINIMUM_OPENCL_VERSION
#define CL_HPP_MINIMUM_OPENCL_VERSION CL_TARGET_OPENCL_VERSION
#endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION >= 200
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#define CL_HPP_MINIMUM_OPENCL_VERSION 200 #endif
#define CL_HPP_TARGET_OPENCL_VERSION 200
// Check for compiler and change specific diagnostics. // Check for compiler and change specific diagnostics.
#if defined __INTEL_COMPILER #if defined __INTEL_COMPILER
@ -59,11 +70,13 @@
#include "ocl_ext.hpp" #include "ocl_ext.hpp"
#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION >= 200
namespace cl { namespace cl {
namespace detail { namespace detail {
CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_)
} }
} }
#endif
// Restore specific diagnostics. // Restore specific diagnostics.
#if defined __INTEL_COMPILER #if defined __INTEL_COMPILER

View File

@ -190,7 +190,7 @@ TEST(cl_mem_check, check_input) {
image_desc.image_slice_pitch = 0; image_desc.image_slice_pitch = 0;
image_desc.num_mip_levels = 0; image_desc.num_mip_levels = 0;
image_desc.num_samples = 0; image_desc.num_samples = 0;
image_desc.mem_object = NULL; image_desc.buffer = NULL;
cl_mem img = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL, cl_mem img = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL,
&image_format, &image_desc, NULL, &err); &image_format, &image_desc, NULL, &err);
@ -198,7 +198,7 @@ TEST(cl_mem_check, check_input) {
image_desc.image_width = 0; image_desc.image_width = 0;
image_desc.image_height = 0; image_desc.image_height = 0;
image_desc.mem_object = img; image_desc.buffer = img;
image_desc.image_depth = 0; image_desc.image_depth = 0;
image_format.image_channel_order = CL_R; image_format.image_channel_order = CL_R;