[GPU] Ability to compile OV with OCL 1.2 (#13706)

This commit is contained in:
Vladimir Paramuzov 2022-11-04 12:31:01 +04:00 committed by GitHub
parent 8789fcda04
commit 8b93e3f2ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 103 additions and 22 deletions

View File

@ -25,6 +25,10 @@ if(ENABLE_GPU_DEBUG_CAPS)
add_definitions(-DGPU_DEBUG_CONFIG=1)
endif()
set(INTEL_GPU_TARGET_OCL_VERSION "200" CACHE "Target version of OpenCL which should be used by GPU plugin" STRING)
add_definitions(-DCL_TARGET_OPENCL_VERSION=${INTEL_GPU_TARGET_OCL_VERSION})
set(MAIN_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
set(INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")

View File

@ -85,6 +85,9 @@ public:
/// Returns preferred allocation type which can be mapped to host ptr
allocation_type get_lockable_preferred_memory_allocation_type(bool is_image_layout = false) const;
/// Returns preferred device allocation type which may be not lockable
allocation_type get_preferred_memory_allocation_type(bool is_image_layout = false) const;
/// Checks if the current engine supports speicied allocation @p type
bool supports_allocation(allocation_type type) const;

View File

@ -510,7 +510,7 @@ void primitive_inst::allocate_internal_buffers(void) {
auto total_device_mem_size = std::accumulate(inst_deps.begin(), inst_deps.end(), size_t(0), device_mem_acc);
for (const auto& output : _outputs) {
if (output->get_allocation_type() == allocation_type::usm_device)
if (output->get_allocation_type() == allocation_type::usm_device)
total_device_mem_size += output->size();
}
@ -530,10 +530,13 @@ void primitive_inst::allocate_internal_buffers(void) {
GPU_DEBUG_IF(debug_config->verbose >= 2) {
GPU_DEBUG_COUT << "[" << _node->id() << ": internal buf]" << std::endl;
}
if (input_device_mem && (available_device_mem_size - (int64_t)layout.bytes_count() >= 0))
_intermediates_memory.push_back(engine.allocate_memory(layout, allocation_type::usm_device));
else
_intermediates_memory.push_back(engine.allocate_memory(layout, allocation_type::usm_host));
auto alloc_type = allocation_type::unknown;
if (input_device_mem && (available_device_mem_size - (int64_t)layout.bytes_count() >= 0)) {
alloc_type = engine.get_preferred_memory_allocation_type();
} else {
alloc_type = engine.get_lockable_preferred_memory_allocation_type();
}
_intermediates_memory.push_back(engine.allocate_memory(layout, alloc_type));
}
}
@ -598,7 +601,8 @@ event::ptr primitive_inst::update_weights() {
}
_impl_params->reordered_weights = engine.reinterpret_buffer(*_impl_params->reordered_weights, expected_layout);
} else {
_impl_params->reordered_weights = engine.allocate_memory(expected_layout, allocation_type::usm_device);
auto alloc_type = engine.get_preferred_memory_allocation_type();
_impl_params->reordered_weights = engine.allocate_memory(expected_layout, alloc_type);
}
kernel_arguments_data args;

View File

@ -111,7 +111,21 @@ allocation_type engine::get_lockable_preferred_memory_allocation_type(bool is_im
if (support_usm_host)
return allocation_type::usm_host;
throw std::runtime_error("[clDNN internal error] Could not find proper allocation type!");
OPENVINO_ASSERT(false, "[GPU] Couldn't find proper allocation type in get_lockable_preferred_memory_allocation_type method");
}
allocation_type engine::get_preferred_memory_allocation_type(bool is_image_layout) const {
if (!use_unified_shared_memory() || is_image_layout)
return get_default_allocation_type();
if (supports_allocation(allocation_type::usm_device))
return allocation_type::usm_device;
// Fallback to host allocations in case if device ones are not supported for some reason
if (supports_allocation(allocation_type::usm_host))
return allocation_type::usm_host;
OPENVINO_ASSERT(false, "[GPU] Couldn't find proper allocation type in get_preferred_memory_allocation_type method");
}
memory::ptr engine::attach_memory(const layout& layout, void* ptr) {

View File

@ -16,6 +16,7 @@ command_queues_builder::command_queues_builder()
_priority_mode(priority_mode_types::disabled),
_throttle_mode(throttle_mode_types::disabled) {}
#if CL_TARGET_OPENCL_VERSION >= 200
std::vector<cl_queue_properties> command_queues_builder::get_properties(const cl::Device& device, uint16_t stream_id) {
std::vector<cl_queue_properties> properties;
@ -75,6 +76,14 @@ std::vector<cl_queue_properties> command_queues_builder::get_properties(const cl
return properties;
}
#else
cl_command_queue_properties command_queues_builder::get_properties(const cl::Device& device, uint16_t stream_id) {
cl_command_queue_properties cl_queue_properties =
((_profiling ? CL_QUEUE_PROFILING_ENABLE : 0) | (_out_of_order ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0));
return cl_queue_properties;
}
#endif
ocl_queue_type command_queues_builder::build(const cl::Context& context, const cl::Device& device) {
ocl_queue_type queue;
@ -82,9 +91,11 @@ ocl_queue_type command_queues_builder::build(const cl::Context& context, const c
static std::atomic<uint16_t> stream_id{0};
auto properties = get_properties(device, stream_id++);
#if CL_TARGET_OPENCL_VERSION >= 200
queue = clCreateCommandQueueWithProperties(context.get(), device.get(), properties.data(), &error_code);
#else
queue = clCreateCommandQueue(context.get(), device.get(), properties, &error_code);
#endif
if (error_code != CL_SUCCESS) {
CLDNN_ERROR_MESSAGE("Command queues builders",
"clCreateCommandQueueWithPropertiesINTEL error " + std::to_string(error_code));

View File

@ -26,8 +26,11 @@ private:
bool _supports_queue_families;
priority_mode_types _priority_mode;
throttle_mode_types _throttle_mode;
#if CL_TARGET_OPENCL_VERSION >= 200
std::vector<cl_queue_properties> get_properties(const cl::Device& device, uint16_t stream_id = 0);
#else
cl_command_queue_properties get_properties(const cl::Device& device, uint16_t stream_id = 0);
#endif
};
} // namespace ocl

View File

@ -12,14 +12,6 @@
namespace cldnn {
namespace ocl {
typedef cl::vector<cl::vector<unsigned char>> kernels_binaries_vector;
typedef cl::vector<kernels_binaries_vector> kernels_binaries_container;
typedef CL_API_ENTRY cl_command_queue(CL_API_CALL* pfn_clCreateCommandQueueWithPropertiesINTEL)(
cl_context context,
cl_device_id device,
const cl_queue_properties* properties,
cl_int* errcodeRet);
using ocl_queue_type = cl::CommandQueue;
using ocl_kernel_type = cl::KernelIntel;

View File

@ -25,6 +25,17 @@ static const char create_device_error_msg[] =
"[GPU] No supported OCL devices found or unexpected error happened during devices query.\n"
"[GPU] Please check OpenVINO documentation for GPU drivers setup guide.\n";
std::vector<std::string> split(const std::string& s, char delim) {
std::vector<std::string> result;
std::stringstream ss(s);
std::string item;
while (getline(ss, item, delim)) {
result.push_back(item);
}
return result;
}
bool does_device_match_config(bool out_of_order, const cl::Device& device) {
if (device.getInfo<CL_DEVICE_TYPE>() != CL_DEVICE_TYPE_GPU) {
return false;
@ -44,6 +55,32 @@ bool does_device_match_config(bool out_of_order, const cl::Device& device) {
}
}
int32_t ocl_major = -1;
int32_t ocl_minor = -1;
// Spec says that the format of this string is OpenCL<space><major_version.minor_version><space><vendor-specific information>
auto ocl_version_string = device.getInfo<CL_DEVICE_VERSION>();
auto tokens = split(ocl_version_string, ' ');
if (tokens.size() > 1) {
auto version_string = tokens[1];
auto version_tokens = split(version_string, '.');
if (version_tokens.size() == 2) {
ocl_major = std::stoi(version_tokens[0]);
ocl_minor = std::stoi(version_tokens[1]);
}
}
if (ocl_major != -1 && ocl_minor != -1) {
int32_t ocl_version = ocl_major*100 + ocl_minor*10;
#if CL_TARGET_OPENCL_VERSION >= 200
int32_t min_ocl_version = 200;
#else
int32_t min_ocl_version = 120;
#endif
if (ocl_version < min_ocl_version)
return false;
}
return true;
}

View File

@ -15,9 +15,20 @@
// we want exceptions
#define CL_HPP_ENABLE_EXCEPTIONS
#ifndef CL_TARGET_OPENCL_VERSION
#define CL_TARGET_OPENCL_VERSION 200
#endif
#ifndef CL_HPP_TARGET_OPENCL_VERSION
#define CL_HPP_TARGET_OPENCL_VERSION CL_TARGET_OPENCL_VERSION
#endif
#ifndef CL_HPP_MINIMUM_OPENCL_VERSION
#define CL_HPP_MINIMUM_OPENCL_VERSION CL_TARGET_OPENCL_VERSION
#endif
#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION >= 200
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#define CL_HPP_MINIMUM_OPENCL_VERSION 200
#define CL_HPP_TARGET_OPENCL_VERSION 200
#endif
// Check for compiler and change specific diagnostics.
#if defined __INTEL_COMPILER
@ -59,11 +70,13 @@
#include "ocl_ext.hpp"
#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION >= 200
namespace cl {
namespace detail {
CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_)
}
}
#endif
// Restore specific diagnostics.
#if defined __INTEL_COMPILER

View File

@ -190,7 +190,7 @@ TEST(cl_mem_check, check_input) {
image_desc.image_slice_pitch = 0;
image_desc.num_mip_levels = 0;
image_desc.num_samples = 0;
image_desc.mem_object = NULL;
image_desc.buffer = NULL;
cl_mem img = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL,
&image_format, &image_desc, NULL, &err);
@ -198,7 +198,7 @@ TEST(cl_mem_check, check_input) {
image_desc.image_width = 0;
image_desc.image_height = 0;
image_desc.mem_object = img;
image_desc.buffer = img;
image_desc.image_depth = 0;
image_format.image_channel_order = CL_R;