[GPU] Ability to compile OV with OCL 1.2 (#13706)
This commit is contained in:
parent
8789fcda04
commit
8b93e3f2ec
@ -25,6 +25,10 @@ if(ENABLE_GPU_DEBUG_CAPS)
|
||||
add_definitions(-DGPU_DEBUG_CONFIG=1)
|
||||
endif()
|
||||
|
||||
set(INTEL_GPU_TARGET_OCL_VERSION "200" CACHE "Target version of OpenCL which should be used by GPU plugin" STRING)
|
||||
|
||||
add_definitions(-DCL_TARGET_OPENCL_VERSION=${INTEL_GPU_TARGET_OCL_VERSION})
|
||||
|
||||
set(MAIN_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
set(INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
|
||||
|
||||
|
@ -85,6 +85,9 @@ public:
|
||||
/// Returns preferred allocation type which can be mapped to host ptr
|
||||
allocation_type get_lockable_preferred_memory_allocation_type(bool is_image_layout = false) const;
|
||||
|
||||
/// Returns preferred device allocation type which may be not lockable
|
||||
allocation_type get_preferred_memory_allocation_type(bool is_image_layout = false) const;
|
||||
|
||||
/// Checks if the current engine supports speicied allocation @p type
|
||||
bool supports_allocation(allocation_type type) const;
|
||||
|
||||
|
@ -510,7 +510,7 @@ void primitive_inst::allocate_internal_buffers(void) {
|
||||
|
||||
auto total_device_mem_size = std::accumulate(inst_deps.begin(), inst_deps.end(), size_t(0), device_mem_acc);
|
||||
for (const auto& output : _outputs) {
|
||||
if (output->get_allocation_type() == allocation_type::usm_device)
|
||||
if (output->get_allocation_type() == allocation_type::usm_device)
|
||||
total_device_mem_size += output->size();
|
||||
}
|
||||
|
||||
@ -530,10 +530,13 @@ void primitive_inst::allocate_internal_buffers(void) {
|
||||
GPU_DEBUG_IF(debug_config->verbose >= 2) {
|
||||
GPU_DEBUG_COUT << "[" << _node->id() << ": internal buf]" << std::endl;
|
||||
}
|
||||
if (input_device_mem && (available_device_mem_size - (int64_t)layout.bytes_count() >= 0))
|
||||
_intermediates_memory.push_back(engine.allocate_memory(layout, allocation_type::usm_device));
|
||||
else
|
||||
_intermediates_memory.push_back(engine.allocate_memory(layout, allocation_type::usm_host));
|
||||
auto alloc_type = allocation_type::unknown;
|
||||
if (input_device_mem && (available_device_mem_size - (int64_t)layout.bytes_count() >= 0)) {
|
||||
alloc_type = engine.get_preferred_memory_allocation_type();
|
||||
} else {
|
||||
alloc_type = engine.get_lockable_preferred_memory_allocation_type();
|
||||
}
|
||||
_intermediates_memory.push_back(engine.allocate_memory(layout, alloc_type));
|
||||
}
|
||||
}
|
||||
|
||||
@ -598,7 +601,8 @@ event::ptr primitive_inst::update_weights() {
|
||||
}
|
||||
_impl_params->reordered_weights = engine.reinterpret_buffer(*_impl_params->reordered_weights, expected_layout);
|
||||
} else {
|
||||
_impl_params->reordered_weights = engine.allocate_memory(expected_layout, allocation_type::usm_device);
|
||||
auto alloc_type = engine.get_preferred_memory_allocation_type();
|
||||
_impl_params->reordered_weights = engine.allocate_memory(expected_layout, alloc_type);
|
||||
}
|
||||
|
||||
kernel_arguments_data args;
|
||||
|
@ -111,7 +111,21 @@ allocation_type engine::get_lockable_preferred_memory_allocation_type(bool is_im
|
||||
if (support_usm_host)
|
||||
return allocation_type::usm_host;
|
||||
|
||||
throw std::runtime_error("[clDNN internal error] Could not find proper allocation type!");
|
||||
OPENVINO_ASSERT(false, "[GPU] Couldn't find proper allocation type in get_lockable_preferred_memory_allocation_type method");
|
||||
}
|
||||
|
||||
allocation_type engine::get_preferred_memory_allocation_type(bool is_image_layout) const {
|
||||
if (!use_unified_shared_memory() || is_image_layout)
|
||||
return get_default_allocation_type();
|
||||
|
||||
if (supports_allocation(allocation_type::usm_device))
|
||||
return allocation_type::usm_device;
|
||||
|
||||
// Fallback to host allocations in case if device ones are not supported for some reason
|
||||
if (supports_allocation(allocation_type::usm_host))
|
||||
return allocation_type::usm_host;
|
||||
|
||||
OPENVINO_ASSERT(false, "[GPU] Couldn't find proper allocation type in get_preferred_memory_allocation_type method");
|
||||
}
|
||||
|
||||
memory::ptr engine::attach_memory(const layout& layout, void* ptr) {
|
||||
|
@ -16,6 +16,7 @@ command_queues_builder::command_queues_builder()
|
||||
_priority_mode(priority_mode_types::disabled),
|
||||
_throttle_mode(throttle_mode_types::disabled) {}
|
||||
|
||||
#if CL_TARGET_OPENCL_VERSION >= 200
|
||||
std::vector<cl_queue_properties> command_queues_builder::get_properties(const cl::Device& device, uint16_t stream_id) {
|
||||
std::vector<cl_queue_properties> properties;
|
||||
|
||||
@ -75,6 +76,14 @@ std::vector<cl_queue_properties> command_queues_builder::get_properties(const cl
|
||||
|
||||
return properties;
|
||||
}
|
||||
#else
|
||||
cl_command_queue_properties command_queues_builder::get_properties(const cl::Device& device, uint16_t stream_id) {
|
||||
cl_command_queue_properties cl_queue_properties =
|
||||
((_profiling ? CL_QUEUE_PROFILING_ENABLE : 0) | (_out_of_order ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0));
|
||||
|
||||
return cl_queue_properties;
|
||||
}
|
||||
#endif
|
||||
|
||||
ocl_queue_type command_queues_builder::build(const cl::Context& context, const cl::Device& device) {
|
||||
ocl_queue_type queue;
|
||||
@ -82,9 +91,11 @@ ocl_queue_type command_queues_builder::build(const cl::Context& context, const c
|
||||
static std::atomic<uint16_t> stream_id{0};
|
||||
|
||||
auto properties = get_properties(device, stream_id++);
|
||||
|
||||
#if CL_TARGET_OPENCL_VERSION >= 200
|
||||
queue = clCreateCommandQueueWithProperties(context.get(), device.get(), properties.data(), &error_code);
|
||||
|
||||
#else
|
||||
queue = clCreateCommandQueue(context.get(), device.get(), properties, &error_code);
|
||||
#endif
|
||||
if (error_code != CL_SUCCESS) {
|
||||
CLDNN_ERROR_MESSAGE("Command queues builders",
|
||||
"clCreateCommandQueueWithPropertiesINTEL error " + std::to_string(error_code));
|
||||
|
@ -26,8 +26,11 @@ private:
|
||||
bool _supports_queue_families;
|
||||
priority_mode_types _priority_mode;
|
||||
throttle_mode_types _throttle_mode;
|
||||
|
||||
#if CL_TARGET_OPENCL_VERSION >= 200
|
||||
std::vector<cl_queue_properties> get_properties(const cl::Device& device, uint16_t stream_id = 0);
|
||||
#else
|
||||
cl_command_queue_properties get_properties(const cl::Device& device, uint16_t stream_id = 0);
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace ocl
|
||||
|
@ -12,14 +12,6 @@
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
typedef cl::vector<cl::vector<unsigned char>> kernels_binaries_vector;
|
||||
typedef cl::vector<kernels_binaries_vector> kernels_binaries_container;
|
||||
typedef CL_API_ENTRY cl_command_queue(CL_API_CALL* pfn_clCreateCommandQueueWithPropertiesINTEL)(
|
||||
cl_context context,
|
||||
cl_device_id device,
|
||||
const cl_queue_properties* properties,
|
||||
cl_int* errcodeRet);
|
||||
|
||||
using ocl_queue_type = cl::CommandQueue;
|
||||
using ocl_kernel_type = cl::KernelIntel;
|
||||
|
||||
|
@ -25,6 +25,17 @@ static const char create_device_error_msg[] =
|
||||
"[GPU] No supported OCL devices found or unexpected error happened during devices query.\n"
|
||||
"[GPU] Please check OpenVINO documentation for GPU drivers setup guide.\n";
|
||||
|
||||
std::vector<std::string> split(const std::string& s, char delim) {
|
||||
std::vector<std::string> result;
|
||||
std::stringstream ss(s);
|
||||
std::string item;
|
||||
|
||||
while (getline(ss, item, delim)) {
|
||||
result.push_back(item);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool does_device_match_config(bool out_of_order, const cl::Device& device) {
|
||||
if (device.getInfo<CL_DEVICE_TYPE>() != CL_DEVICE_TYPE_GPU) {
|
||||
return false;
|
||||
@ -44,6 +55,32 @@ bool does_device_match_config(bool out_of_order, const cl::Device& device) {
|
||||
}
|
||||
}
|
||||
|
||||
int32_t ocl_major = -1;
|
||||
int32_t ocl_minor = -1;
|
||||
// Spec says that the format of this string is OpenCL<space><major_version.minor_version><space><vendor-specific information>
|
||||
auto ocl_version_string = device.getInfo<CL_DEVICE_VERSION>();
|
||||
auto tokens = split(ocl_version_string, ' ');
|
||||
|
||||
if (tokens.size() > 1) {
|
||||
auto version_string = tokens[1];
|
||||
auto version_tokens = split(version_string, '.');
|
||||
if (version_tokens.size() == 2) {
|
||||
ocl_major = std::stoi(version_tokens[0]);
|
||||
ocl_minor = std::stoi(version_tokens[1]);
|
||||
}
|
||||
}
|
||||
|
||||
if (ocl_major != -1 && ocl_minor != -1) {
|
||||
int32_t ocl_version = ocl_major*100 + ocl_minor*10;
|
||||
#if CL_TARGET_OPENCL_VERSION >= 200
|
||||
int32_t min_ocl_version = 200;
|
||||
#else
|
||||
int32_t min_ocl_version = 120;
|
||||
#endif
|
||||
if (ocl_version < min_ocl_version)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -15,9 +15,20 @@
|
||||
|
||||
// we want exceptions
|
||||
#define CL_HPP_ENABLE_EXCEPTIONS
|
||||
|
||||
#ifndef CL_TARGET_OPENCL_VERSION
|
||||
#define CL_TARGET_OPENCL_VERSION 200
|
||||
#endif
|
||||
|
||||
#ifndef CL_HPP_TARGET_OPENCL_VERSION
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION CL_TARGET_OPENCL_VERSION
|
||||
#endif
|
||||
#ifndef CL_HPP_MINIMUM_OPENCL_VERSION
|
||||
#define CL_HPP_MINIMUM_OPENCL_VERSION CL_TARGET_OPENCL_VERSION
|
||||
#endif
|
||||
#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION >= 200
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#define CL_HPP_MINIMUM_OPENCL_VERSION 200
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION 200
|
||||
#endif
|
||||
|
||||
// Check for compiler and change specific diagnostics.
|
||||
#if defined __INTEL_COMPILER
|
||||
@ -59,11 +70,13 @@
|
||||
|
||||
#include "ocl_ext.hpp"
|
||||
|
||||
#if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION >= 200
|
||||
namespace cl {
|
||||
namespace detail {
|
||||
CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Restore specific diagnostics.
|
||||
#if defined __INTEL_COMPILER
|
||||
|
@ -190,7 +190,7 @@ TEST(cl_mem_check, check_input) {
|
||||
image_desc.image_slice_pitch = 0;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.mem_object = NULL;
|
||||
image_desc.buffer = NULL;
|
||||
|
||||
cl_mem img = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL,
|
||||
&image_format, &image_desc, NULL, &err);
|
||||
@ -198,7 +198,7 @@ TEST(cl_mem_check, check_input) {
|
||||
|
||||
image_desc.image_width = 0;
|
||||
image_desc.image_height = 0;
|
||||
image_desc.mem_object = img;
|
||||
image_desc.buffer = img;
|
||||
image_desc.image_depth = 0;
|
||||
image_format.image_channel_order = CL_R;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user