[GPU] Baseline for enabling GPUs from other vendors (#12577)

This commit is contained in:
Vladimir Paramuzov
2022-11-01 18:02:29 +04:00
committed by GitHub
parent 7595fd4c4e
commit af9724e8da
13 changed files with 281 additions and 153 deletions

View File

@@ -6,11 +6,14 @@
#include "device_info.hpp"
#include "memory_caps.hpp"
#include "layout.hpp"
#include <memory>
namespace cldnn {
const uint32_t INTEL_VENDOR_ID = 0x8086;
/// @brief Represents detected GPU device object. Use device_query to get list of available objects.
struct device {
public:
@@ -20,6 +23,8 @@ public:
virtual bool is_same(const device::ptr other) = 0;
float get_gops(cldnn::data_types dt) const;
virtual ~device() = default;
};

View File

@@ -1548,7 +1548,9 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
#ifdef ENABLE_ONEDNN_FOR_GPU
auto& engine = get_engine();
if (engine.get_device_info().supports_immad && engine.configuration().queue_type == queue_types::in_order)
if (engine.get_device_info().supports_immad &&
engine.get_device_info().vendor_id == INTEL_VENDOR_ID &&
engine.configuration().queue_type == queue_types::in_order)
lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, 1);
#endif
}

View File

@@ -85,25 +85,25 @@ KERNEL(reorder_biplanar_nv12)(
B -= VALUE_TO_SUBTRACT[2];
#elif defined MEAN_SUBTRACT_IN_BUFFER
uint8 msv = RESHAPE_DIMS(INPUT0, MEAN_SUBTRACT, b, 0, w, z, y, x);
R -= mean_subtract[GET_DATA_INDEX_SAFE(MEAN_SUBTRACT, msv[1], msv[2], msv[5], msv[6])];
R -= mean_subtract[GET_DATA_INDEX_SAFE(MEAN_SUBTRACT, msv.s1, msv.s2, msv.s5, msv.s6)];
msv = RESHAPE_DIMS(INPUT0, MEAN_SUBTRACT, b, 1, w, z, y, x);
G -= mean_subtract[GET_DATA_INDEX_SAFE(MEAN_SUBTRACT, msv[1], msv[2], msv[5], msv[6])];
G -= mean_subtract[GET_DATA_INDEX_SAFE(MEAN_SUBTRACT, msv.s1, msv.s2, msv.s5, msv.s6)];
msv = RESHAPE_DIMS(INPUT0, MEAN_SUBTRACT, b, 2, w, z, y, x);
B -= mean_subtract[GET_DATA_INDEX_SAFE(MEAN_SUBTRACT, msv[1], msv[2], msv[5], msv[6])];
B -= mean_subtract[GET_DATA_INDEX_SAFE(MEAN_SUBTRACT, msv.s1, msv.s2, msv.s5, msv.s6)];
#endif
uint8 ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, 0, w, z, y, x);
uint output_idx = FUNC_CALL(get_output_index)(ov[1], ov[2], ov[3], ov[4], ov[5], ov[6]);
uint output_idx = FUNC_CALL(get_output_index)(ov.s1, ov.s2, ov.s3, ov.s4, ov.s5, ov.s6);
output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(R), NL_M, NL_N);
ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, 1, w, z, y, x);
output_idx = FUNC_CALL(get_output_index)(ov[1], ov[2], ov[3], ov[4], ov[5], ov[6]);
output_idx = FUNC_CALL(get_output_index)(ov.s1, ov.s2, ov.s3, ov.s4, ov.s5, ov.s6);
output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(G), NL_M, NL_N);
ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, 2, w, z, y, x);
output_idx = FUNC_CALL(get_output_index)(ov[1], ov[2], ov[3], ov[4], ov[5], ov[6]);
output_idx = FUNC_CALL(get_output_index)(ov.s1, ov.s2, ov.s3, ov.s4, ov.s5, ov.s6);
output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(B), NL_M, NL_N);

View File

@@ -117,7 +117,7 @@ KERNEL (reorder_data)(
#else
uint8 ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, f, w, z, y, x);
const uint input_idx = FUNC_CALL(get_input_index)(b, f, w, z, y, x);
const uint output_idx = FUNC_CALL(get_output_index)(ov[1],ov[2],ov[3],ov[4], ov[5], ov[6]);
const uint output_idx = FUNC_CALL(get_output_index)(ov.s1,ov.s2,ov.s3,ov.s4,ov.s5,ov.s6);
#if defined MEAN_SUBTRACT_INSIDE_PARAMS
float res = TO_MEAN_TYPE(input[input_idx]);
@@ -130,7 +130,7 @@ KERNEL (reorder_data)(
// TODO Add support for 6D mean
MEAN_SUBTRACT_TYPE res = TO_MEAN_TYPE(input[input_idx]);
uint8 msv = RESHAPE_DIMS(INPUT0, MEAN_SUBTRACT, b, f, w, z, y, x);
res = MEAN_OP(res, mean_subtract[GET_DATA_INDEX_SAFE(MEAN_SUBTRACT, msv[1], msv[2], /*msv[3], msv[4],*/ msv[5], msv[6])]);
res = MEAN_OP(res, mean_subtract[GET_DATA_INDEX_SAFE(MEAN_SUBTRACT, msv.s1, msv.s2, /*msv.s3, msv.s4,*/ msv.s5, msv.s6)]);
#endif
#else
CALC_TYPE res = TO_CALC_TYPE(input[input_idx]);
@@ -139,27 +139,27 @@ KERNEL (reorder_data)(
#if defined INPUT0_LAYOUT_NV12
uint8 ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, 0, w, z, y, x);
uint output_idx = FUNC_CALL(get_output_index)(ov[1], ov[2], ov[3], ov[4], ov[5], ov[6]);
uint output_idx = FUNC_CALL(get_output_index)(ov.s1, ov.s2, ov.s3, ov.s4, ov.s5, ov.s6);
output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(R), NL_M, NL_N);
ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, 1, w, z, y, x);
output_idx = FUNC_CALL(get_output_index)(ov[1], ov[2], ov[3], ov[4], ov[5], ov[6]);
output_idx = FUNC_CALL(get_output_index)(ov.s1, ov.s2, ov.s3, ov.s4, ov.s5, ov.s6);
output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(G), NL_M, NL_N);
ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, 2, w, z, y, x);
output_idx = FUNC_CALL(get_output_index)(ov[1], ov[2], ov[3], ov[4], ov[5], ov[6]);
output_idx = FUNC_CALL(get_output_index)(ov.s1, ov.s2, ov.s3, ov.s4, ov.s5, ov.s6);
output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(B), NL_M, NL_N);
#elif INPUT0_LAYOUT_IMAGE_2D_RGBA
uint8 ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, 0, w, z, y, x);
uint output_idx = FUNC_CALL(get_output_index)(ov[1], ov[2], ov[3], ov[4], ov[5], ov[6]);
uint output_idx = FUNC_CALL(get_output_index)(ov.s1, ov.s2, ov.s3, ov.s4, ov.s5, ov.s6);
output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(colorRGBA.s0), NL_M, NL_N);
ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, 1, w, z, y, x);
output_idx = FUNC_CALL(get_output_index)(ov[1], ov[2], ov[3], ov[4], ov[5], ov[6]);
output_idx = FUNC_CALL(get_output_index)(ov.s1, ov.s2, ov.s3, ov.s4, ov.s5, ov.s6);
output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(colorRGBA.s1), NL_M, NL_N);
ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, 2, w, z, y, x);
output_idx = FUNC_CALL(get_output_index)(ov[1], ov[2], ov[3], ov[4], ov[5], ov[6]);
output_idx = FUNC_CALL(get_output_index)(ov.s1, ov.s2, ov.s3, ov.s4, ov.s5, ov.s6);
output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(colorRGBA.s2), NL_M, NL_N);
#if INPUT0_FEATURE_NUM == 4
ov = RESHAPE_DIMS(INPUT0, OUTPUT, b, 3, w, z, y, x);
output_idx = FUNC_CALL(get_output_index)(ov[1], ov[2], ov[3], ov[4], ov[5], ov[6]);
output_idx = FUNC_CALL(get_output_index)(ov.s1, ov.s2, ov.s3, ov.s4, ov.s5, ov.s6);
output[output_idx] = ACTIVATION_FUNC_TYPED(OUTPUT_REORDER, TO_OUTPUT_REORDER_TYPE(colorRGBA.s3), NL_M, NL_N);
#endif
#elif OUTPUT_LAYOUT_IMAGE_2D_RGBA

View File

@@ -229,7 +229,7 @@ KERNEL (reorder_data_fast_b1)(
const uint output_idx = data_idx;
#else
uint8 ov = RESHAPE_DIMS(OUTPUT, INPUT0, b, f, w, z, y, x);
const uint input_idx = FUNC_CALL(get_input_index)(ov[1], ov[2], ov[3], ov[4], ov[5],ov[6]);
const uint input_idx = FUNC_CALL(get_input_index)(ov.s1, ov.s2, ov.s3, ov.s4, ov.s5, ov.s6);
const uint output_idx = FUNC_CALL(get_output_index)(b, f, w, z, y, x);
#endif
@@ -239,7 +239,7 @@ KERNEL (reorder_data_fast_b1)(
#elif defined MEAN_SUBTRACT_IN_BUFFER
MEAN_SUBTRACT_TYPE res = TO_MEAN_TYPE(input[input_idx]);
uint8 msv = RESHAPE_DIMS(INPUT0, MEAN_SUBTRACT, b, f, w, z, y, x);
res -= mean_subtract[GET_DATA_INDEX_SAFE(MEAN_SUBTRACT, msv[1], msv[2], msv[5], msv[6])];
res -= mean_subtract[GET_DATA_INDEX_SAFE(MEAN_SUBTRACT, msv.s1, msv.s2, msv.s5, msv.s6)];
#else
CALC_TYPE res = TO_CALC_TYPE(input[input_idx]);
#endif

View File

@@ -452,7 +452,7 @@ KERNEL (reorder_weights)(const __global INPUT0_TYPE* input, write_only image2d_t
MAKE_VECTOR_TYPE(UNIT_TYPE, 4) input_val = (MAKE_VECTOR_TYPE(UNIT_TYPE, 4))(UNIT_VAL_ZERO, UNIT_VAL_ZERO, UNIT_VAL_ZERO, UNIT_VAL_ZERO);
const int2 coord = (int2)(o, iyx);
uint8 ir = RESHAPE_WEIGHT_DIMS(OUTPUT, INPUT0, o, i, 0, 0, y, x);
input_val.s0 = TO_OUTPUT_TYPE(input[FUNC_CALL(get_input_index)(ir[0],ir[1],ir[2],ir[4],ir[5],ir[6])]);
input_val.s0 = TO_OUTPUT_TYPE(input[FUNC_CALL(get_input_index)(ir.s0,ir.s1,ir.s2,ir.s4,ir.s5,ir.s6)]);
IMAGE_WRITE(output, coord, input_val);
}
#else
@@ -489,7 +489,7 @@ KERNEL (reorder_weights)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE
uint8 ir = RESHAPE_WEIGHT_DIMS(OUTPUT, INPUT0, o, i, 0, z, y, x);
#endif
uint input_idx = FUNC_CALL(get_input_index)(ir[0],ir[1],ir[2],ir[4],ir[5],ir[6]);
uint input_idx = FUNC_CALL(get_input_index)(ir.s0,ir.s1,ir.s2,ir.s4,ir.s5,ir.s6);
#if !REORDER_ROTATE
uint output_idx = FUNC_CALL(get_output_index)(g, o, i, z, y, x);
#else

View File

@@ -512,58 +512,6 @@ auto StringRightTrim = [](std::string string, std::string substring, bool case_s
return ret_str;
};
static float GetGOPS(cldnn::device_info info, cldnn::data_types dt) {
auto freqGHz = info.gpu_frequency / 1000.f;
auto numEUs = info.execution_units_count;
auto opsPerComputeBlock = 0;
auto computeBlockIPC = 1.0f;
switch (dt) {
case cldnn::data_types::u8:
case cldnn::data_types::i8: {
if (info.supports_immad) {
if (info.gfx_ver.major == 12) {
if (info.gfx_ver.minor == 5)
opsPerComputeBlock = 512;
else if (info.gfx_ver.minor == 7)
opsPerComputeBlock = 256;
}
} else if (info.supports_imad) {
// fma * simd size
opsPerComputeBlock = 2 * 32;
} else {
// separate mul + add instructions for int8 data type
opsPerComputeBlock = 2 * 16;
// mul/add instructions can't be executed in parallel, so we need 2 clocks to execute compute block
computeBlockIPC = 0.5f;
}
break;
}
case cldnn::data_types::f16: {
if (info.supports_immad) {
if (info.gfx_ver.major == 12) {
if (info.gfx_ver.minor == 5)
opsPerComputeBlock = 256;
else if (info.gfx_ver.minor == 7)
opsPerComputeBlock = 128;
}
} else {
// fma * simd size
opsPerComputeBlock = 2 * 16;
}
break;
}
case cldnn::data_types::f32: {
// fma * simd size
opsPerComputeBlock = 2 * 8;
break;
}
default: throw std::runtime_error("GetGOPS: Unsupported precision");
}
return freqGHz * opsPerComputeBlock * computeBlockIPC * numEUs;
}
Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string, Parameter>& options) const {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::GetMetric");
GPU_DEBUG_GET_INSTANCE(debug_config);
@@ -648,17 +596,17 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
} else if (name == ov::device::gops) {
if (is_new_api) {
std::map<element::Type, float> gops;
gops[element::i8] = GetGOPS(device_info, cldnn::data_types::i8);
gops[element::u8] = GetGOPS(device_info, cldnn::data_types::u8);
gops[element::f16] = GetGOPS(device_info, cldnn::data_types::f16);
gops[element::f32] = GetGOPS(device_info, cldnn::data_types::f32);
gops[element::i8] = device->get_gops(cldnn::data_types::i8);
gops[element::u8] = device->get_gops(cldnn::data_types::u8);
gops[element::f16] = device->get_gops(cldnn::data_types::f16);
gops[element::f32] = device->get_gops(cldnn::data_types::f32);
return decltype(ov::device::gops)::value_type {gops};
} else {
std::map<InferenceEngine::Precision, float> gops;
gops[InferenceEngine::Precision::I8] = GetGOPS(device_info, cldnn::data_types::i8);
gops[InferenceEngine::Precision::U8] = GetGOPS(device_info, cldnn::data_types::u8);
gops[InferenceEngine::Precision::FP16] = GetGOPS(device_info, cldnn::data_types::f16);
gops[InferenceEngine::Precision::FP32] = GetGOPS(device_info, cldnn::data_types::f32);
gops[InferenceEngine::Precision::I8] = device->get_gops(cldnn::data_types::i8);
gops[InferenceEngine::Precision::U8] = device->get_gops(cldnn::data_types::u8);
gops[InferenceEngine::Precision::FP16] = device->get_gops(cldnn::data_types::f16);
gops[InferenceEngine::Precision::FP32] = device->get_gops(cldnn::data_types::f32);
IE_SET_METRIC_RETURN(DEVICE_GOPS, gops);
}
} else if (name == ov::intel_gpu::execution_units_count) {

View File

@@ -0,0 +1,66 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "intel_gpu/runtime/device.hpp"
namespace cldnn {
float device::get_gops(cldnn::data_types dt) const {
auto info = get_info();
if (info.vendor_id != INTEL_VENDOR_ID) {
// GOPS calculation is not supported for non Intel GPUs
return 0.0f;
}
auto freqGHz = info.gpu_frequency / 1000.f;
auto numEUs = info.execution_units_count;
auto opsPerComputeBlock = 0;
auto computeBlockIPC = 1.0f;
switch (dt) {
case cldnn::data_types::u8:
case cldnn::data_types::i8: {
if (info.supports_immad) {
if (info.gfx_ver.major == 12) {
if (info.gfx_ver.minor == 5)
opsPerComputeBlock = 512;
else if (info.gfx_ver.minor == 7)
opsPerComputeBlock = 256;
}
} else if (info.supports_imad) {
// fma * simd size
opsPerComputeBlock = 2 * 32;
} else {
// separate mul + add instructions for int8 data type
opsPerComputeBlock = 2 * 16;
// mul/add instructions can't be executed in parallel, so we need 2 clocks to execute compute block
computeBlockIPC = 0.5f;
}
break;
}
case cldnn::data_types::f16: {
if (info.supports_immad) {
if (info.gfx_ver.major == 12) {
if (info.gfx_ver.minor == 5)
opsPerComputeBlock = 256;
else if (info.gfx_ver.minor == 7)
opsPerComputeBlock = 128;
}
} else {
// fma * simd size
opsPerComputeBlock = 2 * 16;
}
break;
}
case cldnn::data_types::f32: {
// fma * simd size
opsPerComputeBlock = 2 * 8;
break;
}
default: OPENVINO_ASSERT(false, "[GPU] get_gops: unsupported precision: ", dt);
}
return freqGHz * opsPerComputeBlock * computeBlockIPC * numEUs;
}
} // namespace cldnn

View File

@@ -22,25 +22,43 @@
namespace {
bool does_device_match_config(bool out_of_order, const cl::Device& device) {
// Is it intel gpu
if (device.getInfo<CL_DEVICE_TYPE>() != CL_DEVICE_TYPE_GPU ||
device.getInfo<CL_DEVICE_VENDOR_ID>() != 0x8086) {
return false;
}
// Does device support OOOQ?
if (out_of_order) {
auto queue_properties = device.getInfo<CL_DEVICE_QUEUE_PROPERTIES>();
using cmp_t = std::common_type<decltype(queue_properties),
typename std::underlying_type<cl::QueueProperties>::type>::type;
if (!(static_cast<cmp_t>(queue_properties) & static_cast<cmp_t>(cl::QueueProperties::OutOfOrder))) {
if (device.getInfo<CL_DEVICE_TYPE>() != CL_DEVICE_TYPE_GPU) {
return false;
}
// TODO: Remove the check below once kernels are fixed
if (device.getInfo<CL_DEVICE_VENDOR_ID>() != cldnn::INTEL_VENDOR_ID)
return false;
// Does device support OOOQ?
if (out_of_order) {
auto queue_properties = device.getInfo<CL_DEVICE_QUEUE_PROPERTIES>();
using cmp_t = std::common_type<decltype(queue_properties),
typename std::underlying_type<cl::QueueProperties>::type>::type;
if (!(static_cast<cmp_t>(queue_properties) & static_cast<cmp_t>(cl::QueueProperties::OutOfOrder))) {
return false;
}
}
return true;
}
return true;
// The priority return by this function impacts the order of devices reported by GPU plugin and devices enumeration
// Lower priority value means lower device ID
// Current behavior is: Intel iGPU < Intel dGPU < any other GPU
// Order of Intel dGPUs is undefined and depends on the OCL impl
// Order of other vendor GPUs is undefined and depends on the OCL impl
size_t get_device_priority(const cldnn::device_info& info) {
if (info.vendor_id == cldnn::INTEL_VENDOR_ID && info.dev_type == cldnn::device_type::integrated_gpu) {
return 0;
} else if (info.vendor_id == cldnn::INTEL_VENDOR_ID) {
return 1;
} else {
return std::numeric_limits<size_t>::max();
}
}
} // namespace
namespace cldnn {
namespace ocl {
static constexpr auto INTEL_PLATFORM_VENDOR = "Intel(R) Corporation";
@@ -56,10 +74,8 @@ static std::vector<cl::Device> getSubDevices(cl::Device& rootDevice) {
sizeof(maxSubDevices),
&maxSubDevices, &maxSubDevicesSize);
if (err != CL_SUCCESS || maxSubDevicesSize != sizeof(maxSubDevices)) {
throw cl::Error(err, "clGetDeviceInfo(..., CL_DEVICE_PARTITION_MAX_SUB_DEVICES,...)");
}
OPENVINO_ASSERT(err == CL_SUCCESS && maxSubDevicesSize == sizeof(maxSubDevices),
"[GPU] clGetDeviceInfo(..., CL_DEVICE_PARTITION_MAX_SUB_DEVICES,...)");
if (maxSubDevices == 0) {
return {};
}
@@ -91,47 +107,50 @@ static std::vector<cl::Device> getSubDevices(cl::Device& rootDevice) {
return subDevices;
}
std::vector<device::ptr> ocl_device_detector::sort_devices(const std::vector<device::ptr>& devices_list) {
std::vector<device::ptr> sorted_list = devices_list;
std::stable_sort(sorted_list.begin(), sorted_list.end(), [](device::ptr d1, device::ptr d2) {
return get_device_priority(d1->get_info()) < get_device_priority(d2->get_info());
});
return sorted_list;
}
std::map<std::string, device::ptr> ocl_device_detector::get_available_devices(void* user_context,
void* user_device,
int ctx_device_id,
int target_tile_id) const {
bool host_out_of_order = true; // Change to false, if debug requires in-order queue.
std::vector<device::ptr> dev_orig, dev_sorted;
std::vector<device::ptr> devices_list;
if (user_context != nullptr) {
dev_orig = create_device_list_from_user_context(host_out_of_order, user_context, ctx_device_id);
devices_list = create_device_list_from_user_context(host_out_of_order, user_context, ctx_device_id);
} else if (user_device != nullptr) {
dev_orig = create_device_list_from_user_device(host_out_of_order, user_device);
devices_list = create_device_list_from_user_device(host_out_of_order, user_device);
} else {
dev_orig = create_device_list(host_out_of_order);
devices_list = create_device_list(host_out_of_order);
}
devices_list = sort_devices(devices_list);
std::map<std::string, device::ptr> ret;
for (auto& dptr : dev_orig) {
if (dptr->get_info().dev_type == cldnn::device_type::integrated_gpu)
dev_sorted.insert(dev_sorted.begin(), dptr);
else
dev_sorted.push_back(dptr);
}
uint32_t idx = 0;
for (auto& dptr : dev_sorted) {
for (auto& dptr : devices_list) {
auto map_id = std::to_string(idx++);
ret[map_id] = dptr;
auto rootDevice = std::dynamic_pointer_cast<ocl_device>(dptr);
if (!rootDevice) {
throw std::runtime_error("Invalid device type created in ocl_device_detector");
}
auto root_device = std::dynamic_pointer_cast<ocl_device>(dptr);
OPENVINO_ASSERT(root_device != nullptr, "[GPU] Invalid device type created in ocl_device_detector");
auto subDevices = getSubDevices(rootDevice->get_device());
if (!subDevices.empty()) {
auto sub_devices = getSubDevices(root_device->get_device());
if (!sub_devices.empty()) {
uint32_t sub_idx = 0;
for (auto& subdevice : subDevices) {
for (auto& sub_device : sub_devices) {
if (target_tile_id != -1 && static_cast<int>(sub_idx) != target_tile_id) {
sub_idx++;
continue;
}
auto subdPtr = std::make_shared<ocl_device>(subdevice, cl::Context(subdevice), rootDevice->get_platform());
ret[map_id+"."+std::to_string(sub_idx++)] = subdPtr;
auto sub_device_ptr = std::make_shared<ocl_device>(sub_device, cl::Context(sub_device), root_device->get_platform());
ret[map_id + "." + std::to_string(sub_idx++)] = sub_device_ptr;
}
}
}
@@ -142,72 +161,56 @@ std::vector<device::ptr> ocl_device_detector::create_device_list(bool out_out_or
cl_uint n = 0;
// Get number of platforms availible
cl_int err = clGetPlatformIDs(0, NULL, &n);
if (err != CL_SUCCESS) {
throw std::runtime_error("[CLDNN ERROR]. clGetPlatformIDs error " + std::to_string(err));
}
OPENVINO_ASSERT(err == CL_SUCCESS, "[GPU] clGetPlatformIDs error ", err);
// Get platform list
std::vector<cl_platform_id> platform_ids(n);
err = clGetPlatformIDs(n, platform_ids.data(), NULL);
if (err != CL_SUCCESS) {
throw std::runtime_error("[CLDNN ERROR]. clGetPlatformIDs error " + std::to_string(err));
}
OPENVINO_ASSERT(err == CL_SUCCESS, "[GPU] clGetPlatformIDs error ", err);
std::vector<device::ptr> ret;
std::vector<device::ptr> supported_devices;
for (auto& id : platform_ids) {
cl::Platform platform = cl::Platform(id);
if (platform.getInfo<CL_PLATFORM_VENDOR>() != INTEL_PLATFORM_VENDOR)
continue;
std::vector<cl::Device> devices;
platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
for (auto& device : devices) {
if (!does_device_match_config(out_out_order, device))
continue;
ret.emplace_back(std::make_shared<ocl_device>(device, cl::Context(device), id));
supported_devices.emplace_back(std::make_shared<ocl_device>(device, cl::Context(device), id));
}
}
if (ret.empty()) {
throw std::runtime_error("[CLDNN ERROR]. No GPU device was found.");
}
return ret;
OPENVINO_ASSERT(!supported_devices.empty(), "[GPU] No GPU device was found.");
return supported_devices;
}
std::vector<device::ptr> ocl_device_detector::create_device_list_from_user_context(bool out_out_order, void* user_context, int ctx_device_id) const {
cl::Context ctx = cl::Context(static_cast<cl_context>(user_context), true);
auto all_devices = ctx.getInfo<CL_CONTEXT_DEVICES>();
std::vector<device::ptr> ret;
std::vector<device::ptr> supported_devices;
for (size_t i = 0; i < all_devices.size(); i++) {
auto& device = all_devices[i];
if (!does_device_match_config(out_out_order, device) || static_cast<int>(i) != ctx_device_id)
continue;
ret.emplace_back(std::make_shared<ocl_device>(device, ctx, device.getInfo<CL_DEVICE_PLATFORM>()));
supported_devices.emplace_back(std::make_shared<ocl_device>(device, ctx, device.getInfo<CL_DEVICE_PLATFORM>()));
}
if (ret.empty()) {
throw std::runtime_error("[CLDNN ERROR]. User defined context does not have GPU device included!");
}
return ret;
OPENVINO_ASSERT(!supported_devices.empty(), "[GPU] User defined context does not have GPU device included.");
return supported_devices;
}
std::vector<device::ptr> ocl_device_detector::create_device_list_from_user_device(bool out_out_order, void* user_device) const {
std::vector<device::ptr> ocl_device_detector::create_device_list_from_user_device(bool out_out_order, void* user_device) const {
cl_uint n = 0;
// Get number of platforms availible
cl_int err = clGetPlatformIDs(0, NULL, &n);
if (err != CL_SUCCESS) {
throw std::runtime_error("[CLDNN ERROR]. clGetPlatformIDs error " + std::to_string(err));
}
OPENVINO_ASSERT(err == CL_SUCCESS, "[GPU] clGetPlatformIDs error ", err);
// Get platform list
std::vector<cl_platform_id> platform_ids(n);
err = clGetPlatformIDs(n, platform_ids.data(), NULL);
if (err != CL_SUCCESS) {
throw std::runtime_error("[CLDNN ERROR]. clGetPlatformIDs error " + std::to_string(err));
}
OPENVINO_ASSERT(err == CL_SUCCESS, "[GPU] clGetPlatformIDs error ", err);
std::vector<device::ptr> ret;
std::vector<device::ptr> supported_devices;
for (auto& id : platform_ids) {
cl::PlatformVA platform = cl::PlatformVA(id);
@@ -250,13 +253,11 @@ std::vector<device::ptr> ocl_device_detector::create_device_list_from_user_devi
CL_CONTEXT_INTEROP_USER_SYNC, CL_FALSE,
CL_CONTEXT_PLATFORM, (cl_context_properties)id,
0 };
ret.emplace_back(std::make_shared<ocl_device>(device, cl::Context(device, props), id));
supported_devices.emplace_back(std::make_shared<ocl_device>(device, cl::Context(device, props), id));
}
}
if (ret.empty()) {
throw std::runtime_error("[CLDNN ERROR]. No corresponding GPU device was found.");
}
return ret;
OPENVINO_ASSERT(!supported_devices.empty(), "[GPU] User specified device is not supported.");
return supported_devices;
}
} // namespace ocl

View File

@@ -21,6 +21,8 @@ public:
std::map<std::string, device::ptr> get_available_devices(void *user_context, void *user_device, int ctx_device_id = 0, int target_tile_id = -1) const;
static std::vector<device::ptr> sort_devices(const std::vector<device::ptr>& devices_list);
private:
std::vector<device::ptr> create_device_list(bool out_out_order) const;
std::vector<device::ptr> create_device_list_from_user_context(bool out_out_order, void* user_context, int ctx_device_id = 0) const;

View File

@@ -57,6 +57,7 @@ ocl_engine::ocl_engine(const device::ptr dev, runtime_types runtime_type,
#ifdef ENABLE_ONEDNN_FOR_GPU
dnnl::engine& ocl_engine::get_onednn_engine() const {
const std::lock_guard<std::mutex> lock(onednn_mutex);
OPENVINO_ASSERT(_device->get_info().vendor_id == INTEL_VENDOR_ID, "[GPU] OneDNN engine can be used for Intel GPUs only");
if (!_onednn_engine) {
auto casted = std::dynamic_pointer_cast<ocl_device>(_device);
if (!casted)

View File

@@ -291,7 +291,7 @@ ocl_stream::ocl_stream(const ocl_engine &engine)
_command_queue = queue_builder.build(context, device);
#ifdef ENABLE_ONEDNN_FOR_GPU
if (config.queue_type == queue_types::in_order) {
if (config.queue_type == queue_types::in_order && engine.get_device_info().vendor_id == INTEL_VENDOR_ID) {
auto onednn_engine = engine.get_onednn_engine();
_onednn_stream = std::make_shared<dnnl::stream>(dnnl::ocl_interop::make_stream(engine.get_onednn_engine(), _command_queue.get()));
}

View File

@@ -0,0 +1,103 @@
// Copyright (C) 2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils.h"
#include "intel_gpu/runtime/device.hpp"
#include "runtime/ocl/ocl_device_detector.hpp"
#include <memory>
using namespace cldnn;
using namespace ::tests;
namespace {
struct dummy_device : public device {
public:
dummy_device(uint32_t vendor_id, device_type type, size_t device_id) : _mem_caps({}) {
_info = device_info{};
_info.vendor_id = vendor_id;
_info.dev_type = type;
_info.device_id = device_id;
}
device_info get_info() const override { return _info; }
memory_capabilities get_mem_caps() const override { return _mem_caps; }
bool is_same(const device::ptr other) override {
return this == other.get();
}
~dummy_device() = default;
private:
device_info _info;
memory_capabilities _mem_caps;
};
} // namespace
TEST(devices_test, sort_order_single_vendor) {
size_t device_id = 0;
std::vector<device::ptr> devices_list;
devices_list.push_back(std::make_shared<dummy_device>(INTEL_VENDOR_ID, device_type::discrete_gpu, device_id++));
devices_list.push_back(std::make_shared<dummy_device>(INTEL_VENDOR_ID, device_type::discrete_gpu, device_id++));
devices_list.push_back(std::make_shared<dummy_device>(INTEL_VENDOR_ID, device_type::integrated_gpu, device_id++));
devices_list.push_back(std::make_shared<dummy_device>(INTEL_VENDOR_ID, device_type::discrete_gpu, device_id++));
devices_list.push_back(std::make_shared<dummy_device>(INTEL_VENDOR_ID, device_type::discrete_gpu, device_id++));
auto sorted_list = ocl::ocl_device_detector::sort_devices(devices_list);
std::vector<size_t> expected_devices_order = {2, 0, 1, 3, 4};
std::vector<size_t> actual_devices_order;
std::transform(sorted_list.begin(), sorted_list.end(), std::back_inserter(actual_devices_order), [](const device::ptr& d) -> size_t {
return d->get_info().device_id;
});
ASSERT_EQ(expected_devices_order, actual_devices_order);
}
TEST(devices_test, sort_order_two_vendors) {
size_t device_id = 0;
const auto OTHER_VENDOR_ID = 0x123;
std::vector<device::ptr> devices_list;
devices_list.push_back(std::make_shared<dummy_device>(OTHER_VENDOR_ID, device_type::discrete_gpu, device_id++));
devices_list.push_back(std::make_shared<dummy_device>(OTHER_VENDOR_ID, device_type::discrete_gpu, device_id++));
devices_list.push_back(std::make_shared<dummy_device>(INTEL_VENDOR_ID, device_type::discrete_gpu, device_id++));
devices_list.push_back(std::make_shared<dummy_device>(INTEL_VENDOR_ID, device_type::integrated_gpu, device_id++));
auto sorted_list = ocl::ocl_device_detector::sort_devices(devices_list);
std::vector<size_t> expected_devices_order = {3, 2, 0, 1};
std::vector<size_t> actual_devices_order;
std::transform(sorted_list.begin(), sorted_list.end(), std::back_inserter(actual_devices_order), [](const device::ptr& d) -> size_t {
return d->get_info().device_id;
});
ASSERT_EQ(expected_devices_order, actual_devices_order);
}
TEST(devices_test, sort_order_three_vendors) {
size_t device_id = 0;
const auto OTHER_VENDOR_ID1 = 0x123;
const auto OTHER_VENDOR_ID2 = 0x1234;
std::vector<device::ptr> devices_list;
devices_list.push_back(std::make_shared<dummy_device>(OTHER_VENDOR_ID1, device_type::discrete_gpu, device_id++));
devices_list.push_back(std::make_shared<dummy_device>(OTHER_VENDOR_ID1, device_type::discrete_gpu, device_id++));
devices_list.push_back(std::make_shared<dummy_device>(INTEL_VENDOR_ID, device_type::integrated_gpu, device_id++));
devices_list.push_back(std::make_shared<dummy_device>(INTEL_VENDOR_ID, device_type::discrete_gpu, device_id++));
devices_list.push_back(std::make_shared<dummy_device>(OTHER_VENDOR_ID2, device_type::discrete_gpu, device_id++));
devices_list.push_back(std::make_shared<dummy_device>(OTHER_VENDOR_ID2, device_type::discrete_gpu, device_id++));
auto sorted_list = ocl::ocl_device_detector::sort_devices(devices_list);
std::vector<size_t> expected_devices_order = {2, 3, 0, 1, 4, 5};
std::vector<size_t> actual_devices_order;
std::transform(sorted_list.begin(), sorted_list.end(), std::back_inserter(actual_devices_order), [](const device::ptr& d) -> size_t {
return d->get_info().device_id;
});
ASSERT_EQ(expected_devices_order, actual_devices_order);
}