[IE CLDNN] QueryAPI extension with gpu device info (#5440)
This commit is contained in:
committed by
GitHub
parent
ae9f3ebc5d
commit
d52c4d433a
@@ -3,6 +3,7 @@
|
||||
//
|
||||
|
||||
#include "ie_api_impl.hpp"
|
||||
#include "ie_plugin_config.hpp"
|
||||
|
||||
#include "hetero/hetero_plugin_config.hpp"
|
||||
#include "ie_iinfer_request.hpp"
|
||||
@@ -70,6 +71,11 @@ PyObject* parse_parameter(const InferenceEngine::Parameter& param) {
|
||||
auto val = param.as<unsigned int>();
|
||||
return PyLong_FromLong((unsigned long)val);
|
||||
}
|
||||
// Check for uint64_t
|
||||
else if (param.is<uint64_t>()) {
|
||||
auto val = param.as<uint64_t>();
|
||||
return PyLong_FromLong((unsigned long)val);
|
||||
}
|
||||
// Check for float
|
||||
else if (param.is<float>()) {
|
||||
auto val = param.as<float>();
|
||||
@@ -151,6 +157,21 @@ PyObject* parse_parameter(const InferenceEngine::Parameter& param) {
|
||||
PyDict_SetItemString(dict, it.first.c_str(), PyLong_FromLong((long)it.second));
|
||||
}
|
||||
return dict;
|
||||
} else if (param.is<std::map<InferenceEngine::Precision, float>>()) {
|
||||
auto val = param.as<std::map<InferenceEngine::Precision, float>>();
|
||||
PyObject* dict = PyDict_New();
|
||||
for (const auto& it : val) {
|
||||
std::stringstream s;
|
||||
s << it.first;
|
||||
PyDict_SetItemString(dict, s.str().c_str(), PyFloat_FromDouble((double)it.second));
|
||||
}
|
||||
return dict;
|
||||
} else if (param.is<InferenceEngine::Metrics::DeviceType>()) {
|
||||
auto val = param.as<InferenceEngine::Metrics::DeviceType>();
|
||||
using namespace InferenceEngine;
|
||||
std::stringstream s;
|
||||
s << val;
|
||||
return PyUnicode_FromString(s.str().c_str());
|
||||
} else {
|
||||
PyErr_SetString(PyExc_TypeError, "Failed to convert parameter to Python representation!");
|
||||
return (PyObject*)NULL;
|
||||
|
||||
@@ -14,6 +14,44 @@
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
namespace Metrics {
|
||||
|
||||
/**
|
||||
* @def GPU_METRIC_KEY(name)
|
||||
* @brief shortcut for defining GPU plugin metrics
|
||||
*/
|
||||
#define GPU_METRIC_KEY(name) METRIC_KEY(GPU_##name)
|
||||
#define DECLARE_GPU_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(GPU_##name, __VA_ARGS__)
|
||||
|
||||
/**
|
||||
* @def DECLARE_GPU_METRIC_VALUE(name)
|
||||
* @brief shortcut for defining gpu metric values
|
||||
*/
|
||||
#define DECLARE_GPU_METRIC_VALUE(name) DECLARE_METRIC_VALUE(GPU_##name)
|
||||
|
||||
/**
|
||||
* @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size, for dGPU - dedicated gpu memory size
|
||||
*/
|
||||
DECLARE_GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE, uint64_t);
|
||||
|
||||
/**
|
||||
* @brief Metric to get microarchitecture identifier in major.minor.revision format
|
||||
*/
|
||||
DECLARE_GPU_METRIC_KEY(UARCH_VERSION, std::string);
|
||||
|
||||
/**
|
||||
* @brief Metric to get count of execution units for current GPU
|
||||
*/
|
||||
DECLARE_GPU_METRIC_KEY(EXECUTION_UNITS_COUNT, int);
|
||||
|
||||
/**
|
||||
* @brief Possible return value for OPTIMIZATION_CAPABILITIES metric
|
||||
* - "HW_MATMUL" - Defines if device has hardware block for matrix multiplication
|
||||
*/
|
||||
DECLARE_GPU_METRIC_VALUE(HW_MATMUL);
|
||||
|
||||
} // namespace Metrics
|
||||
|
||||
/**
|
||||
* @brief GPU plugin configuration
|
||||
*/
|
||||
|
||||
@@ -13,6 +13,9 @@
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
#include "ie_precision.hpp"
|
||||
|
||||
namespace InferenceEngine {
|
||||
|
||||
@@ -148,6 +151,36 @@ DECLARE_METRIC_KEY(NUMBER_OF_EXEC_INFER_REQUESTS, unsigned int);
|
||||
*/
|
||||
DECLARE_METRIC_KEY(DEVICE_ARCHITECTURE, std::string);
|
||||
|
||||
/**
|
||||
* @brief Enum to define possible device types
|
||||
*/
|
||||
enum class DeviceType {
|
||||
integrated = 0,
|
||||
discrete = 1,
|
||||
};
|
||||
|
||||
/** @cond INTERNAL */
|
||||
inline std::ostream& operator<<(std::ostream& os, const InferenceEngine::Metrics::DeviceType& deviceType) {
|
||||
switch (deviceType) {
|
||||
case InferenceEngine::Metrics::DeviceType::discrete: os << "discrete"; break;
|
||||
case InferenceEngine::Metrics::DeviceType::integrated: os << "integrated"; break;
|
||||
default: os << "unknown"; break;
|
||||
}
|
||||
|
||||
return os;
|
||||
}
|
||||
/** @endcond */
|
||||
|
||||
/**
|
||||
* @brief Metric to get a type of device. See DeviceType enum definition for possible return values
|
||||
*/
|
||||
DECLARE_METRIC_KEY(DEVICE_TYPE, DeviceType);
|
||||
|
||||
/**
|
||||
* @brief Metric which defines Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions supported by specified device
|
||||
*/
|
||||
DECLARE_METRIC_KEY(DEVICE_GOPS, std::map<InferenceEngine::Precision, float>);
|
||||
|
||||
/**
|
||||
* @brief Metric which defines support of import/export functionality by plugin
|
||||
*/
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
//
|
||||
|
||||
#include <cstdlib>
|
||||
#include <ie_plugin_config.hpp>
|
||||
#include <inference_engine.hpp>
|
||||
#include <iomanip>
|
||||
#include <memory>
|
||||
@@ -41,6 +42,8 @@ void printParameterValue(const Parameter& value) {
|
||||
std::cout << value.as<int>() << std::endl;
|
||||
} else if (value.is<unsigned int>()) {
|
||||
std::cout << value.as<unsigned int>() << std::endl;
|
||||
} else if (value.is<uint64_t>()) {
|
||||
std::cout << value.as<uint64_t>() << std::endl;
|
||||
} else if (value.is<float>()) {
|
||||
std::cout << value.as<float>() << std::endl;
|
||||
} else if (value.is<std::string>()) {
|
||||
@@ -62,6 +65,17 @@ void printParameterValue(const Parameter& value) {
|
||||
std::cout << std::get<2>(values);
|
||||
std::cout << " }";
|
||||
std::cout << std::endl;
|
||||
} else if (value.is<Metrics::DeviceType>()) {
|
||||
auto v = value.as<Metrics::DeviceType>();
|
||||
std::cout << v << std::endl;
|
||||
} else if (value.is<std::map<InferenceEngine::Precision, float>>()) {
|
||||
auto values = value.as<std::map<InferenceEngine::Precision, float>>();
|
||||
std::cout << "{ ";
|
||||
for (auto& kv : values) {
|
||||
std::cout << kv.first << ": " << kv.second << "; ";
|
||||
}
|
||||
std::cout << " }";
|
||||
std::cout << std::endl;
|
||||
} else if (value.is<std::tuple<unsigned int, unsigned int>>()) {
|
||||
auto values = value.as<std::tuple<unsigned int, unsigned int>>();
|
||||
std::cout << "{ ";
|
||||
|
||||
@@ -79,11 +79,17 @@
|
||||
#include "cldnn_executable_network.h"
|
||||
#include "cldnn_custom_layer.h"
|
||||
#include "cldnn_itt.h"
|
||||
#include "cldnn/cldnn_config.hpp"
|
||||
|
||||
#ifdef __linux__
|
||||
# include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
// Undef DEVICE_TYPE macro which can be defined somewhere in windows headers as DWORD and conflict with our metric
|
||||
#ifdef DEVICE_TYPE
|
||||
#undef DEVICE_TYPE
|
||||
#endif
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::gpu;
|
||||
using namespace InferenceEngine::details;
|
||||
@@ -814,6 +820,42 @@ auto StringRightTrim = [](std::string string, std::string substring, bool case_s
|
||||
return ret_str;
|
||||
};
|
||||
|
||||
static float GetGOPS(cldnn::device_info info, cldnn::data_types dt) {
|
||||
auto freqGHz = info.core_frequency / 1000.f;
|
||||
auto numEUs = info.cores_count;
|
||||
auto opsPerComputeBlock = 0;
|
||||
auto computeBlockIPC = 1.0f;
|
||||
switch (dt) {
|
||||
case cldnn::data_types::u8:
|
||||
case cldnn::data_types::i8: {
|
||||
if (info.supports_imad) {
|
||||
// fma * simd size
|
||||
opsPerComputeBlock = 2 * 32;
|
||||
} else {
|
||||
// separate mul + add instructions for int8 data type
|
||||
opsPerComputeBlock = 2 * 16;
|
||||
// mul/add instructions can't be executed in parallel, so we need 2 clocks to execute compute block
|
||||
computeBlockIPC = 0.5f;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case cldnn::data_types::f16: {
|
||||
// fma * simd size
|
||||
opsPerComputeBlock = 2 * 16;
|
||||
break;
|
||||
}
|
||||
case cldnn::data_types::f32: {
|
||||
// fma * simd size
|
||||
opsPerComputeBlock = 2 * 8;
|
||||
break;
|
||||
}
|
||||
|
||||
default: throw std::runtime_error("GetGOPS: Unsupported precision");
|
||||
}
|
||||
|
||||
return freqGHz * opsPerComputeBlock * computeBlockIPC * numEUs;
|
||||
}
|
||||
|
||||
Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& options) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::GetMetric");
|
||||
auto device_id = GetConfig(CONFIG_KEY(DEVICE_ID), {});
|
||||
@@ -834,12 +876,42 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
|
||||
metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
|
||||
metrics.push_back(METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS));
|
||||
metrics.push_back(METRIC_KEY(RANGE_FOR_STREAMS));
|
||||
metrics.push_back(METRIC_KEY(DEVICE_TYPE));
|
||||
metrics.push_back(METRIC_KEY(DEVICE_GOPS));
|
||||
metrics.push_back(GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE));
|
||||
metrics.push_back(GPU_METRIC_KEY(UARCH_VERSION));
|
||||
metrics.push_back(GPU_METRIC_KEY(EXECUTION_UNITS_COUNT));
|
||||
|
||||
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
|
||||
} else if (name == METRIC_KEY(AVAILABLE_DEVICES)) {
|
||||
std::vector<std::string> availableDevices = { };
|
||||
for (auto const& dev : device_map)
|
||||
availableDevices.push_back(dev.first);
|
||||
IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
|
||||
} else if (name == GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE)) {
|
||||
IE_SET_METRIC_RETURN(GPU_DEVICE_TOTAL_MEM_SIZE, device_info.max_global_mem_size);
|
||||
} else if (name == METRIC_KEY(DEVICE_TYPE)) {
|
||||
auto dev_type = device_info.dev_type == cldnn::device_type::discrete_gpu ? Metrics::DeviceType::discrete : Metrics::DeviceType::integrated;
|
||||
IE_SET_METRIC_RETURN(DEVICE_TYPE, dev_type);
|
||||
} else if (name == METRIC_KEY(DEVICE_GOPS)) {
|
||||
std::map<InferenceEngine::Precision, float> gops;
|
||||
gops[InferenceEngine::Precision::I8] = GetGOPS(device_info, cldnn::data_types::i8);
|
||||
gops[InferenceEngine::Precision::U8] = GetGOPS(device_info, cldnn::data_types::u8);
|
||||
gops[InferenceEngine::Precision::FP16] = GetGOPS(device_info, cldnn::data_types::f16);
|
||||
gops[InferenceEngine::Precision::FP32] = GetGOPS(device_info, cldnn::data_types::f32);
|
||||
IE_SET_METRIC_RETURN(DEVICE_GOPS, gops);
|
||||
} else if (name == GPU_METRIC_KEY(EXECUTION_UNITS_COUNT)) {
|
||||
IE_SET_METRIC_RETURN(GPU_EXECUTION_UNITS_COUNT, device_info.cores_count);
|
||||
} else if (name == GPU_METRIC_KEY(UARCH_VERSION)) {
|
||||
std::stringstream s;
|
||||
if (device_info.gfx_ver.major == 0 && device_info.gfx_ver.minor == 0 && device_info.gfx_ver.revision == 0) {
|
||||
s << "unknown";
|
||||
} else {
|
||||
s << static_cast<int>(device_info.gfx_ver.major) << "."
|
||||
<< static_cast<int>(device_info.gfx_ver.minor) << "."
|
||||
<< static_cast<int>(device_info.gfx_ver.revision);
|
||||
}
|
||||
IE_SET_METRIC_RETURN(GPU_UARCH_VERSION, s.str());
|
||||
} else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
|
||||
auto deviceName = StringRightTrim(device_info.dev_name, "NEO", false);
|
||||
deviceName += std::string(" (") + (device_info.dev_type == cldnn::device_type::discrete_gpu ? "dGPU" : "iGPU") + ")";
|
||||
@@ -859,6 +931,8 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
|
||||
capabilities.push_back(METRIC_VALUE(FP16));
|
||||
if (device_info.supports_imad || device_info.supports_immad)
|
||||
capabilities.push_back(METRIC_VALUE(INT8));
|
||||
if (device_info.supports_immad)
|
||||
capabilities.push_back(METRIC_VALUE(GPU_HW_MATMUL));
|
||||
|
||||
IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
|
||||
} else if (name == METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)) {
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#endif
|
||||
#include "gpu/gpu_context_api_ocl.hpp"
|
||||
|
||||
#include "cldnn/cldnn_config.hpp"
|
||||
|
||||
using namespace BehaviorTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
@@ -56,6 +58,16 @@ INSTANTIATE_TEST_CASE_P(
|
||||
::testing::Values("GPU")
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
nightly_IEClassGetMetricTest, IEClassGetMetricTest_DEVICE_GOPS,
|
||||
::testing::Values("GPU")
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
nightly_IEClassGetMetricTest, IEClassGetMetricTest_DEVICE_TYPE,
|
||||
::testing::Values("GPU")
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
nightly_IEClassGetMetricTest, IEClassGetMetricTest_RANGE_FOR_ASYNC_INFER_REQUESTS,
|
||||
::testing::Values("GPU")
|
||||
@@ -81,6 +93,66 @@ INSTANTIATE_TEST_CASE_P(
|
||||
::testing::Values("GPU")
|
||||
);
|
||||
|
||||
//
|
||||
// GPU specific metrics
|
||||
//
|
||||
using IEClassGetMetricTest_GPU_DEVICE_TOTAL_MEM_SIZE = IEClassBaseTestP;
|
||||
TEST_P(IEClassGetMetricTest_GPU_DEVICE_TOTAL_MEM_SIZE, GetMetricAndPrintNoThrow) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
Core ie;
|
||||
Parameter p;
|
||||
|
||||
ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE)));
|
||||
uint64_t t = p;
|
||||
|
||||
std::cout << "GPU device total memory size: " << t << std::endl;
|
||||
|
||||
ASSERT_METRIC_SUPPORTED(GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE));
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_DEVICE_TOTAL_MEM_SIZE,
|
||||
::testing::Values("GPU")
|
||||
);
|
||||
|
||||
using IEClassGetMetricTest_GPU_UARCH_VERSION = IEClassBaseTestP;
|
||||
TEST_P(IEClassGetMetricTest_GPU_UARCH_VERSION, GetMetricAndPrintNoThrow) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
Core ie;
|
||||
Parameter p;
|
||||
|
||||
ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(UARCH_VERSION)));
|
||||
std::string t = p;
|
||||
|
||||
std::cout << "GPU device uarch: " << t << std::endl;
|
||||
|
||||
ASSERT_METRIC_SUPPORTED(GPU_METRIC_KEY(UARCH_VERSION));
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_UARCH_VERSION,
|
||||
::testing::Values("GPU")
|
||||
);
|
||||
|
||||
using IEClassGetMetricTest_GPU_EXECUTION_UNITS_COUNT = IEClassBaseTestP;
|
||||
TEST_P(IEClassGetMetricTest_GPU_EXECUTION_UNITS_COUNT, GetMetricAndPrintNoThrow) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
Core ie;
|
||||
Parameter p;
|
||||
|
||||
ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(EXECUTION_UNITS_COUNT)));
|
||||
int t = p;
|
||||
|
||||
std::cout << "GPU EUs count: " << t << std::endl;
|
||||
|
||||
ASSERT_METRIC_SUPPORTED(GPU_METRIC_KEY(EXECUTION_UNITS_COUNT));
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_EXECUTION_UNITS_COUNT,
|
||||
::testing::Values("GPU")
|
||||
);
|
||||
|
||||
//
|
||||
// IE Class GetConfig
|
||||
//
|
||||
|
||||
@@ -138,6 +138,8 @@ using IEClassGetMetricTest_SUPPORTED_CONFIG_KEYS = IEClassBaseTestP;
|
||||
using IEClassGetMetricTest_AVAILABLE_DEVICES = IEClassBaseTestP;
|
||||
using IEClassGetMetricTest_FULL_DEVICE_NAME = IEClassBaseTestP;
|
||||
using IEClassGetMetricTest_OPTIMIZATION_CAPABILITIES = IEClassBaseTestP;
|
||||
using IEClassGetMetricTest_DEVICE_GOPS = IEClassBaseTestP;
|
||||
using IEClassGetMetricTest_DEVICE_TYPE = IEClassBaseTestP;
|
||||
using IEClassGetMetricTest_NUMBER_OF_WAITING_INFER_REQUESTS = IEClassBaseTestP;
|
||||
using IEClassGetMetricTest_NUMBER_OF_EXEC_INFER_REQUESTS = IEClassBaseTestP;
|
||||
using IEClassGetMetricTest_RANGE_FOR_ASYNC_INFER_REQUESTS = IEClassBaseTestP;
|
||||
@@ -772,6 +774,35 @@ TEST_P(IEClassGetMetricTest_OPTIMIZATION_CAPABILITIES, GetMetricAndPrintNoThrow)
|
||||
ASSERT_METRIC_SUPPORTED(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
|
||||
}
|
||||
|
||||
TEST_P(IEClassGetMetricTest_DEVICE_GOPS, GetMetricAndPrintNoThrow) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
Core ie;
|
||||
Parameter p;
|
||||
|
||||
ASSERT_NO_THROW(p = ie.GetMetric(deviceName, METRIC_KEY(DEVICE_GOPS)));
|
||||
std::map<InferenceEngine::Precision, float> t = p;
|
||||
|
||||
std::cout << "Device GOPS: " << std::endl;
|
||||
for (auto &&kv : t) {
|
||||
std::cout << kv.first << ": " << kv.second << std::endl;
|
||||
}
|
||||
|
||||
ASSERT_METRIC_SUPPORTED(METRIC_KEY(DEVICE_GOPS));
|
||||
}
|
||||
|
||||
TEST_P(IEClassGetMetricTest_DEVICE_TYPE, GetMetricAndPrintNoThrow) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
Core ie;
|
||||
Parameter p;
|
||||
|
||||
ASSERT_NO_THROW(p = ie.GetMetric(deviceName, METRIC_KEY(DEVICE_TYPE)));
|
||||
InferenceEngine::Metrics::DeviceType t = p;
|
||||
|
||||
std::cout << "Device Type: " << t << std::endl;
|
||||
|
||||
ASSERT_METRIC_SUPPORTED(METRIC_KEY(DEVICE_TYPE));
|
||||
}
|
||||
|
||||
TEST_P(IEClassGetMetricTest_NUMBER_OF_WAITING_INFER_REQUESTS, GetMetricAndPrintNoThrow) {
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED()
|
||||
Core ie;
|
||||
@@ -1483,4 +1514,3 @@ TEST_P(IEClassLoadNetworkAfterCoreRecreateTest, LoadAfterRecreateCoresAndPlugins
|
||||
});
|
||||
};
|
||||
} // namespace BehaviorTestsDefinitions
|
||||
|
||||
|
||||
13
inference-engine/thirdparty/clDNN/api/device.hpp
vendored
13
inference-engine/thirdparty/clDNN/api/device.hpp
vendored
@@ -23,6 +23,12 @@ enum class device_type {
|
||||
discrete_gpu = 1
|
||||
};
|
||||
|
||||
struct gfx_version {
|
||||
uint16_t major;
|
||||
uint8_t minor;
|
||||
uint8_t revision;
|
||||
};
|
||||
|
||||
/// @brief Information about the device properties and capabilities.
|
||||
struct device_info {
|
||||
uint32_t cores_count; ///< Number of available HW cores.
|
||||
@@ -53,6 +59,13 @@ struct device_info {
|
||||
std::string driver_version; ///< Version of OpenCL driver
|
||||
|
||||
device_type dev_type; ///< Defines type of current GPU device (integrated or discrete)
|
||||
|
||||
gfx_version gfx_ver;
|
||||
uint32_t device_id;
|
||||
uint32_t num_slices;
|
||||
uint32_t num_sub_slices_per_slice;
|
||||
uint32_t num_eus_per_sub_slice;
|
||||
uint32_t num_threads_per_eu;
|
||||
};
|
||||
|
||||
struct device_impl;
|
||||
|
||||
@@ -19,6 +19,34 @@ typedef cl_va_api_device_source_intel cl_device_source_intel;
|
||||
typedef cl_va_api_device_set_intel cl_device_set_intel;
|
||||
#endif
|
||||
|
||||
// cl_intel_device_attribute_query
|
||||
#define CL_DEVICE_IP_VERSION_INTEL 0x4250
|
||||
#define CL_DEVICE_ID_INTEL 0x4251
|
||||
#define CL_DEVICE_NUM_SLICES_INTEL 0x4252
|
||||
#define CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL 0x4253
|
||||
#define CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL 0x4254
|
||||
#define CL_DEVICE_NUM_THREADS_PER_EU_INTEL 0x4255
|
||||
#define CL_DEVICE_FEATURE_CAPABILITIES_INTEL 0x4256
|
||||
|
||||
typedef cl_bitfield cl_device_feature_capabilities_intel;
|
||||
|
||||
/* For GPU devices, version 1.0.0: */
|
||||
|
||||
#define CL_DEVICE_FEATURE_FLAG_DP4A_INTEL (1 << 0)
|
||||
|
||||
|
||||
namespace cl {
|
||||
namespace detail {
|
||||
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_IP_VERSION_INTEL, cl_uint)
|
||||
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_ID_INTEL, cl_uint)
|
||||
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_SLICES_INTEL, cl_uint)
|
||||
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL, cl_uint)
|
||||
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL, cl_uint)
|
||||
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_THREADS_PER_EU_INTEL, cl_uint)
|
||||
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_FEATURE_CAPABILITIES_INTEL, cl_device_feature_capabilities_intel)
|
||||
}
|
||||
}
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -109,6 +109,14 @@ static device_type get_device_type(const cl::Device& device) {
|
||||
return unified_mem ? device_type::integrated_gpu : device_type::discrete_gpu;
|
||||
}
|
||||
|
||||
gfx_version parse_version(cl_uint ver) {
|
||||
uint16_t major = ver >> 16;
|
||||
uint8_t minor = (ver >> 8) & 0xFF;
|
||||
uint8_t revision = ver & 0xFF;
|
||||
|
||||
return {major, minor, revision};
|
||||
}
|
||||
|
||||
static bool get_imad_support(const cl::Device& device) {
|
||||
std::string dev_name = device.getInfo<CL_DEVICE_NAME>();
|
||||
|
||||
@@ -222,6 +230,27 @@ device_info_internal::device_info_internal(const cl::Device& device) {
|
||||
supports_optimization_hints = false;
|
||||
supports_local_block_io = extensions.find("cl_intel_subgroup_local_block_io") != std::string::npos &&
|
||||
is_local_block_io_supported(device);
|
||||
|
||||
bool device_attr_supported = extensions.find("cl_intel_device_attribute_query") != std::string::npos;
|
||||
|
||||
if (device_attr_supported) {
|
||||
gfx_ver = parse_version(device.getInfo<CL_DEVICE_IP_VERSION_INTEL>());
|
||||
device_id = device.getInfo<CL_DEVICE_ID_INTEL>();
|
||||
num_slices = device.getInfo<CL_DEVICE_NUM_SLICES_INTEL>();
|
||||
num_sub_slices_per_slice = device.getInfo<CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL>();
|
||||
num_eus_per_sub_slice = device.getInfo<CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL>();
|
||||
num_threads_per_eu = device.getInfo<CL_DEVICE_NUM_THREADS_PER_EU_INTEL>();
|
||||
auto features = device.getInfo<CL_DEVICE_FEATURE_CAPABILITIES_INTEL>();
|
||||
|
||||
supports_imad = supports_imad || (features & CL_DEVICE_FEATURE_FLAG_DP4A_INTEL);
|
||||
} else {
|
||||
gfx_ver = {0, 0, 0};
|
||||
device_id = driver_dev_id();
|
||||
num_slices = 0;
|
||||
num_sub_slices_per_slice = 0;
|
||||
num_eus_per_sub_slice = 0;
|
||||
num_threads_per_eu = 0;
|
||||
}
|
||||
}
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
||||
|
||||
@@ -43,7 +43,13 @@ struct device_info_internal : cldnn::device_info {
|
||||
supports_usm,
|
||||
dev_name,
|
||||
driver_version,
|
||||
dev_type
|
||||
dev_type,
|
||||
gfx_ver,
|
||||
device_id,
|
||||
num_slices,
|
||||
num_sub_slices_per_slice,
|
||||
num_eus_per_sub_slice,
|
||||
num_threads_per_eu,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user