[IE CLDNN] QueryAPI extension with gpu device info (#5440)

2021-05-19 16:44:40 +03:00
parent ae9f3ebc5d
commit d52c4d433a
11 changed files with 360 additions and 2 deletions
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
@@ -3,6 +3,7 @@
 //

 #include "ie_api_impl.hpp"
+#include "ie_plugin_config.hpp"

 #include "hetero/hetero_plugin_config.hpp"
 #include "ie_iinfer_request.hpp"
@@ -70,6 +71,11 @@ PyObject* parse_parameter(const InferenceEngine::Parameter& param) {
        auto val = param.as<unsigned int>();
        return PyLong_FromLong((unsigned long)val);
    }
+    // Check for uint64_t
+    else if (param.is<uint64_t>()) {
+        auto val = param.as<uint64_t>();
+        return PyLong_FromLong((unsigned long)val);
+    }
    // Check for float
    else if (param.is<float>()) {
        auto val = param.as<float>();
@@ -151,6 +157,21 @@ PyObject* parse_parameter(const InferenceEngine::Parameter& param) {
            PyDict_SetItemString(dict, it.first.c_str(), PyLong_FromLong((long)it.second));
        }
        return dict;
+    } else if (param.is<std::map<InferenceEngine::Precision, float>>()) {
+        auto val = param.as<std::map<InferenceEngine::Precision, float>>();
+        PyObject* dict = PyDict_New();
+        for (const auto& it : val) {
+            std::stringstream s;
+            s << it.first;
+            PyDict_SetItemString(dict, s.str().c_str(), PyFloat_FromDouble((double)it.second));
+        }
+        return dict;
+    } else if (param.is<InferenceEngine::Metrics::DeviceType>()) {
+        auto val = param.as<InferenceEngine::Metrics::DeviceType>();
+        using namespace InferenceEngine;
+        std::stringstream s;
+        s << val;
+        return PyUnicode_FromString(s.str().c_str());
    } else {
        PyErr_SetString(PyExc_TypeError, "Failed to convert parameter to Python representation!");
        return (PyObject*)NULL;
--- a/inference-engine/include/cldnn/cldnn_config.hpp
+++ b/inference-engine/include/cldnn/cldnn_config.hpp
@@ -14,6 +14,44 @@

 namespace InferenceEngine {

+namespace Metrics {
+
+/**
+ * @def GPU_METRIC_KEY(name)
+ * @brief shortcut for defining GPU plugin metrics
+ */
+#define GPU_METRIC_KEY(name) METRIC_KEY(GPU_##name)
+#define DECLARE_GPU_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(GPU_##name, __VA_ARGS__)
+
+/**
+ * @def DECLARE_GPU_METRIC_VALUE(name)
+ * @brief shortcut for defining gpu metric values
+ */
+#define DECLARE_GPU_METRIC_VALUE(name) DECLARE_METRIC_VALUE(GPU_##name)
+
+/**
+ * @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size, for dGPU - dedicated gpu memory size
+ */
+DECLARE_GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE, uint64_t);
+
+/**
+ * @brief Metric to get microarchitecture identifier in major.minor.revision format
+ */
+DECLARE_GPU_METRIC_KEY(UARCH_VERSION, std::string);
+
+/**
+ * @brief Metric to get count of execution units for current GPU
+ */
+DECLARE_GPU_METRIC_KEY(EXECUTION_UNITS_COUNT, int);
+
+/**
+ * @brief Possible return value for OPTIMIZATION_CAPABILITIES metric
+ *  - "HW_MATMUL" - Defines if device has hardware block for matrix multiplication
+ */
+DECLARE_GPU_METRIC_VALUE(HW_MATMUL);
+
+}  // namespace Metrics
+
 /**
 * @brief GPU plugin configuration
 */
--- a/inference-engine/include/ie_plugin_config.hpp
+++ b/inference-engine/include/ie_plugin_config.hpp
@@ -13,6 +13,9 @@
 #include <string>
 #include <tuple>
 #include <vector>
+#include <map>
+
+#include "ie_precision.hpp"

 namespace InferenceEngine {

@@ -148,6 +151,36 @@ DECLARE_METRIC_KEY(NUMBER_OF_EXEC_INFER_REQUESTS, unsigned int);
 */
 DECLARE_METRIC_KEY(DEVICE_ARCHITECTURE, std::string);

+/**
+ * @brief Enum to define possible device types
+ */
+enum class DeviceType {
+    integrated = 0,
+    discrete = 1,
+};
+
+/** @cond INTERNAL */
+inline std::ostream& operator<<(std::ostream& os, const InferenceEngine::Metrics::DeviceType& deviceType) {
+    switch (deviceType) {
+        case InferenceEngine::Metrics::DeviceType::discrete: os << "discrete"; break;
+        case InferenceEngine::Metrics::DeviceType::integrated: os << "integrated"; break;
+        default: os << "unknown"; break;
+    }
+
+    return os;
+}
+/** @endcond */
+
+/**
+ * @brief Metric to get a type of device. See DeviceType enum definition for possible return values
+ */
+DECLARE_METRIC_KEY(DEVICE_TYPE, DeviceType);
+
+/**
+ * @brief Metric which defines Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions supported by specified device
+ */
+DECLARE_METRIC_KEY(DEVICE_GOPS, std::map<InferenceEngine::Precision, float>);
+
 /**
 * @brief Metric which defines support of import/export functionality by plugin
 */
--- a/inference-engine/samples/hello_query_device/main.cpp
+++ b/inference-engine/samples/hello_query_device/main.cpp
@@ -3,6 +3,7 @@
 //

 #include <cstdlib>
+#include <ie_plugin_config.hpp>
 #include <inference_engine.hpp>
 #include <iomanip>
 #include <memory>
@@ -41,6 +42,8 @@ void printParameterValue(const Parameter& value) {
        std::cout << value.as<int>() << std::endl;
    } else if (value.is<unsigned int>()) {
        std::cout << value.as<unsigned int>() << std::endl;
+    } else if (value.is<uint64_t>()) {
+        std::cout << value.as<uint64_t>() << std::endl;
    } else if (value.is<float>()) {
        std::cout << value.as<float>() << std::endl;
    } else if (value.is<std::string>()) {
@@ -62,6 +65,17 @@ void printParameterValue(const Parameter& value) {
        std::cout << std::get<2>(values);
        std::cout << " }";
        std::cout << std::endl;
+    } else if (value.is<Metrics::DeviceType>()) {
+        auto v = value.as<Metrics::DeviceType>();
+        std::cout << v << std::endl;
+    } else if (value.is<std::map<InferenceEngine::Precision, float>>()) {
+        auto values = value.as<std::map<InferenceEngine::Precision, float>>();
+        std::cout << "{ ";
+        for (auto& kv : values) {
+            std::cout << kv.first << ": " << kv.second << "; ";
+        }
+        std::cout << " }";
+        std::cout << std::endl;
    } else if (value.is<std::tuple<unsigned int, unsigned int>>()) {
        auto values = value.as<std::tuple<unsigned int, unsigned int>>();
        std::cout << "{ ";
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@@ -79,11 +79,17 @@
 #include "cldnn_executable_network.h"
 #include "cldnn_custom_layer.h"
 #include "cldnn_itt.h"
+#include "cldnn/cldnn_config.hpp"

 #ifdef __linux__
 # include <dlfcn.h>
 #endif

+// Undef DEVICE_TYPE macro which can be defined somewhere in windows headers as DWORD and conflict with our metric
+#ifdef DEVICE_TYPE
+#undef DEVICE_TYPE
+#endif
+
 using namespace InferenceEngine;
 using namespace InferenceEngine::gpu;
 using namespace InferenceEngine::details;
@@ -814,6 +820,42 @@ auto StringRightTrim = [](std::string string, std::string substring, bool case_s
    return ret_str;
 };

+static float GetGOPS(cldnn::device_info info, cldnn::data_types dt) {
+    auto freqGHz = info.core_frequency / 1000.f;
+    auto numEUs = info.cores_count;
+    auto opsPerComputeBlock = 0;
+    auto computeBlockIPC = 1.0f;
+    switch (dt) {
+    case cldnn::data_types::u8:
+    case cldnn::data_types::i8: {
+        if (info.supports_imad) {
+            // fma * simd size
+            opsPerComputeBlock = 2 * 32;
+        } else {
+            // separate mul + add instructions for int8 data type
+            opsPerComputeBlock = 2 * 16;
+            // mul/add instructions can't be executed in parallel, so we need 2 clocks to execute compute block
+            computeBlockIPC = 0.5f;
+        }
+        break;
+    }
+    case cldnn::data_types::f16: {
+        // fma * simd size
+        opsPerComputeBlock = 2 * 16;
+        break;
+    }
+    case cldnn::data_types::f32: {
+        // fma * simd size
+        opsPerComputeBlock = 2 * 8;
+        break;
+    }
+
+    default: throw std::runtime_error("GetGOPS: Unsupported precision");
+    }
+
+    return freqGHz * opsPerComputeBlock * computeBlockIPC * numEUs;
+}
+
 Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& options) const {
    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::GetMetric");
    auto device_id = GetConfig(CONFIG_KEY(DEVICE_ID), {});
@@ -834,12 +876,42 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
        metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
        metrics.push_back(METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS));
        metrics.push_back(METRIC_KEY(RANGE_FOR_STREAMS));
+        metrics.push_back(METRIC_KEY(DEVICE_TYPE));
+        metrics.push_back(METRIC_KEY(DEVICE_GOPS));
+        metrics.push_back(GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE));
+        metrics.push_back(GPU_METRIC_KEY(UARCH_VERSION));
+        metrics.push_back(GPU_METRIC_KEY(EXECUTION_UNITS_COUNT));
+
        IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
    } else if (name == METRIC_KEY(AVAILABLE_DEVICES)) {
        std::vector<std::string> availableDevices = { };
        for (auto const& dev : device_map)
            availableDevices.push_back(dev.first);
        IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
+    } else if (name == GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE)) {
+        IE_SET_METRIC_RETURN(GPU_DEVICE_TOTAL_MEM_SIZE, device_info.max_global_mem_size);
+    } else if (name == METRIC_KEY(DEVICE_TYPE)) {
+        auto dev_type = device_info.dev_type == cldnn::device_type::discrete_gpu ? Metrics::DeviceType::discrete : Metrics::DeviceType::integrated;
+        IE_SET_METRIC_RETURN(DEVICE_TYPE, dev_type);
+    } else if (name == METRIC_KEY(DEVICE_GOPS)) {
+        std::map<InferenceEngine::Precision, float> gops;
+        gops[InferenceEngine::Precision::I8] = GetGOPS(device_info, cldnn::data_types::i8);
+        gops[InferenceEngine::Precision::U8] = GetGOPS(device_info, cldnn::data_types::u8);
+        gops[InferenceEngine::Precision::FP16] = GetGOPS(device_info, cldnn::data_types::f16);
+        gops[InferenceEngine::Precision::FP32] = GetGOPS(device_info, cldnn::data_types::f32);
+        IE_SET_METRIC_RETURN(DEVICE_GOPS, gops);
+    } else if (name == GPU_METRIC_KEY(EXECUTION_UNITS_COUNT)) {
+        IE_SET_METRIC_RETURN(GPU_EXECUTION_UNITS_COUNT, device_info.cores_count);
+    } else if (name == GPU_METRIC_KEY(UARCH_VERSION)) {
+        std::stringstream s;
+        if (device_info.gfx_ver.major == 0 && device_info.gfx_ver.minor == 0 && device_info.gfx_ver.revision == 0) {
+            s << "unknown";
+        } else {
+            s << static_cast<int>(device_info.gfx_ver.major) << "."
+              << static_cast<int>(device_info.gfx_ver.minor) << "."
+              << static_cast<int>(device_info.gfx_ver.revision);
+        }
+        IE_SET_METRIC_RETURN(GPU_UARCH_VERSION, s.str());
    } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
        auto deviceName = StringRightTrim(device_info.dev_name, "NEO", false);
        deviceName += std::string(" (") + (device_info.dev_type == cldnn::device_type::discrete_gpu ? "dGPU" : "iGPU") + ")";
@@ -859,6 +931,8 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
            capabilities.push_back(METRIC_VALUE(FP16));
        if (device_info.supports_imad || device_info.supports_immad)
            capabilities.push_back(METRIC_VALUE(INT8));
+        if (device_info.supports_immad)
+            capabilities.push_back(METRIC_VALUE(GPU_HW_MATMUL));

        IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
    } else if (name == METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)) {
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp
@@ -11,6 +11,8 @@
 #endif
 #include "gpu/gpu_context_api_ocl.hpp"

+#include "cldnn/cldnn_config.hpp"
+
 using namespace BehaviorTestsDefinitions;

 namespace {
@@ -56,6 +58,16 @@ INSTANTIATE_TEST_CASE_P(
        ::testing::Values("GPU")
 );

+INSTANTIATE_TEST_CASE_P(
+        nightly_IEClassGetMetricTest, IEClassGetMetricTest_DEVICE_GOPS,
+        ::testing::Values("GPU")
+);
+
+INSTANTIATE_TEST_CASE_P(
+        nightly_IEClassGetMetricTest, IEClassGetMetricTest_DEVICE_TYPE,
+        ::testing::Values("GPU")
+);
+
 INSTANTIATE_TEST_CASE_P(
        nightly_IEClassGetMetricTest, IEClassGetMetricTest_RANGE_FOR_ASYNC_INFER_REQUESTS,
        ::testing::Values("GPU")
@@ -81,6 +93,66 @@ INSTANTIATE_TEST_CASE_P(
        ::testing::Values("GPU")
 );

+//
+// GPU specific metrics
+//
+using IEClassGetMetricTest_GPU_DEVICE_TOTAL_MEM_SIZE = IEClassBaseTestP;
+TEST_P(IEClassGetMetricTest_GPU_DEVICE_TOTAL_MEM_SIZE, GetMetricAndPrintNoThrow) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    Core ie;
+    Parameter p;
+
+    ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE)));
+    uint64_t t = p;
+
+    std::cout << "GPU device total memory size: " << t << std::endl;
+
+    ASSERT_METRIC_SUPPORTED(GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE));
+}
+
+INSTANTIATE_TEST_CASE_P(
+        nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_DEVICE_TOTAL_MEM_SIZE,
+        ::testing::Values("GPU")
+);
+
+using IEClassGetMetricTest_GPU_UARCH_VERSION = IEClassBaseTestP;
+TEST_P(IEClassGetMetricTest_GPU_UARCH_VERSION, GetMetricAndPrintNoThrow) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    Core ie;
+    Parameter p;
+
+    ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(UARCH_VERSION)));
+    std::string t = p;
+
+    std::cout << "GPU device uarch: " << t << std::endl;
+
+    ASSERT_METRIC_SUPPORTED(GPU_METRIC_KEY(UARCH_VERSION));
+}
+
+INSTANTIATE_TEST_CASE_P(
+        nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_UARCH_VERSION,
+        ::testing::Values("GPU")
+);
+
+using IEClassGetMetricTest_GPU_EXECUTION_UNITS_COUNT = IEClassBaseTestP;
+TEST_P(IEClassGetMetricTest_GPU_EXECUTION_UNITS_COUNT, GetMetricAndPrintNoThrow) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    Core ie;
+    Parameter p;
+
+    ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(EXECUTION_UNITS_COUNT)));
+    int t = p;
+
+    std::cout << "GPU EUs count: " << t << std::endl;
+
+    ASSERT_METRIC_SUPPORTED(GPU_METRIC_KEY(EXECUTION_UNITS_COUNT));
+}
+
+INSTANTIATE_TEST_CASE_P(
+        nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_EXECUTION_UNITS_COUNT,
+        ::testing::Values("GPU")
+);
+
 //
 // IE Class GetConfig
 //
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/core_integration.hpp
@@ -138,6 +138,8 @@ using IEClassGetMetricTest_SUPPORTED_CONFIG_KEYS = IEClassBaseTestP;
 using IEClassGetMetricTest_AVAILABLE_DEVICES = IEClassBaseTestP;
 using IEClassGetMetricTest_FULL_DEVICE_NAME = IEClassBaseTestP;
 using IEClassGetMetricTest_OPTIMIZATION_CAPABILITIES = IEClassBaseTestP;
+using IEClassGetMetricTest_DEVICE_GOPS = IEClassBaseTestP;
+using IEClassGetMetricTest_DEVICE_TYPE = IEClassBaseTestP;
 using IEClassGetMetricTest_NUMBER_OF_WAITING_INFER_REQUESTS = IEClassBaseTestP;
 using IEClassGetMetricTest_NUMBER_OF_EXEC_INFER_REQUESTS = IEClassBaseTestP;
 using IEClassGetMetricTest_RANGE_FOR_ASYNC_INFER_REQUESTS = IEClassBaseTestP;
@@ -772,6 +774,35 @@ TEST_P(IEClassGetMetricTest_OPTIMIZATION_CAPABILITIES, GetMetricAndPrintNoThrow)
    ASSERT_METRIC_SUPPORTED(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
 }

+TEST_P(IEClassGetMetricTest_DEVICE_GOPS, GetMetricAndPrintNoThrow) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    Core ie;
+    Parameter p;
+
+    ASSERT_NO_THROW(p = ie.GetMetric(deviceName, METRIC_KEY(DEVICE_GOPS)));
+    std::map<InferenceEngine::Precision, float> t = p;
+
+    std::cout << "Device GOPS: " << std::endl;
+    for (auto &&kv : t) {
+        std::cout << kv.first << ": " << kv.second << std::endl;
+    }
+
+    ASSERT_METRIC_SUPPORTED(METRIC_KEY(DEVICE_GOPS));
+}
+
+TEST_P(IEClassGetMetricTest_DEVICE_TYPE, GetMetricAndPrintNoThrow) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    Core ie;
+    Parameter p;
+
+    ASSERT_NO_THROW(p = ie.GetMetric(deviceName, METRIC_KEY(DEVICE_TYPE)));
+    InferenceEngine::Metrics::DeviceType t = p;
+
+    std::cout << "Device Type: " << t << std::endl;
+
+    ASSERT_METRIC_SUPPORTED(METRIC_KEY(DEVICE_TYPE));
+}
+
 TEST_P(IEClassGetMetricTest_NUMBER_OF_WAITING_INFER_REQUESTS, GetMetricAndPrintNoThrow) {
    SKIP_IF_CURRENT_TEST_IS_DISABLED()
    Core ie;
@@ -1483,4 +1514,3 @@ TEST_P(IEClassLoadNetworkAfterCoreRecreateTest, LoadAfterRecreateCoresAndPlugins
                    });
 };
 } // namespace BehaviorTestsDefinitions
-
--- a/inference-engine/thirdparty/clDNN/api/device.hpp
+++ b/inference-engine/thirdparty/clDNN/api/device.hpp
@@ -23,6 +23,12 @@ enum class device_type {
    discrete_gpu = 1
 };

+struct gfx_version {
+    uint16_t major;
+    uint8_t minor;
+    uint8_t revision;
+};
+
 /// @brief Information about the device properties and capabilities.
 struct device_info {
    uint32_t cores_count;     ///< Number of available HW cores.
@@ -53,6 +59,13 @@ struct device_info {
    std::string driver_version;  ///< Version of OpenCL driver

    device_type dev_type;  ///< Defines type of current GPU device (integrated or discrete)
+
+    gfx_version gfx_ver;
+    uint32_t device_id;
+    uint32_t num_slices;
+    uint32_t num_sub_slices_per_slice;
+    uint32_t num_eus_per_sub_slice;
+    uint32_t num_threads_per_eu;
 };

 struct device_impl;
--- a/inference-engine/thirdparty/clDNN/common/include/cl2_ext.hpp
+++ b/inference-engine/thirdparty/clDNN/common/include/cl2_ext.hpp
@@ -19,6 +19,34 @@ typedef cl_va_api_device_source_intel cl_device_source_intel;
 typedef cl_va_api_device_set_intel    cl_device_set_intel;
 #endif

+// cl_intel_device_attribute_query
+#define CL_DEVICE_IP_VERSION_INTEL                0x4250
+#define CL_DEVICE_ID_INTEL                        0x4251
+#define CL_DEVICE_NUM_SLICES_INTEL                0x4252
+#define CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL  0x4253
+#define CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL     0x4254
+#define CL_DEVICE_NUM_THREADS_PER_EU_INTEL        0x4255
+#define CL_DEVICE_FEATURE_CAPABILITIES_INTEL      0x4256
+
+typedef cl_bitfield         cl_device_feature_capabilities_intel;
+
+/* For GPU devices, version 1.0.0: */
+
+#define CL_DEVICE_FEATURE_FLAG_DP4A_INTEL         (1 << 0)
+
+
+namespace cl {
+namespace detail {
+CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_IP_VERSION_INTEL, cl_uint)
+CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_ID_INTEL, cl_uint)
+CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_SLICES_INTEL, cl_uint)
+CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL, cl_uint)
+CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL, cl_uint)
+CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_THREADS_PER_EU_INTEL, cl_uint)
+CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_FEATURE_CAPABILITIES_INTEL, cl_device_feature_capabilities_intel)
+}
+}
+
 #include <memory>

 namespace {
--- a/inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp
@@ -109,6 +109,14 @@ static device_type get_device_type(const cl::Device& device) {
    return unified_mem ? device_type::integrated_gpu : device_type::discrete_gpu;
 }

+gfx_version parse_version(cl_uint ver) {
+    uint16_t major = ver >> 16;
+    uint8_t minor = (ver >> 8) & 0xFF;
+    uint8_t revision = ver & 0xFF;
+
+    return {major, minor, revision};
+}
+
 static bool get_imad_support(const cl::Device& device) {
    std::string dev_name = device.getInfo<CL_DEVICE_NAME>();

@@ -222,6 +230,27 @@ device_info_internal::device_info_internal(const cl::Device& device) {
    supports_optimization_hints = false;
    supports_local_block_io = extensions.find("cl_intel_subgroup_local_block_io") != std::string::npos &&
                              is_local_block_io_supported(device);
+
+    bool device_attr_supported = extensions.find("cl_intel_device_attribute_query") != std::string::npos;
+
+    if (device_attr_supported) {
+        gfx_ver = parse_version(device.getInfo<CL_DEVICE_IP_VERSION_INTEL>());
+        device_id = device.getInfo<CL_DEVICE_ID_INTEL>();
+        num_slices = device.getInfo<CL_DEVICE_NUM_SLICES_INTEL>();
+        num_sub_slices_per_slice = device.getInfo<CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL>();
+        num_eus_per_sub_slice = device.getInfo<CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL>();
+        num_threads_per_eu = device.getInfo<CL_DEVICE_NUM_THREADS_PER_EU_INTEL>();
+        auto features = device.getInfo<CL_DEVICE_FEATURE_CAPABILITIES_INTEL>();
+
+        supports_imad = supports_imad || (features & CL_DEVICE_FEATURE_FLAG_DP4A_INTEL);
+    } else {
+        gfx_ver = {0, 0, 0};
+        device_id = driver_dev_id();
+        num_slices = 0;
+        num_sub_slices_per_slice = 0;
+        num_eus_per_sub_slice = 0;
+        num_threads_per_eu = 0;
+    }
 }
 }  // namespace gpu
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/src/gpu/device_info.h
+++ b/inference-engine/thirdparty/clDNN/src/gpu/device_info.h
@@ -43,7 +43,13 @@ struct device_info_internal : cldnn::device_info {
         supports_usm,
         dev_name,
         driver_version,
-         dev_type
+         dev_type,
+         gfx_ver,
+         device_id,
+         num_slices,
+         num_sub_slices_per_slice,
+         num_eus_per_sub_slice,
+         num_threads_per_eu,
        };
    }
 };