[IE CLDNN] QueryAPI extension with gpu device info (#5440)

This commit is contained in:
Vladimir Paramuzov
2021-05-19 16:44:40 +03:00
committed by GitHub
parent ae9f3ebc5d
commit d52c4d433a
11 changed files with 360 additions and 2 deletions

View File

@@ -3,6 +3,7 @@
//
#include "ie_api_impl.hpp"
#include "ie_plugin_config.hpp"
#include "hetero/hetero_plugin_config.hpp"
#include "ie_iinfer_request.hpp"
@@ -70,6 +71,11 @@ PyObject* parse_parameter(const InferenceEngine::Parameter& param) {
auto val = param.as<unsigned int>();
return PyLong_FromLong((unsigned long)val);
}
// Check for uint64_t
else if (param.is<uint64_t>()) {
auto val = param.as<uint64_t>();
return PyLong_FromLong((unsigned long)val);
}
// Check for float
else if (param.is<float>()) {
auto val = param.as<float>();
@@ -151,6 +157,21 @@ PyObject* parse_parameter(const InferenceEngine::Parameter& param) {
PyDict_SetItemString(dict, it.first.c_str(), PyLong_FromLong((long)it.second));
}
return dict;
} else if (param.is<std::map<InferenceEngine::Precision, float>>()) {
auto val = param.as<std::map<InferenceEngine::Precision, float>>();
PyObject* dict = PyDict_New();
for (const auto& it : val) {
std::stringstream s;
s << it.first;
PyDict_SetItemString(dict, s.str().c_str(), PyFloat_FromDouble((double)it.second));
}
return dict;
} else if (param.is<InferenceEngine::Metrics::DeviceType>()) {
auto val = param.as<InferenceEngine::Metrics::DeviceType>();
using namespace InferenceEngine;
std::stringstream s;
s << val;
return PyUnicode_FromString(s.str().c_str());
} else {
PyErr_SetString(PyExc_TypeError, "Failed to convert parameter to Python representation!");
return (PyObject*)NULL;

View File

@@ -14,6 +14,44 @@
namespace InferenceEngine {
namespace Metrics {
/**
* @def GPU_METRIC_KEY(name)
* @brief shortcut for defining GPU plugin metrics
*/
#define GPU_METRIC_KEY(name) METRIC_KEY(GPU_##name)
#define DECLARE_GPU_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(GPU_##name, __VA_ARGS__)
/**
* @def DECLARE_GPU_METRIC_VALUE(name)
* @brief shortcut for defining gpu metric values
*/
#define DECLARE_GPU_METRIC_VALUE(name) DECLARE_METRIC_VALUE(GPU_##name)
/**
* @brief Metric which defines size of memory in bytes available for the device. For iGPU it returns host memory size, for dGPU - dedicated gpu memory size
*/
DECLARE_GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE, uint64_t);
/**
* @brief Metric to get microarchitecture identifier in major.minor.revision format
*/
DECLARE_GPU_METRIC_KEY(UARCH_VERSION, std::string);
/**
* @brief Metric to get count of execution units for current GPU
*/
DECLARE_GPU_METRIC_KEY(EXECUTION_UNITS_COUNT, int);
/**
* @brief Possible return value for OPTIMIZATION_CAPABILITIES metric
* - "HW_MATMUL" - Defines if device has hardware block for matrix multiplication
*/
DECLARE_GPU_METRIC_VALUE(HW_MATMUL);
} // namespace Metrics
/**
* @brief GPU plugin configuration
*/

View File

@@ -13,6 +13,9 @@
#include <string>
#include <tuple>
#include <vector>
#include <map>
#include "ie_precision.hpp"
namespace InferenceEngine {
@@ -148,6 +151,36 @@ DECLARE_METRIC_KEY(NUMBER_OF_EXEC_INFER_REQUESTS, unsigned int);
*/
DECLARE_METRIC_KEY(DEVICE_ARCHITECTURE, std::string);
/**
* @brief Enum to define possible device types
*/
enum class DeviceType {
integrated = 0,
discrete = 1,
};
/** @cond INTERNAL */
inline std::ostream& operator<<(std::ostream& os, const InferenceEngine::Metrics::DeviceType& deviceType) {
switch (deviceType) {
case InferenceEngine::Metrics::DeviceType::discrete: os << "discrete"; break;
case InferenceEngine::Metrics::DeviceType::integrated: os << "integrated"; break;
default: os << "unknown"; break;
}
return os;
}
/** @endcond */
/**
* @brief Metric to get a type of device. See DeviceType enum definition for possible return values
*/
DECLARE_METRIC_KEY(DEVICE_TYPE, DeviceType);
/**
* @brief Metric which defines Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions supported by specified device
*/
DECLARE_METRIC_KEY(DEVICE_GOPS, std::map<InferenceEngine::Precision, float>);
/**
* @brief Metric which defines support of import/export functionality by plugin
*/

View File

@@ -3,6 +3,7 @@
//
#include <cstdlib>
#include <ie_plugin_config.hpp>
#include <inference_engine.hpp>
#include <iomanip>
#include <memory>
@@ -41,6 +42,8 @@ void printParameterValue(const Parameter& value) {
std::cout << value.as<int>() << std::endl;
} else if (value.is<unsigned int>()) {
std::cout << value.as<unsigned int>() << std::endl;
} else if (value.is<uint64_t>()) {
std::cout << value.as<uint64_t>() << std::endl;
} else if (value.is<float>()) {
std::cout << value.as<float>() << std::endl;
} else if (value.is<std::string>()) {
@@ -62,6 +65,17 @@ void printParameterValue(const Parameter& value) {
std::cout << std::get<2>(values);
std::cout << " }";
std::cout << std::endl;
} else if (value.is<Metrics::DeviceType>()) {
auto v = value.as<Metrics::DeviceType>();
std::cout << v << std::endl;
} else if (value.is<std::map<InferenceEngine::Precision, float>>()) {
auto values = value.as<std::map<InferenceEngine::Precision, float>>();
std::cout << "{ ";
for (auto& kv : values) {
std::cout << kv.first << ": " << kv.second << "; ";
}
std::cout << " }";
std::cout << std::endl;
} else if (value.is<std::tuple<unsigned int, unsigned int>>()) {
auto values = value.as<std::tuple<unsigned int, unsigned int>>();
std::cout << "{ ";

View File

@@ -79,11 +79,17 @@
#include "cldnn_executable_network.h"
#include "cldnn_custom_layer.h"
#include "cldnn_itt.h"
#include "cldnn/cldnn_config.hpp"
#ifdef __linux__
# include <dlfcn.h>
#endif
// Undef DEVICE_TYPE macro which can be defined somewhere in windows headers as DWORD and conflict with our metric
#ifdef DEVICE_TYPE
#undef DEVICE_TYPE
#endif
using namespace InferenceEngine;
using namespace InferenceEngine::gpu;
using namespace InferenceEngine::details;
@@ -814,6 +820,42 @@ auto StringRightTrim = [](std::string string, std::string substring, bool case_s
return ret_str;
};
static float GetGOPS(cldnn::device_info info, cldnn::data_types dt) {
auto freqGHz = info.core_frequency / 1000.f;
auto numEUs = info.cores_count;
auto opsPerComputeBlock = 0;
auto computeBlockIPC = 1.0f;
switch (dt) {
case cldnn::data_types::u8:
case cldnn::data_types::i8: {
if (info.supports_imad) {
// fma * simd size
opsPerComputeBlock = 2 * 32;
} else {
// separate mul + add instructions for int8 data type
opsPerComputeBlock = 2 * 16;
// mul/add instructions can't be executed in parallel, so we need 2 clocks to execute compute block
computeBlockIPC = 0.5f;
}
break;
}
case cldnn::data_types::f16: {
// fma * simd size
opsPerComputeBlock = 2 * 16;
break;
}
case cldnn::data_types::f32: {
// fma * simd size
opsPerComputeBlock = 2 * 8;
break;
}
default: throw std::runtime_error("GetGOPS: Unsupported precision");
}
return freqGHz * opsPerComputeBlock * computeBlockIPC * numEUs;
}
Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::string, Parameter>& options) const {
OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::GetMetric");
auto device_id = GetConfig(CONFIG_KEY(DEVICE_ID), {});
@@ -834,12 +876,42 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
metrics.push_back(METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS));
metrics.push_back(METRIC_KEY(RANGE_FOR_STREAMS));
metrics.push_back(METRIC_KEY(DEVICE_TYPE));
metrics.push_back(METRIC_KEY(DEVICE_GOPS));
metrics.push_back(GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE));
metrics.push_back(GPU_METRIC_KEY(UARCH_VERSION));
metrics.push_back(GPU_METRIC_KEY(EXECUTION_UNITS_COUNT));
IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
} else if (name == METRIC_KEY(AVAILABLE_DEVICES)) {
std::vector<std::string> availableDevices = { };
for (auto const& dev : device_map)
availableDevices.push_back(dev.first);
IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices);
} else if (name == GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE)) {
IE_SET_METRIC_RETURN(GPU_DEVICE_TOTAL_MEM_SIZE, device_info.max_global_mem_size);
} else if (name == METRIC_KEY(DEVICE_TYPE)) {
auto dev_type = device_info.dev_type == cldnn::device_type::discrete_gpu ? Metrics::DeviceType::discrete : Metrics::DeviceType::integrated;
IE_SET_METRIC_RETURN(DEVICE_TYPE, dev_type);
} else if (name == METRIC_KEY(DEVICE_GOPS)) {
std::map<InferenceEngine::Precision, float> gops;
gops[InferenceEngine::Precision::I8] = GetGOPS(device_info, cldnn::data_types::i8);
gops[InferenceEngine::Precision::U8] = GetGOPS(device_info, cldnn::data_types::u8);
gops[InferenceEngine::Precision::FP16] = GetGOPS(device_info, cldnn::data_types::f16);
gops[InferenceEngine::Precision::FP32] = GetGOPS(device_info, cldnn::data_types::f32);
IE_SET_METRIC_RETURN(DEVICE_GOPS, gops);
} else if (name == GPU_METRIC_KEY(EXECUTION_UNITS_COUNT)) {
IE_SET_METRIC_RETURN(GPU_EXECUTION_UNITS_COUNT, device_info.cores_count);
} else if (name == GPU_METRIC_KEY(UARCH_VERSION)) {
std::stringstream s;
if (device_info.gfx_ver.major == 0 && device_info.gfx_ver.minor == 0 && device_info.gfx_ver.revision == 0) {
s << "unknown";
} else {
s << static_cast<int>(device_info.gfx_ver.major) << "."
<< static_cast<int>(device_info.gfx_ver.minor) << "."
<< static_cast<int>(device_info.gfx_ver.revision);
}
IE_SET_METRIC_RETURN(GPU_UARCH_VERSION, s.str());
} else if (name == METRIC_KEY(FULL_DEVICE_NAME)) {
auto deviceName = StringRightTrim(device_info.dev_name, "NEO", false);
deviceName += std::string(" (") + (device_info.dev_type == cldnn::device_type::discrete_gpu ? "dGPU" : "iGPU") + ")";
@@ -859,6 +931,8 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
capabilities.push_back(METRIC_VALUE(FP16));
if (device_info.supports_imad || device_info.supports_immad)
capabilities.push_back(METRIC_VALUE(INT8));
if (device_info.supports_immad)
capabilities.push_back(METRIC_VALUE(GPU_HW_MATMUL));
IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities);
} else if (name == METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)) {

View File

@@ -11,6 +11,8 @@
#endif
#include "gpu/gpu_context_api_ocl.hpp"
#include "cldnn/cldnn_config.hpp"
using namespace BehaviorTestsDefinitions;
namespace {
@@ -56,6 +58,16 @@ INSTANTIATE_TEST_CASE_P(
::testing::Values("GPU")
);
INSTANTIATE_TEST_CASE_P(
nightly_IEClassGetMetricTest, IEClassGetMetricTest_DEVICE_GOPS,
::testing::Values("GPU")
);
INSTANTIATE_TEST_CASE_P(
nightly_IEClassGetMetricTest, IEClassGetMetricTest_DEVICE_TYPE,
::testing::Values("GPU")
);
INSTANTIATE_TEST_CASE_P(
nightly_IEClassGetMetricTest, IEClassGetMetricTest_RANGE_FOR_ASYNC_INFER_REQUESTS,
::testing::Values("GPU")
@@ -81,6 +93,66 @@ INSTANTIATE_TEST_CASE_P(
::testing::Values("GPU")
);
//
// GPU specific metrics
//
using IEClassGetMetricTest_GPU_DEVICE_TOTAL_MEM_SIZE = IEClassBaseTestP;
TEST_P(IEClassGetMetricTest_GPU_DEVICE_TOTAL_MEM_SIZE, GetMetricAndPrintNoThrow) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Core ie;
Parameter p;
ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE)));
uint64_t t = p;
std::cout << "GPU device total memory size: " << t << std::endl;
ASSERT_METRIC_SUPPORTED(GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE));
}
INSTANTIATE_TEST_CASE_P(
nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_DEVICE_TOTAL_MEM_SIZE,
::testing::Values("GPU")
);
using IEClassGetMetricTest_GPU_UARCH_VERSION = IEClassBaseTestP;
TEST_P(IEClassGetMetricTest_GPU_UARCH_VERSION, GetMetricAndPrintNoThrow) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Core ie;
Parameter p;
ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(UARCH_VERSION)));
std::string t = p;
std::cout << "GPU device uarch: " << t << std::endl;
ASSERT_METRIC_SUPPORTED(GPU_METRIC_KEY(UARCH_VERSION));
}
INSTANTIATE_TEST_CASE_P(
nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_UARCH_VERSION,
::testing::Values("GPU")
);
using IEClassGetMetricTest_GPU_EXECUTION_UNITS_COUNT = IEClassBaseTestP;
TEST_P(IEClassGetMetricTest_GPU_EXECUTION_UNITS_COUNT, GetMetricAndPrintNoThrow) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Core ie;
Parameter p;
ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(EXECUTION_UNITS_COUNT)));
int t = p;
std::cout << "GPU EUs count: " << t << std::endl;
ASSERT_METRIC_SUPPORTED(GPU_METRIC_KEY(EXECUTION_UNITS_COUNT));
}
INSTANTIATE_TEST_CASE_P(
nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_EXECUTION_UNITS_COUNT,
::testing::Values("GPU")
);
//
// IE Class GetConfig
//

View File

@@ -138,6 +138,8 @@ using IEClassGetMetricTest_SUPPORTED_CONFIG_KEYS = IEClassBaseTestP;
using IEClassGetMetricTest_AVAILABLE_DEVICES = IEClassBaseTestP;
using IEClassGetMetricTest_FULL_DEVICE_NAME = IEClassBaseTestP;
using IEClassGetMetricTest_OPTIMIZATION_CAPABILITIES = IEClassBaseTestP;
using IEClassGetMetricTest_DEVICE_GOPS = IEClassBaseTestP;
using IEClassGetMetricTest_DEVICE_TYPE = IEClassBaseTestP;
using IEClassGetMetricTest_NUMBER_OF_WAITING_INFER_REQUESTS = IEClassBaseTestP;
using IEClassGetMetricTest_NUMBER_OF_EXEC_INFER_REQUESTS = IEClassBaseTestP;
using IEClassGetMetricTest_RANGE_FOR_ASYNC_INFER_REQUESTS = IEClassBaseTestP;
@@ -772,6 +774,35 @@ TEST_P(IEClassGetMetricTest_OPTIMIZATION_CAPABILITIES, GetMetricAndPrintNoThrow)
ASSERT_METRIC_SUPPORTED(METRIC_KEY(OPTIMIZATION_CAPABILITIES));
}
TEST_P(IEClassGetMetricTest_DEVICE_GOPS, GetMetricAndPrintNoThrow) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Core ie;
Parameter p;
ASSERT_NO_THROW(p = ie.GetMetric(deviceName, METRIC_KEY(DEVICE_GOPS)));
std::map<InferenceEngine::Precision, float> t = p;
std::cout << "Device GOPS: " << std::endl;
for (auto &&kv : t) {
std::cout << kv.first << ": " << kv.second << std::endl;
}
ASSERT_METRIC_SUPPORTED(METRIC_KEY(DEVICE_GOPS));
}
TEST_P(IEClassGetMetricTest_DEVICE_TYPE, GetMetricAndPrintNoThrow) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Core ie;
Parameter p;
ASSERT_NO_THROW(p = ie.GetMetric(deviceName, METRIC_KEY(DEVICE_TYPE)));
InferenceEngine::Metrics::DeviceType t = p;
std::cout << "Device Type: " << t << std::endl;
ASSERT_METRIC_SUPPORTED(METRIC_KEY(DEVICE_TYPE));
}
TEST_P(IEClassGetMetricTest_NUMBER_OF_WAITING_INFER_REQUESTS, GetMetricAndPrintNoThrow) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Core ie;
@@ -1483,4 +1514,3 @@ TEST_P(IEClassLoadNetworkAfterCoreRecreateTest, LoadAfterRecreateCoresAndPlugins
});
};
} // namespace BehaviorTestsDefinitions

View File

@@ -23,6 +23,12 @@ enum class device_type {
discrete_gpu = 1
};
struct gfx_version {
uint16_t major;
uint8_t minor;
uint8_t revision;
};
/// @brief Information about the device properties and capabilities.
struct device_info {
uint32_t cores_count; ///< Number of available HW cores.
@@ -53,6 +59,13 @@ struct device_info {
std::string driver_version; ///< Version of OpenCL driver
device_type dev_type; ///< Defines type of current GPU device (integrated or discrete)
gfx_version gfx_ver;
uint32_t device_id;
uint32_t num_slices;
uint32_t num_sub_slices_per_slice;
uint32_t num_eus_per_sub_slice;
uint32_t num_threads_per_eu;
};
struct device_impl;

View File

@@ -19,6 +19,34 @@ typedef cl_va_api_device_source_intel cl_device_source_intel;
typedef cl_va_api_device_set_intel cl_device_set_intel;
#endif
// cl_intel_device_attribute_query
#define CL_DEVICE_IP_VERSION_INTEL 0x4250
#define CL_DEVICE_ID_INTEL 0x4251
#define CL_DEVICE_NUM_SLICES_INTEL 0x4252
#define CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL 0x4253
#define CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL 0x4254
#define CL_DEVICE_NUM_THREADS_PER_EU_INTEL 0x4255
#define CL_DEVICE_FEATURE_CAPABILITIES_INTEL 0x4256
typedef cl_bitfield cl_device_feature_capabilities_intel;
/* For GPU devices, version 1.0.0: */
#define CL_DEVICE_FEATURE_FLAG_DP4A_INTEL (1 << 0)
namespace cl {
namespace detail {
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_IP_VERSION_INTEL, cl_uint)
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_ID_INTEL, cl_uint)
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_SLICES_INTEL, cl_uint)
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL, cl_uint)
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL, cl_uint)
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_THREADS_PER_EU_INTEL, cl_uint)
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_FEATURE_CAPABILITIES_INTEL, cl_device_feature_capabilities_intel)
}
}
#include <memory>
namespace {

View File

@@ -109,6 +109,14 @@ static device_type get_device_type(const cl::Device& device) {
return unified_mem ? device_type::integrated_gpu : device_type::discrete_gpu;
}
gfx_version parse_version(cl_uint ver) {
uint16_t major = ver >> 16;
uint8_t minor = (ver >> 8) & 0xFF;
uint8_t revision = ver & 0xFF;
return {major, minor, revision};
}
static bool get_imad_support(const cl::Device& device) {
std::string dev_name = device.getInfo<CL_DEVICE_NAME>();
@@ -222,6 +230,27 @@ device_info_internal::device_info_internal(const cl::Device& device) {
supports_optimization_hints = false;
supports_local_block_io = extensions.find("cl_intel_subgroup_local_block_io") != std::string::npos &&
is_local_block_io_supported(device);
bool device_attr_supported = extensions.find("cl_intel_device_attribute_query") != std::string::npos;
if (device_attr_supported) {
gfx_ver = parse_version(device.getInfo<CL_DEVICE_IP_VERSION_INTEL>());
device_id = device.getInfo<CL_DEVICE_ID_INTEL>();
num_slices = device.getInfo<CL_DEVICE_NUM_SLICES_INTEL>();
num_sub_slices_per_slice = device.getInfo<CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL>();
num_eus_per_sub_slice = device.getInfo<CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL>();
num_threads_per_eu = device.getInfo<CL_DEVICE_NUM_THREADS_PER_EU_INTEL>();
auto features = device.getInfo<CL_DEVICE_FEATURE_CAPABILITIES_INTEL>();
supports_imad = supports_imad || (features & CL_DEVICE_FEATURE_FLAG_DP4A_INTEL);
} else {
gfx_ver = {0, 0, 0};
device_id = driver_dev_id();
num_slices = 0;
num_sub_slices_per_slice = 0;
num_eus_per_sub_slice = 0;
num_threads_per_eu = 0;
}
}
} // namespace gpu
} // namespace cldnn

View File

@@ -43,7 +43,13 @@ struct device_info_internal : cldnn::device_info {
supports_usm,
dev_name,
driver_version,
dev_type
dev_type,
gfx_ver,
device_id,
num_slices,
num_sub_slices_per_slice,
num_eus_per_sub_slice,
num_threads_per_eu,
};
}
};