Fix GPU stress tests under the proxy (#18700)

* Add debug messages

* Add more debug messages

* Add more messages

* Disable proxy plugin

* Enable proxy and suppress NVIDIA

* Remove disabling NVIDIA

* Update hidden device only if proxy settings were changed

* Use static unavailable device to load unavailable device only one time
for the process

* Removed debug messages and add global mutexes for unavailable plugins
This commit is contained in:
Ilya Churaev 2023-07-28 09:56:39 +04:00 committed by GitHub
parent 1fcdc90989
commit c4f8afb741
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -5,6 +5,7 @@
#include "openvino/proxy/plugin.hpp"
#include <memory>
#include <mutex>
#include <stdexcept>
#include "compiled_model.hpp"
@ -22,6 +23,26 @@
namespace {
bool compare_containers(const std::vector<std::string>& c1, const std::vector<std::string> c2) {
if (c1.size() != c2.size())
return false;
for (size_t i = 0; i < c1.size(); i++) {
if (c1.at(i) != c2.at(i))
return false;
}
return true;
}
bool compare_containers(const std::unordered_set<std::string>& c1, const std::unordered_set<std::string> c2) {
if (c1.size() != c2.size())
return false;
for (const auto& val : c1) {
if (c2.find(val) == c2.end())
return false;
}
return true;
}
size_t string_to_size_t(const std::string& s) {
std::stringstream sstream(s);
size_t idx;
@ -152,19 +173,21 @@ void ov::proxy::Plugin::set_property(const ov::AnyMap& properties) {
it = hw_config.find(ov::proxy::alias_for.name());
bool fill_order = hw_config.find(ov::proxy::device_priorities.name()) == hw_config.end() && m_device_order.empty();
if (it != hw_config.end()) {
std::unordered_set<std::string> new_alias;
for (auto&& dev : it->second.as<std::vector<std::string>>()) {
proxy_config_was_changed = true;
m_alias_for.emplace(dev);
new_alias.emplace(dev);
if (fill_order)
m_device_order.emplace_back(dev);
}
if (!compare_containers(m_alias_for, new_alias)) {
proxy_config_was_changed = true;
m_alias_for = new_alias;
}
}
// Restore device order
it = hw_config.find(ov::proxy::device_priorities.name());
if (it != hw_config.end()) {
proxy_config_was_changed = true;
m_device_order.clear();
std::vector<std::pair<std::string, size_t>> priority_order;
// Biggest number means minimum priority
size_t min_priority(0);
@ -192,18 +215,24 @@ void ov::proxy::Plugin::set_property(const ov::AnyMap& properties) {
[](const std::pair<std::string, size_t>& v1, const std::pair<std::string, size_t>& v2) {
return v1.second < v2.second;
});
m_device_order.reserve(priority_order.size());
std::vector<std::string> new_device_order;
new_device_order.reserve(priority_order.size());
for (const auto& dev : priority_order) {
m_device_order.emplace_back(dev.first);
new_device_order.emplace_back(dev.first);
}
// Align sizes of device order with alias
if (m_device_order.size() < m_alias_for.size()) {
if (new_device_order.size() < m_alias_for.size()) {
for (const auto& dev : m_alias_for) {
if (std::find(std::begin(m_device_order), std::end(m_device_order), dev) == std::end(m_device_order)) {
m_device_order.emplace_back(dev);
if (std::find(std::begin(new_device_order), std::end(new_device_order), dev) ==
std::end(new_device_order)) {
new_device_order.emplace_back(dev);
}
}
}
if (!compare_containers(m_device_order, new_device_order)) {
m_device_order = new_device_order;
proxy_config_was_changed = true;
}
}
{
@ -211,12 +240,19 @@ void ov::proxy::Plugin::set_property(const ov::AnyMap& properties) {
std::lock_guard<std::mutex> lock(m_plugin_mutex);
it = hw_config.find(ov::device::priorities.name());
if (it != hw_config.end()) {
proxy_config_was_changed = true;
m_configs[config_name][ov::device::priorities.name()] = it->second;
if (m_configs[config_name].find(ov::device::priorities.name()) == m_configs[config_name].end() ||
!compare_containers(
m_configs[config_name][ov::device::priorities.name()].as<std::vector<std::string>>(),
it->second.as<std::vector<std::string>>())) {
proxy_config_was_changed = true;
m_configs[config_name][ov::device::priorities.name()] = it->second;
}
// Main device is needed in case if we don't have alias and would like to be able change fallback order per
// device
if (m_alias_for.empty() && config_name.empty())
if (m_alias_for.empty() && config_name.empty()) {
proxy_config_was_changed = true;
m_alias_for.insert(it->second.as<std::vector<std::string>>()[0]);
}
}
}
if (proxy_config_was_changed) {
@ -535,15 +571,25 @@ std::vector<std::vector<std::string>> ov::proxy::Plugin::get_hidden_devices() co
// 2. Use individual fallback priorities to fill each list
std::vector<DeviceID_t> all_highlevel_devices;
std::set<std::array<uint8_t, ov::device::UUID::MAX_UUID_SIZE>> unique_devices;
std::unordered_set<std::string> unavailable_devices;
// Static unavailable device in order to avoid loading from different ov::Core the same unavailable plugin
static std::unordered_set<std::string> unavailable_devices;
static std::unordered_map<std::string, std::mutex> unavailable_plugin_mutex;
for (const auto& device : m_device_order) {
std::vector<std::string> supported_device_ids;
try {
supported_device_ids = core->get_property(device, ov::available_devices);
} catch (const std::runtime_error&) {
unavailable_devices.emplace(device);
// Device cannot be loaded
// Avoid loading unavailable device for several times
if (unavailable_devices.count(device))
continue;
std::vector<std::string> supported_device_ids;
{
std::lock_guard<std::mutex> lock(unavailable_plugin_mutex[device]);
if (unavailable_devices.count(device))
continue;
try {
supported_device_ids = core->get_property(device, ov::available_devices);
} catch (const std::runtime_error&) {
unavailable_devices.emplace(device);
// Device cannot be loaded
continue;
}
}
for (const auto& device_id : supported_device_ids) {
const std::string full_device_name = device_id.empty() ? device : device + '.' + device_id;