Fix GPU stress tests under the proxy (#18700)
* Add debug messages * Add more debug messages * Add more messages * Disable proxy plugin * Enable proxy and suppress NVIDIA * Remove disabling NVIDIA * Update hidden device only if proxy settings were changed * Use static unavailable device to load unavailable device only one time for the process * Removed debug messages and add global mutexes for unavailable plugins
This commit is contained in:
parent
1fcdc90989
commit
c4f8afb741
@ -5,6 +5,7 @@
|
||||
#include "openvino/proxy/plugin.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "compiled_model.hpp"
|
||||
@ -22,6 +23,26 @@
|
||||
|
||||
namespace {
|
||||
|
||||
bool compare_containers(const std::vector<std::string>& c1, const std::vector<std::string> c2) {
|
||||
if (c1.size() != c2.size())
|
||||
return false;
|
||||
for (size_t i = 0; i < c1.size(); i++) {
|
||||
if (c1.at(i) != c2.at(i))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool compare_containers(const std::unordered_set<std::string>& c1, const std::unordered_set<std::string> c2) {
|
||||
if (c1.size() != c2.size())
|
||||
return false;
|
||||
for (const auto& val : c1) {
|
||||
if (c2.find(val) == c2.end())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t string_to_size_t(const std::string& s) {
|
||||
std::stringstream sstream(s);
|
||||
size_t idx;
|
||||
@ -152,19 +173,21 @@ void ov::proxy::Plugin::set_property(const ov::AnyMap& properties) {
|
||||
it = hw_config.find(ov::proxy::alias_for.name());
|
||||
bool fill_order = hw_config.find(ov::proxy::device_priorities.name()) == hw_config.end() && m_device_order.empty();
|
||||
if (it != hw_config.end()) {
|
||||
std::unordered_set<std::string> new_alias;
|
||||
for (auto&& dev : it->second.as<std::vector<std::string>>()) {
|
||||
proxy_config_was_changed = true;
|
||||
m_alias_for.emplace(dev);
|
||||
new_alias.emplace(dev);
|
||||
if (fill_order)
|
||||
m_device_order.emplace_back(dev);
|
||||
}
|
||||
if (!compare_containers(m_alias_for, new_alias)) {
|
||||
proxy_config_was_changed = true;
|
||||
m_alias_for = new_alias;
|
||||
}
|
||||
}
|
||||
|
||||
// Restore device order
|
||||
it = hw_config.find(ov::proxy::device_priorities.name());
|
||||
if (it != hw_config.end()) {
|
||||
proxy_config_was_changed = true;
|
||||
m_device_order.clear();
|
||||
std::vector<std::pair<std::string, size_t>> priority_order;
|
||||
// Biggest number means minimum priority
|
||||
size_t min_priority(0);
|
||||
@ -192,18 +215,24 @@ void ov::proxy::Plugin::set_property(const ov::AnyMap& properties) {
|
||||
[](const std::pair<std::string, size_t>& v1, const std::pair<std::string, size_t>& v2) {
|
||||
return v1.second < v2.second;
|
||||
});
|
||||
m_device_order.reserve(priority_order.size());
|
||||
std::vector<std::string> new_device_order;
|
||||
new_device_order.reserve(priority_order.size());
|
||||
for (const auto& dev : priority_order) {
|
||||
m_device_order.emplace_back(dev.first);
|
||||
new_device_order.emplace_back(dev.first);
|
||||
}
|
||||
// Align sizes of device order with alias
|
||||
if (m_device_order.size() < m_alias_for.size()) {
|
||||
if (new_device_order.size() < m_alias_for.size()) {
|
||||
for (const auto& dev : m_alias_for) {
|
||||
if (std::find(std::begin(m_device_order), std::end(m_device_order), dev) == std::end(m_device_order)) {
|
||||
m_device_order.emplace_back(dev);
|
||||
if (std::find(std::begin(new_device_order), std::end(new_device_order), dev) ==
|
||||
std::end(new_device_order)) {
|
||||
new_device_order.emplace_back(dev);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!compare_containers(m_device_order, new_device_order)) {
|
||||
m_device_order = new_device_order;
|
||||
proxy_config_was_changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
@ -211,12 +240,19 @@ void ov::proxy::Plugin::set_property(const ov::AnyMap& properties) {
|
||||
std::lock_guard<std::mutex> lock(m_plugin_mutex);
|
||||
it = hw_config.find(ov::device::priorities.name());
|
||||
if (it != hw_config.end()) {
|
||||
proxy_config_was_changed = true;
|
||||
m_configs[config_name][ov::device::priorities.name()] = it->second;
|
||||
if (m_configs[config_name].find(ov::device::priorities.name()) == m_configs[config_name].end() ||
|
||||
!compare_containers(
|
||||
m_configs[config_name][ov::device::priorities.name()].as<std::vector<std::string>>(),
|
||||
it->second.as<std::vector<std::string>>())) {
|
||||
proxy_config_was_changed = true;
|
||||
m_configs[config_name][ov::device::priorities.name()] = it->second;
|
||||
}
|
||||
// Main device is needed in case if we don't have alias and would like to be able change fallback order per
|
||||
// device
|
||||
if (m_alias_for.empty() && config_name.empty())
|
||||
if (m_alias_for.empty() && config_name.empty()) {
|
||||
proxy_config_was_changed = true;
|
||||
m_alias_for.insert(it->second.as<std::vector<std::string>>()[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (proxy_config_was_changed) {
|
||||
@ -535,15 +571,25 @@ std::vector<std::vector<std::string>> ov::proxy::Plugin::get_hidden_devices() co
|
||||
// 2. Use individual fallback priorities to fill each list
|
||||
std::vector<DeviceID_t> all_highlevel_devices;
|
||||
std::set<std::array<uint8_t, ov::device::UUID::MAX_UUID_SIZE>> unique_devices;
|
||||
std::unordered_set<std::string> unavailable_devices;
|
||||
// Static unavailable device in order to avoid loading from different ov::Core the same unavailable plugin
|
||||
static std::unordered_set<std::string> unavailable_devices;
|
||||
static std::unordered_map<std::string, std::mutex> unavailable_plugin_mutex;
|
||||
for (const auto& device : m_device_order) {
|
||||
std::vector<std::string> supported_device_ids;
|
||||
try {
|
||||
supported_device_ids = core->get_property(device, ov::available_devices);
|
||||
} catch (const std::runtime_error&) {
|
||||
unavailable_devices.emplace(device);
|
||||
// Device cannot be loaded
|
||||
// Avoid loading unavailable device for several times
|
||||
if (unavailable_devices.count(device))
|
||||
continue;
|
||||
std::vector<std::string> supported_device_ids;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(unavailable_plugin_mutex[device]);
|
||||
if (unavailable_devices.count(device))
|
||||
continue;
|
||||
try {
|
||||
supported_device_ids = core->get_property(device, ov::available_devices);
|
||||
} catch (const std::runtime_error&) {
|
||||
unavailable_devices.emplace(device);
|
||||
// Device cannot be loaded
|
||||
continue;
|
||||
}
|
||||
}
|
||||
for (const auto& device_id : supported_device_ids) {
|
||||
const std::string full_device_name = device_id.empty() ? device : device + '.' + device_id;
|
||||
|
Loading…
Reference in New Issue
Block a user