WIP:moving the auto-batching to the icore to let the MULT/AUTO support that, ALLOW_AUTO_BATCHING as a conventional config key. still fials hot device swap
This commit is contained in:
parent
ac21d71321
commit
776624ff2a
@ -244,6 +244,10 @@ DECLARE_CONFIG_VALUE(THROUGHPUT);
|
||||
* usually this value comes from the actual use-case (e.g. number of video-cameras, or other sources of inputs)
|
||||
*/
|
||||
DECLARE_CONFIG_KEY(PERFORMANCE_HINT_NUM_REQUESTS);
|
||||
/**
|
||||
* @brief (Optional) config key that governs Auto-Batching
|
||||
*/
|
||||
DECLARE_CONFIG_KEY(ALLOW_AUTO_BATCHING);
|
||||
|
||||
/**
|
||||
* @brief generic boolean values
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <ie_performance_hints.hpp>
|
||||
|
||||
#include "cnn_network_ngraph_impl.hpp"
|
||||
#include "compilation_context.hpp"
|
||||
@ -41,6 +42,7 @@
|
||||
#include "xml_parse_utils.h"
|
||||
|
||||
using namespace InferenceEngine::PluginConfigParams;
|
||||
using namespace InferenceEngine;
|
||||
using namespace std::placeholders;
|
||||
|
||||
namespace ov {
|
||||
@ -489,11 +491,60 @@ public:
|
||||
}
|
||||
|
||||
ie::SoExecutableNetworkInternal LoadNetwork(const ie::CNNNetwork& network,
|
||||
const std::string& deviceName,
|
||||
const std::string& deviceNameOrig,
|
||||
const std::map<std::string, std::string>& config) override {
|
||||
OV_ITT_SCOPE(FIRST_INFERENCE, ie::itt::domains::IE_LT, "Core::LoadNetwork::CNN");
|
||||
bool forceDisableCache = config.count(CONFIG_KEY_INTERNAL(FORCE_DISABLE_CACHE)) > 0;
|
||||
auto parsed = parseDeviceNameIntoConfig(deviceName, config);
|
||||
std::string deviceName = deviceNameOrig;
|
||||
std::map<std::string, std::string> config_with_batch = config;
|
||||
|
||||
const auto& batch_mode = config_with_batch.find(CONFIG_KEY(ALLOW_AUTO_BATCHING));
|
||||
if (batch_mode != config_with_batch.end() && batch_mode->second == CONFIG_VALUE(YES)) {
|
||||
std::map<std::string, ie::Parameter> options;
|
||||
options["MODEL_ADDRESS"] = &network;
|
||||
auto optimalBatchSize = GetCPPPluginByName(DeviceIDParser(deviceNameOrig).getDeviceName()).
|
||||
get_metric(METRIC_KEY(OPTIMAL_BATCH), options).as<unsigned int>();
|
||||
const auto &reqs = config.find(KEY_PERFORMANCE_HINT_NUM_REQUESTS);
|
||||
if (reqs != config.end()) {
|
||||
auto r = (uint)PerfHintsConfig::CheckPerformanceHintRequestValue(reqs->second);
|
||||
std::cout << "!!!!!!!!!!!!!!!Detected reqs_limitation: " << r << std::endl;
|
||||
optimalBatchSize = std::min(r, optimalBatchSize);
|
||||
}
|
||||
auto function = network.getFunction();
|
||||
bool bDetectionOutput = false;
|
||||
for (auto&& node : function->get_ops()) {
|
||||
auto isDetectionOutputParent = [](decltype(node)& nd) {
|
||||
for (size_t n = 0; n < nd->get_input_size(); n++) {
|
||||
if (!std::strcmp("DetectionOutput", nd->get_input_node_ptr(n)->get_type_info().name))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
if (!std::strcmp("DetectionOutput", node->get_type_info().name) ||
|
||||
(!std::strcmp("Result", node->get_type_info().name) && isDetectionOutputParent(node))) {
|
||||
node->get_rt_info()["affinity"] = std::make_shared<ngraph::VariantWrapper<std::string>>(deviceNameOrig);
|
||||
std::cout << "!!! AFF !!! type: " << node->get_type_info().name
|
||||
<< ", name: " << node->get_friendly_name() << std::endl;
|
||||
bDetectionOutput = true;
|
||||
} else {
|
||||
node->get_rt_info()["affinity"] = std::make_shared<ngraph::VariantWrapper<std::string>>("BATCH");
|
||||
}
|
||||
}
|
||||
if (optimalBatchSize > 1) {
|
||||
if (bDetectionOutput) {
|
||||
deviceName = "HETERO:BATCH," + deviceNameOrig;
|
||||
std::cout << "HETERO code path!!!!" << std::endl;
|
||||
// config["AUTO_BATCH"] = deviceNameOrig+"("+ std::to_string(optimalBatchSize)+ ")";
|
||||
SetConfigForPlugins({{"AUTO_BATCH", deviceNameOrig + "(" + std::to_string(optimalBatchSize) + ")"}}, "BATCH");
|
||||
} else {
|
||||
std::string deviceBatch = "BATCH:" + deviceNameOrig + "(" + std::to_string(optimalBatchSize) + ")";
|
||||
deviceName = deviceBatch;
|
||||
}
|
||||
}
|
||||
config_with_batch.erase(batch_mode);
|
||||
}
|
||||
bool forceDisableCache = config_with_batch.count(CONFIG_KEY_INTERNAL(FORCE_DISABLE_CACHE)) > 0;
|
||||
auto parsed = parseDeviceNameIntoConfig(deviceName, config_with_batch);
|
||||
if (forceDisableCache) {
|
||||
// remove this config key from parsed as plugins can throw unsupported exception
|
||||
parsed._config.erase(CONFIG_KEY_INTERNAL(FORCE_DISABLE_CACHE));
|
||||
@ -1147,49 +1198,15 @@ CNNNetwork Core::ReadNetwork(const std::string& model, const Blob::CPtr& weights
|
||||
}
|
||||
|
||||
ExecutableNetwork Core::LoadNetwork(const CNNNetwork& network,
|
||||
const std::string& deviceNameOrig,
|
||||
const std::string& deviceName,
|
||||
const std::map<std::string, std::string>& config) {
|
||||
auto deviceName = deviceNameOrig;
|
||||
if (deviceNameOrig.find("GPU") != std::string::npos) {
|
||||
std::map<std::string, Parameter> options;
|
||||
options["MODEL_ADDRESS"] = &network;
|
||||
auto optimalBatchSize =
|
||||
_impl->GetCPPPluginByName(DeviceIDParser(deviceName).getDeviceName()).
|
||||
get_metric(METRIC_KEY(OPTIMAL_BATCH), options).as<unsigned int>();
|
||||
auto function = network.getFunction();
|
||||
bool bDetectionOutput = false;
|
||||
for (auto&& node : function->get_ops()) {
|
||||
auto isDetectionOutputParent = [](decltype(node)& nd) {
|
||||
for (size_t n = 0; n < nd->get_input_size(); n++) {
|
||||
if (!std::strcmp("DetectionOutput", nd->get_input_node_ptr(n)->get_type_info().name))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
std::map<std::string, std::string> config_with_batch = config;
|
||||
// const auto& mode = config.find(KEY_PERFORMANCE_HINT);
|
||||
// if (mode != config.end() && mode->second ==CONFIG_VALUE(THROUGHPUT) && deviceName.find("GPU") != std::string::npos)
|
||||
if (deviceName.find("GPU") != std::string::npos)
|
||||
config_with_batch[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(YES);
|
||||
|
||||
if (!std::strcmp("DetectionOutput", node->get_type_info().name) ||
|
||||
(!std::strcmp("Result", node->get_type_info().name) && isDetectionOutputParent(node))) {
|
||||
node->get_rt_info()["affinity"] = std::make_shared<ngraph::VariantWrapper<std::string>>(deviceNameOrig);
|
||||
std::cout << "!!! AFF !!! type: " << node->get_type_info().name
|
||||
<< ", name: " << node->get_friendly_name() << std::endl;
|
||||
bDetectionOutput = true;
|
||||
} else {
|
||||
node->get_rt_info()["affinity"] = std::make_shared<ngraph::VariantWrapper<std::string>>("BATCH");
|
||||
}
|
||||
}
|
||||
if (optimalBatchSize > 1) {
|
||||
if (bDetectionOutput) {
|
||||
deviceName = "HETERO:BATCH," + deviceNameOrig;
|
||||
std::cout << "HETERO code path!!!!" << std::endl;
|
||||
// config["AUTO_BATCH"] = deviceNameOrig+"("+ std::to_string(optimalBatchSize)+ ")";
|
||||
SetConfig({{"AUTO_BATCH", deviceNameOrig + "(" + std::to_string(optimalBatchSize) + ")"}}, "BATCH");
|
||||
} else {
|
||||
std::string deviceBatch = "BATCH:" + deviceNameOrig + "(" + std::to_string(optimalBatchSize) + ")";
|
||||
deviceName = deviceBatch;
|
||||
}
|
||||
}
|
||||
}
|
||||
auto exec = _impl->LoadNetwork(network, deviceName, config);
|
||||
auto exec = _impl->LoadNetwork(network, deviceName, config_with_batch);
|
||||
return {exec._so, exec._ptr};
|
||||
}
|
||||
|
||||
|
@ -200,11 +200,15 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string&
|
||||
_workerRequests[p.deviceName];
|
||||
_inferPipelineTasksDeviceSpecific[p.deviceName] = NULL;
|
||||
const auto device = p.deviceName;
|
||||
const auto deviceConfig = p.config;
|
||||
auto deviceConfig = p.config;
|
||||
if (device == "GPU") {
|
||||
deviceConfig[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(YES);
|
||||
}
|
||||
// will not wait for loading accelerator network,
|
||||
// so some parameters need to be transferred by value.
|
||||
_executor->run([&, modelPath, network, device, deviceConfig]() {
|
||||
SoExecutableNetworkInternal executableNetwork;
|
||||
std::cout << "DEVICE in AUTO:" << device << std::endl;
|
||||
SoExecutableNetworkInternal executableNetwork;
|
||||
if (!modelPath.empty()) {
|
||||
executableNetwork = _core->LoadNetwork(modelPath, device, deviceConfig);
|
||||
} else {
|
||||
@ -212,6 +216,7 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string&
|
||||
}
|
||||
|
||||
GenerateWorkers(device, executableNetwork);
|
||||
std::cout << "DEVICE in AUTO:" << device << " ENDED" <<std::endl;
|
||||
|
||||
if (device.find("CPU") == std::string::npos) {
|
||||
_alreadyActualNetwork = true;
|
||||
|
@ -55,6 +55,7 @@ namespace {
|
||||
}
|
||||
std::vector<std::string> supported_configKeys = []() -> decltype(PerfHintsConfig::SupportedKeys()) {
|
||||
auto res = PerfHintsConfig::SupportedKeys();
|
||||
res.push_back(PluginConfigParams::KEY_PERF_COUNT);
|
||||
res.push_back(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES);
|
||||
res.push_back(CONFIG_KEY_INTERNAL(MULTI_WORK_MODE_AS_AUTO));
|
||||
return res;
|
||||
|
Loading…
Reference in New Issue
Block a user