enable first_inference_latency (first_inference+time_to_inference) (#14829)
* add time_to_first_inference_result KPI as sum of inf latency values * append unit time after the value tested locally, worked * Revert "append unit time after the value" This reverts commit428b8cafdc
. * remove metric * revert42dd271c3b
* enable { SCOPED_TIMER(first_inference_latency); } * enable first_inference_latency tested locally, it works * remove trailing whitespace Co-authored-by: Daria Ilina <daria.krupnova@intel.com>
This commit is contained in:
parent
e2a1bd78a4
commit
711e2cbab0
@ -55,7 +55,6 @@ def aggregate_stats(stats: dict):
|
||||
"stdev": statistics.stdev(duration_list) if len(duration_list) > 1 else 0}
|
||||
for step_name, duration_list in stats.items()}
|
||||
|
||||
|
||||
def prepare_executable_cmd(args: dict):
|
||||
"""Generate common part of cmd from arguments to execute"""
|
||||
return [
|
||||
|
@ -28,50 +28,53 @@ int runPipeline(const std::string &model, const std::string &device, const bool
|
||||
|
||||
// first_inference_latency = time_to_inference + first_inference
|
||||
{
|
||||
SCOPED_TIMER(time_to_inference);
|
||||
SCOPED_TIMER(first_inference_latency);
|
||||
{
|
||||
SCOPED_TIMER(load_plugin);
|
||||
TimeTest::setPerformanceConfig(ie, device);
|
||||
ie.GetVersions(device);
|
||||
SCOPED_TIMER(time_to_inference);
|
||||
{
|
||||
SCOPED_TIMER(load_plugin);
|
||||
TimeTest::setPerformanceConfig(ie, device);
|
||||
ie.GetVersions(device);
|
||||
|
||||
if (isCacheEnabled)
|
||||
ie.SetConfig({{CONFIG_KEY(CACHE_DIR), "models_cache"}});
|
||||
}
|
||||
{
|
||||
SCOPED_TIMER(create_exenetwork);
|
||||
if (!isCacheEnabled) {
|
||||
if (TimeTest::fileExt(model) == "blob") {
|
||||
SCOPED_TIMER(import_network);
|
||||
exeNetwork = ie.ImportNetwork(model, device);
|
||||
if (isCacheEnabled)
|
||||
ie.SetConfig({ {CONFIG_KEY(CACHE_DIR), "models_cache"} });
|
||||
}
|
||||
{
|
||||
SCOPED_TIMER(create_exenetwork);
|
||||
if (!isCacheEnabled) {
|
||||
if (TimeTest::fileExt(model) == "blob") {
|
||||
SCOPED_TIMER(import_network);
|
||||
exeNetwork = ie.ImportNetwork(model, device);
|
||||
}
|
||||
else {
|
||||
{
|
||||
SCOPED_TIMER(read_network);
|
||||
cnnNetwork = ie.ReadNetwork(model);
|
||||
batchSize = cnnNetwork.getBatchSize();
|
||||
}
|
||||
{
|
||||
SCOPED_TIMER(load_network);
|
||||
exeNetwork = ie.LoadNetwork(cnnNetwork, device);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
{
|
||||
SCOPED_TIMER(read_network);
|
||||
cnnNetwork = ie.ReadNetwork(model);
|
||||
batchSize = cnnNetwork.getBatchSize();
|
||||
}
|
||||
{
|
||||
SCOPED_TIMER(load_network);
|
||||
exeNetwork = ie.LoadNetwork(cnnNetwork, device);
|
||||
}
|
||||
SCOPED_TIMER(load_network_cache);
|
||||
exeNetwork = ie.LoadNetwork(model, device);
|
||||
}
|
||||
}
|
||||
else {
|
||||
SCOPED_TIMER(load_network_cache);
|
||||
exeNetwork = ie.LoadNetwork(model, device);
|
||||
}
|
||||
inferRequest = exeNetwork.CreateInferRequest();
|
||||
}
|
||||
inferRequest = exeNetwork.CreateInferRequest();
|
||||
}
|
||||
{
|
||||
SCOPED_TIMER(first_inference);
|
||||
{
|
||||
SCOPED_TIMER(fill_inputs);
|
||||
const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
|
||||
batchSize = batchSize != 0 ? batchSize : 1;
|
||||
fillBlobs(inferRequest, inputsInfo, batchSize);
|
||||
SCOPED_TIMER(first_inference);
|
||||
{
|
||||
SCOPED_TIMER(fill_inputs);
|
||||
const InferenceEngine::ConstInputsDataMap inputsInfo(exeNetwork.GetInputsInfo());
|
||||
batchSize = batchSize != 0 ? batchSize : 1;
|
||||
fillBlobs(inferRequest, inputsInfo, batchSize);
|
||||
}
|
||||
inferRequest.Infer();
|
||||
}
|
||||
inferRequest.Infer();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -36,62 +36,65 @@ int runPipeline(const std::string &model, const std::string &device, const bool
|
||||
|
||||
// first_inference_latency = time_to_inference + first_inference
|
||||
{
|
||||
SCOPED_TIMER(time_to_inference);
|
||||
SCOPED_TIMER(first_inference_latency);
|
||||
{
|
||||
SCOPED_TIMER(load_plugin);
|
||||
TimeTest::setPerformanceConfig(ie, device);
|
||||
ie.get_versions(device);
|
||||
SCOPED_TIMER(time_to_inference);
|
||||
{
|
||||
SCOPED_TIMER(load_plugin);
|
||||
TimeTest::setPerformanceConfig(ie, device);
|
||||
ie.get_versions(device);
|
||||
|
||||
if (isCacheEnabled)
|
||||
ie.set_property({{CONFIG_KEY(CACHE_DIR), "models_cache"}});
|
||||
}
|
||||
{
|
||||
SCOPED_TIMER(create_exenetwork);
|
||||
if (!isCacheEnabled) {
|
||||
if (TimeTest::fileExt(model) == "blob") {
|
||||
SCOPED_TIMER(import_network);
|
||||
std::ifstream streamModel{model};
|
||||
exeNetwork = ie.import_model(streamModel, device);
|
||||
}
|
||||
else {
|
||||
{
|
||||
SCOPED_TIMER(read_network);
|
||||
cnnNetwork = ie.read_model(model);
|
||||
if (isCacheEnabled)
|
||||
ie.set_property({{CONFIG_KEY(CACHE_DIR), "models_cache"}});
|
||||
}
|
||||
{
|
||||
SCOPED_TIMER(create_exenetwork);
|
||||
if (!isCacheEnabled) {
|
||||
if (TimeTest::fileExt(model) == "blob") {
|
||||
SCOPED_TIMER(import_network);
|
||||
std::ifstream streamModel{model};
|
||||
exeNetwork = ie.import_model(streamModel, device);
|
||||
}
|
||||
if (reshape) {
|
||||
else {
|
||||
{
|
||||
SCOPED_TIMER(reshape);
|
||||
defaultInputs = getCopyOfDefaultInputs(cnnNetwork->inputs());
|
||||
cnnNetwork->reshape(reshapeShapes);
|
||||
SCOPED_TIMER(read_network);
|
||||
cnnNetwork = ie.read_model(model);
|
||||
}
|
||||
if (reshape) {
|
||||
{
|
||||
SCOPED_TIMER(reshape);
|
||||
defaultInputs = getCopyOfDefaultInputs(cnnNetwork->inputs());
|
||||
cnnNetwork->reshape(reshapeShapes);
|
||||
}
|
||||
}
|
||||
{
|
||||
SCOPED_TIMER(load_network);
|
||||
exeNetwork = ie.compile_model(cnnNetwork, device);
|
||||
}
|
||||
}
|
||||
{
|
||||
SCOPED_TIMER(load_network);
|
||||
exeNetwork = ie.compile_model(cnnNetwork, device);
|
||||
}
|
||||
}
|
||||
else {
|
||||
SCOPED_TIMER(load_network_cache);
|
||||
exeNetwork = ie.compile_model(model, device);
|
||||
}
|
||||
}
|
||||
else {
|
||||
SCOPED_TIMER(load_network_cache);
|
||||
exeNetwork = ie.compile_model(model, device);
|
||||
}
|
||||
inferRequest = exeNetwork.create_infer_request();
|
||||
}
|
||||
inferRequest = exeNetwork.create_infer_request();
|
||||
}
|
||||
{
|
||||
SCOPED_TIMER(first_inference);
|
||||
{
|
||||
SCOPED_TIMER(fill_inputs);
|
||||
std::vector<ov::Output<const ov::Node>> inputs = exeNetwork.inputs();
|
||||
if (reshape && dataShapes.empty()) {
|
||||
fillTensors(inferRequest, defaultInputs);
|
||||
} else if (reshape && !dataShapes.empty()) {
|
||||
fillTensorsWithSpecifiedShape(inferRequest, inputs, dataShapes);
|
||||
} else {
|
||||
fillTensors(inferRequest, inputs);
|
||||
SCOPED_TIMER(first_inference);
|
||||
{
|
||||
SCOPED_TIMER(fill_inputs);
|
||||
std::vector<ov::Output<const ov::Node>> inputs = exeNetwork.inputs();
|
||||
if (reshape && dataShapes.empty()) {
|
||||
fillTensors(inferRequest, defaultInputs);
|
||||
} else if (reshape && !dataShapes.empty()) {
|
||||
fillTensorsWithSpecifiedShape(inferRequest, inputs, dataShapes);
|
||||
} else {
|
||||
fillTensors(inferRequest, inputs);
|
||||
}
|
||||
}
|
||||
inferRequest.infer();
|
||||
}
|
||||
inferRequest.infer();
|
||||
}
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user