[IE Myriad/35383] vpu_profile is removed (#1393)
This commit is contained in:
parent
093a02fcef
commit
5b9a5a6293
@ -13,10 +13,6 @@
|
||||
# limitations under the License.
|
||||
|
||||
if(ENABLE_MYRIAD)
|
||||
add_subdirectory(vpu_perfcheck)
|
||||
add_subdirectory(vpu_profile)
|
||||
endif()
|
||||
|
||||
if (ENABLE_MYRIAD)
|
||||
add_subdirectory(vpu_compile)
|
||||
add_subdirectory(vpu_perfcheck)
|
||||
endif()
|
||||
|
@ -1,46 +0,0 @@
|
||||
#
|
||||
# Copyright (C) 2018-2020 Intel Corporation
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
set(TARGET_NAME vpu_profile)
|
||||
|
||||
file(GLOB SOURCES
|
||||
${IE_MAIN_SOURCE_DIR}/tools/vpu/common/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
|
||||
)
|
||||
|
||||
add_executable(${TARGET_NAME} ${SOURCES})
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
target_compile_options(${TARGET_NAME} PRIVATE
|
||||
"-Wall"
|
||||
)
|
||||
endif()
|
||||
|
||||
target_include_directories(${TARGET_NAME} SYSTEM PRIVATE
|
||||
"${IE_MAIN_SOURCE_DIR}/samples/common"
|
||||
"${IE_MAIN_SOURCE_DIR}/samples/common/format_reader"
|
||||
"${IE_MAIN_SOURCE_DIR}/tools/vpu/common"
|
||||
)
|
||||
|
||||
target_link_libraries(${TARGET_NAME} PRIVATE
|
||||
inference_engine format_reader vpu_graph_transformer
|
||||
gflags
|
||||
)
|
||||
|
||||
set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}
|
||||
FOLDER tools)
|
||||
|
||||
add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
|
@ -1,104 +0,0 @@
|
||||
# vpu_profile tool {#openvino_inference_engine_tools_vpu_vpu_profile_README}
|
||||
|
||||
This topic demonstrates how to run the `vpu_profile` tool application, which intended to get per layer or per stage
|
||||
performance statistics for vpu plugins of Inference Engine by configuration options.
|
||||
|
||||
## How It Works
|
||||
|
||||
Upon the start-up, the tool application reads command line parameters and loads a network to the Inference Engine plugin,
|
||||
performs inference and prints performance statistics, according to provided by plugin performance counts to standard output.
|
||||
Only Bitmap(.bmp) images are supported as inputs, need to convert from other formats if your inputs are not Bitmap.
|
||||
|
||||
## Running
|
||||
|
||||
Running the application with the "-h" option yields the following usage message:
|
||||
|
||||
```sh
|
||||
$./vpu_profile -h
|
||||
Inference Engine:
|
||||
API version ............ <version>
|
||||
Build .................. <build>
|
||||
vpu_profile [OPTIONS]
|
||||
[OPTIONS]:
|
||||
-h Print a help(this) message.
|
||||
-model <value> Path to xml model.
|
||||
-inputs_dir <value> Path to folder with images, only bitmap(.bmp) supported. Default: ".".
|
||||
-config <value> Path to the configuration file. Default value: "config".
|
||||
-iterations <value> Specifies number of iterations. Default value: 16.
|
||||
-plugin <value> Specifies plugin. Supported values: myriad, hddl.
|
||||
Default value: "myriad".
|
||||
-report <value> Specifies report type. Supported values: per_layer, per_stage.
|
||||
Overrides value in configuration file if provided. Default value: "per_layer"
|
||||
```
|
||||
|
||||
Running the application with the empty list of options yields an error message.
|
||||
|
||||
You can use the following command to simply execute network:
|
||||
|
||||
```sh
|
||||
$./vpu_profile -model <path_to_model>/model_name.xml
|
||||
```
|
||||
> **NOTE**: Models should be first converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](https://software.intel.com/en-us/articles/OpenVINO-ModelOptimizer).
|
||||
|
||||
## Plugin Option
|
||||
|
||||
You have to select between Myriad and HDDL plugin manually, by default vpu_profile will try to use myriad plugin
|
||||
If you need to run HDDL, need to set it explicitly
|
||||
|
||||
```sh
|
||||
$./vpu_profile -model <path_to_model>/model_name.xml -plugin hddl
|
||||
```
|
||||
|
||||
## Iterations Option
|
||||
|
||||
Sets amount of Infer requests to be executed, will affect overall inference time, performance counts will be reported for last iteration
|
||||
|
||||
```sh
|
||||
$./vpu_profile -model <path_to_model>/model_name.xml -iterations 30
|
||||
```
|
||||
|
||||
## Configuration file
|
||||
|
||||
Set configuration keys for plugin, file contents converted to a config map used for LoadNetwork call
|
||||
Format of file - each line represent config key and it's value like:
|
||||
"<CONFIG_KEY> <CONFIG_VALUE>"
|
||||
Below example shows how to set performance reporting
|
||||
|
||||
```sh
|
||||
$echo "VPU_PERF_REPORT_MODE VPU_PER_STAGE" > config.txt
|
||||
```
|
||||
```sh
|
||||
$./vpu_profile -model <path_to_model>/model_name.xml -config ./congig.txt
|
||||
```
|
||||
|
||||
## Report Option
|
||||
|
||||
By default performance counts are reported per layer in Inference Engine.
|
||||
vpu_profile sets "per_layer" profiling by default - means performance report will be provided for each layer.
|
||||
To switch report you can use configuration file or report option when changed to "per_stage" -
|
||||
statistics will be provided with finer granularity - for each executed stage.
|
||||
If wrongly specified - switch back to default mode.
|
||||
|
||||
```sh
|
||||
$./vpu_profile -model <path_to_model>/model_name.xml -report "per_stage"
|
||||
```
|
||||
|
||||
Performace counts will be provided for executed only stages/layers, in next format:
|
||||
```sh
|
||||
Index Name Type Time (ms)
|
||||
<stage index> <stage name> <stage exec_type> <time in millisecond>
|
||||
```
|
||||
Where
|
||||
* stage index - correspons to execution order of a stage, in case of per_layer output this corresponds to the first stage order
|
||||
* stage name - corresponds to the name of a stage or layer in case of per_layer output,
|
||||
* stage exec_type - corresponds to stage execution type, e.g. MyriadXHwOp mean that stage was executed at HW fixed function, otherwise - shaves.
|
||||
In case of per_layer output exec_type can be not accurate - if the first stage of a layer performed on shaves - whole layer won't be shown as HW operation
|
||||
* time in millisecond - time in millisecond took corresponding stage or layer to execute.
|
||||
|
||||
At the end of report total accumulated execution time printed. `Important`: this timing doesn't represent throughput, but latency of execution on device.
|
||||
Throughput will depend on asyncrounous operation depth and device capacity, so for example by default MyriadX usually running 2 infer requests asynchrounously
|
||||
which exected in parallel on a device, so below number can be close to 1000/(total_time*2).
|
||||
```sh
|
||||
Total inference time: <total_time>
|
||||
```
|
||||
|
@ -1,256 +0,0 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#include <memory>
|
||||
#include <map>
|
||||
#include <cmath>
|
||||
#include <future>
|
||||
#include <atomic>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
|
||||
#include "inference_engine.hpp"
|
||||
#include "precision_utils.h"
|
||||
|
||||
#include "vpu_tools_common.hpp"
|
||||
#include "vpu/vpu_plugin_config.hpp"
|
||||
#include "vpu/private_plugin_config.hpp"
|
||||
#include "samples/common.hpp"
|
||||
|
||||
static constexpr char help_message[] = "Print a help(this) message.";
|
||||
static constexpr char model_message[] = "Path to xml model.";
|
||||
static constexpr char inputs_dir_message[] = "Path to folder with images, only bitmap(.bmp) supported. Default: \".\".";
|
||||
static constexpr char config_message[] = "Path to the configuration file. Default value: \"config\".";
|
||||
static constexpr char iterations_message[] = "Specifies number of iterations. Default value: 16.";
|
||||
static constexpr char plugin_message[] = "Specifies plugin. Supported values: myriad, hddl.\n"
|
||||
"\t \t \tDefault value: \"myriad\".";
|
||||
static constexpr char report_message[] = "Specifies report type. Supported values: per_layer, per_stage.\n"
|
||||
"\t \t \tOverrides value in configuration file if provided. Default value: \"per_stage\"";
|
||||
|
||||
DEFINE_bool(h, false, help_message);
|
||||
DEFINE_string(model, "", model_message);
|
||||
DEFINE_string(inputs_dir, ".", inputs_dir_message);
|
||||
DEFINE_string(config, "", config_message);
|
||||
DEFINE_int32(iterations, 16, iterations_message);
|
||||
DEFINE_string(plugin, "myriad", plugin_message);
|
||||
DEFINE_string(report, "", report_message);
|
||||
|
||||
static void showUsage() {
|
||||
std::cout << std::endl;
|
||||
std::cout << "vpu_profile [OPTIONS]" << std::endl;
|
||||
std::cout << "[OPTIONS]:" << std::endl;
|
||||
std::cout << "\t-h \t \t" << help_message << std::endl;
|
||||
std::cout << "\t-model \t <value> \t" << model_message << std::endl;
|
||||
std::cout << "\t-inputs_dir \t <value> \t" << inputs_dir_message << std::endl;
|
||||
std::cout << "\t-config \t <value> \t" << config_message << std::endl;
|
||||
std::cout << "\t-iterations \t <value> \t" << iterations_message << std::endl;
|
||||
std::cout << "\t-plugin \t <value> \t" << plugin_message << std::endl;
|
||||
std::cout << "\t-report \t <value> \t" << report_message << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
static bool parseCommandLine(int *argc, char ***argv) {
|
||||
gflags::ParseCommandLineNonHelpFlags(argc, argv, true);
|
||||
|
||||
if (FLAGS_h) {
|
||||
showUsage();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (FLAGS_model.empty()) {
|
||||
throw std::invalid_argument("Path to model xml file is required");
|
||||
}
|
||||
|
||||
if (1 < *argc) {
|
||||
std::stringstream message;
|
||||
message << "Unknown arguments: ";
|
||||
for (auto arg = 1; arg < *argc; arg++) {
|
||||
message << argv[arg];
|
||||
if (arg < *argc) {
|
||||
message << " ";
|
||||
}
|
||||
}
|
||||
throw std::invalid_argument(message.str());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static std::map<std::string, std::string> configure(const std::string& confFileName, const std::string& report) {
|
||||
auto config = parseConfig(confFileName);
|
||||
|
||||
/* Since user can specify config file we probably can avoid it */
|
||||
config[CONFIG_KEY(LOG_LEVEL)] = CONFIG_VALUE(LOG_WARNING);
|
||||
config[VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME)] = CONFIG_VALUE(YES);
|
||||
/*
|
||||
Default is PER_LAYER
|
||||
*/
|
||||
if (report == "per_layer") {
|
||||
config[VPU_CONFIG_KEY(PERF_REPORT_MODE)] = VPU_CONFIG_VALUE(PER_LAYER);
|
||||
} else if (report == "per_stage") {
|
||||
config[VPU_CONFIG_KEY(PERF_REPORT_MODE)] = VPU_CONFIG_VALUE(PER_STAGE);
|
||||
} else if (config.find(VPU_CONFIG_KEY(PERF_REPORT_MODE)) == config.end()) {
|
||||
config[VPU_CONFIG_KEY(PERF_REPORT_MODE)] = VPU_CONFIG_VALUE(PER_LAYER);
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static bool isImage(const T& blob) {
|
||||
auto descriptor = blob->getTensorDesc();
|
||||
if (descriptor.getLayout() != InferenceEngine::NCHW) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto channels = descriptor.getDims()[1];
|
||||
return channels == 3;
|
||||
}
|
||||
|
||||
static void loadInputs(std::size_t requestIdx, const std::vector<std::string>& images,
|
||||
const std::vector<std::string>& binaries, InferenceEngine::InferRequest& request,
|
||||
InferenceEngine::CNNNetwork& network) {
|
||||
for (auto &&input : network.getInputsInfo()) {
|
||||
auto blob = request.GetBlob(input.first);
|
||||
|
||||
if (isImage(blob)) {
|
||||
loadImage(images[requestIdx % images.size()], blob);
|
||||
} else {
|
||||
loadBinaryTensor(binaries[requestIdx % binaries.size()], blob);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static std::string process_user_input(const std::string &src) {
|
||||
std::string name = src;
|
||||
std::transform(name.begin(), name.end(), name.begin(), ::toupper);
|
||||
name.erase(std::remove_if(name.begin(), name.end(), ::isspace), name.end());
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
static std::size_t getNumberRequests(const std::string &plugin) {
|
||||
static const std::unordered_map<std::string, std::size_t> supported_plugins = {
|
||||
{ "MYRIAD", 4 }
|
||||
};
|
||||
|
||||
auto num_requests = supported_plugins.find(plugin);
|
||||
if (num_requests == supported_plugins.end()) {
|
||||
throw std::invalid_argument("Unknown plugin " + plugin);
|
||||
}
|
||||
|
||||
return num_requests->second;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
try {
|
||||
std::cout << "Inference Engine: " << InferenceEngine::GetInferenceEngineVersion() << std::endl;
|
||||
|
||||
if (!parseCommandLine(&argc, &argv)) {
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
auto network = readNetwork(FLAGS_model);
|
||||
setPrecisions(network);
|
||||
|
||||
auto user_plugin = process_user_input(FLAGS_plugin);
|
||||
|
||||
InferenceEngine::Core ie;
|
||||
auto executableNetwork = ie.LoadNetwork(network, user_plugin, configure(FLAGS_config, FLAGS_report));
|
||||
|
||||
auto num_requests = getNumberRequests(user_plugin);
|
||||
|
||||
auto images = extractFilesByExtension(FLAGS_inputs_dir, "bmp", 1);
|
||||
auto hasImageInput = [](const InferenceEngine::CNNNetwork &network) {
|
||||
auto inputs = network.getInputsInfo();
|
||||
auto isImageInput = [](const InferenceEngine::InputsDataMap::value_type &input) {
|
||||
return isImage(input.second);
|
||||
};
|
||||
return std::any_of(inputs.begin(), inputs.end(), isImageInput);
|
||||
};
|
||||
|
||||
if (hasImageInput(network) && images.empty()) {
|
||||
throw std::invalid_argument(FLAGS_inputs_dir + " does not contain images for network");
|
||||
}
|
||||
|
||||
auto binaries = extractFilesByExtension(FLAGS_inputs_dir, "bin", 1);
|
||||
auto hasBinaryInput = [](const InferenceEngine::CNNNetwork &network) {
|
||||
auto inputs = network.getInputsInfo();
|
||||
auto isBinaryInput = [](const InferenceEngine::InputsDataMap::value_type &input) {
|
||||
return !isImage(input.second);
|
||||
};
|
||||
return std::any_of(inputs.begin(), inputs.end(), isBinaryInput);
|
||||
};
|
||||
|
||||
if (hasBinaryInput(network) && binaries.empty()) {
|
||||
throw std::invalid_argument(FLAGS_inputs_dir + " does not contain binaries for network");
|
||||
}
|
||||
|
||||
std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> performance;
|
||||
|
||||
std::atomic<std::size_t> iteration{0};
|
||||
std::promise<void> done;
|
||||
bool needStartAsync{true};
|
||||
std::size_t profiledIteration = 2 * num_requests + FLAGS_iterations;
|
||||
|
||||
std::vector<InferenceEngine::InferRequest> requests(num_requests);
|
||||
std::vector<std::size_t> current_iterations(num_requests);
|
||||
|
||||
using callback_t = std::function<void(InferenceEngine::InferRequest, InferenceEngine::StatusCode)>;
|
||||
|
||||
for (std::size_t request = 0; request < num_requests; ++request) {
|
||||
requests[request] = executableNetwork.CreateInferRequest();
|
||||
current_iterations[request] = 0;
|
||||
|
||||
loadInputs(request, images, binaries, requests[request], network);
|
||||
|
||||
callback_t callback =
|
||||
[request, profiledIteration, &done, &needStartAsync, &performance, &iteration, ¤t_iterations]
|
||||
(InferenceEngine::InferRequest inferRequest, InferenceEngine::StatusCode code) {
|
||||
if (code != InferenceEngine::StatusCode::OK) {
|
||||
THROW_IE_EXCEPTION << "Infer request failed with code " << code;
|
||||
}
|
||||
|
||||
auto current_iteration = current_iterations[request];
|
||||
if (current_iteration == profiledIteration) {
|
||||
performance = inferRequest.GetPerformanceCounts();
|
||||
needStartAsync = false;
|
||||
done.set_value();
|
||||
}
|
||||
|
||||
if (needStartAsync) {
|
||||
current_iterations[request] = iteration++;
|
||||
inferRequest.StartAsync();
|
||||
}
|
||||
};
|
||||
|
||||
requests[request].SetCompletionCallback<callback_t>(callback);
|
||||
}
|
||||
|
||||
auto doneFuture = done.get_future();
|
||||
|
||||
for (std::size_t request = 0; request < num_requests; ++request) {
|
||||
current_iterations[request] = iteration++;
|
||||
requests[request].StartAsync();
|
||||
}
|
||||
|
||||
doneFuture.wait();
|
||||
printPerformanceCounts(performance, FLAGS_report);
|
||||
} catch (const std::exception &error) {
|
||||
std::cerr << error.what() << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
} catch (...) {
|
||||
std::cerr << "Unknown/internal exception happened." << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
Loading…
Reference in New Issue
Block a user