Moved Post-training Optimization Tool to open-source (#7940)
* Moved POT to opensource * Added OMZ as a submodule * Exclude OMZ from ShellCheck
This commit is contained in:
parent
2e4514b4df
commit
bbeec714aa
10
.gitattributes
vendored
10
.gitattributes
vendored
@ -64,3 +64,13 @@
|
||||
*.gif filter=lfs diff=lfs merge=lfs -text
|
||||
*.vsdx filter=lfs diff=lfs merge=lfs -text
|
||||
*.bmp filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
#POT attributes
|
||||
tools/pot/tests/data/test_cases_refs/* filter=lfs diff=lfs merge=lfs -text
|
||||
/tools/pot/tests/** -pot_package
|
||||
/configs/accuracy_checker/** -pot_package
|
||||
/configs/quantization/** -pot_package
|
||||
/tools/pot/tools/auxilary/** -pot_package
|
||||
/tools/pot/tools/run_series_experiments.py -pot_package
|
||||
/tools/pot/.pylintrc -pot_package
|
||||
/tools/pot/README_dev.md -pot_package
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -56,3 +56,6 @@
|
||||
[submodule "thirdparty/onednn_gpu"]
|
||||
path = thirdparty/onednn_gpu
|
||||
url = https://github.com/oneapi-src/oneDNN.git
|
||||
[submodule "tools/pot/thirdparty/open_model_zoo"]
|
||||
path = tools/pot/thirdparty/open_model_zoo
|
||||
url = https://github.com/openvinotoolkit/open_model_zoo.git
|
||||
|
@ -12,6 +12,7 @@ ie_shellcheck_process(DIRECTORY "${OpenVINO_SOURCE_DIR}"
|
||||
"${OpenVINO_SOURCE_DIR}/thirdparty"
|
||||
"${OpenVINO_SOURCE_DIR}/runtime/bindings/python/thirdparty/pybind11"
|
||||
"${IE_MAIN_SOURCE_DIR}/thirdparty"
|
||||
"${OpenVINO_SOURCE_DIR}/tools/pot/thirdparty"
|
||||
"${TEMP}"
|
||||
# TODO fix and enable back:
|
||||
"${OpenVINO_SOURCE_DIR}/inference-engine/scripts/dependencies.sh"
|
||||
|
107
tools/pot/.gitignore
vendored
Normal file
107
tools/pot/.gitignore
vendored
Normal file
@ -0,0 +1,107 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# dotenv
|
||||
.env
|
||||
|
||||
# virtualenv
|
||||
.venv
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
|
||||
# PyCharm
|
||||
.idea
|
||||
|
||||
# snapshots
|
||||
*.tar
|
29
tools/pot/.pylintrc
Normal file
29
tools/pot/.pylintrc
Normal file
@ -0,0 +1,29 @@
|
||||
[MASTER]
|
||||
disable = fixme,
|
||||
invalid-name,
|
||||
missing-docstring,
|
||||
no-self-use,
|
||||
too-few-public-methods,
|
||||
too-many-arguments,
|
||||
too-many-locals
|
||||
max-attributes=20
|
||||
max-line-length = 120
|
||||
ignore-docstrings = yes
|
||||
ignored-modules = mo,accuracy_checker,extensions,openvino.inference_engine,cv2,open_model_zoo.model_tools._configuration,open_model_zoo.model_tools._common
|
||||
ignore-patterns = ac_imports.py
|
||||
extension-pkg-whitelist = numpy
|
||||
|
||||
[SIMILARITIES]
|
||||
min-similarity-lines = 19
|
||||
ignore-imports = yes
|
||||
|
||||
[BASIC]
|
||||
good-names=logger,fn
|
||||
|
||||
[DESIGN]
|
||||
max-statements=120
|
||||
max-branches=14
|
||||
max-nested-blocks=7
|
||||
|
||||
[OPTIONS]
|
||||
generated-members=torch.*
|
5
tools/pot/CODEOWNERS
Normal file
5
tools/pot/CODEOWNERS
Normal file
@ -0,0 +1,5 @@
|
||||
# See help here: https://docs.gitlab.com/ee/user/project/code_owners.html
|
||||
|
||||
# Control 3d party dependencies
|
||||
**/*requirements*.* openvino.configuration.mgmt@intel.com
|
||||
**/setup.py openvino.configuration.mgmt@intel.com
|
59
tools/pot/README.md
Normal file
59
tools/pot/README.md
Normal file
@ -0,0 +1,59 @@
|
||||
# Post-Training Optimization Tool {#pot_README}
|
||||
|
||||
## Introduction
|
||||
|
||||
Post-training Optimization Tool (POT) is designed to accelerate the inference of deep learning models by applying
|
||||
special methods without model retraining or fine-tuning, like post-training quantization. Therefore, the tool does not
|
||||
require a training dataset or a pipeline. To apply post-training algorithms from the POT, you need:
|
||||
* A floating-point precision model, FP32 or FP16, converted into the OpenVINO™ Intermediate Representation (IR) format
|
||||
and run on CPU with the OpenVINO™.
|
||||
* A representative calibration dataset representing a use case scenario, for example, 300 images.
|
||||
|
||||
Post-training Optimization Tool provides the following key
|
||||
features:
|
||||
|
||||
* Two post-training 8-bit quantization algorithms: fast [DefaultQuantization](openvino/tools/pot/algorithms/quantization/default/README.md) and precise [AccuracyAwareQuantization](openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md).
|
||||
* Compression for different hardware targets such as CPU and GPU.
|
||||
* Multiple domains: Computer Vision, Natural Language Processing, Recommendation Systems, Speech Recognition.
|
||||
* [API](openvino/tools/pot/api/README.md) that helps to apply optimization methods within a custom inference script written with OpenVINO Python* API.
|
||||
* Symmetric and asymmetric quantization schemes. For details, see the [Quantization](openvino/tools/pot/algorithms/quantization/README.md) section.
|
||||
* Per-channel quantization for Convolutional and Fully-Connected layers.
|
||||
* Global optimization of post-training quantization parameters using the [Tree-Structured Parzen Estimator](openvino/tools/pot/optimization/tpe/README.md).
|
||||
|
||||
The tool is aimed to fully automate the model transformation process without a need to change the model on the user's side.
|
||||
The POT is available only in the Intel® distribution of OpenVINO™ toolkit and is not opensourced. For details
|
||||
about the low-precision flow in OpenVINO™, see the [Low Precision Optimization Guide](docs/LowPrecisionOptimizationGuide.md).
|
||||
|
||||
For benchmarking results collected for the models optimized with POT tool, see [INT8 vs FP32 Comparison on Select Networks and Platforms](@ref openvino_docs_performance_int8_vs_fp32).
|
||||
|
||||
Further documentation presumes that you are familiar with the basic Deep Learning concepts, such as model inference,
|
||||
dataset preparation, model optimization, as well as with the OpenVINO™ toolkit and its components such
|
||||
as [Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide)
|
||||
and [Accuracy Checker Tool](@ref omz_tools_accuracy_checker_README).
|
||||
|
||||
## Use POT
|
||||

|
||||
|
||||
The POT provides three basic usage scenarios:
|
||||
* **[Command-line interface](docs/CLI.md)**: this is the recommended path if the model from OpenVINO™
|
||||
[Model Zoo](https://github.com/openvinotoolkit/open_model_zoo) or there is a valid [Accuracy Checker Tool](@ref omz_tools_accuracy_checker_README)
|
||||
configuration file for the model that allows validating model accuracy using [Accuracy Checker Tool](@ref omz_tools_accuracy_checker_README).
|
||||
* **[Python* API](openvino/tools/pot/api/README.md)**: it allows integrating optimization methods implemented in POT into
|
||||
a Python* inference script written with [Python* API](@ref openvino_inference_engine_ie_bridges_python_docs_api_overview).
|
||||
This flow is recommended if it is not possible to use [Accuracy Checker Tool](@ref omz_tools_accuracy_checker_README)
|
||||
for validation on the dedicated dataset.
|
||||
* **[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench)**: a web-based graphical environment
|
||||
that enables you to optimize, fine-tune, analyze, visualize, and compare performance of deep learning models.
|
||||
|
||||
Note: POT also supports optimization in the so-called [**Simplified mode**](@ref pot_configs_README) which is essentially a local
|
||||
implementation of the POT Python API aimed at quantizing Computer Vision with simple pre-processing and inference flow. But
|
||||
please note that this mode can lead to an inaccurate model after optimization due to the difference in the model preprocessing.
|
||||
|
||||
To get started with POT, follow the [Installation Guide](docs/InstallationGuide.md).
|
||||
|
||||
## See Also
|
||||
|
||||
* [Low Precision Optimization Guide](docs/LowPrecisionOptimizationGuide.md)
|
||||
* [Post-Training Optimization Best Practices](docs/BestPractices.md)
|
||||
* [POT Frequently Asked Questions](docs/FrequentlyAskedQuestions.md)
|
||||
* [INT8 Quantization by Using Web-Based Interface of the DL Workbench](https://docs.openvinotoolkit.org/latest/workbench_docs_Workbench_DG_Int_8_Quantization.html)
|
54
tools/pot/README_dev.md
Normal file
54
tools/pot/README_dev.md
Normal file
@ -0,0 +1,54 @@
|
||||
# Post-training Optimization Tool {#pot_README_dev}
|
||||
|
||||
Starting with the 2020.1 version, OpenVINO™ toolkit delivers the Post-Training Optimization Tool designed to accelerate the inference of DL models by converting them into a more hardware-friendly representation by applying specific methods that do not require re-training, for example, post-training quantization.
|
||||
For more details about the low-precision flow in OpenVINO™, refer to the [Low Precision Optimization Guide](docs/LowPrecisionOptimizationGuide.md).
|
||||
|
||||
Post-Training Optimization Tool includes standalone command-line tool and Python* API that provide the following key features:
|
||||
|
||||
## Key features:
|
||||
|
||||
* Two supported post-training quantization algorithms: fast [DefaultQuantization](openvino/tools/pot/algorithms/quantization/default/README.md) and precise [AccuracyAwareQuantization](openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md), as well as multiple experimental methods.
|
||||
* Global optimization of post-training quantization parameters using [Tree-structured Parzen Estimator](openvino/tools/pot/optimization/tpe/README.md).
|
||||
* Symmetric and asymmetric quantization schemes. For more details, see the [Quantization](openvino/tools/pot/algorithms/quantization/README.md) section.
|
||||
* Per-channel quantization for Convolutional and Fully-Connected layers.
|
||||
* Multiple domains: Computer Vision, Recommendation Systems.
|
||||
* Ability to implement custom calibration pipeline via supported [API](openvino/tools/pot/api/README.md).
|
||||
* Compression for different HW targets such as CPU, GPU, VPU.
|
||||
* Post-training sparsity.
|
||||
|
||||
## Usage
|
||||
|
||||
### System requirements
|
||||
- Ubuntu 18.04 or later (64-bit)
|
||||
- Python 3.6 or later
|
||||
- OpenVINO
|
||||
|
||||
### Installation (Temporary)
|
||||
1) Clone compression tool repo: `git clone git@gitlab-icv.inn.intel.com:algo/post-training-compression-tool.git`
|
||||
2) Download submodules:
|
||||
```
|
||||
git submodule init
|
||||
git submodule update
|
||||
```
|
||||
3) Clone DLDT repo: `git clone https://gitlab-icv.inn.intel.com/inference-engine/dldt` (Not into the post-training-compression-tool)
|
||||
4) Switch dldt to required branch: `feature/low_precision/develop_fp_v10`
|
||||
5) Build inference engine (Instruction can be found in dldt repo)
|
||||
6) Switch dldt to _mkaglins/poc_ branch (Inference engine is built from _feature/low_precision/develop_fp_v10_ branch to support `FakeQuantize` layers. ModelOptimizer is used from _mkaglins/poc_ branch. So stay on _mkaglins/poc_ branch as you've built IE and don't build it from there again)
|
||||
7) Set _PYTHONPATH_ variable: `export PYTHONPATH=<path to DLDT bins>/bin/intel64/Release/lib/python_api/python3.6:<path to DLDT>/dldt/model-optimizer`
|
||||
8) Install requirements for accuracy checker:
|
||||
- From POT root: `cd ./thirdparty/open_model_zoo/tools/accuracy_checker`
|
||||
- Call setup script: `python3 setup.py install`
|
||||
- Get back to root POT dir: `cd <PATH_TO_POT_DIR>`
|
||||
9) Install requirements for the tool:
|
||||
- Call setup script: `python3 setup.py install`
|
||||
|
||||
### Run
|
||||
1) Prepare configuration file for the tool based on the examples in the `configs` folder
|
||||
2) Navigate to compression tool directory
|
||||
3) Launch the tool running the following command:
|
||||
`python3 main.py -c <path to config file> -e`
|
||||
|
||||
To test the tool you can use PyTorch Mobilenet_v2 model from `tests/data/models/mobilenetv2/mobilenetv2.onnx`
|
||||
|
||||
- If there're some errors with imports in ModelOptimizer first of all make the following steps:
|
||||
- Checkout _mkaglins/poc_ branch in DLDT (It's important!)
|
2
tools/pot/__init__.py
Normal file
2
tools/pot/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
# Copyright (C) 2020-2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
64
tools/pot/configs/README.md
Normal file
64
tools/pot/configs/README.md
Normal file
@ -0,0 +1,64 @@
|
||||
# Configuration File Description {#pot_configs_README}
|
||||
|
||||
In the instructions below, the Post-training Optimization Tool directory `<INSTALL_DIR>/deployment_tools/tools/post_training_optimization_toolkit` is referred to as `<POT_DIR>`. `<INSTALL_DIR>` is the directory where Intel® Distribution of OpenVINO™ toolkit is installed.
|
||||
> **NOTE**: Installation directory is different in the case of PyPI installation and does not contain examples of
|
||||
> configuration files.
|
||||
|
||||
The tool is designed to work with the configuration file where all the parameters required for the optimization are specified. These parameters are organized as a dictionary and stored in
|
||||
a JSON file. JSON file allows using comments that are supported by the `jstyleson` Python* package.
|
||||
Logically all parameters are divided into three groups:
|
||||
- **Model parameters** that are related to the model definition (e.g. model name, model path, etc.)
|
||||
- **Engine parameters** that define parameters of the engine which is responsible for the model inference and data preparation used for optimization and evaluation (e.g. preprocessing parameters, dataset path, etc.)
|
||||
- **Compression parameters** that are related to the optimization algorithm (e.g. algorithm name and specific parameters)
|
||||
|
||||
## Model Parameters
|
||||
|
||||
```json
|
||||
"model": {
|
||||
"model_name": "model_name",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
}
|
||||
```
|
||||
|
||||
This section contains only three parameters:
|
||||
- `"model_name"` - string parameter that defines a model name, e.g. `"MobileNetV2"`
|
||||
- `"model"` - string parameter that defines the path to an input model topology (.xml)
|
||||
- `"weights"` - string parameter that defines the path to an input model weights (.bin)
|
||||
|
||||
## Engine Parameters
|
||||
|
||||
```json
|
||||
"engine": {
|
||||
"type": "accuracy_checker",
|
||||
"config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml"
|
||||
}
|
||||
```
|
||||
The main parameter is `"type"` which can take two possible options: `"accuracy_checher"` (default) and `"simplified"`,
|
||||
which specify the engine that is used for model inference and validation (if supported):
|
||||
- **Simplified mode** engine. This engine can be used only with `DefaultQuantization` algorithm to get fully quantized model
|
||||
using a subset of images. It does not use the Accuracy Checker tool and annotation. To measure accuracy, you should implement
|
||||
your own validation pipeline with OpenVINO API.
|
||||
- To run the simplified mode, define engine section similar to the example `mobilenetV2_tf_int8_simple_mode.json` file from the `<POT_DIR>/configs/examples/quantization/classification/` directory.
|
||||
- **Accuracy Checker** engine. It relies on the [Deep Learning Accuracy Validation Framework](@ref omz_tools_accuracy_checker_README) (Accuracy Checker) when inferencing DL models and working with datasets.
|
||||
The benefit of this mode is you can compute accuracy in case you have annotations. It is possible to use accuracy aware
|
||||
algorithms family when this mode is selected.
|
||||
There are two options to define engine parameters in that mode:
|
||||
- Refer to the existing Accuracy Checker configuration file which is represented by the YAML file. It can be a file used for full-precision model validation. In this case, you should define only the `"config"` parameter containing a path to the AccuracyChecker configuration file.
|
||||
- Define all the [required Accuracy Checker parameters](@ref omz_tools_accuracy_checker_accuracy_checker_launcher_dlsdk_launcher_readme)
|
||||
directly in the JSON file. In this case, POT just passes the corresponding dictionary of parameters to the Accuracy Checker when instantiating it.
|
||||
For more details, refer to the corresponding Accuracy Checker information and examples of configuration files provided with the tool:
|
||||
- For the SSD-MobileNet model:<br>\<POT_DIR\>/configs/examples/quantization/object_detection/ssd_mobilenetv1_int8.json
|
||||
|
||||
## Compression Parameters
|
||||
|
||||
This section defines optimization algorithms and their parameters. For more details about parameters of the concrete optimization algorithm, please refer to the corresponding
|
||||
[documentation](@ref pot_compression_algorithms_quantization_README).
|
||||
|
||||
## Examples of the Configuration File
|
||||
|
||||
For a quick start, many examples of configuration files are provided and placed to the `<POT_DIR>/configs/examples`
|
||||
folder. There you can find ready-to-use configurations for the models from various domains: Computer Vision (Image
|
||||
Classification, Object Detection, Segmentation), Natural Language Processing, Recommendation Systems. We basically
|
||||
put configuration files for the models which require non-default configuration settings in order to get accurate results.
|
||||
For details on how to run the Post-Training Optimization Tool with a sample configuration file, see the [example](@ref pot_configs_examples_README).
|
204
tools/pot/configs/accuracy_aware_quantization_spec.json
Normal file
204
tools/pot/configs/accuracy_aware_quantization_spec.json
Normal file
@ -0,0 +1,204 @@
|
||||
{
|
||||
/* Model parameters */
|
||||
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name
|
||||
"model": "<MODEL_PATH>", // Path to model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
|
||||
},
|
||||
|
||||
/* Parameters of the engine used for model inference */
|
||||
|
||||
// Post-Training Optimization Tool supports engine based on accuracy checker and custom engine.
|
||||
// For custom engine you should specify your own set of parameters.
|
||||
// The engine based on accuracy checker uses accuracy checker parameters.
|
||||
// You can specify the parameters via accuracy checker config file or directly in engine section.
|
||||
// More information about accuracy checker parameters can be found here:
|
||||
// https://github.com/opencv/open_model_zoo/tree/master/tools/accuracy_checker
|
||||
|
||||
"engine": {
|
||||
"stat_requests_number": 8, // Number of requests during statistcs collection
|
||||
"eval_requests_number": 8, // Number of requests during evaluation
|
||||
"config": "<CONFIG_PATH>",
|
||||
|
||||
/* OR */
|
||||
|
||||
"name": "model_name",
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "classification"
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "dataset_name",
|
||||
"data_source": "<DATASET_PATH>",
|
||||
"annotation": "<ANNOTATION_PATH>",
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "resize",
|
||||
"interpolation": "BILINEAR",
|
||||
"aspect_ratio_scale": "greater",
|
||||
"size": 224
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy@top1",
|
||||
"type": "accuracy",
|
||||
"top_k": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
/* Optimization hyperparameters */
|
||||
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"inplace_statistic": true, // An optional parameter, change method collect statistics,
|
||||
// reduces the amount of memory consumed,
|
||||
// but increases the calibration time
|
||||
// the default value is true
|
||||
"model_type": "None", // An optional parameter, needed for additional patterns in the model,
|
||||
// default value is None (supported only transformer now)
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "AccuracyAwareQuantization", // Optimization algorithm name
|
||||
"params": {
|
||||
"ranking_subset_size": 300, // A size of a subset which is used to rank layers by their
|
||||
// contribution to the accuracy drop
|
||||
|
||||
"max_iter_num": 20, // Maximum number of iterations of the algorithm (maximum of layers
|
||||
// that may be reverted back to full-precision)
|
||||
|
||||
"maximal_drop": 0.005, // Maximum accuracy drop which has to be achieved after the quantization
|
||||
|
||||
"drop_type": "absolute", // Drop type of the accuracy metric: relative or absolute (default)
|
||||
|
||||
"use_prev_if_drop_increase": false, // Whether to use NN snapshot from the previous algorithm
|
||||
// iteration in case if drop increases
|
||||
|
||||
"base_algorithm": "DefaultQuantization", // Base algorithm that is used to quantize model
|
||||
// at the beginning
|
||||
|
||||
"annotation_free": false, // Whether to compute accuracy drop on a dataset without annotation
|
||||
|
||||
"annotation_conf_threshold": 0.6, // Threshold for annotation creation in case of annotation free
|
||||
// algorithm execution. Images on which original model predicts
|
||||
// with confidence below this threshold will be skipped during
|
||||
// evaluation
|
||||
|
||||
"convert_to_mixed_preset": false, // Whether to convert the model to mixed mode if
|
||||
// the accuracy criteria of the symmetrically quantized
|
||||
// model are not satisfied
|
||||
|
||||
// An optional list of metrics that are taken into account during optimization.
|
||||
// If not specified, all metrics defined in engine config are used
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy", // Metric name to optimize
|
||||
"baseline_value": 0.72 // Baseline metric value of the original model
|
||||
}
|
||||
],
|
||||
|
||||
"metric_subset_ratio": 0.5, // A part of the validation set that is used to compare element-wise
|
||||
// full-precision and quantized models in case of predefined metric
|
||||
// values of the original model
|
||||
|
||||
"tune_hyperparams": false, // Whether to search the best quantization parameters for model.
|
||||
// This algo uses grid search engine based on a special subset of samples from the dataset
|
||||
|
||||
"ignored": {
|
||||
// List of nodes that are excluded from optimization
|
||||
"scope": [
|
||||
"<NODE_NAME>"
|
||||
],
|
||||
// List of types that are excluded from optimization
|
||||
"operations": [
|
||||
{
|
||||
"type": "<NODE_TYPE>",
|
||||
// Includes excluding by attributes
|
||||
"attributes": {
|
||||
"<NAME>": "<VALUE>" // Lists of values is not included
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "<NODE_TYPE>" // Excluding only by type
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"preset": "mixed", // A preset is a collection of optimization algorithm parameters
|
||||
// that will specify to the algorithm to improve which metric
|
||||
// the algorithm needs to concentrate. Each optimization algorithm
|
||||
// supports [performance, accuracy, mixed] presets
|
||||
|
||||
"stat_subset_size": 100, // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
|
||||
/* Manually specification quantization parametrs */
|
||||
|
||||
/* Quantization parameters for weights */
|
||||
|
||||
"weights": {
|
||||
"bits": 8, // Number of quantization bits
|
||||
"mode": "symmetric", // Quantization mode
|
||||
"granularity": "perchannel", // Granularity: a scale for each output channel
|
||||
"level_low": -127, // Low quantization level
|
||||
"level_high": 127, // High quantization level
|
||||
|
||||
/* Parameters specify how to calculate the minimum and maximum of quantization range */
|
||||
|
||||
"range_estimator": {
|
||||
"max": {
|
||||
"type": "quantile",
|
||||
"outlier_prob": 0.0001
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
/* Quantization parameters for activations */
|
||||
|
||||
"activations": {
|
||||
"bits": 8, // Number of quantization bits
|
||||
"mode": "asymmetric", // Quantization mode
|
||||
"granularity": "pertensor", // Granularity: one scale for output tensor
|
||||
|
||||
/* Parameters specify how to calculate the minimum and maximum of quantization range */
|
||||
|
||||
"range_estimator": {
|
||||
"preset": "quantile",
|
||||
|
||||
/* OR */
|
||||
|
||||
/* Minimum of quantization range */
|
||||
|
||||
"min": {
|
||||
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
|
||||
// mean_no_outliers, median_no_outliers, hl_estimator]
|
||||
|
||||
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
|
||||
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
|
||||
},
|
||||
|
||||
/* Maximum of quantization range */
|
||||
|
||||
"max": {
|
||||
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
|
||||
// mean_no_outliers, median_no_outliers, hl_estimator]
|
||||
|
||||
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
|
||||
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
44
tools/pot/configs/accuracy_aware_quantization_template.json
Normal file
44
tools/pot/configs/accuracy_aware_quantization_template.json
Normal file
@ -0,0 +1,44 @@
|
||||
/* This configuration file is the fastest way to get started with the accuracy aware
|
||||
quantization algorithm. It contains only mandatory options with commonly used
|
||||
values. All other options can be considered as an advanced mode and requires
|
||||
deep knowledge of the quantization process. An overall description of all possible
|
||||
parameters can be found in the accuracy_aware_quantization_spec.json */
|
||||
|
||||
{
|
||||
/* Model parameters */
|
||||
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name
|
||||
"model": "<MODEL_PATH>", // Path to model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
|
||||
},
|
||||
|
||||
/* Parameters of the engine used for model inference */
|
||||
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>" // Path to Accuracy Checker config
|
||||
},
|
||||
|
||||
/* Optimization hyperparameters */
|
||||
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "AccuracyAwareQuantization", // Optimization algorithm name
|
||||
"params": {
|
||||
"preset": "performance", // Preset [performance, mixed, accuracy] which control the quantization
|
||||
// mode (symmetric, mixed (weights symmetric and activations asymmetric)
|
||||
// and fully asymmetric respectively)
|
||||
|
||||
"stat_subset_size": 300, // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
|
||||
"maximal_drop": 0.01, // Maximum accuracy drop which has to be achieved after the quantization
|
||||
"tune_hyperparams": false // Whether to search the best quantization parameters for model
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,84 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "model_name",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "classification"
|
||||
}
|
||||
],
|
||||
"datasets": {
|
||||
/* Global dataset preprocessing that will be used for all datasets
|
||||
if no local configuration is spesified */
|
||||
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "crop",
|
||||
"central_fraction": 0.875
|
||||
},
|
||||
{
|
||||
"type": "resize",
|
||||
"size": 224
|
||||
}
|
||||
],
|
||||
|
||||
/* Dataset for statistics collection */
|
||||
|
||||
"optimization": {
|
||||
"name": "classification_dataset",
|
||||
"data_source": "<DATASET_PATH>"
|
||||
},
|
||||
|
||||
/* Dataset for final evaluation */
|
||||
|
||||
"evaluation": {
|
||||
"name": "classification_dataset",
|
||||
"data_source": "<DATASET_PATH>",
|
||||
"annotation_conversion": {
|
||||
"converter": "imagenet",
|
||||
"annotation_file": "<ANNOTATION_FILE_PATH>"
|
||||
},
|
||||
|
||||
/* Local preprocessing config. Overwrites global config */
|
||||
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "crop",
|
||||
"central_fraction": 0.875
|
||||
},
|
||||
{
|
||||
"type": "resize",
|
||||
"size": 224
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy@top1",
|
||||
"type": "accuracy",
|
||||
"top_k": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"stat_requests_number": 8,
|
||||
"eval_requests_number": 8
|
||||
},
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
251
tools/pot/configs/cascaded_model_default_quantizatoin_spec.json
Normal file
251
tools/pot/configs/cascaded_model_default_quantizatoin_spec.json
Normal file
@ -0,0 +1,251 @@
|
||||
{
|
||||
/* Model parameters */
|
||||
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name (name of whole cascade)
|
||||
|
||||
/* List of models in cascade */
|
||||
"cascade": [
|
||||
{
|
||||
/* The first model of cascade */
|
||||
"name": "<FIRST_MODEL_NAME>", // Name of the first model of cascade (should be taken from engine section)
|
||||
"model": "<MODEL_PATH>", // Path to the first model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to the first model weights (.bin format)
|
||||
},
|
||||
|
||||
/* ... */
|
||||
|
||||
{
|
||||
/* The last model of cascade */
|
||||
"name": "<LAST_MODEL_NAME>", // Name of the last model of cascade (should be taken from engine section)
|
||||
"model": "<MODEL_PATH>", // Path to the last model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to the last model weights (.bin format)
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
/* Parameters of the engine used for model inference */
|
||||
|
||||
// Post-Training Optimization Tool supports engine based on accuracy checker and custom engine.
|
||||
// For custom engine you should specify your own set of parameters.
|
||||
// The engine based on accuracy checker uses accuracy checker parameters.
|
||||
// You can specify the parameters via accuracy checker config file or directly in engine section.
|
||||
// More information about accuracy checker parameters can be found here:
|
||||
// https://github.com/opencv/open_model_zoo/tree/master/tools/accuracy_checker
|
||||
|
||||
"engine": {
|
||||
/* Accuracy checker mode (default) */
|
||||
|
||||
"config": "<CONFIG_PATH>",
|
||||
|
||||
/* OR */
|
||||
|
||||
"module": "<EVALUATOR_CORRESPONDING_TO_CURRENT_CASCADE>",
|
||||
"module_config": {
|
||||
"network_info": {
|
||||
"<FIRST_MODEL_NAME>": { // Name of the first model of cascade (this name should be used in model section)
|
||||
"outputs": {
|
||||
"probability_out": "prob1",
|
||||
"region_out": "conv4-2"
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"name": "data",
|
||||
"type": "INPUT",
|
||||
"layout": "NCWH"
|
||||
}
|
||||
],
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "bgr_to_rgb"
|
||||
},
|
||||
{
|
||||
"type": "pyramid",
|
||||
"min_size": 10,
|
||||
"factor": 0.79
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
/* ... */
|
||||
|
||||
"<LAST_MODEL_NAME>": { // Name of the last model of cascade (this name should be used in model section)
|
||||
"outputs": {
|
||||
"probability_out": "prob1",
|
||||
"region_out": "conv6-2"
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"name": "data",
|
||||
"type": "INPUT",
|
||||
"layout": "NCWH"
|
||||
}
|
||||
],
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "bgr_to_rgb"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk"
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "<DATASET_NAME>",
|
||||
"data_source": "<DATASET_PATH>",
|
||||
"postprocessing": [
|
||||
{
|
||||
"type": "filter",
|
||||
"apply_to": "prediction",
|
||||
"is_empty": true
|
||||
},
|
||||
{
|
||||
"type": "filter",
|
||||
"height_range": 60,
|
||||
"apply_to": "annotation"
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"type": "map",
|
||||
"ignore_difficult": true,
|
||||
"include_boundaries": true,
|
||||
"allow_multiple_matches_per_ignored": true,
|
||||
"distinct_conf": false
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
/* Optimization hyperparameters */
|
||||
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization", // Optimization algorithm name
|
||||
"params": {
|
||||
"ignored": {
|
||||
/* Ignored block for the first model of cascade */
|
||||
"<FIRST_MODEL_NAME>": {
|
||||
// List of nodes that are excluded from optimization
|
||||
"scope": [
|
||||
"<NODE_NAME>"
|
||||
],
|
||||
// List of types that are excluded from optimization
|
||||
"operations": [
|
||||
{
|
||||
"type": "<NODE_TYPE>",
|
||||
// Includes excluding by attributes
|
||||
"attributes": {
|
||||
"<NAME>": "<VALUE>" // Lists of values is not included
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "<NODE_TYPE>" // Excluding only by type
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
/* ... */
|
||||
|
||||
/* Ignored block for the last model of cascade */
|
||||
"<LAST_MODEL_NAME>": {
|
||||
// List of nodes that are excluded from optimization
|
||||
"scope": [
|
||||
"<NODE_NAME>"
|
||||
],
|
||||
// List of types that are excluded from optimization
|
||||
"operations": [
|
||||
{
|
||||
"type": "<NODE_TYPE>",
|
||||
// Includes excluding by attributes
|
||||
"attributes": {
|
||||
"<NAME>": "<VALUE>" // Lists of values is not included
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "<NODE_TYPE>" // Excluding only by type
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"preset": "accuracy", // A preset is a collection of optimization algorithm parameters
|
||||
// that will specify to the algorithm to improve which metric
|
||||
// the algorithm needs to concentrate. Each optimization algorithm
|
||||
// supports [performance, mixed, accuracy] presets
|
||||
|
||||
"stat_subset_size": 100, // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
|
||||
/* Manually specification quantization parametrs */
|
||||
|
||||
/* Quantization parameters for weights */
|
||||
|
||||
"weights": {
|
||||
"bits": 8, // Number of quantization bits
|
||||
"mode": "symmetric", // Quantization mode
|
||||
"granularity": "perchannel", // Granularity: a scale for each output channel
|
||||
"level_low": -127, // Low quantization level
|
||||
"level_high": 127, // High quantization level
|
||||
|
||||
/* Parameters specify how to calculate the minimum and maximum of quantization range */
|
||||
|
||||
"range_estimator": {
|
||||
"max": {
|
||||
"type": "quantile",
|
||||
"outlier_prob": 0.0001
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
/* Quantization parameters for activations */
|
||||
|
||||
"activations": {
|
||||
"bits": 8, // Number of quantization bits
|
||||
"mode": "asymmetric", // Quantization mode
|
||||
"granularity": "pertensor", // Granularity: one scale for output tensor
|
||||
|
||||
/* Parameters specify how to calculate the minimum and maximum of quantization range */
|
||||
|
||||
"range_estimator": {
|
||||
"preset": "quantile",
|
||||
|
||||
/* OR */
|
||||
|
||||
/* Minimum of quantization range */
|
||||
|
||||
"min": {
|
||||
"clipping_value": 0, // Threshold for min statistic value clipping (lower bound)
|
||||
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
|
||||
// mean_no_outliers, median_no_outliers, hl_estimator]
|
||||
|
||||
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
|
||||
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
|
||||
},
|
||||
|
||||
/* Maximum of quantization range */
|
||||
|
||||
"max": {
|
||||
"clipping_value": 6, // Threshold for max statistic value clipping (upper bound)
|
||||
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
|
||||
// mean_no_outliers, median_no_outliers, hl_estimator]
|
||||
|
||||
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
|
||||
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
182
tools/pot/configs/default_quantization_spec.json
Normal file
182
tools/pot/configs/default_quantization_spec.json
Normal file
@ -0,0 +1,182 @@
|
||||
{
|
||||
/* Model parameters */
|
||||
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name
|
||||
"model": "<MODEL_PATH>", // Path to model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
|
||||
},
|
||||
|
||||
/* Parameters of the engine used for model inference */
|
||||
|
||||
// Post-Training Optimization Tool supports engine based on accuracy checker and custom engine.
|
||||
// For custom engine you should specify your own set of parameters.
|
||||
// The engine based on accuracy checker uses accuracy checker parameters.
|
||||
// You can specify the parameters via accuracy checker config file or directly in engine section.
|
||||
// More information about accuracy checker parameters can be found here:
|
||||
// https://github.com/opencv/open_model_zoo/tree/master/tools/accuracy_checker
|
||||
|
||||
"engine": {
|
||||
/* Accuracy checker mode (default) */
|
||||
|
||||
"stat_requests_number": 8, // Number of requests during statistcs collection
|
||||
"eval_requests_number": 8, // Number of requests during evaluation
|
||||
"config": "<CONFIG_PATH>",
|
||||
|
||||
/* OR */
|
||||
|
||||
"name": "model_name",
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "classification"
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "dataset_name",
|
||||
"data_source": "<DATASET_PATH>",
|
||||
"annotation": "<ANNOTATION_PATH>",
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "resize",
|
||||
"interpolation": "BILINEAR",
|
||||
"aspect_ratio_scale": "greater",
|
||||
"size": 224
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy@top1",
|
||||
"type": "accuracy",
|
||||
"top_k": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
/* OR */
|
||||
|
||||
/* Simplified mode */
|
||||
|
||||
"type": "simplified", // OR default value "type": "accuracy_checker" for non simplified mode
|
||||
|
||||
"data_source": "PATH_TO_SOURCE" // You can specify path to directory with images. Also you can
|
||||
// specify template for file names to filter images to load.
|
||||
// Templates are unix style (This option valid only in simplified mode)
|
||||
},
|
||||
|
||||
/* Optimization hyperparameters */
|
||||
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
|
||||
"model_type": "None", // An optional parameter, needed for additional patterns in the model,
|
||||
// default value is None (supported only transformer now)
|
||||
"dump_intermediate_model": false, // Save intermediate models for DefaultAlgorithm
|
||||
"inplase_statistics": true, // An optional parameter, change method collect statistics,
|
||||
// reduces the amount of memory consumed,
|
||||
// but increases the calibration time
|
||||
// the default value is true
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization", // Optimization algorithm name
|
||||
"params": {
|
||||
"ignored": {
|
||||
// List of nodes that are excluded from optimization
|
||||
"scope": [
|
||||
"<NODE_NAME>"
|
||||
],
|
||||
// List of types that are excluded from optimization
|
||||
"operations": [
|
||||
{
|
||||
"type": "<NODE_TYPE>",
|
||||
// Includes excluding by attributes
|
||||
"attributes": {
|
||||
"<NAME>": "<VALUE>" // Lists of values is not included
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "<NODE_TYPE>" // Excluding only by type
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"preset": "mixed", // A preset is a collection of optimization algorithm parameters
|
||||
// that will specify to the algorithm to improve which metric
|
||||
// the algorithm needs to concentrate. Each optimization algorithm
|
||||
// supports [performance, mixed, accuracy] presets
|
||||
|
||||
"stat_subset_size": 100, // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
|
||||
"shuffle_data": false, // Shuffle data before selecting the subset to calculate activation
|
||||
// statistics. An optional parameter, the default value is false
|
||||
|
||||
"seed": 0, // Seed for data shuffle. An optional parameter, the default value is 0
|
||||
|
||||
/* Manually specification quantization parametrs */
|
||||
|
||||
/* Quantization parameters for weights */
|
||||
|
||||
"weights": {
|
||||
"bits": 8, // Number of quantization bits
|
||||
"mode": "symmetric", // Quantization mode
|
||||
"granularity": "perchannel", // Granularity: a scale for each output channel
|
||||
"level_low": -127, // Low quantization level
|
||||
"level_high": 127, // High quantization level
|
||||
|
||||
/* Parameters specify how to calculate the minimum and maximum of quantization range */
|
||||
|
||||
"range_estimator": {
|
||||
"max": {
|
||||
"type": "quantile",
|
||||
"outlier_prob": 0.0001
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
/* Quantization parameters for activations */
|
||||
|
||||
"activations": {
|
||||
"bits": 8, // Number of quantization bits
|
||||
"mode": "symmetric", // Quantization mode
|
||||
"granularity": "pertensor", // Granularity: one scale for output tensor
|
||||
|
||||
/* Parameters specify how to calculate the minimum and maximum of quantization range */
|
||||
|
||||
"range_estimator": {
|
||||
"preset": "quantile",
|
||||
|
||||
/* OR */
|
||||
|
||||
/* Minimum of quantization range */
|
||||
|
||||
"min": {
|
||||
"clipping_value": 0, // Threshold for min statistic value clipping (lower bound)
|
||||
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
|
||||
// mean_no_outliers, median_no_outliers, hl_estimator]
|
||||
|
||||
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
|
||||
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
|
||||
},
|
||||
|
||||
/* Maximum of quantization range */
|
||||
|
||||
"max": {
|
||||
"clipping_value": 6, // Threshold for max statistic value clipping (upper bound)
|
||||
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
|
||||
// mean_no_outliers, median_no_outliers, hl_estimator]
|
||||
|
||||
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
|
||||
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
41
tools/pot/configs/default_quantization_template.json
Normal file
41
tools/pot/configs/default_quantization_template.json
Normal file
@ -0,0 +1,41 @@
|
||||
/* This configuration file is the fastest way to get started with the default
|
||||
quantization algorithm. It contains only mandatory options with commonly used
|
||||
values. All other options can be considered as an advanced mode and requires
|
||||
deep knowledge of the quantization process. An overall description of all possible
|
||||
parameters can be found in the default_quantization_spec.json */
|
||||
|
||||
{
|
||||
/* Model parameters */
|
||||
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name
|
||||
"model": "<MODEL_PATH>", // Path to model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
|
||||
},
|
||||
|
||||
/* Parameters of the engine used for model inference */
|
||||
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>" // Path to Accuracy Checker config
|
||||
},
|
||||
|
||||
/* Optimization hyperparameters */
|
||||
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization", // Optimization algorithm name
|
||||
"params": {
|
||||
"preset": "performance", // Preset [performance, mixed, accuracy] which control the quantization
|
||||
// mode (symmetric, mixed (weights symmetric and activations asymmetric)
|
||||
// and fully asymmetric respectively)
|
||||
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
models:
|
||||
- name: bert_base_squad1_1
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: cpu
|
||||
batch: 1
|
||||
adapter:
|
||||
type: bert_question_answering
|
||||
start_token_logits_output: "unstack/Squeeze_"
|
||||
end_token_logits_output: "unstack/Split.1"
|
||||
mo_params:
|
||||
data_type: FP32
|
||||
input: input_ids_1,input_mask_1,segment_ids_1
|
||||
input_shape: "[1,384], [1,384], [1,384]"
|
||||
output: unstack/Squeeze_,unstack/Split.1
|
||||
mo_flags:
|
||||
- disable_nhwc_to_nchw
|
||||
inputs:
|
||||
- name: "input_ids_1"
|
||||
type: INPUT
|
||||
value: "input_ids"
|
||||
precision: I32
|
||||
- name: "input_mask_1"
|
||||
type: INPUT
|
||||
value: 'input_mask'
|
||||
- name: "segment_ids_1"
|
||||
type: INPUT
|
||||
value: 'segment_ids'
|
||||
precision: I32
|
||||
|
||||
datasets:
|
||||
- name: squad
|
||||
data_source: <PATH_TO_DATASET>/squad1.1/
|
||||
annotation: <PATH_TO_DATASET>/squad.pickle
|
||||
reader:
|
||||
type: annotation_features_extractor
|
||||
features:
|
||||
- input_ids
|
||||
- input_mask
|
||||
- segment_ids
|
||||
postprocessing:
|
||||
- type: extract_answers_tokens
|
||||
max_answer: 30
|
||||
n_best_size: 20
|
||||
metrics:
|
||||
- name: 'F1'
|
||||
type: 'f1'
|
||||
reference: 88.57
|
||||
threshold: 0.01
|
||||
|
||||
- name: 'EM'
|
||||
type: 'exact_match'
|
||||
reference: 81.25
|
||||
threshold: 0.01
|
@ -0,0 +1,54 @@
|
||||
models:
|
||||
- name: bert_large_squad1_1
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: cpu
|
||||
batch: 1
|
||||
adapter:
|
||||
type: bert_question_answering
|
||||
start_token_logits_output: "unstack/Squeeze_"
|
||||
end_token_logits_output: "unstack/Split.1"
|
||||
mo_params:
|
||||
data_type: FP32
|
||||
input: input_ids_1,input_mask_1,segment_ids_1
|
||||
input_shape: "[1,384], [1,384], [1,384]"
|
||||
output: unstack/Squeeze_,unstack/Split.1
|
||||
mo_flags:
|
||||
- disable_nhwc_to_nchw
|
||||
inputs:
|
||||
- name: "input_ids_1"
|
||||
type: INPUT
|
||||
value: "input_ids"
|
||||
precision: I32
|
||||
- name: "input_mask_1"
|
||||
type: INPUT
|
||||
value: 'input_mask'
|
||||
- name: "segment_ids_1"
|
||||
type: INPUT
|
||||
value: 'segment_ids'
|
||||
precision: I32
|
||||
|
||||
datasets:
|
||||
- name: squad
|
||||
data_source: <PATH_TO_DATASET>/squad1.1/
|
||||
annotation: <PATH_TO_DATASET>/squad.pickle
|
||||
reader:
|
||||
type: annotation_features_extractor
|
||||
features:
|
||||
- input_ids
|
||||
- input_mask
|
||||
- segment_ids
|
||||
postprocessing:
|
||||
- type: extract_answers_tokens
|
||||
max_answer: 30
|
||||
n_best_size: 20
|
||||
metrics:
|
||||
- name: 'F1'
|
||||
type: 'f1'
|
||||
reference: 90.63
|
||||
threshold: 0.05
|
||||
|
||||
- name: 'EM'
|
||||
type: 'exact_match'
|
||||
reference: 83.59
|
||||
threshold: 0.05
|
29
tools/pot/configs/examples/accuracy_checker/densnet_121.yaml
Normal file
29
tools/pot/configs/examples/accuracy_checker/densnet_121.yaml
Normal file
@ -0,0 +1,29 @@
|
||||
models:
|
||||
- name: DensNet-121
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: classification
|
||||
|
||||
datasets:
|
||||
- name: classification_dataset
|
||||
data_source: <PATH TO VALIDATION DATASET>
|
||||
annotation_conversion:
|
||||
converter: imagenet
|
||||
annotation_file: <PATH TO ANNOTATION FILE>
|
||||
|
||||
preprocessing:
|
||||
- type: resize
|
||||
size: 256
|
||||
- type: crop
|
||||
size: 224
|
||||
|
||||
metrics:
|
||||
- name: accuracy@top1
|
||||
type: accuracy
|
||||
top_k: 1
|
||||
|
||||
- name: accuracy@top5
|
||||
type: accuracy
|
||||
top_k: 5
|
@ -0,0 +1,29 @@
|
||||
models:
|
||||
- name: east
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
allow_reshape_input: True
|
||||
adapter:
|
||||
type: east_text_detection
|
||||
score_map_out: feature_fusion/Conv_7/Sigmoid
|
||||
geometry_map_out: feature_fusion/concat_3
|
||||
|
||||
datasets:
|
||||
- name: ICDAR2015
|
||||
data_source: <PATH_TO_DATASET>
|
||||
annotation_conversion:
|
||||
converter: icdar_detection
|
||||
data_dir: <PATH_TO_GROUND_TRUTH>
|
||||
|
||||
preprocessing:
|
||||
- type: resize
|
||||
dst_height: 2400
|
||||
dst_width: 32
|
||||
aspect_ratio_scale: east_keep_aspect_ratio
|
||||
- type: bgr_to_rgb
|
||||
|
||||
metrics:
|
||||
- type: incidental_text_hmean
|
||||
name: f-measure
|
||||
ignore_difficult: True
|
@ -0,0 +1,37 @@
|
||||
models:
|
||||
- name: faster_rcnn_resnet101_coco
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: ssd
|
||||
inputs:
|
||||
- name: image_info
|
||||
type: CONST_INPUT
|
||||
value: [[600, 1024, 1]]
|
||||
datasets:
|
||||
- name: ms_coco_detection_91_classes
|
||||
annotation_conversion:
|
||||
converter: mscoco_detection
|
||||
annotation_file: <ANNOTATION_PATH>/instances_val2017.json
|
||||
has_background: True
|
||||
sort_annotations: True
|
||||
use_full_label_map: True
|
||||
data_source: <DATA_PATH>/val2017
|
||||
preprocessing:
|
||||
- type: resize
|
||||
aspect_ratio_scale: fit_to_window
|
||||
dst_height: 600
|
||||
dst_width: 1024
|
||||
- type: padding
|
||||
dst_height: 600
|
||||
dst_width: 1024
|
||||
pad_type: right_bottom
|
||||
|
||||
postprocessing:
|
||||
- type: faster_rcnn_postprocessing_resize
|
||||
dst_height: 600
|
||||
dst_width: 1024
|
||||
|
||||
metrics:
|
||||
- type: coco_precision
|
||||
max_detections: 100
|
@ -0,0 +1,37 @@
|
||||
models:
|
||||
- name: faster_rcnn_resnet50_coco
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: ssd
|
||||
inputs:
|
||||
- name: image_info
|
||||
type: CONST_INPUT
|
||||
value: [[600, 1024, 1]]
|
||||
datasets:
|
||||
- name: ms_coco_detection_91_classes
|
||||
annotation_conversion:
|
||||
converter: mscoco_detection
|
||||
annotation_file: <ANNOTATION_PATH>/instances_val2017.json
|
||||
has_background: True
|
||||
sort_annotations: True
|
||||
use_full_label_map: True
|
||||
data_source: <DATA_PATH>/val2017
|
||||
preprocessing:
|
||||
- type: resize
|
||||
aspect_ratio_scale: fit_to_window
|
||||
dst_height: 600
|
||||
dst_width: 1024
|
||||
- type: padding
|
||||
dst_height: 600
|
||||
dst_width: 1024
|
||||
pad_type: right_bottom
|
||||
|
||||
postprocessing:
|
||||
- type: faster_rcnn_postprocessing_resize
|
||||
dst_height: 600
|
||||
dst_width: 1024
|
||||
|
||||
metrics:
|
||||
- type: coco_precision
|
||||
max_detections: 100
|
@ -0,0 +1,40 @@
|
||||
models:
|
||||
- name: mask_rcnn_resnet50_atrous_coco
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
tags:
|
||||
- FP32
|
||||
adapter:
|
||||
type: mask_rcnn
|
||||
detection_out: reshape_do_2d
|
||||
raw_masks_out: masks
|
||||
inputs:
|
||||
- name: image_info
|
||||
type: CONST_INPUT
|
||||
value: [[800, 1365, 1]]
|
||||
datasets:
|
||||
- name: ms_coco_mask_rcnn_short_91_classes
|
||||
annotation_conversion:
|
||||
converter: mscoco_mask_rcnn
|
||||
annotation_file: <ANNOTATION_PATH>/instances_val2017.json
|
||||
has_background: True
|
||||
sort_annotations: True
|
||||
use_full_label_map: True
|
||||
data_source: <DATA_PATH>/val2017
|
||||
preprocessing:
|
||||
- type: resize
|
||||
aspect_ratio_scale: fit_to_window
|
||||
dst_height: 800
|
||||
dst_width: 1365
|
||||
- type: padding
|
||||
dst_height: 800
|
||||
dst_width: 1365
|
||||
pad_type: right_bottom
|
||||
postprocessing:
|
||||
- type: faster_rcnn_postprocessing_resize
|
||||
dst_height: 800
|
||||
dst_width: 1365
|
||||
metrics:
|
||||
- type: coco_orig_segm_precision
|
||||
- type: coco_orig_precision
|
@ -0,0 +1,31 @@
|
||||
models:
|
||||
- name: MobileNet_v1
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: classification
|
||||
|
||||
datasets:
|
||||
- name: classification_dataset
|
||||
data_source: <PATH TO VALIDATION DATASET>
|
||||
annotation_conversion:
|
||||
converter: imagenet
|
||||
annotation_file: <PATH TO ANNOTATION FILE>
|
||||
has_background: true
|
||||
|
||||
preprocessing:
|
||||
- type: resize
|
||||
size: 256
|
||||
aspect_ratio_scale: greater
|
||||
- type: crop
|
||||
size: 224
|
||||
|
||||
metrics:
|
||||
- name: accuracy@top1
|
||||
type: accuracy
|
||||
top_k: 1
|
||||
|
||||
- name: accuracy@top5
|
||||
type: accuracy
|
||||
top_k: 5
|
@ -0,0 +1,34 @@
|
||||
models:
|
||||
- name: MobileNet_v2
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: classification
|
||||
|
||||
datasets:
|
||||
- name: classification_dataset
|
||||
data_source: <PATH TO VALIDATION DATASET>
|
||||
annotation_conversion:
|
||||
converter: imagenet
|
||||
annotation_file: <PATH TO ANNOTATION FILE>
|
||||
reader: pillow_imread
|
||||
|
||||
preprocessing:
|
||||
- type: resize
|
||||
size: 256
|
||||
aspect_ratio_scale: greater
|
||||
use_pillow: True
|
||||
- type: crop
|
||||
size: 224
|
||||
use_pillow: True
|
||||
- type: bgr_to_rgb
|
||||
|
||||
metrics:
|
||||
- name: accuracy@top1
|
||||
type: accuracy
|
||||
top_k: 1
|
||||
|
||||
- name: accuracy@top5
|
||||
type: accuracy
|
||||
top_k: 5
|
73
tools/pot/configs/examples/accuracy_checker/mtcnn.yaml
Normal file
73
tools/pot/configs/examples/accuracy_checker/mtcnn.yaml
Normal file
@ -0,0 +1,73 @@
|
||||
evaluations:
|
||||
- name: mtcnn
|
||||
module: custom_evaluators.mtcnn_evaluator.MTCNNEvaluator
|
||||
module_config:
|
||||
network_info:
|
||||
pnet:
|
||||
outputs:
|
||||
probability_out: prob1
|
||||
region_out: conv4-2
|
||||
inputs:
|
||||
- name: data
|
||||
type: INPUT
|
||||
layout: NCWH
|
||||
preprocessing:
|
||||
- type: bgr_to_rgb
|
||||
- type: pyramid
|
||||
min_size: 10
|
||||
factor: 0.79
|
||||
|
||||
rnet:
|
||||
outputs:
|
||||
probability_out: prob1
|
||||
region_out: conv5-2
|
||||
inputs:
|
||||
- name: data
|
||||
type: INPUT
|
||||
layout: NCWH
|
||||
preprocessing:
|
||||
- type: bgr_to_rgb
|
||||
|
||||
onet:
|
||||
outputs:
|
||||
probability_out: prob1
|
||||
region_out: conv6-2
|
||||
inputs:
|
||||
- name: data
|
||||
type: INPUT
|
||||
layout: NCWH
|
||||
preprocessing:
|
||||
- type: bgr_to_rgb
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
|
||||
datasets:
|
||||
- name: wider
|
||||
data_source: <PATH_TO_DATASET>/WIDER_val/images
|
||||
annotation_conversion:
|
||||
converter: wider
|
||||
annotation_file: <PATH_TO_DATASET>/wider_face_split/wider_face_val_bbx_gt.txt
|
||||
|
||||
postprocessing:
|
||||
- type: filter
|
||||
apply_to: prediction
|
||||
is_empty: True
|
||||
- type: filter
|
||||
height_range: 60
|
||||
apply_to: annotation
|
||||
|
||||
metrics:
|
||||
- type: recall
|
||||
ignore_difficult: True
|
||||
include_boundaries: True
|
||||
allow_multiple_matches_per_ignored: True
|
||||
distinct_conf: False
|
||||
|
||||
- type: map
|
||||
ignore_difficult: True
|
||||
include_boundaries: True
|
||||
allow_multiple_matches_per_ignored: True
|
||||
distinct_conf: False
|
||||
|
38
tools/pot/configs/examples/accuracy_checker/ncf.yaml
Normal file
38
tools/pot/configs/examples/accuracy_checker/ncf.yaml
Normal file
@ -0,0 +1,38 @@
|
||||
models:
|
||||
- name: NCF
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: hit_ratio_adapter
|
||||
batch: 2048
|
||||
inputs:
|
||||
- type: INPUT
|
||||
value: "u"
|
||||
name: embedding/embedding_lookup/placeholder_port_1
|
||||
- type: INPUT
|
||||
value: "i"
|
||||
name: embedding_1/embedding_lookup/placeholder_port_1
|
||||
- type: INPUT
|
||||
value: "u"
|
||||
name: embedding_2/embedding_lookup/placeholder_port_1
|
||||
- type: INPUT
|
||||
value: "i"
|
||||
name: embedding_3/embedding_lookup/placeholder_port_1
|
||||
allow_reshape_input: True
|
||||
|
||||
datasets:
|
||||
- name: ncf_dataset
|
||||
data_source: ncf
|
||||
annotation: <EXISTING FOLDER PATH TO STORE ANNOTATION FILES>/ncf_converter.pickle
|
||||
dataset_meta: <EXISTING FOLDER PATH TO STORE ANNOTATION FILES>/ncf_converter.json
|
||||
annotation_conversion:
|
||||
converter: movie_lens_converter
|
||||
rating_file: <PATH TO RATING FILE>/ml-20m-test-ratings.csv
|
||||
negative_file: <PATH TO NEGATIVE FILE>/ml-20m-test-negative.csv
|
||||
users_max_number: 2048
|
||||
reader: ncf_data_reader
|
||||
|
||||
metrics:
|
||||
- type: hit_ratio
|
||||
- type: ndcg
|
@ -0,0 +1,31 @@
|
||||
models:
|
||||
- name: ssd-mobilenetv1
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: ssd
|
||||
batch: 1
|
||||
|
||||
datasets:
|
||||
- name: classification_dataset
|
||||
data_source: <PATH TO VALIDATION DATASET>/VOC2007/JPEGImages
|
||||
annotation_conversion:
|
||||
converter: "voc_detection"
|
||||
annotations_dir: <PATH TO VALIDATION DATASET>/VOC2007/Annotations
|
||||
images_dir: <PATH TO VALIDATION DATASET>/VOC2007/JPEGImages
|
||||
imageset_file: <PATH TO VALIDATION DATASET>/VOC2007/ImageSets/Main/test.txt
|
||||
has_background: true
|
||||
|
||||
preprocessing:
|
||||
- type: resize
|
||||
size: 300
|
||||
|
||||
postprocessing:
|
||||
- type: resize_prediction_boxes
|
||||
|
||||
metrics:
|
||||
- type: map
|
||||
integral: 11point
|
||||
ignore_difficult: true
|
||||
presenter: print_scalar
|
@ -0,0 +1,37 @@
|
||||
models:
|
||||
- name: SSD_ResNet34
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
adapter:
|
||||
type: ssd_onnx
|
||||
scores_out: .*scores.*
|
||||
labels_out: .*labels.*
|
||||
bboxes_out: .*bboxes.*
|
||||
|
||||
datasets:
|
||||
- name: COCO2017_80cl_bkgr
|
||||
data_source: <PATH TO VALIDATION DATASET>/val2017
|
||||
annotation_conversion:
|
||||
converter: mscoco_detection
|
||||
annotation_file: <PATH TO VALIDATION DATASET>/annotations/instances_val2017.json
|
||||
has_background: True
|
||||
use_full_label_map: False
|
||||
|
||||
|
||||
reader: pillow_imread
|
||||
preprocessing:
|
||||
- type: resize
|
||||
size: 1200
|
||||
use_pillow: true
|
||||
interpolation: BILINEAR
|
||||
|
||||
postprocessing:
|
||||
- type: resize_prediction_boxes
|
||||
|
||||
metrics:
|
||||
- type: map
|
||||
integral: 11point
|
||||
ignore_difficult: true
|
||||
presenter: print_scalar
|
||||
- type: coco_precision
|
||||
- type: coco_orig_precision
|
@ -0,0 +1,30 @@
|
||||
models:
|
||||
- name: ssd_resnet_50_512
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: ssd
|
||||
|
||||
datasets:
|
||||
- name: VOC2007_bkgr
|
||||
data_source: <PATH TO VALIDATION DATASET>/VOC2007/JPEGImages
|
||||
annotation_conversion:
|
||||
converter: voc_detection
|
||||
has_background: False
|
||||
annotations_dir: <PATH TO VALIDATION DATASET>/VOC2007/Annotations
|
||||
images_dir: <PATH TO VALIDATION DATASET>/VOC2007/JPEGImages
|
||||
imageset_file: <PATH TO VALIDATION DATASET>/VOC2007/ImageSets/Main/test.txt
|
||||
|
||||
preprocessing:
|
||||
- type: resize
|
||||
size: 512
|
||||
|
||||
postprocessing:
|
||||
- type: resize_prediction_boxes
|
||||
|
||||
metrics:
|
||||
- type: map
|
||||
integral: 11point
|
||||
ignore_difficult: True
|
||||
presenter: print_scalar
|
@ -0,0 +1,21 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "densnet-121",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/densnet_121.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,60 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "inceptionv3",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"datasets": [
|
||||
{
|
||||
"name": "imagenet_1001_classes",
|
||||
"data_source": "PATH_TO_DATASET",
|
||||
"annotation_conversion": {
|
||||
"annotation_file": "PATH_TO_ANNOTATION_FILE",
|
||||
"has_background": true,
|
||||
"converter": "imagenet"
|
||||
},
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "crop",
|
||||
"central_fraction": 0.875
|
||||
},
|
||||
{
|
||||
"type": "resize",
|
||||
"size": 299
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"type": "accuracy",
|
||||
"name": "accuracy@top1",
|
||||
"top_k": 1
|
||||
},
|
||||
{
|
||||
"type": "accuracy",
|
||||
"name": "accuracy@top5",
|
||||
"top_k": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "classification"
|
||||
}
|
||||
]
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenetv1",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mobilenet_v1_tf.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,68 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet_v2_1.0_224",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"launchers":
|
||||
[
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"adapter": "classification"
|
||||
}
|
||||
],
|
||||
"datasets":
|
||||
[
|
||||
{
|
||||
"name": "imagenet_1000_classes",
|
||||
"reader": "pillow_imread",
|
||||
"annotation_conversion": {
|
||||
"converter": "imagenet",
|
||||
"annotation_file": "PATH_TO_ANNOTATION_FILE"
|
||||
},
|
||||
"data_source": "PATH_TO_VALIDATION_IMAGES",
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "bgr_to_rgb"
|
||||
},
|
||||
{
|
||||
"type": "resize",
|
||||
"size": 256,
|
||||
"aspect_ratio_scale": "greater",
|
||||
"use_pillow": true,
|
||||
"interpolation": "BILINEAR"
|
||||
},
|
||||
{
|
||||
"type": "crop",
|
||||
"size": 224,
|
||||
"use_pillow": true
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy@top1",
|
||||
"type": "accuracy",
|
||||
"top_k": 1
|
||||
},
|
||||
{
|
||||
"name": "accuracy@top5",
|
||||
"type": "accuracy",
|
||||
"top_k": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenetv2",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet_v2_1.0_224",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet_v2_1.0_224",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "AccuracyAwareQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet_v2_1.0_224",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"type": "simplified",
|
||||
// you can specify path to directory with images or video file
|
||||
// also you can specify template for file names to filter images to load
|
||||
// templates are unix style
|
||||
"data_source": "PATH_TO_IMAGES"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet_v2_1.0_224",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "QuantNoiseEstimator",
|
||||
"params": {
|
||||
"stat_subset_size": 100,
|
||||
"mode": "full_fq_noise",
|
||||
"type": "sqnr",
|
||||
"results_dump_filename": "./mobilenetv2_sqnr_data.csv"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,29 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet_v2_1.0_224",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "INT4MixedQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 300,
|
||||
"ranking_subset_size": 300,
|
||||
"maximal_drop": 0.01
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,102 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "se_resnet50",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "classification"
|
||||
}
|
||||
],
|
||||
"datasets": [{
|
||||
"name": "classification_dataset",
|
||||
"data_source": "<PATH_TO_DATASET>",
|
||||
"annotation_conversion": {
|
||||
"converter": "imagenet",
|
||||
"annotation_file": "<PATH_TO_ANNOTATION_FILE>"
|
||||
},
|
||||
"reader": "pillow_imread",
|
||||
"preprocessing":[
|
||||
{
|
||||
"type": "bgr_to_rgb"
|
||||
},
|
||||
{
|
||||
"use_pillow": true,
|
||||
"type": "resize",
|
||||
"size": 256,
|
||||
"interpolation": "BILINEAR",
|
||||
"aspect_ratio_scale": "greater"
|
||||
},
|
||||
{
|
||||
"type": "crop",
|
||||
"size": 224,
|
||||
"use_pillow": true
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy@top1",
|
||||
"type": "accuracy",
|
||||
"top_k": 1
|
||||
},
|
||||
{
|
||||
"name": "accuracy@top5",
|
||||
"type": "accuracy",
|
||||
"top_k": 5
|
||||
}
|
||||
]
|
||||
}]
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "MinMaxQuantization",
|
||||
"params": {
|
||||
"target_device": "CPU",
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 1000,
|
||||
"ignored": {
|
||||
"scope": [
|
||||
"400", "402",
|
||||
"416", "418",
|
||||
"432", "434",
|
||||
"450", "452",
|
||||
"466", "468",
|
||||
"482", "484",
|
||||
"498", "500",
|
||||
"516", "518",
|
||||
"532", "534",
|
||||
"548", "550",
|
||||
"564", "566",
|
||||
"580", "582",
|
||||
"596", "598",
|
||||
"614", "616",
|
||||
"630", "632",
|
||||
"646", "648"
|
||||
]
|
||||
},
|
||||
"weights": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "perchannel"
|
||||
},
|
||||
"activations": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "pertensor"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,77 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "squeezenet1_1",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "classification"
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "classification_dataset",
|
||||
"data_source": "<PATH_TO_DATASET>",
|
||||
"annotation_conversion": {
|
||||
"converter": "imagenet",
|
||||
"annotation_file": "<PATH_TO_ANNOTATION_FILE>",
|
||||
"has_background": false
|
||||
},
|
||||
"reader": "pillow_imread",
|
||||
"preprocessing":[
|
||||
{
|
||||
"type": "bgr_to_rgb"
|
||||
},
|
||||
{
|
||||
"use_pillow": true,
|
||||
"type": "resize",
|
||||
"size": 256,
|
||||
"interpolation": "BILINEAR",
|
||||
"aspect_ratio_scale": "greater"
|
||||
},
|
||||
{
|
||||
"type": "crop",
|
||||
"size": 224,
|
||||
"use_pillow": true
|
||||
}
|
||||
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy@top1",
|
||||
"type": "accuracy",
|
||||
"top_k": 1
|
||||
},
|
||||
{
|
||||
"name": "accuracy@top5",
|
||||
"type": "accuracy",
|
||||
"top_k": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 1000,
|
||||
"weights": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "perchannel"
|
||||
},
|
||||
"activations": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "pertensor"
|
||||
}
|
||||
}
|
||||
}]
|
||||
}
|
||||
}
|
@ -0,0 +1,72 @@
|
||||
{
|
||||
"model": {
|
||||
"name": "bert_base_squad_1_1",
|
||||
"model": "<PATH_TO_MODEL>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/bert_base_squad_1_1_tf_int8.yml"
|
||||
},
|
||||
"compression": {
|
||||
"model_type": "transformer",
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "accuracy",
|
||||
"stat_subset_size": 1000,
|
||||
"weights": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "perchannel",
|
||||
"level_low": -127,
|
||||
"level_high": 127
|
||||
},
|
||||
"activations": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "pertensor"
|
||||
},
|
||||
"ignored": {
|
||||
"scope" : [
|
||||
"bert/encoder/layer_0/output/dense/MatMul",
|
||||
"bert/encoder/layer_0/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_0/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_1/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_2/attention/self/MatMul",
|
||||
"bert/encoder/layer_2/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_3/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_3/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/attention/self/MatMul",
|
||||
"bert/encoder/layer_3/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_4/attention/self/value/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_5/attention/self/MatMul",
|
||||
"bert/encoder/layer_5/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_6/attention/self/MatMul",
|
||||
"bert/encoder/layer_6/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_7/attention/self/MatMul",
|
||||
"bert/encoder/layer_7/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_8/attention/self/MatMul",
|
||||
"bert/encoder/layer_8/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_9/attention/self/MatMul",
|
||||
"bert/encoder/layer_9/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_10/attention/self/MatMul",
|
||||
"bert/encoder/layer_10/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_11/attention/self/MatMul",
|
||||
"bert/encoder/layer_11/attention/self/MatMul_1",
|
||||
"loss/MatMul"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,69 @@
|
||||
{
|
||||
"model": {
|
||||
"name": "bert_base_squad_1_1",
|
||||
"model": "<PATH_TO_MODEL>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/bert_base_squad_1_1_tf_int8.yml"
|
||||
},
|
||||
"compression": {
|
||||
"model_type": "transformer",
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "AccuracyAwareQuantization",
|
||||
"params": {
|
||||
"metric_subset_ratio": 1,
|
||||
"ranking_subset_size": 300,
|
||||
"max_iter_num": 500,
|
||||
"maximal_drop": 0.01,
|
||||
"drop_type": "relative",
|
||||
"base_algorithm": "DefaultQuantization",
|
||||
"use_prev_if_drop_increase": true,
|
||||
"range_estimator": {
|
||||
"preset": "default"
|
||||
},
|
||||
"stat_subset_size": 1000,
|
||||
"ignored": {
|
||||
"scope" : [
|
||||
"bert/encoder/layer_0/output/dense/MatMul",
|
||||
"bert/encoder/layer_0/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_0/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_1/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_2/attention/self/MatMul",
|
||||
"bert/encoder/layer_2/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_3/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_3/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/attention/self/MatMul",
|
||||
"bert/encoder/layer_3/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_4/attention/self/value/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_5/attention/self/MatMul",
|
||||
"bert/encoder/layer_5/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_6/attention/self/MatMul",
|
||||
"bert/encoder/layer_6/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_7/attention/self/MatMul",
|
||||
"bert/encoder/layer_7/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_8/attention/self/MatMul",
|
||||
"bert/encoder/layer_8/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_9/attention/self/MatMul",
|
||||
"bert/encoder/layer_9/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_10/attention/self/MatMul",
|
||||
"bert/encoder/layer_10/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_11/attention/self/MatMul",
|
||||
"bert/encoder/layer_11/attention/self/MatMul_1",
|
||||
"loss/MatMul"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,86 @@
|
||||
{
|
||||
"model": {
|
||||
"name": "bert_large_squad_1_1",
|
||||
"model": "<PATH_TO_MODEL>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/bert_large_squad_1_1_tf_int8.yml"
|
||||
},
|
||||
"compression": {
|
||||
"model_type": "transformer",
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 100,
|
||||
"ignored": {
|
||||
"scope": [
|
||||
"bert/encoder/layer_0/attention/self/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/MatMul",
|
||||
"bert/encoder/layer_2/attention/self/MatMul",
|
||||
"bert/encoder/layer_3/attention/self/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/MatMul",
|
||||
"bert/encoder/layer_5/attention/self/MatMul",
|
||||
"bert/encoder/layer_6/attention/self/MatMul",
|
||||
"bert/encoder/layer_7/attention/self/MatMul",
|
||||
"bert/encoder/layer_8/attention/self/MatMul",
|
||||
"bert/encoder/layer_9/attention/self/MatMul",
|
||||
"bert/encoder/layer_10/attention/self/MatMul",
|
||||
"bert/encoder/layer_11/attention/self/MatMul",
|
||||
"bert/encoder/layer_12/attention/self/MatMul",
|
||||
"bert/encoder/layer_13/attention/self/MatMul",
|
||||
"bert/encoder/layer_14/attention/self/MatMul",
|
||||
"bert/encoder/layer_15/attention/self/MatMul",
|
||||
"bert/encoder/layer_16/attention/self/MatMul",
|
||||
"bert/encoder/layer_17/attention/self/MatMul",
|
||||
"bert/encoder/layer_18/attention/self/MatMul",
|
||||
"bert/encoder/layer_19/attention/self/MatMul",
|
||||
"bert/encoder/layer_20/attention/self/MatMul",
|
||||
"bert/encoder/layer_21/attention/self/MatMul",
|
||||
"bert/encoder/layer_22/attention/self/MatMul",
|
||||
"bert/encoder/layer_23/attention/self/MatMul",
|
||||
"bert/encoder/layer_23/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_22/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_21/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_20/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_19/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_18/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_17/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_16/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_15/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_14/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_13/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_12/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_11/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_10/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_9/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_8/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_7/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_6/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_5/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_4/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_3/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_2/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_1/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_0/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_0/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/value/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_0/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_0/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_3/output/dense/MatMul",
|
||||
"bert/encoder/layer_7/attention/self/key/MatMul"
|
||||
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,87 @@
|
||||
{
|
||||
"model": {
|
||||
"name": "bert_large_squad_1_1",
|
||||
"model": "<PATH_TO_MODEL>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/bert_large_squad_1_1_tf_int8.yml"
|
||||
},
|
||||
"compression": {
|
||||
"target_device": "CPU",
|
||||
"model_type": "transformer",
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "AccuracyAwareQuantization",
|
||||
"params": {
|
||||
"max_iter_num": 500,
|
||||
"stat_subset_size": 100,
|
||||
"ignored": {
|
||||
"scope": [
|
||||
"bert/encoder/layer_0/attention/self/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/MatMul",
|
||||
"bert/encoder/layer_2/attention/self/MatMul",
|
||||
"bert/encoder/layer_3/attention/self/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/MatMul",
|
||||
"bert/encoder/layer_5/attention/self/MatMul",
|
||||
"bert/encoder/layer_6/attention/self/MatMul",
|
||||
"bert/encoder/layer_7/attention/self/MatMul",
|
||||
"bert/encoder/layer_8/attention/self/MatMul",
|
||||
"bert/encoder/layer_9/attention/self/MatMul",
|
||||
"bert/encoder/layer_10/attention/self/MatMul",
|
||||
"bert/encoder/layer_11/attention/self/MatMul",
|
||||
"bert/encoder/layer_12/attention/self/MatMul",
|
||||
"bert/encoder/layer_13/attention/self/MatMul",
|
||||
"bert/encoder/layer_14/attention/self/MatMul",
|
||||
"bert/encoder/layer_15/attention/self/MatMul",
|
||||
"bert/encoder/layer_16/attention/self/MatMul",
|
||||
"bert/encoder/layer_17/attention/self/MatMul",
|
||||
"bert/encoder/layer_18/attention/self/MatMul",
|
||||
"bert/encoder/layer_19/attention/self/MatMul",
|
||||
"bert/encoder/layer_20/attention/self/MatMul",
|
||||
"bert/encoder/layer_21/attention/self/MatMul",
|
||||
"bert/encoder/layer_22/attention/self/MatMul",
|
||||
"bert/encoder/layer_23/attention/self/MatMul",
|
||||
"bert/encoder/layer_23/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_22/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_21/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_20/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_19/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_18/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_17/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_16/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_15/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_14/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_13/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_12/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_11/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_10/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_9/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_8/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_7/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_6/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_5/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_4/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_3/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_2/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_1/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_0/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_0/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/value/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_0/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_0/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_3/output/dense/MatMul",
|
||||
"bert/encoder/layer_7/attention/self/key/MatMul"
|
||||
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "faster_rcnn_resnet101_coco",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/faster_rcnn_resnet101_coco.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300,
|
||||
"ignored": {
|
||||
"scope": [
|
||||
"proposals/conv"
|
||||
]
|
||||
},
|
||||
"activations": {
|
||||
"range_estimator": {
|
||||
"max": {
|
||||
"aggregator": "max",
|
||||
"type": "abs_max"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "faster_rcnn_resnet50_coco",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/faster_rcnn_resnet50_coco.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300,
|
||||
"ignored": {
|
||||
"scope": [
|
||||
"proposals/conv"
|
||||
]
|
||||
},
|
||||
"activations": {
|
||||
"range_estimator": {
|
||||
"max": {
|
||||
"aggregator": "max",
|
||||
"type": "abs_max"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mask_rcnn_resnet50_atrous_coco",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<WEIGHTS_PATH>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mask_rcnn_resnet50_atrous_coco.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300,
|
||||
"ignored": {
|
||||
"scope": [
|
||||
"FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/block1/unit_1/bottleneck_v1/add",
|
||||
"proposals/conv",
|
||||
"proposals/reshape_4d",
|
||||
"SecondStageFeatureExtractor_1/resnet_v1_50/block4/unit_1/bottleneck_v1/shortcut/Conv2D",
|
||||
"SecondStageFeatureExtractor_1/resnet_v1_50/block4/unit_1/bottleneck_v1/conv1/Conv2D",
|
||||
"SecondStageFeatureExtractor/resnet_v1_50/block4/unit_1/bottleneck_v1/shortcut/Conv2D",
|
||||
"SecondStageFeatureExtractor/resnet_v1_50/block4/unit_1/bottleneck_v1/conv1/Conv2D"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mtcnn",
|
||||
"cascade": [
|
||||
{
|
||||
"name": "pnet",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
{
|
||||
"name": "rnet",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
{
|
||||
"name": "onet",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
}
|
||||
]
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mtcnn.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet_ssd",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,59 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet-ssd",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "ssd"
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "VOC2007",
|
||||
"data_source": "<PATH TO VALIDATION DATASET>/VOC2007/JPEGImages",
|
||||
"annotation_conversion": {
|
||||
"converter": "voc_detection",
|
||||
"annotations_dir": "<PATH TO VALIDATION DATASET>/VOC2007/Annotations",
|
||||
"images_dir": "<PATH TO VALIDATION DATASET>/VOC2007/JPEGImages",
|
||||
"imageset_file": "<PATH TO VALIDATION DATASET>/VOC2007/ImageSets/Main/test.txt"
|
||||
},
|
||||
"preprocessing":[
|
||||
{
|
||||
"type": "resize",
|
||||
"size": 300
|
||||
}
|
||||
],
|
||||
"postprocessing":[{
|
||||
"type": "resize_prediction_boxes"
|
||||
}],
|
||||
"metrics": [
|
||||
{
|
||||
"type": "map",
|
||||
"integral": "11point",
|
||||
"ignore_difficult": true,
|
||||
"presenter": "print_scalar"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "MinMaxQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"range_estimator": {
|
||||
"preset": "quantile"
|
||||
},
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "ssd_resnet34_1200",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/ssd_resnet34.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 300,
|
||||
"preset": "performance",
|
||||
"ignored":{
|
||||
"scope": [
|
||||
"Mul_490",
|
||||
"Mul_509",
|
||||
"Add_511",
|
||||
"Mul_507",
|
||||
"Exp_512",
|
||||
"Mul_514",
|
||||
"Mul_548/Fused_Mul_",
|
||||
"Mul_583/Fused_Mul_",
|
||||
"Mul_618",
|
||||
"Mul_653",
|
||||
"Sub_549/add_",
|
||||
"Sub_584/add_",
|
||||
"Add_619",
|
||||
"Add_654",
|
||||
"Mul_703",
|
||||
"Add_704",
|
||||
"Add_labels"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,57 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "ssd_resnet50_512_mxnet",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "ssd"
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "VOC2007",
|
||||
"data_source": "<PATH TO VALIDATION DATASET>/VOC2007/JPEGImages",
|
||||
"annotation_conversion": {
|
||||
"converter": "voc_detection",
|
||||
"has_background": false,
|
||||
"annotations_dir": "<PATH TO VALIDATION DATASET>/VOC2007/Annotations",
|
||||
"images_dir": "<PATH TO VALIDATION DATASET>/VOC2007/JPEGImages",
|
||||
"imageset_file": "<PATH TO VALIDATION DATASET>/VOC2007/ImageSets/Main/test.txt"
|
||||
},
|
||||
"preprocessing":[
|
||||
{
|
||||
"type": "resize",
|
||||
"size": 512
|
||||
}
|
||||
],
|
||||
"postprocessing":[{
|
||||
"type": "resize_prediction_boxes"
|
||||
}],
|
||||
"metrics": [
|
||||
{
|
||||
"type": "map",
|
||||
"integral": "11point",
|
||||
"ignore_difficult": true,
|
||||
"presenter": "print_scalar"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,56 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenetv1",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mobilenet_v1_tf.yaml"
|
||||
},
|
||||
"optimizer": {
|
||||
"name": "Tpe",
|
||||
"params": {
|
||||
"max_trials": 200,
|
||||
"max_minutes": 1440,
|
||||
"trials_load_method": "cold_start",
|
||||
"accuracy_loss": 0.1,
|
||||
"latency_reduce": 1.5,
|
||||
"accuracy_weight": 1.0,
|
||||
"latency_weight": 1.0,
|
||||
"benchmark": {
|
||||
"performance_count": false,
|
||||
"batch_size": 1,
|
||||
"nthreads": 8,
|
||||
"nstreams": 1,
|
||||
"nireq": 1,
|
||||
"api_type": "async",
|
||||
"niter": 1,
|
||||
"duration_seconds": 30
|
||||
}
|
||||
}
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "ActivationChannelAlignment",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "TunableQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 1000,
|
||||
"preset": "performance",
|
||||
"tuning_scope": ["layer"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenetv2",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "MinMaxQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 1000,
|
||||
"weights": {
|
||||
"bits": 8,
|
||||
"mode": "asymmetric",
|
||||
"granularity": "perchannel"
|
||||
},
|
||||
"activations": {
|
||||
"bits": 8,
|
||||
"mode": "asymmetric",
|
||||
"granularity": "pertensor"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "RangeOptimization",
|
||||
"params": {
|
||||
"stat_subset_size": 5000,
|
||||
"result_filename": "rangeopt_results.csv",
|
||||
"lower_boxsize": 0.1,
|
||||
"upper_boxsize": 0.1,
|
||||
"maxiter": 1500,
|
||||
"optimization_scope": ["317", "315"],
|
||||
"metric_name": "accuracy@top1",
|
||||
"opt_backend": "nevergrad",
|
||||
"optimizer_name": "CMA"
|
||||
// use the below option to validate range values
|
||||
// "activation_ranges_to_set": {"317": [-0.0380698, 2.35978142]}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,56 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenetv2",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml"
|
||||
},
|
||||
"optimizer": {
|
||||
"name": "Tpe",
|
||||
"params": {
|
||||
"max_trials": 200,
|
||||
"max_minutes": 1440,
|
||||
"trials_load_method": "cold_start",
|
||||
"accuracy_loss": 0.1,
|
||||
"latency_reduce": 1.5,
|
||||
"accuracy_weight": 1.0,
|
||||
"latency_weight": 1.0,
|
||||
"benchmark": {
|
||||
"performance_count": false,
|
||||
"batch_size": 1,
|
||||
"nthreads": 8,
|
||||
"nstreams": 1,
|
||||
"nireq": 1,
|
||||
"api_type": "async",
|
||||
"niter": 1,
|
||||
"duration_seconds": 30
|
||||
}
|
||||
}
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "ActivationChannelAlignment",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "TunableQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 1000,
|
||||
"preset": "performance",
|
||||
"tuning_scope": ["layer"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,56 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenetv2",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "QuantileTuningAlgorithm",
|
||||
"params": {
|
||||
"opt_backend": "skopt",
|
||||
"maxiter": 100,
|
||||
"optimization_subset_size": 1000,
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 1000,
|
||||
"weights": {
|
||||
"bits": 8,
|
||||
"mode": "asymmetric",
|
||||
"granularity": "perchannel",
|
||||
"range_estimator": {
|
||||
"min": {
|
||||
"type": "quantile",
|
||||
"outlier_prob": 0.0015
|
||||
},
|
||||
"max": {
|
||||
"type": "quantile",
|
||||
"outlier_prob": 0.0015
|
||||
}
|
||||
}
|
||||
},
|
||||
"activations": {
|
||||
"bits": 8,
|
||||
"mode": "asymmetric",
|
||||
"granularity": "pertensor",
|
||||
"range_estimator": {
|
||||
"min": {
|
||||
"aggregator": "mean",
|
||||
"type": "quantile",
|
||||
"outlier_prob": 1e-3
|
||||
},
|
||||
"max": {
|
||||
"aggregator": "mean",
|
||||
"type": "quantile",
|
||||
"outlier_prob": 1e-3
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,59 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet-ssd",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/ssd_mobilenet_v1.yaml"
|
||||
},
|
||||
"optimizer": {
|
||||
"name": "Tpe",
|
||||
"params": {
|
||||
"max_trials": 200,
|
||||
"max_minutes": 1440,
|
||||
"trials_load_method": "cold_start",
|
||||
"accuracy_loss": 0.1,
|
||||
"latency_reduce": 1.5,
|
||||
"accuracy_weight": 1.0,
|
||||
"latency_weight": 0.0,
|
||||
"benchmark": {
|
||||
"performance_count": false,
|
||||
"batch_size": 1,
|
||||
"nthreads": 8,
|
||||
"nstreams": 1,
|
||||
"nireq": 1,
|
||||
"api_type": "async",
|
||||
"niter": 1,
|
||||
"duration_seconds": 30
|
||||
}
|
||||
}
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "ActivationChannelAlignment",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "TunableQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 1000,
|
||||
"preset": "performance",
|
||||
"tuning_scope": ["range_estimator"],
|
||||
"estimator_tuning_scope": ["preset", "outlier_prob"],
|
||||
"outlier_prob_choices": [1e-3, 1e-4, 1e-5]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,33 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "ncf",
|
||||
"model": "<PATH_TO_MODEL>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/ncf.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "MinMaxQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 1000,
|
||||
"weights": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "perchannel",
|
||||
"level_low": -127,
|
||||
"level_high": 127
|
||||
},
|
||||
"activations": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "pertensor"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,60 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "ncf",
|
||||
"model": "<PATH_TO_MODEL>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/ncf.yaml"
|
||||
},
|
||||
"optimizer": {
|
||||
"name": "Tpe",
|
||||
"params": {
|
||||
"max_trials": 100,
|
||||
"max_minutes": 10,
|
||||
"trials_load_method": "cold_start",
|
||||
"accuracy_loss": 0.1,
|
||||
"latency_reduce": 1.5,
|
||||
"expected_quantization_ratio": 0.5,
|
||||
"accuracy_weight": 1.0,
|
||||
"latency_weight": 1.0,
|
||||
"quantization_ratio_weight": 1.0,
|
||||
"benchmark": {
|
||||
"cpu_bind_thread": "YES",
|
||||
"nthreads": 4,
|
||||
"nstreams": 0,
|
||||
"nireq": 0,
|
||||
"api_type": "async",
|
||||
"duration_seconds": 30,
|
||||
"benchmark_app_dir":""
|
||||
}
|
||||
}
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "ActivationChannelAlignment",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "TunableQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 1000,
|
||||
"preset": "performance",
|
||||
"tuning_scope": ["range_estimator"],
|
||||
"estimator_tuning_scope": ["preset", "aggregator", "type", "outlier_prob"],
|
||||
"outlier_prob_choices": [1e-3, 1e-4, 1e-5]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
}
|
||||
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "brain-tumor-segmentation-0001",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"adapter": {
|
||||
"type": "brain_tumor_segmentation"
|
||||
}
|
||||
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "BraTS",
|
||||
"data_source": "<PATH_TO_DATASET>",
|
||||
"annotation_conversion": {
|
||||
"converter": "brats_numpy",
|
||||
"data_dir": "<PATH_TO_DATASET>",
|
||||
"ids_file": "<PATH_TO_IDS_FILE>",
|
||||
"labels_file": "<PATH_TO_LABELS_FILE>"
|
||||
},
|
||||
"reader": "numpy_reader",
|
||||
"metrics": [
|
||||
{
|
||||
"type": "dice_index",
|
||||
"median": true,
|
||||
"presenter": "print_vector"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 72,
|
||||
"preset": "performance"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "east",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<WEIGHTS_PATH>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/east_resnet_v1_50.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"ignored": {
|
||||
"scope": [
|
||||
"feature_fusion/Conv_4/Conv2D"
|
||||
]
|
||||
},
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
30
tools/pot/configs/examples/sparsity/resnet-50-pytorch.json
Normal file
30
tools/pot/configs/examples/sparsity/resnet-50-pytorch.json
Normal file
@ -0,0 +1,30 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "resnet-50-pytorch",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "WeightSparsity",
|
||||
"params": {
|
||||
"sparsity_level": 0.5,
|
||||
"apply_for_all_nodes": true,
|
||||
"stat_subset_size": 300,
|
||||
"use_layerwise_tuning": true,
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
29
tools/pot/configs/examples/sparsity/ssd_resnet50_512.json
Normal file
29
tools/pot/configs/examples/sparsity/ssd_resnet50_512.json
Normal file
@ -0,0 +1,29 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "ssd_resnet50_512",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "WeightSparsity",
|
||||
"params": {
|
||||
"sparsity_level": 0.5,
|
||||
"apply_for_all_nodes": true,
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
44
tools/pot/configs/simplified_mode_template.json
Normal file
44
tools/pot/configs/simplified_mode_template.json
Normal file
@ -0,0 +1,44 @@
|
||||
/* This configuration file is the fastest way to get started with the default
|
||||
quantization algorithm in simplified mode. It contains only mandatory options
|
||||
with commonly used values. All other options can be considered as an advanced
|
||||
mode and requires deep knowledge of the quantization process. An overall description
|
||||
of all possible parameters can be found in the default_quantization_spec.json */
|
||||
|
||||
{
|
||||
/* Model parameters */
|
||||
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name
|
||||
"model": "<MODEL_PATH>", // Path to model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
|
||||
},
|
||||
|
||||
/* Parameters of the engine used for model inference */
|
||||
|
||||
"engine": {
|
||||
"type": "simplified",
|
||||
"data_source": "PATH_TO_SOURCE" // You can specify path to directory with images. Also you can
|
||||
// specify template for file names to filter images to load.
|
||||
// Templates are unix style (This option valid only in simplified mode)
|
||||
},
|
||||
|
||||
/* Optimization hyperparameters */
|
||||
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization", // Optimization algorithm name
|
||||
"params": {
|
||||
"preset": "performance", // Preset [performance, mixed, accuracy] which control the quantization
|
||||
// mode (symmetric, mixed (weights symmetric and activations asymmetric)
|
||||
// and fully asymmetric respectively)
|
||||
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -0,0 +1,44 @@
|
||||
/* This configuration file is the fastest way to get started with the default
|
||||
sparsity and default quantization algorithm. It contains only mandatory options
|
||||
with commonly used values. All other options can be considered as an advanced
|
||||
mode and requires deep knowledge of the quantization process. An overall description
|
||||
of all possible parameters can be found in the default_quantization_spec.json */
|
||||
|
||||
{
|
||||
/* Model parameters */
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name
|
||||
"model": "<MODEL_PATH>", // Path to model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
|
||||
},
|
||||
/* Parameters of the engine used for model inference */
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>" // Path to Accuracy Checker config
|
||||
},
|
||||
/* Optimization hyperparameters */
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "WeightSparsity",
|
||||
"params": {
|
||||
"sparsity_level": 0.3,
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "DefaultQuantization", // Optimization algorithm name
|
||||
"params": {
|
||||
// Preset [performance, mixed, accuracy] which control the quantization mode
|
||||
// (symmetric, mixed (weights symmetric and activations asymmetric) and fully
|
||||
// asymmetric respectively)
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
133
tools/pot/configs/tpe_spec.json
Normal file
133
tools/pot/configs/tpe_spec.json
Normal file
@ -0,0 +1,133 @@
|
||||
{
|
||||
/* Model parameters */
|
||||
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name
|
||||
"model": "<MODEL_PATH>", // Path to a model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
|
||||
},
|
||||
|
||||
/* Parameters of the engine used for model inference. */
|
||||
|
||||
/* Post-Training Optimization Tool supports engine based on the Accuracy Checker and custom engine.
|
||||
For custom engine, specify your own set of parameters.
|
||||
The engine based on the Accuracy Checker uses Accuracy Checker parameters. You can specify the parameters
|
||||
via the Accuracy Checker config file or directly in the engine section.
|
||||
Find more information about Accuracy Checker parameters at
|
||||
https://github.com/opencv/open_model_zoo/tree/master/tools/accuracy_checker */
|
||||
|
||||
"engine": {
|
||||
"stat_requests_number": 8, // Number of requests during statistcs collection
|
||||
"eval_requests_number": 8, // Number of requests during evaluation
|
||||
"config": "<CONFIG_PATH>",
|
||||
/* OR */
|
||||
"name": "model_name",
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "classification"
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "dataset_name",
|
||||
"data_source": "<DATASET_PATH>",
|
||||
"annotation": "<ANNOTATION_PATH>",
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "resize",
|
||||
"interpolation": "BILINEAR",
|
||||
"aspect_ratio_scale": "greater",
|
||||
"size": 224
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy@top1",
|
||||
"type": "accuracy",
|
||||
"top_k": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
/* Global optimizer used to find "optimal" hyperparameters */
|
||||
|
||||
"optimizer": {
|
||||
"name": "Tpe", // Global optimizer name
|
||||
"params": {
|
||||
"max_trials": 100, // Maximum number of trails
|
||||
"max_minutes": 10, // [Optional] Trials time limit. When it expires, the last trial is completed and the best result is returned.
|
||||
"stop_on_target": true, // [Optional] Flag to stop TPE trials when accuracy_loss and latency_reduce targets are reached.
|
||||
// If false or not specified TPE will continue until max_trials or max_minutes is reached even if targets are reached earlier.
|
||||
"eval_subset_size": 2000, // [Optional] subset of test data used to evaluate hyperparameters. The whole dataset is used if no parameter specified.
|
||||
"trials_load_method": "cold_start", // Start from scratch or reuse previous results, supported options [cold_start, warm_start, fine_tune, eval]
|
||||
"accuracy_loss": 0.1, // Accuracy threshold (%)
|
||||
"latency_reduce": 1.5, // Target latency improvement versus original model
|
||||
"accuracy_weight": 1.0, // Accuracy weight in loss function
|
||||
"latency_weight": 1.0, // Latency weight in loss function
|
||||
// An optional list of reference metrics values.
|
||||
// If not specified, all metrics will be calculated from the original model.
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy", // Metric name
|
||||
"baseline_value": 0.72 // Baseline metric value of the original model
|
||||
}
|
||||
],
|
||||
"benchmark": {
|
||||
// Latency measurement benchmark configuration (https://docs.openvinotoolkit.org/latest/_inference_engine_samples_benchmark_app_README.html)
|
||||
"performance_count": false,
|
||||
"batch_size": 0,
|
||||
"nthreads": 4,
|
||||
"nstreams": 0,
|
||||
"nireq": 0,
|
||||
"api_type": "sync",
|
||||
"niter": 4,
|
||||
"duration_seconds": 30,
|
||||
"benchmark_app_dir": "<path to benchmark_app>" // Path to benchmark_app If not specified, Python base benchmark will be used. Use benchmark_app to reduce jitter in results.
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
/* Optimization hyperparameters */
|
||||
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "ActivationChannelAlignment",
|
||||
"params": {
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation.
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "TunableQuantization",
|
||||
"params": {
|
||||
/* Preset is a collection of optimization algorithm parameters that will specify to the algorithm
|
||||
to improve which metric the algorithm needs to concentrate. Each optimization algorithm supports
|
||||
[performance, mixed, accuracy] presets which control the quantization mode
|
||||
(symmetric, mixed(weights symmetric and activations asymmetric), and fully asymmetric respectively)*/
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300, // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation.
|
||||
"tuning_scope": ["layer"], // List of quantization parameters that will be tuned,
|
||||
// available options: [bits, mode, granularity, layer, range_estimator]
|
||||
"estimator_tuning_scope": ["preset", "aggregator", "type", "outlier_prob"], // List of range_estimator parameters that will be tuned,
|
||||
// available options: [preset, aggregator, type, outlier_prob]
|
||||
"outlier_prob_choices": [1e-3, 1e-4, 1e-5] // List of outlier_prob values to use when tuning outlier_prob parameter
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation.
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
84
tools/pot/configs/tpe_template.json
Normal file
84
tools/pot/configs/tpe_template.json
Normal file
@ -0,0 +1,84 @@
|
||||
/* This configuration file is the fastest way to get started with the TPE
|
||||
optimization algorithm. It contains only mandatory options with commonly used
|
||||
values. All other options can be considered as advanced mode and require
|
||||
deep knowledge of the quantization process. Find overall description of all possible
|
||||
parameters in tpe_spec.json */
|
||||
|
||||
{
|
||||
/* Model parameters */
|
||||
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name
|
||||
"model": "<MODEL_PATH>", // Path to a model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
|
||||
},
|
||||
|
||||
/* Parameters of the engine used for model inference. */
|
||||
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>" // Path to Accuracy Checker config
|
||||
},
|
||||
|
||||
/* Optimizer used to find "optimal" hyperparameters */
|
||||
|
||||
"optimizer": {
|
||||
"name": "Tpe", // Global optimizer name
|
||||
"params": {
|
||||
"max_trials": 200, // Maximum number of trails
|
||||
"trials_load_method": "cold_start", // Start from scratch or reuse previous results, supported options [cold_start, warm_start, fine_tune, eval]
|
||||
"accuracy_loss": 0.1, // Accuracy threshold (%)
|
||||
"latency_reduce": 1.5, // Target latency improvement versus original model
|
||||
"accuracy_weight": 1.0, // Accuracy weight in loss function
|
||||
"latency_weight": 1.0, // Latency weight in loss function
|
||||
"benchmark": {
|
||||
// Latency measurement benchmark configuration (https://docs.openvinotoolkit.org/latest/_inference_engine_samples_benchmark_app_README.html)
|
||||
"performance_count": false,
|
||||
"batch_size": 0,
|
||||
"nthreads": 4,
|
||||
"nstreams": 0,
|
||||
"nireq": 0,
|
||||
"api_type": "sync",
|
||||
"niter": 4,
|
||||
"duration_seconds": 30,
|
||||
"benchmark_app_dir": "<path to benchmark_app>" // Path to benchmark_app If not specified, Python base benchmark will be used. Use benchmark_app to reduce jitter in results.
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
/* Optimization hyperparameters */
|
||||
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "ActivationChannelAlignment",
|
||||
"params": {
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation.
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "TunableQuantization",
|
||||
"params": {
|
||||
/* Preset is a collection of optimization algorithm parameters that will specify to the algorithm
|
||||
to improve which metric the algorithm needs to concentrate. Each optimization algorithm supports
|
||||
[performance, mixed, accuracy] presets which control the quantization mode
|
||||
(symmetric, mixed(weights symmetric and activations asymmetric), and fully asymmetric respectively)*/
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300, // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation.
|
||||
"tuning_scope": ["layer"] // List of quantization parameters that will be tuned,
|
||||
// available options: [bits, mode, granularity, layer, range_estimator]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation.
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
274
tools/pot/docs/BestPractices.md
Normal file
274
tools/pot/docs/BestPractices.md
Normal file
@ -0,0 +1,274 @@
|
||||
# Post-Training Optimization Best Practices {#pot_docs_BestPractices}
|
||||
This document describes the most common insights about model optimization using the Post-training Optimization Tool (POT). The post-training optimization usually is
|
||||
the fastest and easiest way to get a low-precision model because it does not require model fine-tuning and thus, there is no need in the training dataset, pipeline and availability of
|
||||
the powerful training hardware. In some cases, it may lead to not satisfactory accuracy drop, especially when optimizing the whole model.
|
||||
However, it can be still helpful for fast performance evaluation in order to understand the possible speed up
|
||||
when applying one or another optimization method. Before going into details
|
||||
we suggest reading the following [POT documentation](../README.md).
|
||||
|
||||
> **NOTE**: POT uses inference on the CPU during model optimization. It means the ability to infer the original
|
||||
> floating-point model is a prerequisite for model optimization.
|
||||
> It is also worth mentioning that in the case of 8-bit quantization it is recommended to run POT on the same CPU
|
||||
> architecture when optimizing for CPU or VNNI-based CPU when quantizing for a non-CPU device, such as GPU, VPU, or GNA.
|
||||
> It should help to avoid the impact of the saturation issue that occurs on AVX and SSE based CPU devices.
|
||||
|
||||
## Get Started with Post-Training Quantization
|
||||
|
||||
Post-training quantization is a basic feature of the POT and it has lots of knobs that can be used to get an accurate
|
||||
quantized model. However, as a starting point we suggest using the `DefaultQuantization` algorithm with default settings.
|
||||
In many cases it leads to satisfied accuracy and performance speedup.
|
||||
|
||||
A fragment of the configuration file (`config/default_quantization_template.json` in the POT directory) with default settings is shown below:
|
||||
```
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken into account during optimization.
|
||||
// The default value "ANY" stands for compatible quantization supported by any HW.
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization", // Optimization algorithm name
|
||||
"params": {
|
||||
"preset": "performance", // Preset [performance, mixed] which control the quantization
|
||||
// mode (symmetric, mixed (weights symmetric and activations asymmetric)
|
||||
// and fully asymmetric respectively)
|
||||
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
In the case of substantial accuracy degradation after applying the `DefaultQuantization` algorithm there are two alternatives to use:
|
||||
1. Hyperparameters tuning
|
||||
2. AccuracyAwareQuantization algorithm
|
||||
3. Layer-wise hyperparameters tuning
|
||||
|
||||
## Tuning Hyperparameters of the DefaultQuantization
|
||||
The `DefaultQuantization` algorithm provides multiple hyperparameters which can be used in order to improve accuracy results for the fully-quantized model.
|
||||
Below is a list of best practices which can be applied to improve accuracy without a substantial performance reduction with respect to default settings:
|
||||
1. The first option that we recommend to change is `preset` that can be varied from `performance` to `mixed`. It enables asymmetric quantization of
|
||||
activations and can be helpful for the NNs with non-ReLU activation functions, e.g. YOLO, EfficientNet, etc.
|
||||
2. The next option is `use_fast_bias`. Setting this option for `false` enables a different bias correction method which is more accurate, in general,
|
||||
and applied after model quantization as a part of `DefaultQuantization` algorithm.
|
||||
> **NOTE**: Changing this option can substantially increase quantization time in the POT tool.
|
||||
3. Another important option is a `range_estimator`. It defines how to calculate the minimum and maximum of quantization range for weights and activations.
|
||||
For example, the following `range_estimator` for activations can improve the accuracy for Faster R-CNN based networks:
|
||||
```
|
||||
"compression": {
|
||||
"target_device": "ANY",
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
|
||||
|
||||
"activations": {
|
||||
"range_estimator": {
|
||||
"max": {
|
||||
"aggregator": "max",
|
||||
"type": "abs_max"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Please find the possible options and their description in the `config/default_quantization_spec.json` file in the POT directory.
|
||||
|
||||
4. The next option is `stat_subset_size`. It controls the size of the calibration dataset used by POT to collect statistics for quantization parameters initialization.
|
||||
It is assumed that this dataset should contain a sufficient number of representative samples. Thus, varying this parameter may affect accuracy (the higher is better).
|
||||
However, we empirically found that 300 samples are sufficient to get representative statistics in most cases.
|
||||
5. The last option is `ignored_scope`. It allows excluding some layers from the quantization process, i.e. their inputs will not be quantized. It may be helpful for some patterns for which it is known in advance that they drop accuracy when executing in low-precision.
|
||||
For example, `DetectionOutput` layer of SSD model expressed as a subgraph should not be quantized to preserve the accuracy of Object Detection models.
|
||||
One of the sources for the ignored scope can be the AccuracyAware algorithm which can revert layers back to the original precision (see details below).
|
||||
|
||||
## AccuracyAwareQuantization
|
||||
In case when the steps above do not lead to the accurate quantized model you may use the so-called `AccuracyAwareQuantization` algorithm which leads to mixed-precision models.
|
||||
The whole idea behind that is to revert quantized layers back to floating-point precision based on their contribution to the accuracy drop until the desired accuracy degradation with respect to
|
||||
the full-precision model is satisfied.
|
||||
|
||||
A fragment of the configuration file with default settings is shown below (`configs/accuracy_aware_quantization_template.json`):
|
||||
```
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken into account during optimization.
|
||||
// The default value "ANY" stands for compatible quantization supported by any HW.
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "AccuracyAwareQuantization", // Optimization algorithm name
|
||||
"params": {
|
||||
"preset": "performance", // Preset [performance, mixed, accuracy] which control the quantization
|
||||
// mode (symmetric, mixed (weights symmetric and activations asymmetric)
|
||||
// and fully asymmetric respectively)
|
||||
|
||||
"stat_subset_size": 300, // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
|
||||
"maximal_drop": 0.01 // Maximum accuracy drop which has to be achieved after the quantization
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
Since the `AccuracyAwareQuantization` calls the `DefaultQuantization` at the first step it means that all the parameters of the latter one are also valid and can be applied to the
|
||||
accuracy-aware scenario.
|
||||
|
||||
> **NOTE**: In general case, possible speedup after applying the `AccuracyAwareQuantization` algorithm is less than after the `DefaultQuantization` when the model gets fully-quantized.
|
||||
|
||||
If you do not achieve the desired accuracy and performance after applying the
|
||||
`AccuracyAwareQuantization` algorithm or you need an accurate fully-quantized model,
|
||||
we recommend either using layer-wise hyperparameters tuning with TPE or using
|
||||
Quantization-Aware training from [the supported frameworks](LowPrecisionOptimizationGuide.md).
|
||||
|
||||
## Layer-Wise Hyperparameters Tuning Using TPE
|
||||
|
||||
As the last step in post-training optimization, you may try layer-wise hyperparameter
|
||||
tuning using TPE, which stands for Tree of Parzen Estimators hyperparameter optimizer
|
||||
that searches through available configurations trying to find an optimal one.
|
||||
For post-training optimization, TPE assigns multiple available configuration
|
||||
options to choose from for every layer and by evaluating different sets of parameters,
|
||||
it creates a probabilistic model of their impact on accuracy and latency to
|
||||
iteratively find an optimal one.
|
||||
|
||||
You can run TPE with any combination of parameters in `tuning_scope`, but it is
|
||||
recommended to use one of two configurations described below. It is recommended to first try
|
||||
Range Estimator Configuration. If this configuration will not be able to reach accuracy
|
||||
target then it is recommended to run Layer Configuration. If for some reason,
|
||||
like HW failure or power shutdown, TPE trials stop before completion, you can
|
||||
rerun them starting from the last trial by changing `trials_load_method`
|
||||
from `cold_start` to `warm_start` as long as logs from the previous execution are available.
|
||||
|
||||
> **NOTE**: TPE requires many iterations to converge to an optimal solution, and
|
||||
> it is recommended to run it for at least 200 iterations. Because every iteration
|
||||
> requires evaluation of a generated model , which means accuracy measurements on a
|
||||
> dataset and latency measurements using benchmark, this process may take from
|
||||
> 24 hours up to few days to complete, depending on a model.
|
||||
> To run this configuration on multiple machines and reduce the execution time,
|
||||
> see [Multi-node](../openvino/tools/pot/optimization/tpe/multinode.md).
|
||||
|
||||
### Range Estimator Configuration
|
||||
|
||||
To run TPE with range estimator tuning, use the following configuration:
|
||||
```json
|
||||
"optimizer": {
|
||||
"name": "Tpe",
|
||||
"params": {
|
||||
"max_trials": 200,
|
||||
"trials_load_method": "cold_start",
|
||||
"accuracy_loss": 0.1,
|
||||
"latency_reduce": 1.5,
|
||||
"accuracy_weight": 1.0,
|
||||
"latency_weight": 0.0,
|
||||
"benchmark": {
|
||||
"performance_count": false,
|
||||
"batch_size": 1,
|
||||
"nthreads": 8,
|
||||
"nstreams": 1,
|
||||
"nireq": 1,
|
||||
"api_type": "async",
|
||||
"niter": 1,
|
||||
"duration_seconds": 30,
|
||||
"benchmark_app_dir": "<path to benchmark_app>" // Path to benchmark_app If not specified, Python base benchmark will be used. Use benchmark_app to reduce jitter in results.
|
||||
}
|
||||
}
|
||||
},
|
||||
"compression": {
|
||||
"target_device": "ANY",
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "ActivationChannelAlignment",
|
||||
"params": {
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "TunableQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 300,
|
||||
"preset": "performance",
|
||||
"tuning_scope": ["range_estimator"],
|
||||
"estimator_tuning_scope": ["preset", "outlier_prob"],
|
||||
"outlier_prob_choices": [1e-3, 1e-4, 1e-5]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
This configuration searches for optimal preset for `range_estimator` and optimal
|
||||
outlier probability for quantiles for every layer. Because this configuration
|
||||
only changes final values provided to [FakeQuantize]((https://docs.openvinotoolkit.org/latest/_docs_ops_quantization_FakeQuantize_1.html)) layers, changes in parameters
|
||||
do not impact inference latency, thus we set `latency_weight` to 0 to prevent
|
||||
jitter in benchmark results to negatively impact model evaluation. Experiments
|
||||
show that this configuration can give much better accuracy then the approach of
|
||||
just changing `range_estimator` configuration globally.
|
||||
|
||||
### Layer Configuration
|
||||
|
||||
To run TPE with layer tuning, use the following configuration:
|
||||
```json
|
||||
"optimizer": {
|
||||
"name": "Tpe",
|
||||
"params": {
|
||||
"max_trials": 200,
|
||||
"trials_load_method": "cold_start",
|
||||
"accuracy_loss": 0.1,
|
||||
"latency_reduce": 1.5,
|
||||
"accuracy_weight": 1.0,
|
||||
"latency_weight": 1.0,
|
||||
"benchmark": {
|
||||
"performance_count": false,
|
||||
"batch_size": 1,
|
||||
"nthreads": 8,
|
||||
"nstreams": 1,
|
||||
"nireq": 1,
|
||||
"api_type": "async",
|
||||
"niter": 1,
|
||||
"duration_seconds": 30,
|
||||
"benchmark_app_dir": "<path to benchmark_app>" // Path to benchmark_app If not specified, Python base benchmark will be used. Use benchmark_app to reduce jitter in results.
|
||||
}
|
||||
}
|
||||
},
|
||||
"compression": {
|
||||
"target_device": "ANY",
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "ActivationChannelAlignment",
|
||||
"params": {
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "TunableQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 300,
|
||||
"preset": "performance",
|
||||
"tuning_scope": ["layer"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
This configuration is similar to `AccuracyAwareQuantization`, because it also
|
||||
tries to revert quantized layers back to floating-point precision, but uses a
|
||||
different algorithm to choose layers, which can lead to better results.
|
79
tools/pot/docs/CLI.md
Normal file
79
tools/pot/docs/CLI.md
Normal file
@ -0,0 +1,79 @@
|
||||
# Use Post-Training Optimization Tool Command-Line Interface {#pot_compression_cli_README}
|
||||
|
||||
POT command-line interface (CLI) is designed to optimize models that are supported by the [Accuracy Checker Tool](@ref omz_tools_accuracy_checker_README) used for accuracy measurement.
|
||||
If your model is exactly from the OpenVINO™ [Model Zoo](https://github.com/openvinotoolkit/open_model_zoo) or it is similar to one of
|
||||
its models then you can employ POT CLI to optimize your model.
|
||||
In other cases, you should consider using POT [API](@ref pot_compression_api_README). To start with POT CLI please refer to the
|
||||
following [example](@ref pot_configs_examples_README).
|
||||
|
||||
Note: There is also the so-called [**Simplified mode**](@ref pot_configs_README) that is basically aimed at INT8 quantization if the model is from the Computer Vision
|
||||
domain and has a simple dataset preprocessing, like image resize and crop. In this case, you can also use POT CLI for
|
||||
optimization. However, the accuracy results are not guaranteed in this case. Moreover, you are also limited in the
|
||||
optimization methods choice since the accuracy measurement is not available.
|
||||
|
||||
|
||||
|
||||
## Prerequisites
|
||||
1. Install POT following the [Installation Guide](@ref pot_InstallationGuide).
|
||||
2. Convert your model from the framework representation into the OpenVINO™ IR format with the
|
||||
[Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide).
|
||||
3. Prepare the Accuracy Checker configuration file and make sure that the model can be successfully inferred and achieves
|
||||
similar accuracy numbers as the reference model from the original framework.
|
||||
4. Activate the Python environment in the command-line shell where the POT and the Accuracy Checker were installed.
|
||||
5. (Optional). Set up the OpenVINO™ environment in the command-line shell with the following script if you
|
||||
installed it from form the distribution file:
|
||||
```sh
|
||||
source <INSTALL_DIR>/bin/setupvars.sh
|
||||
```
|
||||
Note: this step is not required if you use PyPI distribution.
|
||||
|
||||
## Run POT CLI
|
||||
There are two ways how to run POT via command line:
|
||||
|
||||
- **Basic usage**. In this case you can run POT with basic setting just specifying all the options via command line:
|
||||
```sh
|
||||
pot -q default -m <path_to_xml> -w <path_to_bin> --ac-config <path_to_AC_config_yml>
|
||||
```
|
||||
- **Advanced usage**. In this case you should prepare a configuration file for the POT where you can specify advanced options for the optimization
|
||||
methods available. See [POT configuration file description](@ref pot_configs_README) for more details.
|
||||
To launch the command-line tool with the configuration file run:
|
||||
```sh
|
||||
pot -c <path_to_config_file>
|
||||
```
|
||||
For all available usage options, use the `-h`, `--help` arguments or refer to the Command-Line Arguments section below.
|
||||
|
||||
By default, the results are dumped into the separate output subfolder inside the `./results` folder that is created
|
||||
in the same directory where the tool is run from. Use the `-e` option to evaluate the accuracy directly from the tool.
|
||||
|
||||
See also the [End-to-end example](@ref pot_configs_examples_README) about how to run a particular example of 8-bit
|
||||
quantization with the POT.
|
||||
|
||||
### Command-Line Arguments
|
||||
|
||||
The following command-line options are available to run the tool:
|
||||
|
||||
| Argument | Description |
|
||||
| ------------------------------------------------- | ------------------------------------------------------- |
|
||||
| `-h`, `--help` | Optional. Show help message and exit. |
|
||||
| `-q`, `--quantize` | Quantize model to 8 bits with specified quantization method: `default` or `accuracy_aware`. |
|
||||
| `--preset` | Use `performance` for fully symmetric quantization or `mixed` preset for symmetric quantization of weight and asymmetric quantization of activations. Applicable only when `-q` option is used.|
|
||||
| `-m`, `--model` | Path to the optimizing model file (.xml). Applicable only when `-q` option is used. |
|
||||
| `-w`, `--weights` | Path to the weights file of the optimizing model (.bin). Applicable only when `-q` option is used. |
|
||||
| `-n`, `--name` | Model name. Applicable only when `-q` option is used. |
|
||||
| `--ac-config` | Path to the Accuracy Checker configuration file. Applicable only when `-q` option is used. |
|
||||
| `--max-drop` | Optional. Maximum accuracy drop. Valid only for accuracy-aware quantization. Applicable only when `-q` option is used and `accuracy_aware` method is selected. |
|
||||
| `-c CONFIG`, `--config CONFIG` | Path to a config file with task- or model-specific parameters. |
|
||||
| `-e`, `--evaluate` | Optional. Evaluate model on the whole dataset after optimization. |
|
||||
| `--output-dir OUTPUT_DIR` | Optional. A directory where results are saved. Default: `./results`. |
|
||||
| `-sm`, `--save-model` | Optional. Save the original full-precision model. |
|
||||
| `-d`, `--direct-dump` | Optional. Save results to the "optimized" subfolder within the specified output directory with no additional subpaths added at the end. |
|
||||
| `--log-level {CRITICAL,ERROR,WARNING,INFO,DEBUG}` | Optional. Log level to print. Default: INFO. |
|
||||
| `--progress-bar` | Optional. Disable CL logging and enable progress bar. |
|
||||
| `--stream-output` | Optional. Switch model quantization progress display to a multiline mode. Use with third-party components. |
|
||||
| `--keep-uncompressed-weights` | Optional. Keep Convolution, Deconvolution and FullyConnected weights uncompressed. Use with third-party components.|
|
||||
|
||||
|
||||
## See Also
|
||||
|
||||
* [Installation Guide](@ref pot_InstallationGuide)
|
||||
* [Post-Training Optimization Best Practices](@ref pot_docs_BestPractices)
|
200
tools/pot/docs/E2eExample.md
Normal file
200
tools/pot/docs/E2eExample.md
Normal file
@ -0,0 +1,200 @@
|
||||
# End-to-end Command-line Interface example {#pot_configs_examples_README}
|
||||
|
||||
This tutorial describes an example of running post-training quantization for **MobileNet v2 model from PyTorch** framework,
|
||||
particularly by the DefaultQuantization algorithm.
|
||||
The example covers the following steps:
|
||||
- Environment setup
|
||||
- Model preparation and converting it to the OpenVINO™ Intermediate Representation (IR) format
|
||||
- Performance benchmarking of the original full-precision model and the converted one to the IR
|
||||
- Dataset preparation
|
||||
- Accuracy validation of the full-precision model in the IR format
|
||||
- Model quantization by the DefaultQuantization algorithm and accuracy validation of the quantized model
|
||||
- Performance benchmarking of the quantized model
|
||||
|
||||
All the steps are based on the tools and samples of configuration files
|
||||
distributed with the Intel® Distribution of OpenVINO™ toolkit.
|
||||
|
||||
The example has been verified in Ubuntu 18.04 Operating System with Python 3.6 installed.
|
||||
|
||||
In case of issues while running the example, refer to [POT Frequently Asked Questions](@ref pot_docs_FrequentlyAskedQuestions) for help.
|
||||
|
||||
## Environment Setup
|
||||
|
||||
1. Install OpenVINO™ toolkit and Model Optimizer, Accuracy Checker and Post-training Optimization Tool components following the [Installation Guide](@ref pot_InstallationGuide).
|
||||
2. Activate the Python* environment and OpenVINO environment as described in the [Installation Guide](@ref pot_InstallationGuide).
|
||||
3. Create a separate working directory and navigate to it.
|
||||
|
||||
In the instructions below, the Post-Training Optimization Tool directory `<POT_DIR>` is referred to:
|
||||
- `<ENV>/lib/python<version>/site-packages/` in the case of PyPI installation, where `<ENV>` is a Python*
|
||||
environment where OpenVINO is installed and `<version>` is a Python* version, e.g. `3.6`.
|
||||
- `<INSTALL_DIR>/deployment_tools/tools/post_training_optimization_toolkit` in the case of OpenVINO distribution package.
|
||||
`<INSTALL_DIR>` is the directory where Intel® Distribution of OpenVINO™ toolkit is installed.
|
||||
|
||||
## Model Preparation
|
||||
|
||||
1. Navigate to `<EXAMPLE_DIR>`.
|
||||
|
||||
2. Download the MobileNet v2 PyTorch model using [Model Downloader](@ref omz_tools_downloader) tool from the Open Model Zoo repository:
|
||||
```sh
|
||||
python3 ./downloader.py --name mobilenet-v2-pytorch
|
||||
```
|
||||
After that the original full-precision model is located in `<EXAMPLE_DIR>/public/mobilenet-v2-pytorch/`.
|
||||
|
||||
3. Convert the model to the OpenVINO™ Intermediate Representation (IR) format using [Model Converter](@ref omz_tools_downloader) tool:
|
||||
```sh
|
||||
python3 ./converter.py --name mobilenet-v2-pytorch
|
||||
```
|
||||
After that the full-precision model in the IR format is located in `<EXAMPLE_DIR>/public/mobilenet-v2-pytorch/FP32/`.
|
||||
|
||||
For more information about the Model Optimizer, refer to its [documentation](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide).
|
||||
|
||||
## Performance Benchmarking of Full-Precision Models
|
||||
|
||||
1. Check the performance of the original model using [Deep Learning Benchmark](@ref openvino_inference_engine_tools_benchmark_tool_README) tool:
|
||||
```sh
|
||||
python3 ./benchmark_app.py -m <EXAMPLE_DIR>/public/mobilenet-v2-pytorch/mobilenet-v2.onnx
|
||||
```
|
||||
Note that the results might be different dependently on characteristics of your machine. On a machine with Intel® Core™ i9-10920X CPU @ 3.50GHz it is like:
|
||||
```sh
|
||||
Latency: 4.09 ms
|
||||
Throughput: 1456.84 FPS
|
||||
```
|
||||
|
||||
2. Check the performance of the full-precision model in the IR format using [Deep Learning Benchmark](@ref openvino_inference_engine_tools_benchmark_tool_README) tool:
|
||||
```sh
|
||||
python3 ./benchmark_app.py -m <EXAMPLE_DIR>/public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.xml
|
||||
```
|
||||
Note that the results might be different dependently on characteristics of your machine. On a machine with Intel® Core™ i9-10920X CPU @ 3.50GHz it is like:
|
||||
```sh
|
||||
Latency: 4.14 ms
|
||||
Throughput: 1436.55 FPS
|
||||
```
|
||||
|
||||
## Dataset Preparation
|
||||
|
||||
To perform the accuracy validation as well as quantization of a model, the dataset should be prepared. This example uses a real dataset called ImageNet.
|
||||
|
||||
To download images:
|
||||
|
||||
1. Go to the [ImageNet](http://www.image-net.org/) homepage.
|
||||
2. If you do not have an account, click the `Signup` button in the right upper corner, provide your data, and wait for a confirmation email.
|
||||
3. Log in after receiving the confirmation email or if you already have an account. Go to the `Download` tab.
|
||||
4. Select `Download Original Images`.
|
||||
5. You will be redirected to the `Terms of Access` page. If you agree to the Terms, continue by clicking `Agree and Sign`.
|
||||
6. Click one of the links in the `Download as one tar file` section.
|
||||
7. Unpack the downloaded archive into `<EXAMPLE_DIR>/ImageNet/`.
|
||||
|
||||
Note that the registration process might be quite long.
|
||||
Note that the ImageNet size is 50 000 images and takes around 6.5 GB of the disk space.
|
||||
|
||||
To download the annotation file:
|
||||
|
||||
1. Download [archive](http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz).
|
||||
2. Unpack `val.txt` from the archive into `<EXAMPLE_DIR>/ImageNet/`.
|
||||
|
||||
After that the `<EXAMPLE_DIR>/ImageNet/` dataset folder should have a lot of image files like `ILSVRC2012_val_00000001.JPEG` and the `val.txt` annotation file.
|
||||
|
||||
## Accuracy Validation of Full-Precision Model in IR Format
|
||||
|
||||
1. Create a new file in `<EXAMPLE_DIR>` and name it `mobilenet_v2_pytorch.yaml`. This is the Accuracy Checker configuration file.
|
||||
|
||||
2. Put the following text into `mobilenet_v2_pytorch.yaml`:
|
||||
```sh
|
||||
models:
|
||||
- name: mobilenet-v2-pytorch
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: classification
|
||||
|
||||
datasets:
|
||||
- name: classification_dataset
|
||||
data_source: ./ImageNet
|
||||
annotation_conversion:
|
||||
converter: imagenet
|
||||
annotation_file: ./ImageNet/val.txt
|
||||
reader: pillow_imread
|
||||
|
||||
preprocessing:
|
||||
- type: resize
|
||||
size: 256
|
||||
aspect_ratio_scale: greater
|
||||
use_pillow: True
|
||||
- type: crop
|
||||
size: 224
|
||||
use_pillow: True
|
||||
- type: bgr_to_rgb
|
||||
|
||||
metrics:
|
||||
- name: accuracy@top1
|
||||
type: accuracy
|
||||
top_k: 1
|
||||
|
||||
- name: accuracy@top5
|
||||
type: accuracy
|
||||
top_k: 5
|
||||
|
||||
```
|
||||
where `data_source: ./ImageNet` is the dataset and `annotation_file: ./ImageNet/val.txt` is the annotation file prepared on the previous step.
|
||||
For more information about the Accuracy Checker configuration file refer to [Accuracy Checker Tool documentation](@ref omz_tools_accuracy_checker_README).
|
||||
|
||||
3. Evaluate the accuracy of the full-precision model in the IR format by executing the following command in `<EXAMPLE_DIR>`:
|
||||
```sh
|
||||
accuracy_check -c mobilenet_v2_pytorch.yaml -m ./public/mobilenet-v2-pytorch/FP32/
|
||||
```
|
||||
The actual result should be like **71.81**% of the accuracy top-1 metric on VNNI based CPU.
|
||||
Note that the results might be different on CPUs with different instruction sets.
|
||||
|
||||
## Model Quantization
|
||||
|
||||
1. Create a new file in `<EXAMPLE_DIR>` and name it `mobilenet_v2_pytorch_int8.json`. This is the POT configuration file.
|
||||
|
||||
2. Put the following text into `mobilenet_v2_pytorch_int8.json`:
|
||||
```sh
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet-v2-pytorch",
|
||||
"model": "./public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.xml",
|
||||
"weights": "./public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.bin"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./mobilenet_v2_pytorch.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
where `"model": "./public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.xml"` and `"weights": "./public/mobilenet-v2-pytorch/FP32/mobilenet-v2-pytorch.bin"`
|
||||
specify the full-precision model in the IR format, `"config": "./mobilenet_v2_pytorch.yaml"` is the Accuracy Checker configuration file, and
|
||||
`"name": "DefaultQuantization"` is the algorithm name.
|
||||
|
||||
3. Perform model quantization by executing the following command in `<EXAMPLE_DIR>`:
|
||||
```sh
|
||||
pot -c mobilenet_v2_pytorch_int8.json -e
|
||||
```
|
||||
The quantized model is placed into the subfolder with your current date and time in the name under the `./results/mobilenetv2_DefaultQuantization/` directory.
|
||||
The accuracy validation of the quantized model is performed right after the quantization. The actual result should be like **71.556**% of the accuracy top-1 metric on VNNI based CPU.
|
||||
Note that the results might be different on CPUs with different instruction sets.
|
||||
|
||||
## Performance Benchmarking of Quantized Model
|
||||
|
||||
Check the performance of the quantized model using [Deep Learning Benchmark](@ref openvino_inference_engine_tools_benchmark_tool_README) tool:
|
||||
```sh
|
||||
python3 ./benchmark_app.py -m <INT8_MODEL>
|
||||
```
|
||||
where `<INT8_MODEL>` is the path to the quantized model.
|
||||
Note that the results might be different dependently on characteristics of your machine. On a machine with Intel® Core™ i9-10920X CPU @ 3.50GHz it is like:
|
||||
```sh
|
||||
Latency: 1.54 ms
|
||||
Throughput: 3814.18 FPS
|
||||
```
|
106
tools/pot/docs/FrequentlyAskedQuestions.md
Normal file
106
tools/pot/docs/FrequentlyAskedQuestions.md
Normal file
@ -0,0 +1,106 @@
|
||||
# Post-training Optimization Tool Frequently Asked Questions {#pot_docs_FrequentlyAskedQuestions}
|
||||
|
||||
If your question is not covered below, use the [OpenVINO™ Community Forum page](https://community.intel.com/t5/Intel-Distribution-of-OpenVINO/bd-p/distribution-openvino-toolkit),
|
||||
where you can participate freely.
|
||||
|
||||
- <a href="#opensourced">Is the Post-training Optimization Tool opensourced?</a>
|
||||
- <a href="#dataset">Can I quantize my model without a dataset?</a>
|
||||
- <a href="#framework">Can a model in any framework be quantized by the POT?</a>
|
||||
- <a href="#tradeoff">What is a tradeoff when you go to low precision?</a>
|
||||
- <a href="#noac">I'd like to quantize a model and I've converted it to IR but I don't have the Accuracy Checker config. What can I do?</a>
|
||||
- <a href="#nncf">I tried all recommendations from "Post-Training Optimization Best Practices" but either have a high accuracy drop or bad performance after quantization.
|
||||
What else can I do?</a>
|
||||
- <a href="#memory">I get “RuntimeError: Cannot get memory” and “RuntimeError: Output data was not allocated” when I quantize my model by the POT.</a>
|
||||
- <a href="#quality">I have successfully quantized my model with a low accuracy drop and improved performance but the output video generated from the low precision model is much worse than from the full precision model. What could be the root cause?</a>
|
||||
- <a href="#longtime">The quantization process of my model takes a lot of time. Can it be decreased somehow?</a>
|
||||
- <a href="#import">I get "Import Error:... No such file or directory". How can I avoid it?</a>
|
||||
- <a href="#python">When I execute POT CLI, I get "File "/workspace/venv/lib/python3.6/site-packages/nevergrad/optimization/base.py", line 35... SyntaxError: invalid syntax". What is wrong?</a>
|
||||
- <a href="#nomodule">What does a message "ModuleNotFoundError: No module named 'some\_module\_name'" mean?</a>
|
||||
|
||||
|
||||
### <a name="opensourced">Is the Post-training Optimization Tool (POT) opensourced?</a>
|
||||
|
||||
No, the POT is not available on any of the opensource platforms. It is distributed as a part of Intel® [OpenVINO™](@ref index) only.
|
||||
|
||||
### <a name="dataset">Can I quantize my model without a dataset?</a>
|
||||
|
||||
In general, you should have a dataset. The dataset should be annotated if you want to validate the accuracy.
|
||||
If your dataset is not annotated, you can still quantize the model in the Simplified mode but you will not be able to measure the accuracy.
|
||||
See [Post-Training Optimization Best Practices](BestPractices.md) for more details.
|
||||
You can also use [POT API](../openvino/tools/pot/api/README.md) to integrate the post-training quantization into the custom inference pipeline.
|
||||
|
||||
### <a name="framework">Can a model in any framework be quantized by the POT?</a>
|
||||
|
||||
The POT accepts models in the OpenVINO™ Intermediate Representation (IR) format only. For that you need to convert your model to the IR format using
|
||||
[Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide).
|
||||
|
||||
### <a name="noac">I'd like to quantize a model and I've converted it to IR but I don't have the Accuracy Checker config. What can I do?</a>
|
||||
|
||||
To create the Accuracy Checker configuration file, refer to [Accuracy Checker documentation](@ref omz_tools_accuracy_checker_README) and
|
||||
try to find the configuration file for your model among the ones available in the Accuracy Checker examples. An alternative way is to quantize the model
|
||||
in the Simplified mode but you will not be able to measure the accuracy. See [Post-Training Optimization Best Practices](BestPractices.md) for more details.
|
||||
Also, you can use [POT API](../openvino/tools/pot/api/README.md) to integrate the post-training quantization into your pipeline without the Accuracy Checker.
|
||||
|
||||
### <a name="tradeoff">What is a tradeoff when you go to low precision?</a>
|
||||
|
||||
The tradeoff is between the accuracy drop and performance. When a model is in low precision, it is usually performed
|
||||
compared to the same model in full precision but the accuracy might be worse. You can find some benchmarking results in
|
||||
[INT8 vs FP32 Comparison on Select Networks and Platforms](@ref openvino_docs_performance_int8_vs_fp32).
|
||||
The other benefit of having a model in low precision is its smaller size.
|
||||
|
||||
### <a name="nncf">I tried all recommendations from "Post-Training Optimization Best Practices" but either have a high accuracy drop or bad performance after quantization. What else can I do?</a>
|
||||
|
||||
First of all, you should validate the POT compression pipeline you are running, which can be done with the following steps:
|
||||
1. Make sure the accuracy of the original uncompressed model has the value you expect. Run your POT pipeline with an empty compression config and evaluate the resulting model metric. Compare this uncompressed model accuracy metric value with your reference.
|
||||
2. Run your compression pipeline with a single compression algorithm ([DefaultQuantization](../openvino/tools/pot/algorithms/quantization/default/README.md) or [AccuracyAwareQuantization](../openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md)) without any parameter values specified in the config (except for `preset` and `stat_subset_size`). Make sure you get the undesirable accuracy drop/performance gain in this case.
|
||||
|
||||
Finally, if you have done the steps above and the problem persists, you could try to compress your model using the [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf_pytorch).
|
||||
Note that NNCF usage requires you to have a PyTorch-based training pipeline of your model in order to perform compression-aware fine-tuning. See [Low Precision Optimization Guide](LowPrecisionOptimizationGuide.md) for more details.
|
||||
|
||||
### <a name="memory">I get “RuntimeError: Cannot get memory” and “RuntimeError: Output data was not allocated” when I quantize my model by the POT.</a>
|
||||
|
||||
These issues happen due to insufficient available amount of memory for statistics collection during the quantization process of a huge model or
|
||||
due to a very high resolution of input images in the quantization dataset. If you do not have a possibility to increase your RAM size, one of the following options can help:
|
||||
- Set `inplace_statistic` parameters to "True". In that case the POT will change method collect statistics and use less memory. Note that such change might increase time required for quantization.
|
||||
- Set `eval_requests_number` and `stat_requests_number` parameters to 1. In that case the POT will limit the number of infer requests by 1 and use less memory.
|
||||
Note that such change might increase time required for quantization.
|
||||
- Set `use_fast_bias` parameter to `false`. In that case the POT will switch from the FastBiasCorrection algorithm to the full BiasCorrection algorithm
|
||||
which is usually more accurate and takes more time but requires less memory. See [Post-Training Optimization Best Practices](BestPractices.md) for more details.
|
||||
- Reshape your model to a lower resolution and resize the size of images in the dataset. Note that such change might impact the accuracy.
|
||||
|
||||
### <a name="quality">I have successfully quantized my model with a low accuracy drop and improved performance but the output video generated from the low precision model is much worse than from the full precision model. What could be the root cause?</a>
|
||||
|
||||
It can happen due to the following reasons:
|
||||
- A wrong or not representative dataset was used during the quantization and accuracy validation. Please make sure that your data and labels are correct and they sufficiently reflect the use case.
|
||||
- A wrong Accuracy Checker configuration file was used during the quantization. Refer to [Accuracy Checker documentation](@ref omz_tools_accuracy_checker_README) for more information.
|
||||
|
||||
### <a name="longtime">The quantization process of my model takes a lot of time. Can it be decreased somehow?</a>
|
||||
|
||||
Quantization time depends on multiple factors such as the size of the model and the dataset. It also depends on the algorithm:
|
||||
the [DefaultQuantization](../openvino/tools/pot/algorithms/quantization/default/README.md) algorithm takes less time than the [AccuracyAwareQuantization](../openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md) algorithm.
|
||||
The [Tree-Structured Parzen Estimator (TPE)](../openvino/tools/pot/optimization/tpe/README.md) algorithm might take even more time.
|
||||
The following configuration parameters also impact the quantization time duration
|
||||
(see details in [Post-Training Optimization Best Practices](BestPractices.md)):
|
||||
- `use_fast_bias`: when set to `false`, it increases the quantization time
|
||||
- `stat_subset_size`: the higher the value of this parameter, the more time will be required for the quantization
|
||||
- `tune_hyperparams`: if set to `true` when the AccuracyAwareQuantization algorithm is used, it increases the quantization time
|
||||
- `stat_requests_number`: the lower number, the more time might be required for the quantization
|
||||
- `eval_requests_number`: the lower number, the more time might be required for the quantization
|
||||
Note that higher values of `stat_requests_number` and `eval_requests_number` increase memory consumption by POT.
|
||||
|
||||
### <a name="import">I get "Import Error:... No such file or directory". How can I avoid it?</a>
|
||||
|
||||
It happens when some needed library is not available in your environment. To avoid it, execute the following command:
|
||||
```sh
|
||||
source <INSTALL_DIR>/bin/setupvars.sh
|
||||
```
|
||||
where `<INSTALL_DIR>` is the directory where the OpenVINO™ toolkit is installed.
|
||||
|
||||
### <a name="python">When I execute POT CLI, I get "File "/workspace/venv/lib/python3.6/site-packages/nevergrad/optimization/base.py", line 35... SyntaxError: invalid syntax". What is wrong?</a>
|
||||
|
||||
This error is reported when you have an older python version than 3.6 in your environment. Upgrade your python version. Refer to more details about the prerequisites
|
||||
on the [Post-Training Optimization Tool](../README.md) page.
|
||||
|
||||
### <a name="nomodule">What does a message "ModuleNotFoundError: No module named 'some\_module\_name'" mean?</a>
|
||||
|
||||
It means that some required python module is not installed in your environment. To install it, run `pip install some_module_name`.
|
68
tools/pot/docs/InstallationGuide.md
Normal file
68
tools/pot/docs/InstallationGuide.md
Normal file
@ -0,0 +1,68 @@
|
||||
# Post-Training Optimization Tool Installation Guide {#pot_InstallationGuide}
|
||||
|
||||
## Prerequisites
|
||||
|
||||
* Python* 3.6 or higher
|
||||
* [OpenVINO™](@ref index)
|
||||
|
||||
The minimum and the recommended requirements to run the Post-training Optimization Tool (POT) are the same as in [OpenVINO™](@ref index).
|
||||
|
||||
There are two ways how to install the POT on your system:
|
||||
- Installation from PyPI repository
|
||||
- Installation from Intel® Distribution of OpenVINO™ toolkit package
|
||||
|
||||
## Installation POT from PyPI
|
||||
The simplest way to get the Post-training Optimization Tool and OpenVINO™ installed is to use PyPI. Follow the steps below to do that:
|
||||
1. Create a separate [Python* environment](https://docs.python.org/3/tutorial/venv.html) and activate it
|
||||
2. To install OpenVINO™ run `pip install openvino`
|
||||
3. To install POT and other OpenVINO™ developer tools, run `pip install openvino-dev`
|
||||
|
||||
Now the Post-training Optimization Tool is available in the command line by the `pot` alias. To verify it, run `pot -h`.
|
||||
|
||||
## Install and Set Up POT from Intel® Distribution of OpenVINO™ toolkit package
|
||||
|
||||
In the instructions below, `<INSTALL_DIR>` is the directory where the Intel® distribution of OpenVINO™ toolkit
|
||||
is installed. The Post-training Optimization Tool is distributed as a part of the OpenVINO™ release package, and to use the POT as a command-line tool,
|
||||
you need to install OpenVINO™ as well as POT dependencies, namely [Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide)
|
||||
and [Accuracy Checker](@ref omz_tools_accuracy_checker_README). It is recommended to create a separate [Python* environment](https://docs.python.org/3/tutorial/venv.html)
|
||||
before installing the OpenVINO™ and its components.
|
||||
POT source files are available in `<INSTALL_DIR>/deployment_tools/tools/post_training_optimization_toolkit` directory after the OpenVINO™ installation.
|
||||
|
||||
To set up the Post-training Optimization Tool in your environment, follow the steps below.
|
||||
|
||||
### Set up the Model Optimizer and Accuracy Checker components
|
||||
|
||||
- To set up the [Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide):
|
||||
1. Go to `<INSTALL_DIR>/deployment_tools/model_optimizer/install_prerequisites`.
|
||||
2. Run the following script to configure the Model Optimizer:
|
||||
* Linux:
|
||||
```sh
|
||||
sudo ./install_prerequisites.sh
|
||||
```
|
||||
* Windows:
|
||||
```bat
|
||||
install_prerequisites.bat
|
||||
```
|
||||
3. To verify that the Model Optimizer is installed, run `\<INSTALL_DIR\>/deployment_tools/model_optimizer/mo.py -h`.
|
||||
|
||||
- To set up the [Accuracy Checker](@ref omz_tools_accuracy_checker_README):
|
||||
1. Go to `<INSTALL_DIR>/deployment_tools/open_model_zoo/tools/accuracy_checker`.
|
||||
2. Run the following script to configure the Accuracy Checker:
|
||||
```sh
|
||||
python setup.py install
|
||||
```
|
||||
3. Now the Accuracy Checker is available in the command line by the `accuracy_check` alias. To verify it, run `accuracy_check -h`.
|
||||
|
||||
### Set up the POT
|
||||
|
||||
1. Go to `<INSTALL_DIR>/deployment_tools/tools/post_training_optimization_toolkit`.
|
||||
2. Run the following script to configure the POT:
|
||||
```sh
|
||||
python setup.py install
|
||||
```
|
||||
|
||||
In order to enable advanced algorithms such as the Tree-Structured Parzen Estimator (TPE) based optimization, add the following flag to the installation command:
|
||||
```sh
|
||||
python setup.py install --install-extras
|
||||
```
|
||||
3. Now the POT is available in the command line by the `pot` alias. To verify it, run `pot -h`.
|
52
tools/pot/docs/LowPrecisionOptimizationGuide.md
Normal file
52
tools/pot/docs/LowPrecisionOptimizationGuide.md
Normal file
@ -0,0 +1,52 @@
|
||||
# Low Precision Optimization Guide {#pot_docs_LowPrecisionOptimizationGuide}
|
||||
|
||||
## Introduction
|
||||
This document provides the best-known methods on how to use low-precision capabilities of the OpenVINO™ toolkit to transform models
|
||||
to more hardware-friendly representation using such methods as quantization.
|
||||
|
||||
Currently, these capabilities are represented by several components:
|
||||
- Low-precision runtime
|
||||
- Post-training Optimization Tool (POT)
|
||||
- [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf)
|
||||
|
||||
The first two components are the part of OpenVINO toolkit itself while the latter one is a separate tool build on top of the PyTorch* framework
|
||||
and highly aligned with OpenVINO™.
|
||||
|
||||
This document covers high level aspects of model optimization flow in OpenVINO™.
|
||||
|
||||
## General Information
|
||||
|
||||
By low precision we imply the inference of Deep Learning models in the precision which is lower than 32 or 16 bits, such as *FLOAT32* and *FLOAT16*. For example, the most popular
|
||||
bit-width for the low-precision inference is *INT8* (*UINT8*) because it is possible to get accurate 8-bit models which substantially speed up the inference.
|
||||
Such models are represented by the quantized models, i.e. the models that were trained in the floating-point precision and then transformed to integer
|
||||
representation with floating/fixed-point quantization operations between the layers. This transformation can be done using post-training methods or
|
||||
with additional retraining/fine-tuning.
|
||||
|
||||
Starting from the OpenVINO 2020.1 release all the quantized models are represented using so-called `FakeQuantize` layer which is
|
||||
a very expressive primitive and is able to represent such operations as `Quantize`, `Dequantize`, `Requantize`, and even more. This operation is
|
||||
inserted into the model during quantization procedure and is aimed to store quantization parameters for the layers. For more details about this operation
|
||||
please refer to the following [description](https://docs.openvinotoolkit.org/latest/_docs_ops_quantization_FakeQuantize_1.html).
|
||||
|
||||
In order to execute such "fake-quantized" models, OpenVINO has a low-precision runtime which is a part of Inference Engine and consists of a
|
||||
generic component translating the model to real integer representation and HW-specific part implemented in the corresponding HW plug-ins.
|
||||
|
||||
## Model Optimization Workflow
|
||||
We propose a common workflow which aligns with what other DL frameworks have. It contains two main components: post-training quantization and Quantization-Aware Training (QAT).
|
||||
The first component is the the easiest way to get optimized models where the latter one can be considered as an alternative or an addition when the first does not give
|
||||
accurate results.
|
||||
|
||||
The diagram below shows the optimization flow for the new model with OpenVINO and relative tools.
|
||||
|
||||

|
||||
|
||||
- **Step 0: Model enabling**. In this step we should ensure that the model trained on the target dataset can be successfully inferred with OpenVINO
|
||||
[Inference Engine](https://docs.openvinotoolkit.org/latest/_docs_IE_DG_inference_engine_intro.html) in floating-point precision.
|
||||
This process involves use of [Model Optimizer](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) tool to convert the model from the source framework
|
||||
to the OpenVINO Intermediate Representation (IR) and run it on CPU with Inference Engine.
|
||||
> **NOTE**: This step presumes that the model has the same accuracy as in the original training framework and enabled in the [Accuracy Checker](@ref omz_tools_accuracy_checker_README) tool or through the custom validation sample.
|
||||
- **Step 1: Post-training quantization**. As the first step for optimization, we suggest using INT8 quantization from POT where in most cases it is possible to get an accurate quantized model. At this step you do not need model re-training. The only thing required is a representative dataset which is usually several hundreds of images and it is used to collect statistics during the quantization process.
|
||||
Post-training quantization is also really fast and usually takes several minutes depending on the model size and used HW. And, generally, a regular desktop system is enough to quantize most of [OpenVINO Model Zoo](https://github.com/opencv/open_model_zoo).
|
||||
For more information on best practices of post-training optimization please refer to the [Post-training Optimization Best practices](BestPractices.md).
|
||||
- **Step2: Quantization-Aware Training**: If the accuracy of the quantized model does not satisfy accuracy criteria, there is step two which implies QAT using [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf) for [PyTorch*](https://pytorch.org/) and [TensorFlow*](https://www.tensorflow.org/) models.
|
||||
At this step, we assume the user has an original training pipeline of the model written on TensorFlow or PyTorch and NNCF is integrated into it.
|
||||
After this step, you can get an accurate optimized model that can be converted to OpenVINO Intermediate Representation (IR) using Model Optimizer component and inferred with OpenVINO Inference Engine.
|
35
tools/pot/docs/ModelRepresentation.md
Normal file
35
tools/pot/docs/ModelRepresentation.md
Normal file
@ -0,0 +1,35 @@
|
||||
# Representation of low-precision models {#pot_docs_ModelRepresentation}
|
||||
The goal of this document is to describe how optimized models are represented in OpenVINO Intermediate Representation (IR) and provide guidance on interpretation rules for such models at runtime.
|
||||
Currently, there are two groups of optimization methods that can influence on the IR after applying them to the full-precision model:
|
||||
- **Sparsity**. It is represented by zeros inside the weights and this is up to the hardware plugin how to interpret these zeros (use weights as is or apply special compression algorithms and sparse arithmetic). No additional mask is provided with the model.
|
||||
- **Quantization**. The rest of this document is dedicated to the representation of quantized models.
|
||||
|
||||
## Representation of quantized models
|
||||
The OpenVINO Toolkit represents all the quantized models using the so-called [FakeQuantize](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_prepare_model_convert_model_Legacy_IR_Layers_Catalog_Spec.html#FakeQuantize) operation. This operation is very expressive and allows mapping values from arbitrary input and output ranges. The whole idea behind that is quite simple: we project (discretize) the input values to the low-precision data type using affine transformation (with clamp and rounding) and then reproject discrete values back to the original range and data type. It can be considered as an emulation of the quantization process which happens at runtime.
|
||||
In order to be able to execute a particular DL operation in low-precision all its inputs should be quantized i.e. should have FakeQuantize between operation and data blobs. The figure below shows an example of quantized Convolution which contains two FakeQuantize nodes: one for weights and one for activations (bias is quantized using the same parameters).
|
||||
<div align="center"><img src="./images/quantized_convolution.png" alt="This browser does not support PNG" width=70% height=70%></div>
|
||||
<div align="center">Figure 1. Example of quantized Convolution operation.</div><br/>
|
||||
|
||||
Starting from OpenVINO 2020.2 release all the quantized models are represented in the compressed form. It means that the weights of low-precision operations are converted into the target precision (e.g. INT8). It helps to substantially reduce the model size. The rest of the parameters can be represented by FLOAT32 or FLOAT16 precision depending on the input full-precision model used in the quantization process. Fig. 2 below shows an example of the part of the compressed IR.
|
||||
<div align="center"><img src="./images/quantized_model_example.png" alt="This browser does not support PNG" width=70% height=70%></div>
|
||||
<div align="center">Figure 2. Example of compressed quantized model.</div>
|
||||
|
||||
### Interpreting FakeQuantize at runtime
|
||||
One important question that arises at inference time is how to correctly interpret quantized models and specifically FakeQuantize operations. OpenVINO Deep Learning Deployment Toolkit has a special component which is called Low-Precision Transformations (LPT). It is responsible for the translation of "fake-quantized" models into the models with low-precision operations. For more information about low-precision flow please refer to the following [document](https://docs.openvinotoolkit.org/latest/_docs_IE_DG_Int8Inference.html). Here we provide only a high-level overview of the interpretation rules of FakeQuantize operation.
|
||||
At runtime each FakeQuantize can be split into two independent operations: **Quantize** and **Dequantize**. The former one is aimed to transform the input data into the target precision while the latter transforms the resulting values back to the original range and precision. In practice *Dequantize* operations can be propagated forward through the linear low-precision layers, such as *Convolution* or *Fully-Connected*, and in some cases fused with the following *Quantize* operation for the next layer into the so-called *Requantize* operation (see Fig. 3).
|
||||
<div align="center"><img src="./images/qdq_propagation.png" alt="This browser does not support PNG" width=70% height=70%></div>
|
||||
<div align="center">Figure 3. Quantization operations propagation at runtime. Q, DQ, RQ stand for Quantize, Dequantize, and Requantize correspondingly.</div><br/>
|
||||
|
||||
From the calculation standpoint, the FakeQuantize formula also is split into two parts accordingly:
|
||||
`output = round((x - input_low) / (input_high - input_low) * (levels-1)) / (levels-1) * (output_high - output_low) + output_low`
|
||||
The first part of this fomula represents *Quantize* operation:
|
||||
`q = round((x - input_low) / (input_high - input_low) * (levels-1))`
|
||||
The second is responsible for the dequantization:
|
||||
`r = q / (levels-1) * (output_high - output_low) + output_low`
|
||||
From the scale/zero-point notation standpoint the latter formula can be written as follows:
|
||||
`r = (output_high - output_low) / (levels-1) * (q + output_low / (output_high - output_low) * (levels-1))`
|
||||
Thus we can define:
|
||||
- **Scale** as `(output_high - output_low) / (levels-1)`
|
||||
- **Zero-point** as `-output_low / (output_high - output_low) * (levels-1)`
|
||||
|
||||
**Note**: During the quantization process the values `input_low`, `input_high`, `output_low`, `output_high` are selected so that to map a floating-point zero exactly to an integer value (zero-point) and vice versa.
|
3
tools/pot/docs/images/aa_quantization_pipeline.png
Normal file
3
tools/pot/docs/images/aa_quantization_pipeline.png
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4958239932616705f208607c20f63f92a6cdb219f8a5e9ff6046ff7835c451dc
|
||||
size 47028
|
3
tools/pot/docs/images/convolution_quantization.png
Normal file
3
tools/pot/docs/images/convolution_quantization.png
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0e564f28b20af9c92511a59389bb42934bc2e19dcaca593c435968d76f5ff7a6
|
||||
size 28899
|
3
tools/pot/docs/images/default_quantization_pipeline.png
Normal file
3
tools/pot/docs/images/default_quantization_pipeline.png
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bcaa0c75dab08dc03343b2bce069148e27141da1abc92fc5fde2fce3a5d8f5e8
|
||||
size 19411
|
3
tools/pot/docs/images/low_precision_flow.png
Normal file
3
tools/pot/docs/images/low_precision_flow.png
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2612fc6169dec150907d79b174c46c9b2f6428b5a20cf462c57a5ea2fc97f56a
|
||||
size 97895
|
3
tools/pot/docs/images/qdq_propagation.png
Normal file
3
tools/pot/docs/images/qdq_propagation.png
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e0bab657bf979494cb84459e29024e5b8b9cd320388c62c6a91b74b897b19718
|
||||
size 18108
|
3
tools/pot/docs/images/quantized_convolution.png
Normal file
3
tools/pot/docs/images/quantized_convolution.png
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:71365e85be040eb01ed524e568b332d9bb6222c760686c54db4e754f587082c2
|
||||
size 31032
|
3
tools/pot/docs/images/quantized_model_example.png
Normal file
3
tools/pot/docs/images/quantized_model_example.png
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3f68e826cfac63d8e6f8d77aa5b7fc61957a872dfb09b38695fb481044a6ddd5
|
||||
size 48327
|
3
tools/pot/docs/images/workflow.png
Normal file
3
tools/pot/docs/images/workflow.png
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6b9a68861a65526203b56a897f0d6cec0ef860619e9aaf275bc0d7483bc34329
|
||||
size 92994
|
34
tools/pot/docs/pot_docs.xml
Normal file
34
tools/pot/docs/pot_docs.xml
Normal file
@ -0,0 +1,34 @@
|
||||
<doxygenlayout xmlns:xi="http://www.w3.org/2001/XInclude" version="1.0">
|
||||
<!-- POT Developer Guide -->
|
||||
<navindex>
|
||||
<tab id="pot" type="usergroup" title="Post-Training Optimization Tool" url="@ref pot_README">
|
||||
<tab type="user" title="Installation Guide" url="@ref pot_InstallationGuide"/>
|
||||
<tab type="user" title="Low Precision Optimization Guide" url="@ref pot_docs_LowPrecisionOptimizationGuide"/>
|
||||
<tab type="usergroup" title="Quantization" url="@ref pot_compression_algorithms_quantization_README">
|
||||
<tab type="user" title="DefaultQuantization Algorithm" url="@ref pot_compression_algorithms_quantization_default_README"/>
|
||||
<tab type="user" title="AccuracyAwareQuantization Algorithm" url="@ref pot_compression_algorithms_quantization_accuracy_aware_README"/>
|
||||
<tab type="usergroup" title="TunableQuantization Algorithm" url="@ref pot_compression_algorithms_quantization_tunable_quantization_README">
|
||||
<tab type="usergroup" title="Tree-Structured Parzen Estimator (TPE)" url="@ref pot_compression_optimization_tpe_README">
|
||||
<tab type="user" title="TPE Multiple Node Configuration Based on MongoDB Database" url="@ref pot_compression_optimization_tpe_multinode"/>
|
||||
</tab>
|
||||
</tab>
|
||||
</tab>
|
||||
<tab type="user" title="Best Practices" url="@ref pot_docs_BestPractices"/>
|
||||
<tab type="user" title="Command-line Interface" url="@ref pot_compression_cli_README">
|
||||
<tab type="user" title="End-to-end CLI example" url="@ref pot_configs_examples_README"/>
|
||||
</tab>
|
||||
<tab type="user" title="API" url="@ref pot_compression_api_README">
|
||||
<tab type="user" title="API samples" url="@ref pot_sample_README">
|
||||
<tab type="user" title="Image Classification quantization sample" url="@ref pot_sample_classification_README"/>
|
||||
<tab type="user" title="Accuracy-Aware quantization sample" url="@ref pot_sample_object_detection_README"/>
|
||||
<tab type="user" title="Cascaded model quantization sample" url="@ref pot_sample_face_detection_README"/>
|
||||
<tab type="user" title="Semantic segmentation quantization sample" url="@ref pot_sample_segmentation_README"/>
|
||||
<tab type="user" title="3D Segmentation quantization sample" url="@ref pot_sample_3d_segmentation_README"/>
|
||||
<tab type="user" title="GNA speech sample" url="@ref pot_sample_speech_README"/>
|
||||
</tab>
|
||||
</tab>
|
||||
<tab type="user" title="Configuration File Description" url="@ref pot_configs_README"/>
|
||||
<tab type="user" title="Frequently Asked Questions" url="@ref pot_docs_FrequentlyAskedQuestions"/>
|
||||
</tab>
|
||||
</navindex>
|
||||
</doxygenlayout>
|
3
tools/pot/docs/ranger/images/img_combined_2.png
Normal file
3
tools/pot/docs/ranger/images/img_combined_2.png
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:271ba164a9726a5cf8d577f02db258c76df94e9ff79c3bebf95371ebdaa7d82d
|
||||
size 1719169
|
3
tools/pot/docs/ranger/images/scheme3.png
Normal file
3
tools/pot/docs/ranger/images/scheme3.png
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d2c7919ea4b1fec95e1b15648194f7e2227793a661c55a242705e44fdc4c0f2f
|
||||
size 25732
|
7
tools/pot/main.py
Normal file
7
tools/pot/main.py
Normal file
@ -0,0 +1,7 @@
|
||||
# Copyright (C) 2020-2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from openvino.tools.pot.app.run import main
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
4
tools/pot/openvino/__init__.py
Normal file
4
tools/pot/openvino/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# Copyright (C) 2020-2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
__path__ = __import__('pkgutil').extend_path(__path__, __name__)
|
3
tools/pot/openvino/tools/__init__.py
Normal file
3
tools/pot/openvino/tools/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
# Copyright (C) 2020-2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
__path__ = __import__('pkgutil').extend_path(__path__, __name__)
|
63
tools/pot/openvino/tools/pot/__init__.py
Normal file
63
tools/pot/openvino/tools/pot/__init__.py
Normal file
@ -0,0 +1,63 @@
|
||||
# Copyright (C) 2020-2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from .algorithms.quantization.accuracy_aware.algorithm import AccuracyAwareQuantization
|
||||
from .algorithms.quantization.accuracy_aware_gna.algorithm import AccuracyAwareGNA
|
||||
from .algorithms.quantization.accuracy_aware_common.algorithm import AccuracyAwareCommon
|
||||
from .algorithms.quantization.accuracy_aware_common.mixed_precision import (
|
||||
INT4MixedQuantization,
|
||||
)
|
||||
from .algorithms.quantization.fast_bias_correction.algorithm import FastBiasCorrection
|
||||
from .algorithms.quantization.bias_correction.algorithm import BiasCorrection
|
||||
from .algorithms.quantization.channel_alignment.algorithm import (
|
||||
ActivationChannelAlignment,
|
||||
)
|
||||
from .algorithms.quantization.datafree.algorithm import DataFreeQuantization
|
||||
from .algorithms.quantization.default.algorithm import DefaultQuantization
|
||||
from .algorithms.quantization.minmax.algorithm import MinMaxQuantization
|
||||
from .algorithms.quantization.optimization.rangeopt import RangeOptimization
|
||||
from .algorithms.quantization.optimization.params_tuning import (
|
||||
ParamsGridSearchAlgorithm,
|
||||
)
|
||||
from .algorithms.quantization.qnoise_estimator.algorithm import QuantNoiseEstimator
|
||||
from .algorithms.quantization.tunable_quantization.algorithm import TunableQuantization
|
||||
from .algorithms.quantization.outlier_channel_splitting.algorithm import (
|
||||
OutlierChannelSplitting,
|
||||
)
|
||||
from .algorithms.quantization.weight_bias_correction.algorithm import (
|
||||
WeightBiasCorrection,
|
||||
)
|
||||
from .algorithms.sparsity.magnitude_sparsity.algorithm import MagnitudeSparsity
|
||||
from .algorithms.sparsity.default.algorithm import WeightSparsity
|
||||
from .algorithms.sparsity.default.base_algorithm import BaseWeightSparsity
|
||||
from .optimization.tpe.base_algorithm import Tpe
|
||||
from .algorithms.quantization.overflow_correction.algorithm import OverflowCorrection
|
||||
from .algorithms.quantization.ranger.algorithm import Ranger
|
||||
|
||||
QUANTIZATION_ALGORITHMS = [
|
||||
'MinMaxQuantization',
|
||||
'RangeOptimization',
|
||||
'FastBiasCorrection',
|
||||
'BiasCorrection',
|
||||
'ActivationChannelAlignment',
|
||||
'DataFreeQuantization',
|
||||
'DefaultQuantization',
|
||||
'AccuracyAwareQuantization',
|
||||
'AccuracyAwareGNA',
|
||||
'AccuracyAwareCommon',
|
||||
'INT4MixedQuantization',
|
||||
'TunableQuantization',
|
||||
'Tpe',
|
||||
'QuantNoiseEstimator',
|
||||
'OutlierChannelSplitting',
|
||||
'WeightBiasCorrection',
|
||||
'ParamsGridSearchAlgorithm',
|
||||
'OverflowCorrection',
|
||||
'Ranger',
|
||||
]
|
||||
|
||||
SPARSITY_ALGORITHMS = ['WeightSparsity',
|
||||
'MagnitudeSparsity',
|
||||
'BaseWeightSparsity']
|
||||
|
||||
__all__ = QUANTIZATION_ALGORITHMS + SPARSITY_ALGORITHMS
|
2
tools/pot/openvino/tools/pot/algorithms/__init__.py
Normal file
2
tools/pot/openvino/tools/pot/algorithms/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
# Copyright (C) 2020-2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
70
tools/pot/openvino/tools/pot/algorithms/algorithm.py
Normal file
70
tools/pot/openvino/tools/pot/algorithms/algorithm.py
Normal file
@ -0,0 +1,70 @@
|
||||
# Copyright (C) 2020-2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from copy import deepcopy
|
||||
|
||||
from ..api.engine import Engine
|
||||
|
||||
|
||||
class Algorithm(ABC):
|
||||
|
||||
algo_type = 'quantization'
|
||||
|
||||
def __init__(self, config, engine: Engine):
|
||||
""" Constructor
|
||||
:param config: algorithm specific config
|
||||
:param engine: model inference engine
|
||||
:param sampler: Sampler class inheritor instance to read dataset
|
||||
"""
|
||||
self._config, self._engine = deepcopy(config), engine
|
||||
self._stats_collector = None
|
||||
self.params = {}
|
||||
self.default_steps_size = 0.05
|
||||
self.total_exec_steps = 0
|
||||
|
||||
@property
|
||||
def config(self):
|
||||
return self._config
|
||||
|
||||
@property
|
||||
def algo_collector(self):
|
||||
return self._stats_collector
|
||||
|
||||
@algo_collector.setter
|
||||
def algo_collector(self, collector):
|
||||
self._stats_collector = collector
|
||||
|
||||
@abstractmethod
|
||||
def run(self, model):
|
||||
""" Run algorithm on model
|
||||
:param model: model to apply algorithm
|
||||
:return optimized model
|
||||
"""
|
||||
|
||||
def statistics(self):
|
||||
""" Returns a dictionary of printable statistics"""
|
||||
return {}
|
||||
|
||||
def register_statistics(self, model, stats_collector):
|
||||
"""
|
||||
:param model: FP32 original model
|
||||
:param stats_collector: object of StatisticsCollector class
|
||||
:return: None
|
||||
"""
|
||||
|
||||
def get_parameter_meta(self, _model, _optimizer_state):
|
||||
""" Get parameters metadata
|
||||
:param _model: model to get parameters for
|
||||
:param _optimizer_state: dictionary describing optimizer state that allow to tune created search space
|
||||
differently for different optimizer states
|
||||
:return params_meta: metadata of optional parameters
|
||||
"""
|
||||
return []
|
||||
|
||||
def compute_total_exec_steps(self, model=None):
|
||||
""" Compute executions steps based on stat_subset_size, algorithm, model """
|
||||
|
||||
def update_config(self, config):
|
||||
""" Update Algorithm configuration based on input config """
|
||||
self._config = deepcopy(config)
|
@ -0,0 +1,27 @@
|
||||
# Copyright (C) 2020-2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from ..utils.registry import Registry, RegistryStorage
|
||||
|
||||
COMPRESSION_ALGORITHMS = Registry('QuantizationAlgos')
|
||||
REGISTRY_STORAGE = RegistryStorage(globals())
|
||||
|
||||
|
||||
def get_registry(name):
|
||||
return REGISTRY_STORAGE.get_registry(name)
|
||||
|
||||
|
||||
def get_algorithm(name):
|
||||
if name.startswith('.') or name.endswith('.'):
|
||||
raise Exception('The algorithm name cannot start or end with "."')
|
||||
|
||||
if '.' in name:
|
||||
ind = name.find('.')
|
||||
reg_name = name[:ind]
|
||||
algo_name = name[ind + 1:]
|
||||
else:
|
||||
reg_name = 'QuantizationAlgos'
|
||||
algo_name = name
|
||||
|
||||
reg = get_registry(reg_name)
|
||||
return reg.get(algo_name)
|
@ -0,0 +1,2 @@
|
||||
# Copyright (C) 2020-2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
378
tools/pot/openvino/tools/pot/algorithms/finetuning/algorithm.py
Normal file
378
tools/pot/openvino/tools/pot/algorithms/finetuning/algorithm.py
Normal file
@ -0,0 +1,378 @@
|
||||
# Copyright (C) 2020-2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import random
|
||||
from copy import deepcopy
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from .utils import get_optimization_params
|
||||
from ..quantization.accuracy_aware_common.utils import evaluate_model, create_metric_config
|
||||
from ...algorithms.algorithm import Algorithm
|
||||
from ...engines.simplified_engine import SimplifiedEngine
|
||||
from ...graph import model_utils as mu, node_utils as nu
|
||||
from ...graph.special_operations import OPERATIONS_WITH_WEIGHTS
|
||||
from ...samplers.batch_sampler import BatchSampler
|
||||
from ...statistics.collector import collect_statistics
|
||||
from ...statistics.statistics import TensorStatistic
|
||||
from ...utils.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
# pylint: disable=E1102,C0415,R0902,R0912
|
||||
class LayerwiseModelFinetuning(Algorithm):
|
||||
name = 'LayerwiseModelFinetuning'
|
||||
|
||||
@property
|
||||
def change_original_model(self):
|
||||
return True
|
||||
|
||||
def __init__(self, config, engine):
|
||||
super().__init__(config, engine)
|
||||
self._tconf = {
|
||||
'optimizer': 'Adam',
|
||||
'loss': 'l2',
|
||||
'seed': 0,
|
||||
'weight_decay': 0,
|
||||
'loss_logging_freq': 10,
|
||||
'calibration_indices_pool': 300,
|
||||
'use_only_fp_inputs': True,
|
||||
'calculate_grads_on_loss_increase_only': True,
|
||||
'update_every_batch': False,
|
||||
'use_ranking_subset': False,
|
||||
'tuning_ignored_scope': self._config.ignored.get('scope', []),
|
||||
'batch_size': 1
|
||||
}
|
||||
for key, value in self._tconf.items():
|
||||
self._tconf[key] = self._config.get(key, value)
|
||||
|
||||
self._device = 'cpu'
|
||||
self._current_best_loss = 0.0
|
||||
self._iteration = 0
|
||||
self._safety_eps = 1e-8
|
||||
self._dataset_size = len(self._engine.data_loader)
|
||||
self._samples_indices_pool = range(self._dataset_size)
|
||||
self._weighted_operations = [op['type'] for op in OPERATIONS_WITH_WEIGHTS]
|
||||
self._is_variable_resolution_model = False
|
||||
self._optimization_dataset_size = self._dataset_size
|
||||
self._metric_subset_ratio = (10 * self._tconf['calibration_indices_pool'] / self._optimization_dataset_size)
|
||||
self._ranking_subset_size = self._tconf['calibration_indices_pool']
|
||||
|
||||
self._original_model = None
|
||||
self._initial_losses = {}
|
||||
self._nodes_to_tune = {}
|
||||
self._nodes_to_tune_input = {}
|
||||
self._nodes_to_tune_output = {}
|
||||
self._layer_ops_wrapped = {}
|
||||
self._is_simplified_evaluation = isinstance(self._engine, SimplifiedEngine)
|
||||
self._base_algo_config = deepcopy(self._config)
|
||||
self._base_algo = None
|
||||
self._base_algo_args = None
|
||||
self._metrics_config = None
|
||||
|
||||
self.set_seed(self._tconf['seed'], self._device)
|
||||
self.set_default_parameters()
|
||||
|
||||
def set_default_parameters(self):
|
||||
if self._tconf['use_ranking_subset']:
|
||||
if self._is_simplified_evaluation:
|
||||
logger.info('Cannot use ranking subset in simplified mode')
|
||||
self._tconf['use_ranking_subset'] = False
|
||||
else:
|
||||
self._metrics_config = create_metric_config(
|
||||
self._engine,
|
||||
self._config,
|
||||
force_logit_comparison=True,
|
||||
logit_distance_type='mse',
|
||||
)
|
||||
|
||||
if (self._tconf['calibration_indices_pool'] is not None
|
||||
and self._tconf['calibration_indices_pool'] < self._optimization_dataset_size):
|
||||
self._samples_indices_pool = random.sample(
|
||||
range(self._optimization_dataset_size), self._tconf['calibration_indices_pool'])
|
||||
|
||||
def run(self, model):
|
||||
raise NotImplementedError
|
||||
|
||||
def _collect_nodes_to_tune(self, modified_model):
|
||||
raise NotImplementedError
|
||||
|
||||
def _wrap_nodes(self, modified_model, nodes_to_tune):
|
||||
raise NotImplementedError
|
||||
|
||||
def _calculate_gradients(self, losses):
|
||||
pass
|
||||
|
||||
def _get_optimizer_and_criterion(self, wrapped_ops_parameters):
|
||||
criterion, optimizer_algorithm = get_optimization_params(self._tconf['loss'], self._tconf['optimizer'])
|
||||
optimizers = {
|
||||
name: optimizer_algorithm(params=param, weight_decay=self._tconf['weight_decay'])
|
||||
for name, param in wrapped_ops_parameters.items()
|
||||
}
|
||||
return optimizers, criterion
|
||||
|
||||
def _wrap_node(self, op_node, wrapper, op_info):
|
||||
params = []
|
||||
wrapped_op = None
|
||||
if wrapper.is_able_to_wrap(op_node):
|
||||
wrapped_op = wrapper(op_node, device=self._device, **op_info)
|
||||
for name, param in wrapped_op.named_parameters():
|
||||
lr_name = name + '_lr'
|
||||
if lr_name in self._tconf.keys():
|
||||
params.append({'lr': self._tconf[lr_name], 'params': [param]})
|
||||
else:
|
||||
logger.warning('Undefined parameter found: {}'.format(name))
|
||||
continue
|
||||
else:
|
||||
logger.warning('Was not able to wrap layer {} with PyTorch'.format(op_node.name))
|
||||
return wrapped_op, params
|
||||
|
||||
def _fine_tuning_loop(
|
||||
self,
|
||||
modified_model,
|
||||
optimizers,
|
||||
criterion,
|
||||
n_batches,
|
||||
fp_model_callbacks,
|
||||
modified_model_callbacks=None
|
||||
):
|
||||
for layer in self._layer_ops_wrapped.values():
|
||||
layer.to(self._device)
|
||||
|
||||
for optimizer in optimizers.values():
|
||||
optimizer.zero_grad()
|
||||
|
||||
try:
|
||||
# Calculate feature maps for the original model beforehand on the used batch
|
||||
batch_indices_sample = self._random_samples()
|
||||
fp_activations = self._update_batch_from_model(self._original_model,
|
||||
batch_indices_sample,
|
||||
fp_model_callbacks)
|
||||
|
||||
for batch_idx in range(n_batches):
|
||||
if batch_idx != 0 and self._tconf['update_every_batch']:
|
||||
logger.debug('Batch update')
|
||||
batch_indices_sample = self._random_samples()
|
||||
fp_activations = self._update_batch_from_model(self._original_model,
|
||||
batch_indices_sample,
|
||||
fp_model_callbacks)
|
||||
|
||||
modified_activations = fp_activations
|
||||
if modified_model_callbacks:
|
||||
modified_activations = self._update_batch_from_model(modified_model,
|
||||
batch_indices_sample,
|
||||
modified_model_callbacks)
|
||||
|
||||
self._fine_tuning_step(
|
||||
optimizers,
|
||||
criterion,
|
||||
batch_idx,
|
||||
fp_activations,
|
||||
modified_activations,
|
||||
n_batches
|
||||
)
|
||||
return 0
|
||||
|
||||
except MemoryError:
|
||||
return -1
|
||||
|
||||
def _random_samples(self):
|
||||
batch_indices_sample = random.sample(self._samples_indices_pool, self._tconf['batch_size'])
|
||||
if self._is_simplified_evaluation:
|
||||
batch_indices_sample = BatchSampler(batch_indices_sample)
|
||||
return batch_indices_sample
|
||||
|
||||
def _update_batch_from_model(self, model, batch_indices_sample, model_callbacks):
|
||||
self._engine.set_model(model)
|
||||
|
||||
_, output_activations = self._engine.predict(model_callbacks, batch_indices_sample)
|
||||
return self._activation_maps_to_torch(output_activations)
|
||||
|
||||
def _fine_tuning_step(
|
||||
self,
|
||||
optimizers,
|
||||
criterion,
|
||||
batch_idx,
|
||||
fp_activations,
|
||||
modified_activations,
|
||||
n_batches
|
||||
):
|
||||
accumulated_losses = {op_name: 0.0 for op_name in self._layer_ops_wrapped}
|
||||
losses = {}
|
||||
for op_name in self._layer_ops_wrapped:
|
||||
torch_wrapped_op = self._layer_ops_wrapped[op_name]
|
||||
input_name = self._nodes_to_tune_input[op_name]
|
||||
output_name = self._nodes_to_tune_output[op_name]
|
||||
|
||||
in_blobs = modified_activations[input_name]['output']
|
||||
if self._tconf['use_only_fp_inputs']:
|
||||
in_blobs = fp_activations[input_name]['output']
|
||||
fp_out_blobs = fp_activations[output_name]['output']
|
||||
|
||||
if not self._is_variable_resolution_model:
|
||||
modified_out_blobs = torch_wrapped_op(in_blobs)
|
||||
losses[op_name] = criterion(modified_out_blobs, fp_out_blobs)
|
||||
else:
|
||||
for blob_idx, modified_in_blob in enumerate(in_blobs):
|
||||
modified_out_blob = torch_wrapped_op(torch.unsqueeze(modified_in_blob, 0))
|
||||
losses[op_name] += criterion(
|
||||
modified_out_blob, torch.unsqueeze(fp_out_blobs[blob_idx], 0)
|
||||
)
|
||||
|
||||
for name, loss in losses.items():
|
||||
accumulated_losses[name] = loss.data
|
||||
|
||||
if batch_idx == 0 and self._iteration == 0:
|
||||
self._initial_losses = deepcopy(accumulated_losses)
|
||||
self._initial_losses = {
|
||||
name: val + self._safety_eps
|
||||
for name, val in self._initial_losses.items()
|
||||
}
|
||||
|
||||
weighted_loss = 0
|
||||
for op_name in self._layer_ops_wrapped:
|
||||
init_loss = self._initial_losses[op_name]
|
||||
accumulated_loss = accumulated_losses[op_name]
|
||||
weighted_loss += accumulated_loss / init_loss / len(self._initial_losses)
|
||||
|
||||
if batch_idx % self._tconf['loss_logging_freq'] == 0:
|
||||
printable_loss = weighted_loss.to('cpu').numpy()
|
||||
logger.info(
|
||||
'Batch #%s/%s, weighted_loss: %s',
|
||||
batch_idx + 1,
|
||||
n_batches,
|
||||
printable_loss,
|
||||
)
|
||||
|
||||
if self._tconf['calculate_grads_on_loss_increase_only']:
|
||||
if weighted_loss >= self._current_best_loss:
|
||||
self._current_best_loss = weighted_loss
|
||||
self._calculate_gradients(losses)
|
||||
for op_name, optimizer in optimizers.items():
|
||||
optimizer.step()
|
||||
if self._current_best_loss == weighted_loss:
|
||||
optimizer.zero_grad()
|
||||
self._current_best_loss = weighted_loss
|
||||
else:
|
||||
self._calculate_gradients(losses)
|
||||
for op_name, optimizer in optimizers.items():
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
if self._tconf['update_every_batch']:
|
||||
for layer in self._layer_ops_wrapped.values():
|
||||
layer.update_node_params()
|
||||
|
||||
def _activation_maps_to_torch(self, activations):
|
||||
for layer_name in activations:
|
||||
activations[layer_name]['output'] = [
|
||||
torch.tensor(activations[layer_name]['output'][index][0]).to(self._device) for index in
|
||||
range(len(activations[layer_name]['output']))]
|
||||
if len({feature_map.shape for feature_map in activations[layer_name]['output']}) > 1:
|
||||
self._is_variable_resolution_model = True
|
||||
if not self._is_variable_resolution_model:
|
||||
for layer_name in activations:
|
||||
activations[layer_name]['output'] = torch.stack(activations[layer_name]['output'])
|
||||
return activations
|
||||
|
||||
def _get_ranking_subset(self):
|
||||
"""
|
||||
Find a subset of samples with the highest distance between
|
||||
outputs of original and compressed model (a ranking subset)
|
||||
:return: ranking data subset indices
|
||||
"""
|
||||
base_algo = self._base_algo(**self._base_algo_args)
|
||||
base_algo.register_statistics(self._original_model, self.algo_collector)
|
||||
collect_statistics(self._engine, self._original_model, [base_algo])
|
||||
base_model = base_algo.run(deepcopy(self._original_model))
|
||||
output_node_name = nu.get_node_input(self._original_model.get_final_output_nodes()[0], 0).name
|
||||
|
||||
stats_layout = {output_node_name: {'output_logits': TensorStatistic(lambda logits: logits)}}
|
||||
metric_subset_size = int(self._dataset_size * self._metric_subset_ratio)
|
||||
diff_subset_indices = (
|
||||
sorted(random.sample(range(self._dataset_size), metric_subset_size))
|
||||
if metric_subset_size < self._dataset_size
|
||||
else list(range(self._dataset_size))
|
||||
)
|
||||
|
||||
_, original_per_sample_metrics = evaluate_model(
|
||||
self._original_model,
|
||||
self._engine,
|
||||
self._dataset_size,
|
||||
subset_indices=diff_subset_indices,
|
||||
metrics_config=self._metrics_config,
|
||||
output_node_name=output_node_name,
|
||||
stats_layout=stats_layout,
|
||||
)
|
||||
_, base_model_per_sample_metrics = evaluate_model(
|
||||
base_model,
|
||||
self._engine,
|
||||
self._dataset_size,
|
||||
subset_indices=diff_subset_indices,
|
||||
metrics_config=self._metrics_config,
|
||||
output_node_name=output_node_name,
|
||||
stats_layout=stats_layout,
|
||||
)
|
||||
|
||||
persample_metric = list(self._metrics_config.values())[0].persample
|
||||
sorted_sample_importance = persample_metric.sort_fn(
|
||||
original_per_sample_metrics[persample_metric.name],
|
||||
base_model_per_sample_metrics[persample_metric.name],
|
||||
reverse=True,
|
||||
)
|
||||
ranking_indices = sorted_sample_importance[: self._ranking_subset_size]
|
||||
ranking_subset = list(np.array(diff_subset_indices)[ranking_indices])
|
||||
return ranking_subset
|
||||
|
||||
def _create_layer_callbacks(self, modified_model):
|
||||
fp_model_callbacks = {}
|
||||
modified_model_callbacks = {}
|
||||
|
||||
for op_name in self._nodes_to_tune:
|
||||
modified_node = mu.get_node_by_name(modified_model, op_name)
|
||||
|
||||
input_node = self._get_input_node(modified_node)
|
||||
output_node = input_node
|
||||
if modified_node.type in self._weighted_operations:
|
||||
bias_node = nu.get_bias_for_node(modified_node)
|
||||
output_node = modified_node
|
||||
if bias_node is not None:
|
||||
output_node = nu.get_node_output(bias_node, 0)[0]
|
||||
input_node_name = self._get_input_node_name(modified_node)
|
||||
|
||||
if self._tconf['use_only_fp_inputs']:
|
||||
fp_model_callbacks[input_node_name] = {'output': lambda tensor: tensor}
|
||||
else:
|
||||
modified_model_callbacks[input_node_name] = {'output': lambda tensor: tensor}
|
||||
fp_model_callbacks[output_node.name] = {'output': lambda tensor: tensor}
|
||||
self._nodes_to_tune_input[op_name] = input_node_name
|
||||
self._nodes_to_tune_output[op_name] = output_node.name
|
||||
|
||||
return fp_model_callbacks, modified_model_callbacks
|
||||
|
||||
def register_statistics(self, model, stats_collector):
|
||||
self.algo_collector = stats_collector
|
||||
|
||||
def _check_batch_size(self):
|
||||
if self._tconf['batch_size'] > self._dataset_size:
|
||||
logger.debug('Batch size changed from - {} to dataset size - {}.'.format(
|
||||
self._tconf['batch_size'], self._dataset_size))
|
||||
self._tconf['batch_size'] = self._dataset_size
|
||||
|
||||
@staticmethod
|
||||
def set_seed(seed, device):
|
||||
np.random.seed(seed)
|
||||
random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
if device != 'cpu':
|
||||
import torch.backends.cudnn as cudnn
|
||||
cudnn.deterministic = True
|
||||
cudnn.benchmark = False
|
||||
|
||||
@staticmethod
|
||||
def _get_input_node(node):
|
||||
return nu.get_node_input(node, 0)
|
||||
|
||||
@staticmethod
|
||||
def _get_input_node_name(node):
|
||||
return nu.get_quantized_input_key(node)
|
235
tools/pot/openvino/tools/pot/algorithms/finetuning/layers.py
Normal file
235
tools/pot/openvino/tools/pot/algorithms/finetuning/layers.py
Normal file
@ -0,0 +1,235 @@
|
||||
# Copyright (C) 2020-2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from openvino.tools.pot.graph import node_utils as nu
|
||||
from openvino.tools.pot.utils.logger import get_logger
|
||||
from .utils import get_weight_node
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
# pylint: disable=W0221
|
||||
class STERound(torch.autograd.Function):
|
||||
@staticmethod
|
||||
def forward(ctx, input_data, val_min, val_max):
|
||||
ctx.save_for_backward(input_data)
|
||||
ctx.val_min = val_min
|
||||
ctx.val_max = val_max
|
||||
return input_data.round()
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
(input_data,) = ctx.saved_tensors
|
||||
alpha = 0.01
|
||||
mask = (input_data <= ctx.val_max) & (input_data >= ctx.val_min)
|
||||
mask = mask.type(input_data.dtype)
|
||||
grad_input = grad_output * (mask * (1 - alpha) + alpha)
|
||||
return grad_input, None, None
|
||||
|
||||
|
||||
# pylint: disable=E1102,W0223
|
||||
class FakeQuantize(torch.nn.Module):
|
||||
"""
|
||||
A pytorch wrapper for a single FakeQuantize node.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def is_able_to_wrap(node):
|
||||
if node.type != 'FakeQuantize':
|
||||
return False
|
||||
is_const = [
|
||||
node.in_port(i).get_source().node.type == 'Const' for i in range(1, 5)
|
||||
]
|
||||
if not all(is_const):
|
||||
return False
|
||||
data = [node.in_port(i).data.get_value() for i in range(1, 5)]
|
||||
diff = [np.max(np.abs(data[i] - data[i + 2])) for i in [0, 1]]
|
||||
diff = max(diff)
|
||||
if diff > 10 ** -8:
|
||||
logger.info('FakeQuantize {} has different input and output scales'.format(node.name))
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def __init__(self, node, device='cpu', asymmetric=False):
|
||||
super(FakeQuantize, self).__init__()
|
||||
self.node = node
|
||||
self.device = device
|
||||
input_0 = nu.get_node_input(self.node, 0)
|
||||
self.is_weight_fq = input_0.type == 'Const'
|
||||
self.asymmetric = asymmetric
|
||||
|
||||
min_val = nu.get_node_value(nu.get_node_input(self.node, 1))
|
||||
max_val = nu.get_node_value(nu.get_node_input(self.node, 2))
|
||||
min_val = np.array(min_val, dtype=np.float32)
|
||||
self.min = torch.tensor(min_val).to(self.device)
|
||||
self.min = torch.nn.Parameter(self.min) if self.asymmetric else self.min
|
||||
|
||||
ranges = np.array(max_val - min_val, dtype=np.float32)
|
||||
self.scale = torch.tensor(ranges).log()
|
||||
self.scale = self.scale.to(self.device)
|
||||
self.scale = torch.nn.Parameter(self.scale)
|
||||
|
||||
self.val_h = int(self.node.levels - 1)
|
||||
self.val_l = 0
|
||||
|
||||
def update_node_params(self):
|
||||
scale = self.scale.exp()
|
||||
max_level = scale.detach().cpu().numpy()
|
||||
max_level = np.reshape(max_level, nu.get_input_shape(self.node, 2))
|
||||
min_level = self.min.detach().cpu().numpy()
|
||||
min_level = np.reshape(min_level, nu.get_input_shape(self.node, 1))
|
||||
max_level = min_level + max_level
|
||||
|
||||
self.node.in_port(1).data.set_value(min_level)
|
||||
self.node.in_port(2).data.set_value(max_level)
|
||||
self.node.in_port(3).data.set_value(min_level)
|
||||
self.node.in_port(4).data.set_value(max_level)
|
||||
|
||||
def forward(self, x):
|
||||
scale = self.scale.exp()
|
||||
s = self.val_h * scale.reciprocal()
|
||||
x = x - self.min
|
||||
x = x.clamp(max=self.val_h, min=self.val_l)
|
||||
x = x * s
|
||||
x = STERound.apply(x, self.val_l, self.val_h)
|
||||
x = x * s.reciprocal() + self.min
|
||||
return x
|
||||
|
||||
|
||||
# pylint: disable=E1102,W0223
|
||||
class LinearModule(torch.nn.Module):
|
||||
"""
|
||||
A pytorch wrapper for a single Conv2d/Linear node.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def is_able_to_wrap(node):
|
||||
if node.type not in ['Convolution', 'MatMul', 'GroupConvolution']:
|
||||
return False
|
||||
|
||||
node_weight = nu.get_node_input(node, 1)
|
||||
if node_weight.type == 'FakeQuantize':
|
||||
node_weight = nu.get_node_input(node_weight, 0)
|
||||
if node_weight.type != 'Const':
|
||||
return False
|
||||
|
||||
if node.type != 'MatMul':
|
||||
|
||||
weights = nu.get_node_value(node_weight)
|
||||
if len(weights.shape) != 4:
|
||||
return False
|
||||
|
||||
s = node.stride
|
||||
stride_check = (s[2] == s[3])
|
||||
|
||||
d = node.dilation
|
||||
dilation_check = (d[2] == d[3])
|
||||
|
||||
if not dilation_check or not stride_check:
|
||||
return False
|
||||
|
||||
bias_node = nu.get_bias_for_node(node)
|
||||
if bias_node is not None:
|
||||
bias_value = nu.get_node_value(bias_node)
|
||||
if bias_value.shape[0] != 1:
|
||||
return False
|
||||
return True
|
||||
|
||||
def __init__(self,
|
||||
node,
|
||||
input_fq=None,
|
||||
wrap_weight_fq=False,
|
||||
device='cpu',
|
||||
set_quantized_values_to_weight_parameter=False,
|
||||
asymmetric=False):
|
||||
super().__init__()
|
||||
|
||||
self.node = node
|
||||
self.device = device
|
||||
|
||||
self.set_quantized_values_to_weight_parameter = set_quantized_values_to_weight_parameter
|
||||
self.weight_fq, self.input_fq = None, input_fq
|
||||
|
||||
if wrap_weight_fq:
|
||||
weight_fq = nu.get_node_input(self.node, 1)
|
||||
weight_fq_wrapper = FakeQuantize
|
||||
if not weight_fq_wrapper.is_able_to_wrap(weight_fq):
|
||||
logger.warning('Was not able to wrap layer %s with pytorch', weight_fq.name)
|
||||
self.weight_fq = None
|
||||
else:
|
||||
self.weight_fq = weight_fq_wrapper(weight_fq, device=device,
|
||||
asymmetric=asymmetric)
|
||||
|
||||
node_weight = get_weight_node(node)
|
||||
weights = nu.get_node_value(node_weight)
|
||||
self.weights_dtype = weights.dtype
|
||||
weights = torch.from_numpy(weights).to(torch.float32)
|
||||
weights = weights.to(device)
|
||||
self.weights = torch.nn.Parameter(weights)
|
||||
|
||||
self.bias = None
|
||||
bias_node = nu.get_bias_for_node(self.node)
|
||||
if bias_node is not None:
|
||||
bias = nu.get_node_value(bias_node)
|
||||
self.bias_dtype = bias.dtype
|
||||
bias = torch.from_numpy(bias).to(torch.float32).squeeze()
|
||||
bias = bias if bias.shape else bias.reshape(1)
|
||||
bias = bias.to(device)
|
||||
self.bias = torch.nn.Parameter(bias)
|
||||
|
||||
if self.node.type != 'MatMul':
|
||||
self.stride = (int(node.stride[2]), int(node.stride[3]))
|
||||
self.pads_begin, self.pads_end = node.pad[2], node.pad[3]
|
||||
self.dilation = (int(node.dilation[2]), int(node.dilation[3]))
|
||||
self.group = 1 if 'group' not in node else int(node.group)
|
||||
|
||||
def update_node_params(self):
|
||||
weights = self.weights.detach()
|
||||
weights = weights.cpu() if self.device != 'cpu' else weights
|
||||
weights = weights.numpy().astype(self.weights_dtype)
|
||||
weight_node = get_weight_node(self.node)
|
||||
nu.set_node_value(weight_node, weights)
|
||||
|
||||
if self.weight_fq is not None:
|
||||
self.weight_fq.update_node_params()
|
||||
if self.input_fq is not None:
|
||||
self.input_fq.update_node_params()
|
||||
|
||||
if self.bias is not None:
|
||||
bias_node = nu.get_bias_for_node(self.node)
|
||||
bias_shape = nu.get_node_value(bias_node).shape
|
||||
bias = self.bias.data.reshape(bias_shape)
|
||||
|
||||
bias = bias.detach()
|
||||
bias = bias.cpu() if self.device != 'cpu' else bias
|
||||
bias = bias.numpy().astype(self.bias_dtype)
|
||||
nu.set_node_value(bias_node, bias)
|
||||
|
||||
def forward(self, x):
|
||||
w = self.weight_fq(self.weights) if self.weight_fq is not None else self.weights
|
||||
x = self.input_fq(x) if self.input_fq is not None else x
|
||||
if self.set_quantized_values_to_weight_parameter and self.weight_fq is not None:
|
||||
self.weights.data = w
|
||||
|
||||
if self.node.type == 'MatMul':
|
||||
x = torch.nn.functional.linear(x,
|
||||
self.weights,
|
||||
bias=self.bias)
|
||||
else:
|
||||
pad_top, pad_bottom = int(self.pads_begin[0]), int(self.pads_begin[1])
|
||||
pad_left, pad_right = int(self.pads_end[0]), int(self.pads_end[1])
|
||||
x = torch.nn.functional.pad(x, (pad_left, pad_right, pad_top, pad_bottom))
|
||||
x = torch.nn.functional.conv2d(
|
||||
x,
|
||||
self.weights,
|
||||
bias=self.bias,
|
||||
stride=self.stride,
|
||||
dilation=self.dilation,
|
||||
groups=self.group
|
||||
)
|
||||
|
||||
return x
|
31
tools/pot/openvino/tools/pot/algorithms/finetuning/utils.py
Normal file
31
tools/pot/openvino/tools/pot/algorithms/finetuning/utils.py
Normal file
@ -0,0 +1,31 @@
|
||||
# Copyright (C) 2020-2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import torch
|
||||
|
||||
from openvino.tools.pot.graph import node_utils as nu
|
||||
from openvino.tools.pot.utils.logger import get_logger
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def get_optimization_params(loss_name, optimizer_name):
|
||||
loss_fn_map = {
|
||||
'l2': torch.nn.MSELoss(),
|
||||
}
|
||||
|
||||
optimizer_map = {
|
||||
'Adam': torch.optim.Adam,
|
||||
'SGD': torch.optim.SGD,
|
||||
}
|
||||
return loss_fn_map[loss_name], optimizer_map[optimizer_name]
|
||||
|
||||
|
||||
def get_weight_node(node, port_id=1):
|
||||
node_weight = nu.get_node_input(node, port_id)
|
||||
if node_weight.type == 'FakeQuantize':
|
||||
node_weight = nu.get_node_input(node_weight, 0)
|
||||
if node_weight.type != 'Const':
|
||||
raise ValueError('Provided weight node is not Const!')
|
||||
return node_weight
|
151
tools/pot/openvino/tools/pot/algorithms/quantization/README.md
Normal file
151
tools/pot/openvino/tools/pot/algorithms/quantization/README.md
Normal file
@ -0,0 +1,151 @@
|
||||
# Quantization {#pot_compression_algorithms_quantization_README}
|
||||
|
||||
The primary optimization feature of the Post-training Optimization Tool (POT) is uniform quantization. In general,
|
||||
this method supports an arbitrary number of bits, greater or equal to two, which represents weights and activations.
|
||||
During the quantization process, the method inserts [FakeQuantize](@ref openvino_docs_ops_quantization_FakeQuantize_1)
|
||||
operations into the model graph automatically based on a predefined hardware target in order to produce the most
|
||||
hardware-friendly optimized model:
|
||||

|
||||
|
||||
After that, different quantization algorithms can tune the `FakeQuantize` parameters or remove some of them in order to
|
||||
meet the accuracy criteria. The resulting *fakequantized* models are interpreted and transformed to real low-precision
|
||||
models during inference at the OpenVINO™ Inference Engine runtime giving real performance improvement.
|
||||
|
||||
## Quantization Algorithms
|
||||
|
||||
Currently, the POT provides two algorithms for 8-bit quantization, which are verified and provide stable results on a
|
||||
wide range of DNN models:
|
||||
* **DefaultQuantization** is a default method that provides fast and in most cases accurate results for 8-bit
|
||||
quantization. For details, see the [DefaultQuantization Algorithm](@ref pot_compression_algorithms_quantization_default_README) documentation.
|
||||
|
||||
* **AccuracyAwareQuantization** enables remaining at a predefined range of accuracy drop after quantization at the cost
|
||||
of performance improvement. It may require more time for quantization. For details, see the
|
||||
[AccuracyAwareQuantization Algorithm](@ref pot_compression_algorithms_quantization_accuracy_aware_README) documentation.
|
||||
|
||||
* **Tree-Structured Parzen Estimator (TPE)** similarly to **AccuracyAwareQuantization** enables remaining at a predefined range of accuracy drop at the cost
|
||||
of performance improvement, but additionally tries to provide best possible performance improvement. It requires even more time for quantization than **AccuracyAwareQuantization**,
|
||||
but may lead to better performance improvement. For details, see the [Tree-Structured Parzen Estimator (TPE)](@ref pot_compression_optimization_tpe_README) documentation.
|
||||
|
||||
## Quantization Formula
|
||||
|
||||
Quantization is parametrized by clamping the range and the number of quantization levels:
|
||||
|
||||
\f[
|
||||
output = \frac{\left\lfloor (clamp(input; input\_low, input\_high)-input\_low) *s\right \rceil}{s} + input\_low\\
|
||||
\f]
|
||||
|
||||
\f[
|
||||
clamp(input; input\_low, input\_high) = min(max(input, input\_low), input\_high)))
|
||||
\f]
|
||||
|
||||
\f[
|
||||
s=\frac{levels-1}{input\_high - input\_low}
|
||||
\f]
|
||||
|
||||
In the formulas:
|
||||
* `input_low` and `input_high` represent the quantization range
|
||||
* \f[\left\lfloor\cdot\right \rceil\f] denotes rounding to the nearest integer
|
||||
|
||||
The POT supports symmetric and asymmetric quantization of weights and activations, which are controlled by the `preset`.
|
||||
The main difference between them is that in the symmetric mode the floating-point zero is mapped directly to the integer
|
||||
zero, while in asymmetric the mode it can be an arbitrary integer number. In any mode, the floating-point zero is mapped
|
||||
directly to the quant without rounding an error. See this [tutorial](@ref pot_docs_BestPractices) for details.
|
||||
|
||||
Below is the detailed description of quantization formulas for both modes. These formulas are used both in the POT to
|
||||
quantize weights of the model and in the OpenVINO™ Inference Engine runtime when quantizing activations during the
|
||||
inference.
|
||||
|
||||
#### Symmetric Quantization
|
||||
|
||||
The formula is parametrized by the `scale` parameter that is tuned during the quantization process:
|
||||
|
||||
\f[
|
||||
input\_low=scale*\frac{level\_low}{level\_high}
|
||||
\f]
|
||||
|
||||
\f[
|
||||
input\_high=scale
|
||||
\f]
|
||||
|
||||
|
||||
Where `level_low` and `level_high` represent the range of the discrete signal.
|
||||
* For weights:
|
||||
|
||||
\f[
|
||||
level\_low=-2^{bits-1}+1
|
||||
\f]
|
||||
|
||||
\f[
|
||||
level\_high=2^{bits-1}-1
|
||||
\f]
|
||||
|
||||
\f[
|
||||
levels=255
|
||||
\f]
|
||||
|
||||
* For unsigned activations:
|
||||
|
||||
\f[
|
||||
level\_low=0
|
||||
\f]
|
||||
|
||||
\f[
|
||||
level\_high=2^{bits}-1
|
||||
\f]
|
||||
|
||||
\f[
|
||||
levels=256
|
||||
\f]
|
||||
|
||||
* For signed activations:
|
||||
|
||||
\f[
|
||||
level\_low=-2^{bits-1}
|
||||
\f]
|
||||
|
||||
\f[
|
||||
level\_high=2^{bits-1}-1
|
||||
\f]
|
||||
|
||||
|
||||
\f[
|
||||
levels=256
|
||||
\f]
|
||||
|
||||
#### Asymmetric Quantization
|
||||
|
||||
The quantization formula is parametrized by `input_low` and `input_range` that are tunable parameters:
|
||||
|
||||
\f[
|
||||
input\_high=input\_low + input\_range
|
||||
\f]
|
||||
|
||||
\f[
|
||||
levels=256
|
||||
\f]
|
||||
|
||||
For weights and activations the following quantization mode is applied:
|
||||
|
||||
\f[
|
||||
{input\_low}' = min(input\_low, 0)
|
||||
\f]
|
||||
|
||||
\f[
|
||||
{input\_high}' = max(input\_high, 0)
|
||||
\f]
|
||||
|
||||
\f[
|
||||
ZP= \left\lfloor \frac{-{input\_low}'*(levels-1)}{{input\_high}'-{input\_low}'} \right \rceil
|
||||
\f]
|
||||
|
||||
\f[
|
||||
{input\_high}''=\frac{ZP-levels+1}{ZP}*{input\_low}'
|
||||
\f]
|
||||
|
||||
\f[
|
||||
{input\_low}''=\frac{ZP}{ZP-levels+1}*{input\_high}'
|
||||
\f]
|
||||
|
||||
\f[
|
||||
{input\_low,input\_high} = \begin{cases} {input\_low}',{input\_high}', & ZP \in $\{0,levels-1\}$ \\ {input\_low}',{input\_high}'', & {input\_high}'' - {input\_low}' > {input\_high}' - {input\_low}'' \\ {input\_low}'',{input\_high}', & {input\_high}'' - {input\_low}' <= {input\_high}' - {input\_low}''\\ \end{cases}
|
||||
\f]
|
@ -0,0 +1,2 @@
|
||||
# Copyright (C) 2020-2021 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user