* Drop POT

* Removed POT transformations
This commit is contained in:
Ilya Lavrenov 2023-12-21 16:46:37 +04:00 committed by GitHub
parent c79ae17bbf
commit 80618b0498
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
496 changed files with 0 additions and 29536 deletions

14
.ci/pot/Jenkinsfile vendored
View File

@ -1,14 +0,0 @@
#!groovy
properties([
parameters([
string(defaultValue: '',
description: 'Pipeline shared library version (branch/tag/commit). Determined automatically if empty',
name: 'library_version')
])
])
loadOpenVinoLibrary {
potEntrypoint(this)
}

View File

@ -11,7 +11,6 @@ __version__ = get_version()
from openvino._pyopenvino._offline_transformations import apply_fused_names_cleanup
from openvino._pyopenvino._offline_transformations import apply_moc_transformations
from openvino._pyopenvino._offline_transformations import apply_moc_legacy_transformations
from openvino._pyopenvino._offline_transformations import apply_pot_transformations
from openvino._pyopenvino._offline_transformations import apply_low_latency_transformation
from openvino._pyopenvino._offline_transformations import apply_pruning_transformation
from openvino._pyopenvino._offline_transformations import apply_make_stateful_transformation

View File

@ -9,7 +9,6 @@
#include <compress_quantize_weights.hpp>
#include <openvino/pass/make_stateful.hpp>
#include <openvino/pass/serialize.hpp>
#include <pot_transformations.hpp>
#include <pruning.hpp>
#include <transformations/common_optimizations/compress_float_constants.hpp>
#include <transformations/common_optimizations/fused_names_cleanup.hpp>
@ -55,16 +54,6 @@ void regmodule_offline_transformations(py::module m) {
py::arg("model"),
py::arg("params_with_custom_types"));
m_offline_transformations.def(
"apply_pot_transformations",
[](std::shared_ptr<ov::Model> model, std::string device) {
ov::pass::Manager manager;
manager.register_pass<ov::pass::POTTransformations>(std::move(device));
manager.run_passes(model);
},
py::arg("model"),
py::arg("device"));
m_offline_transformations.def(
"apply_low_latency_transformation",
[](std::shared_ptr<ov::Model> model, bool use_const_initializer = true) {

View File

@ -7,7 +7,6 @@ import pytest
import numpy as np
from openvino._offline_transformations import (
apply_moc_transformations,
apply_pot_transformations,
apply_low_latency_transformation,
apply_pruning_transformation,
apply_make_stateful_transformation,
@ -113,15 +112,6 @@ def test_moc_with_smart_reshape():
assert len(model.get_ops()) == 3
def test_pot_transformations():
model = get_relu_model()
apply_pot_transformations(model, "GNA")
assert model is not None
assert len(model.get_ops()) == 3
def test_low_latency_transformation():
model = get_relu_model()

View File

@ -1,33 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include <string>
#include "openvino/pass/graph_rewrite.hpp"
namespace ov {
namespace pass {
class POTTransformations;
} // namespace pass
} // namespace ov
/**
* @brief This transformation is an entry point for OpenVINO transformations that will be
* executed inside POT.
*/
class ov::pass::POTTransformations : public ov::pass::ModelPass {
std::string m_device;
public:
OPENVINO_RTTI("POTTransformations", "0");
explicit POTTransformations(std::string device) : m_device(std::move(device)) {}
bool run_on_model(const std::shared_ptr<ov::Model>&) override;
};

View File

@ -1,23 +0,0 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "pot_transformations.hpp"
#include <memory>
#include "openvino/pass/manager.hpp"
#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
#include "transformations/op_conversions/gru_cell_decomposition.hpp"
#include "transformations/op_conversions/lstm_cell_decomposition.hpp"
bool ov::pass::POTTransformations::run_on_model(const std::shared_ptr<ov::Model>& f) {
ov::pass::Manager manager(get_pass_config());
manager.register_pass<ov::pass::BidirectionalSequenceDecomposition>();
manager.register_pass<ov::pass::ConvertSequenceToTensorIterator>();
manager.register_pass<ov::pass::GRUCellDecomposition>();
manager.register_pass<ov::pass::LSTMCellDecomposition>();
manager.run_passes(f);
return false;
}

View File

@ -9,12 +9,6 @@
add_subdirectory(mo)
# POT
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/pot/openvino/tools/pot/version.txt.in"
"${CMAKE_CURRENT_SOURCE_DIR}/pot/openvino/tools/pot/version.txt" @ONLY)
if(ENABLE_PYTHON)
# Benchmark Tool
add_subdirectory(benchmark_tool)

View File

@ -79,7 +79,6 @@ endfunction()
set(INIT_FILES_TOOLS
"${OpenVINO_SOURCE_DIR}/tools/mo/openvino/__init__.py"
"${OpenVINO_SOURCE_DIR}/tools/pot/openvino/__init__.py"
"${OpenVINO_SOURCE_DIR}/tools/openvino_dev/src/openvino/__init__.py")
ov_check_init_files_alignment("${INIT_FILES_TOOLS}")

View File

@ -52,13 +52,6 @@ PKG_INSTALL_CFG = {
'extract_entry_points': True,
'extract_extras': True,
},
"pot": {
'src_dir': OPENVINO_DIR / 'tools' / 'pot',
'black_list': ['*tests*'],
'prefix': 'pot',
'extract_entry_points': True,
'extract_requirements': True,
},
}

110
tools/pot/.gitignore vendored
View File

@ -1,110 +0,0 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# PyCharm
.idea
# snapshots
*.tar
# version file
version.txt

View File

@ -1,29 +0,0 @@
[MASTER]
disable = fixme,
invalid-name,
missing-docstring,
no-self-use,
too-few-public-methods,
too-many-arguments,
too-many-locals
max-attributes=20
max-line-length = 120
ignore-docstrings = yes
ignored-modules = mo,accuracy_checker,extensions,openvino.inference_engine,cv2,open_model_zoo.model_tools._configuration,open_model_zoo.model_tools._common
ignore-patterns = ac_imports.py
extension-pkg-whitelist = numpy
[SIMILARITIES]
min-similarity-lines = 19
ignore-imports = yes
[BASIC]
good-names=logger,fn
[DESIGN]
max-statements=120
max-branches=14
max-nested-blocks=7
[OPTIONS]
generated-members=torch.*

View File

@ -1,5 +0,0 @@
# See help here: https://docs.gitlab.com/ee/user/project/code_owners.html
# Control 3d party dependencies
**/*requirements*.* openvino.configuration.mgmt@intel.com
**/setup.py openvino.configuration.mgmt@intel.com

View File

@ -1,58 +0,0 @@
# Post-Training Optimization Tool
## Introduction
Post-training Optimization Tool (POT) is designed to accelerate the inference of deep learning models by applying
special methods without model retraining or fine-tuning, for example, post-training 8-bit quantization. Therefore, the tool does not
require a training dataset or a pipeline. To apply post-training algorithms from the POT, you need:
* A floating-point precision model, FP32 or FP16, converted into the OpenVINO&trade; Intermediate Representation (IR) format
and run on CPU with the OpenVINO&trade;.
* A representative calibration dataset representing a use case scenario, for example, 300 samples.
Figure below shows the optimization workflow:
![](docs/images/workflow_simple.svg)
To get started with POT tool refer to the corresponding OpenVINO&trade; [documentation](https://docs.openvino.ai/2023.2/openvino_docs_model_optimization_guide.html).
## Installation
### From PyPI
POT is distributed as a part of OpenVINO&trade; Development Tools package. For installation instruction please refer to this [document](https://docs.openvino.ai/2023.2/openvino_docs_install_guides_install_dev_tools.html).
### From GitHub
As prerequisites, you should install [OpenVINO&trade; Runtime](https://docs.openvino.ai/2023.2/openvino_docs_install_guides_overview.html) and other dependencies such as [Model Optimizer](https://docs.openvino.ai/2023.2/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) and [Accuracy Checker](https://docs.openvino.ai/2023.2/omz_tools_accuracy_checker.html).
To install POT from source:
- Clone OpenVINO repository
```sh
git clone --recusive https://github.com/openvinotoolkit/openvino.git
```
- Navigate to `openvino/tools/pot/` folder
- Install POT package:
```sh
python3 setup.py install
```
After installation POT is available as a Python library under `openvino.tools.pot.*` and in the command line by the `pot` alias. To verify it, run `pot -h`.
## Examples
OpenVINO provides several examples to demonstrate the POT optimization workflow:
* Command-line example:
* [Quantization of Image Classification model](https://docs.openvino.ai/2023.2/pot_configs_examples_README.html)
* API tutorials:
* [Quantization of Image Classification model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/301-tensorflow-training-openvino)
* [Quantization of Object Detection model from Model Zoo](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/111-yolov5-quantization-migration)
* [Quantization of Segmentation model for medical data](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/110-ct-segmentation-quantize)
* [Quantization of BERT for Text Classification](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/105-language-quantize-bert)
* API examples:
* [Quantization of 3D segmentation model](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/3d_segmentation)
* [Quantization of Face Detection model](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/face_detection)
* [Quantization of Object Detection model with controable accuracy](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/object_detection)
* [Quantizatin of speech model for GNA device](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/speech)
## See Also
* [Performance Benchmarks](https://docs.openvino.ai/2023.2/openvino_docs_performance_benchmarks.html)

View File

@ -1,53 +0,0 @@
# Post-training Optimization Tool
Starting with the 2020.1 version, OpenVINO&trade; toolkit delivers the Post-Training Optimization Tool designed to accelerate the inference of DL models by converting them into a more hardware-friendly representation by applying specific methods that do not require re-training, for example, post-training quantization.
For more details about the low-precision flow in OpenVINO&trade;, refer to the [Low Precision Optimization Guide](docs/LowPrecisionOptimizationGuide.md).
Post-Training Optimization Tool includes standalone command-line tool and Python* API that provide the following key features:
## Key features:
* Two supported post-training quantization algorithms: fast [DefaultQuantization](openvino/tools/pot/algorithms/quantization/default/README.md) and precise [AccuracyAwareQuantization](openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md), as well as multiple experimental methods.
* Symmetric and asymmetric quantization schemes. For more details, see the [Quantization](openvino/tools/pot/algorithms/quantization/README.md) section.
* Per-channel quantization for Convolutional and Fully-Connected layers.
* Multiple domains: Computer Vision, Recommendation Systems.
* Ability to implement custom calibration pipeline via supported [API](openvino/tools/pot/api/README.md).
* Compression for different HW targets such as CPU, GPU, NPU.
* Post-training sparsity.
## Usage
### System requirements
- Ubuntu 18.04 or later (64-bit)
- Python 3.8 or later
- OpenVINO
### Installation (Temporary)
1) Clone the openvino repo: `git clone https://github.com/openvinotoolkit/openvino`
2) Download submodules:
```
git submodule init
git submodule update
```
3) Setup model conversion API.
You can setup model conversion API that needs for POT purposed with the two ways:
1. Install model conversion API with pip using "python setup.py install" at the mo folder (`<openvino_path>/tools/mo/setup.py`)
2. Setup model conversion API for Python using PYTHONPATH environment variable. Add the following `<openvino_path>/tools/mo` into PYTHONPATH.
4) Install requirements for accuracy checker:
- From POT root: `cd ./thirdparty/open_model_zoo/tools/accuracy_checker`
- Call setup script: `python3 setup.py install`
- Get back to root POT dir: `cd <PATH_TO_POT_DIR>`
5) Install requirements for the tool:
- Call setup script: `python3 setup.py develop`
### Run
1) Prepare configuration file for the tool based on the examples in the `configs` folder
2) Navigate to compression tool directory
3) Launch the tool running the following command:
`python3 main.py -c <path to config file> -e`
To test the tool you can use PyTorch Mobilenet_v2 model from `tests/data/models/mobilenetv2_example/mobilenetv2_example.onnx`
- If there're some errors with imports in ModelOptimizer, first of all make the following steps:
- If you've installed ModelOptimizer with setting _PYTHONPATH_ variable, checkout the path. It should be as following `<openvino_path>/tools/mo.` The whole command can be found in step 3 Installation (Temporary) guide above.

View File

@ -1,2 +0,0 @@
# Copyright (C) 2020-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

View File

@ -1,23 +0,0 @@
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import sys
import openvino.tools.pot.api
import openvino.tools.pot.engines
import openvino.tools.pot.graph
import openvino.tools.pot.pipeline
from openvino.tools.pot.utils.logger import get_logger
logger = get_logger(__name__)
logger.warning('Import compression is deprecated. Please use openvino.tools.pot instead')
sys.modules["compression.api"] = openvino.tools.pot.api
sys.modules["compression.engines"] = openvino.tools.pot.engines
sys.modules["compression.engines.ie_engine"] = openvino.tools.pot.engines.ie_engine
sys.modules["compression.graph"] = openvino.tools.pot.graph
sys.modules["compression.graph.model_utils"] = openvino.tools.pot.graph.model_utils
sys.modules["compression.pipeline"] = openvino.tools.pot.pipeline
sys.modules["compression.pipeline.initializer"] = openvino.tools.pot.pipeline.initializer

View File

@ -1,204 +0,0 @@
{
/* Model parameters */
"model": {
"model_name": "model_name", // Model name
"model": "<MODEL_PATH>", // Path to model (.xml format)
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
},
/* Parameters of the engine used for model inference */
// Post-Training Optimization Tool supports engine based on accuracy checker and custom engine.
// For custom engine you should specify your own set of parameters.
// The engine based on accuracy checker uses accuracy checker parameters.
// You can specify the parameters via accuracy checker config file or directly in engine section.
// More information about accuracy checker parameters can be found here:
// https://github.com/opencv/open_model_zoo/tree/master/tools/accuracy_checker
"engine": {
"stat_requests_number": 8, // Number of requests during statistcs collection
"eval_requests_number": 8, // Number of requests during evaluation
"config": "<CONFIG_PATH>",
/* OR */
"name": "model_name",
"launchers": [
{
"framework": "dlsdk",
"device": "CPU",
"adapter": "classification"
}
],
"datasets": [
{
"name": "dataset_name",
"data_source": "<DATASET_PATH>",
"annotation": "<ANNOTATION_PATH>",
"preprocessing": [
{
"type": "resize",
"interpolation": "BILINEAR",
"aspect_ratio_scale": "greater",
"size": 224
}
],
"metrics": [
{
"name": "accuracy@top1",
"type": "accuracy",
"top_k": 1
}
]
}
]
},
/* Optimization hyperparameters */
"compression": {
"target_device": "ANY", // Target device, the specificity of which will be taken
// into account during optimization
"inplace_statistics": true, // An optional parameter, change method collect statistics,
// reduces the amount of memory consumed,
// but increases the calibration time
// the default value is true
"model_type": "None", // An optional parameter, needed for additional patterns in the model,
// default value is None (supported only transformer now)
"algorithms": [
{
"name": "AccuracyAwareQuantization", // Optimization algorithm name
"params": {
"ranking_subset_size": 300, // A size of a subset which is used to rank layers by their
// contribution to the accuracy drop
"max_iter_num": 20, // Maximum number of iterations of the algorithm (maximum of layers
// that may be reverted back to full-precision)
"maximal_drop": 0.005, // Maximum accuracy drop which has to be achieved after the quantization
"drop_type": "absolute", // Drop type of the accuracy metric: relative or absolute (default)
"use_prev_if_drop_increase": false, // Whether to use NN snapshot from the previous algorithm
// iteration in case if drop increases
"base_algorithm": "DefaultQuantization", // Base algorithm that is used to quantize model
// at the beginning
"annotation_free": false, // Whether to compute accuracy drop on a dataset without annotation
"annotation_conf_threshold": 0.6, // Threshold for annotation creation in case of annotation free
// algorithm execution. Images on which original model predicts
// with confidence below this threshold will be skipped during
// evaluation
"convert_to_mixed_preset": false, // Whether to convert the model to mixed mode if
// the accuracy criteria of the symmetrically quantized
// model are not satisfied
// An optional list of metrics that are taken into account during optimization.
// If not specified, all metrics defined in engine config are used
"metrics": [
{
"name": "accuracy", // Metric name to optimize
"baseline_value": 0.72 // Baseline metric value of the original model
}
],
"metric_subset_ratio": 0.5, // A part of the validation set that is used to compare element-wise
// full-precision and quantized models in case of predefined metric
// values of the original model
"tune_hyperparams": false, // Whether to search the best quantization parameters for model.
// This algo uses grid search engine based on a special subset of samples from the dataset
"ignored": {
// List of nodes that are excluded from optimization
"scope": [
"<NODE_NAME>"
],
// List of types that are excluded from optimization
"operations": [
{
"type": "<NODE_TYPE>",
// Includes excluding by attributes
"attributes": {
"<NAME>": "<VALUE>" // Lists of values is not included
}
},
{
"type": "<NODE_TYPE>" // Excluding only by type
}
]
},
"preset": "mixed", // A preset is a collection of optimization algorithm parameters
// that will specify to the algorithm to improve which metric
// the algorithm needs to concentrate. Each optimization algorithm
// supports [performance, accuracy, mixed] presets
"stat_subset_size": 100, // Size of subset to calculate activations statistics that can be used
// for quantization parameters calculation
/* Manually specification quantization parametrs */
/* Quantization parameters for weights */
"weights": {
"bits": 8, // Number of quantization bits
"mode": "symmetric", // Quantization mode
"granularity": "perchannel", // Granularity: a scale for each output channel
"level_low": -127, // Low quantization level
"level_high": 127, // High quantization level
/* Parameters specify how to calculate the minimum and maximum of quantization range */
"range_estimator": {
"max": {
"type": "quantile",
"outlier_prob": 0.0001
}
}
},
/* Quantization parameters for activations */
"activations": {
"bits": 8, // Number of quantization bits
"mode": "asymmetric", // Quantization mode
"granularity": "pertensor", // Granularity: one scale for output tensor
/* Parameters specify how to calculate the minimum and maximum of quantization range */
"range_estimator": {
"preset": "quantile",
/* OR */
/* Minimum of quantization range */
"min": {
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
// mean_no_outliers, median_no_outliers, hl_estimator]
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
},
/* Maximum of quantization range */
"max": {
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
// mean_no_outliers, median_no_outliers, hl_estimator]
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
}
}
}
}
}
]
}
}

View File

@ -1,84 +0,0 @@
{
"model": {
"model_name": "model_name",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"launchers": [
{
"framework": "dlsdk",
"device": "CPU",
"adapter": "classification"
}
],
"datasets": {
/* Global dataset preprocessing that will be used for all datasets
if no local configuration is spesified */
"preprocessing": [
{
"type": "crop",
"central_fraction": 0.875
},
{
"type": "resize",
"size": 224
}
],
/* Dataset for statistics collection */
"optimization": {
"name": "classification_dataset",
"data_source": "<DATASET_PATH>"
},
/* Dataset for final evaluation */
"evaluation": {
"name": "classification_dataset",
"data_source": "<DATASET_PATH>",
"annotation_conversion": {
"converter": "imagenet",
"annotation_file": "<ANNOTATION_FILE_PATH>"
},
/* Local preprocessing config. Overwrites global config */
"preprocessing": [
{
"type": "crop",
"central_fraction": 0.875
},
{
"type": "resize",
"size": 224
}
],
"metrics": [
{
"name": "accuracy@top1",
"type": "accuracy",
"top_k": 1
}
]
}
},
"stat_requests_number": 8,
"eval_requests_number": 8
},
"compression": {
"target_device": "ANY", // Target device, the specificity of which will be taken
// into account during optimization
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 300
}
}
]
}
}

View File

@ -1,251 +0,0 @@
{
/* Model parameters */
"model": {
"model_name": "model_name", // Model name (name of whole cascade)
/* List of models in cascade */
"cascade": [
{
/* The first model of cascade */
"name": "<FIRST_MODEL_NAME>", // Name of the first model of cascade (should be taken from engine section)
"model": "<MODEL_PATH>", // Path to the first model (.xml format)
"weights": "<PATH_TO_WEIGHTS>" // Path to the first model weights (.bin format)
},
/* ... */
{
/* The last model of cascade */
"name": "<LAST_MODEL_NAME>", // Name of the last model of cascade (should be taken from engine section)
"model": "<MODEL_PATH>", // Path to the last model (.xml format)
"weights": "<PATH_TO_WEIGHTS>" // Path to the last model weights (.bin format)
}
]
},
/* Parameters of the engine used for model inference */
// Post-Training Optimization Tool supports engine based on accuracy checker and custom engine.
// For custom engine you should specify your own set of parameters.
// The engine based on accuracy checker uses accuracy checker parameters.
// You can specify the parameters via accuracy checker config file or directly in engine section.
// More information about accuracy checker parameters can be found here:
// https://github.com/opencv/open_model_zoo/tree/master/tools/accuracy_checker
"engine": {
/* Accuracy checker mode (default) */
"config": "<CONFIG_PATH>",
/* OR */
"module": "<EVALUATOR_CORRESPONDING_TO_CURRENT_CASCADE>",
"module_config": {
"network_info": {
"<FIRST_MODEL_NAME>": { // Name of the first model of cascade (this name should be used in model section)
"outputs": {
"probability_out": "prob1",
"region_out": "conv4-2"
},
"inputs": [
{
"name": "data",
"type": "INPUT",
"layout": "NCWH"
}
],
"preprocessing": [
{
"type": "bgr_to_rgb"
},
{
"type": "pyramid",
"min_size": 10,
"factor": 0.79
}
]
},
/* ... */
"<LAST_MODEL_NAME>": { // Name of the last model of cascade (this name should be used in model section)
"outputs": {
"probability_out": "prob1",
"region_out": "conv6-2"
},
"inputs": [
{
"name": "data",
"type": "INPUT",
"layout": "NCWH"
}
],
"preprocessing": [
{
"type": "bgr_to_rgb"
}
]
}
},
"launchers": [
{
"framework": "dlsdk"
}
],
"datasets": [
{
"name": "<DATASET_NAME>",
"data_source": "<DATASET_PATH>",
"postprocessing": [
{
"type": "filter",
"apply_to": "prediction",
"is_empty": true
},
{
"type": "filter",
"height_range": 60,
"apply_to": "annotation"
}
],
"metrics": [
{
"type": "map",
"ignore_difficult": true,
"include_boundaries": true,
"allow_multiple_matches_per_ignored": true,
"distinct_conf": false
}
]
}
]
}
},
/* Optimization hyperparameters */
"compression": {
"target_device": "ANY", // Target device, the specificity of which will be taken
// into account during optimization
"algorithms": [
{
"name": "DefaultQuantization", // Optimization algorithm name
"params": {
"ignored": {
/* Ignored block for the first model of cascade */
"<FIRST_MODEL_NAME>": {
// List of nodes that are excluded from optimization
"scope": [
"<NODE_NAME>"
],
// List of types that are excluded from optimization
"operations": [
{
"type": "<NODE_TYPE>",
// Includes excluding by attributes
"attributes": {
"<NAME>": "<VALUE>" // Lists of values is not included
}
},
{
"type": "<NODE_TYPE>" // Excluding only by type
}
]
},
/* ... */
/* Ignored block for the last model of cascade */
"<LAST_MODEL_NAME>": {
// List of nodes that are excluded from optimization
"scope": [
"<NODE_NAME>"
],
// List of types that are excluded from optimization
"operations": [
{
"type": "<NODE_TYPE>",
// Includes excluding by attributes
"attributes": {
"<NAME>": "<VALUE>" // Lists of values is not included
}
},
{
"type": "<NODE_TYPE>" // Excluding only by type
}
]
}
},
"preset": "accuracy", // A preset is a collection of optimization algorithm parameters
// that will specify to the algorithm to improve which metric
// the algorithm needs to concentrate. Each optimization algorithm
// supports [performance, mixed, accuracy] presets
"stat_subset_size": 100, // Size of subset to calculate activations statistics that can be used
// for quantization parameters calculation
/* Manually specification quantization parametrs */
/* Quantization parameters for weights */
"weights": {
"bits": 8, // Number of quantization bits
"mode": "symmetric", // Quantization mode
"granularity": "perchannel", // Granularity: a scale for each output channel
"level_low": -127, // Low quantization level
"level_high": 127, // High quantization level
/* Parameters specify how to calculate the minimum and maximum of quantization range */
"range_estimator": {
"max": {
"type": "quantile",
"outlier_prob": 0.0001
}
}
},
/* Quantization parameters for activations */
"activations": {
"bits": 8, // Number of quantization bits
"mode": "asymmetric", // Quantization mode
"granularity": "pertensor", // Granularity: one scale for output tensor
/* Parameters specify how to calculate the minimum and maximum of quantization range */
"range_estimator": {
"preset": "quantile",
/* OR */
/* Minimum of quantization range */
"min": {
"clipping_value": 0, // Threshold for min statistic value clipping (lower bound)
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
// mean_no_outliers, median_no_outliers, hl_estimator]
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
},
/* Maximum of quantization range */
"max": {
"clipping_value": 6, // Threshold for max statistic value clipping (upper bound)
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
// mean_no_outliers, median_no_outliers, hl_estimator]
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
}
}
}
}
}
]
}
}

View File

@ -1,182 +0,0 @@
{
/* Model parameters */
"model": {
"model_name": "model_name", // Model name
"model": "<MODEL_PATH>", // Path to model (.xml format)
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
},
/* Parameters of the engine used for model inference */
// Post-Training Optimization Tool supports engine based on accuracy checker and custom engine.
// For custom engine you should specify your own set of parameters.
// The engine based on accuracy checker uses accuracy checker parameters.
// You can specify the parameters via accuracy checker config file or directly in engine section.
// More information about accuracy checker parameters can be found here:
// https://github.com/opencv/open_model_zoo/tree/master/tools/accuracy_checker
"engine": {
/* Accuracy checker mode (default) */
"stat_requests_number": 8, // Number of requests during statistcs collection
"eval_requests_number": 8, // Number of requests during evaluation
"config": "<CONFIG_PATH>",
/* OR */
"name": "model_name",
"launchers": [
{
"framework": "dlsdk",
"device": "CPU",
"adapter": "classification"
}
],
"datasets": [
{
"name": "dataset_name",
"data_source": "<DATASET_PATH>",
"annotation": "<ANNOTATION_PATH>",
"preprocessing": [
{
"type": "resize",
"interpolation": "BILINEAR",
"aspect_ratio_scale": "greater",
"size": 224
}
],
"metrics": [
{
"name": "accuracy@top1",
"type": "accuracy",
"top_k": 1
}
]
}
],
/* OR */
/* Simplified mode */
"type": "simplified", // OR default value "type": "accuracy_checker" for non simplified mode
"data_source": "PATH_TO_SOURCE" // You can specify path to directory with images. Also you can
// specify template for file names to filter images to load.
// Templates are unix style (This option valid only in simplified mode)
},
/* Optimization hyperparameters */
"compression": {
"target_device": "ANY", // Target device, the specificity of which will be taken
// into account during optimization
"model_type": "None", // An optional parameter, needed for additional patterns in the model,
// default value is None (supported only transformer now)
"dump_intermediate_model": false, // Save intermediate models for DefaultAlgorithm
"inplace_statistics": true, // An optional parameter, change method collect statistics,
// reduces the amount of memory consumed,
// but increases the calibration time
// the default value is true
"algorithms": [
{
"name": "DefaultQuantization", // Optimization algorithm name
"params": {
"ignored": {
// List of nodes that are excluded from optimization
"scope": [
"<NODE_NAME>"
],
// List of types that are excluded from optimization
"operations": [
{
"type": "<NODE_TYPE>",
// Includes excluding by attributes
"attributes": {
"<NAME>": "<VALUE>" // Lists of values is not included
}
},
{
"type": "<NODE_TYPE>" // Excluding only by type
}
]
},
"preset": "mixed", // A preset is a collection of optimization algorithm parameters
// that will specify to the algorithm to improve which metric
// the algorithm needs to concentrate. Each optimization algorithm
// supports [performance, mixed, accuracy] presets
"stat_subset_size": 100, // Size of subset to calculate activations statistics that can be used
// for quantization parameters calculation
"shuffle_data": false, // Shuffle data before selecting the subset to calculate activation
// statistics. An optional parameter, the default value is false
"seed": 0, // Seed for data shuffle. An optional parameter, the default value is 0
/* Manually specification quantization parametrs */
/* Quantization parameters for weights */
"weights": {
"bits": 8, // Number of quantization bits
"mode": "symmetric", // Quantization mode
"granularity": "perchannel", // Granularity: a scale for each output channel
"level_low": -127, // Low quantization level
"level_high": 127, // High quantization level
/* Parameters specify how to calculate the minimum and maximum of quantization range */
"range_estimator": {
"max": {
"type": "quantile",
"outlier_prob": 0.0001
}
}
},
/* Quantization parameters for activations */
"activations": {
"bits": 8, // Number of quantization bits
"mode": "symmetric", // Quantization mode
"granularity": "pertensor", // Granularity: one scale for output tensor
/* Parameters specify how to calculate the minimum and maximum of quantization range */
"range_estimator": {
"preset": "quantile",
/* OR */
/* Minimum of quantization range */
"min": {
"clipping_value": 0, // Threshold for min statistic value clipping (lower bound)
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
// mean_no_outliers, median_no_outliers, hl_estimator]
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
},
/* Maximum of quantization range */
"max": {
"clipping_value": 6, // Threshold for max statistic value clipping (upper bound)
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
// mean_no_outliers, median_no_outliers, hl_estimator]
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
}
}
}
}
}
]
}
}

View File

@ -1,54 +0,0 @@
models:
- name: bert_base_squad1_1
launchers:
- framework: dlsdk
device: cpu
batch: 1
adapter:
type: bert_question_answering
start_token_logits_output: "unstack/Squeeze_"
end_token_logits_output: "unstack/Split.1"
mo_params:
data_type: FP32
input: input_ids_1,input_mask_1,segment_ids_1
input_shape: "[1,384], [1,384], [1,384]"
output: unstack/Squeeze_,unstack/Split.1
mo_flags:
- disable_nhwc_to_nchw
inputs:
- name: "input_ids_1"
type: INPUT
value: "input_ids"
precision: I32
- name: "input_mask_1"
type: INPUT
value: 'input_mask'
- name: "segment_ids_1"
type: INPUT
value: 'segment_ids'
precision: I32
datasets:
- name: squad
data_source: <PATH_TO_DATASET>/squad1.1/
annotation: <PATH_TO_DATASET>/squad.pickle
reader:
type: annotation_features_extractor
features:
- input_ids
- input_mask
- segment_ids
postprocessing:
- type: extract_answers_tokens
max_answer: 30
n_best_size: 20
metrics:
- name: 'F1'
type: 'f1'
reference: 88.57
threshold: 0.01
- name: 'EM'
type: 'exact_match'
reference: 81.25
threshold: 0.01

View File

@ -1,54 +0,0 @@
models:
- name: bert_large_squad1_1
launchers:
- framework: dlsdk
device: cpu
batch: 1
adapter:
type: bert_question_answering
start_token_logits_output: "unstack/Squeeze_"
end_token_logits_output: "unstack/Split.1"
mo_params:
data_type: FP32
input: input_ids_1,input_mask_1,segment_ids_1
input_shape: "[1,384], [1,384], [1,384]"
output: unstack/Squeeze_,unstack/Split.1
mo_flags:
- disable_nhwc_to_nchw
inputs:
- name: "input_ids_1"
type: INPUT
value: "input_ids"
precision: I32
- name: "input_mask_1"
type: INPUT
value: 'input_mask'
- name: "segment_ids_1"
type: INPUT
value: 'segment_ids'
precision: I32
datasets:
- name: squad
data_source: <PATH_TO_DATASET>/squad1.1/
annotation: <PATH_TO_DATASET>/squad.pickle
reader:
type: annotation_features_extractor
features:
- input_ids
- input_mask
- segment_ids
postprocessing:
- type: extract_answers_tokens
max_answer: 30
n_best_size: 20
metrics:
- name: 'F1'
type: 'f1'
reference: 90.63
threshold: 0.05
- name: 'EM'
type: 'exact_match'
reference: 83.59
threshold: 0.05

View File

@ -1,29 +0,0 @@
models:
- name: DensNet-121
launchers:
- framework: dlsdk
device: CPU
adapter: classification
datasets:
- name: classification_dataset
data_source: <PATH TO VALIDATION DATASET>
annotation_conversion:
converter: imagenet
annotation_file: <PATH TO ANNOTATION FILE>
preprocessing:
- type: resize
size: 256
- type: crop
size: 224
metrics:
- name: accuracy@top1
type: accuracy
top_k: 1
- name: accuracy@top5
type: accuracy
top_k: 5

View File

@ -1,29 +0,0 @@
models:
- name: east
launchers:
- framework: dlsdk
device: CPU
allow_reshape_input: True
adapter:
type: east_text_detection
score_map_out: feature_fusion/Conv_7/Sigmoid
geometry_map_out: feature_fusion/concat_3
datasets:
- name: ICDAR2015
data_source: <PATH_TO_DATASET>
annotation_conversion:
converter: icdar_detection
data_dir: <PATH_TO_GROUND_TRUTH>
preprocessing:
- type: resize
dst_height: 2400
dst_width: 32
aspect_ratio_scale: east_keep_aspect_ratio
- type: bgr_to_rgb
metrics:
- type: incidental_text_hmean
name: f-measure
ignore_difficult: True

View File

@ -1,37 +0,0 @@
models:
- name: faster_rcnn_resnet101_coco
launchers:
- framework: dlsdk
device: CPU
adapter: ssd
inputs:
- name: image_info
type: CONST_INPUT
value: [[600, 1024, 1]]
datasets:
- name: ms_coco_detection_91_classes
annotation_conversion:
converter: mscoco_detection
annotation_file: <ANNOTATION_PATH>/instances_val2017.json
has_background: True
sort_annotations: True
use_full_label_map: True
data_source: <DATA_PATH>/val2017
preprocessing:
- type: resize
aspect_ratio_scale: fit_to_window
dst_height: 600
dst_width: 1024
- type: padding
dst_height: 600
dst_width: 1024
pad_type: right_bottom
postprocessing:
- type: faster_rcnn_postprocessing_resize
dst_height: 600
dst_width: 1024
metrics:
- type: coco_precision
max_detections: 100

View File

@ -1,37 +0,0 @@
models:
- name: faster_rcnn_resnet50_coco
launchers:
- framework: dlsdk
device: CPU
adapter: ssd
inputs:
- name: image_info
type: CONST_INPUT
value: [[600, 1024, 1]]
datasets:
- name: ms_coco_detection_91_classes
annotation_conversion:
converter: mscoco_detection
annotation_file: <ANNOTATION_PATH>/instances_val2017.json
has_background: True
sort_annotations: True
use_full_label_map: True
data_source: <DATA_PATH>/val2017
preprocessing:
- type: resize
aspect_ratio_scale: fit_to_window
dst_height: 600
dst_width: 1024
- type: padding
dst_height: 600
dst_width: 1024
pad_type: right_bottom
postprocessing:
- type: faster_rcnn_postprocessing_resize
dst_height: 600
dst_width: 1024
metrics:
- type: coco_precision
max_detections: 100

View File

@ -1,40 +0,0 @@
models:
- name: mask_rcnn_resnet50_atrous_coco
launchers:
- framework: dlsdk
tags:
- FP32
adapter:
type: mask_rcnn
detection_out: reshape_do_2d
raw_masks_out: masks
inputs:
- name: image_info
type: CONST_INPUT
value: [[800, 1365, 1]]
datasets:
- name: ms_coco_mask_rcnn_short_91_classes
annotation_conversion:
converter: mscoco_mask_rcnn
annotation_file: <ANNOTATION_PATH>/instances_val2017.json
has_background: True
sort_annotations: True
use_full_label_map: True
data_source: <DATA_PATH>/val2017
preprocessing:
- type: resize
aspect_ratio_scale: fit_to_window
dst_height: 800
dst_width: 1365
- type: padding
dst_height: 800
dst_width: 1365
pad_type: right_bottom
postprocessing:
- type: faster_rcnn_postprocessing_resize
dst_height: 800
dst_width: 1365
metrics:
- type: coco_orig_segm_precision
- type: coco_orig_precision

View File

@ -1,31 +0,0 @@
models:
- name: MobileNet_v1
launchers:
- framework: dlsdk
device: CPU
adapter: classification
datasets:
- name: classification_dataset
data_source: <PATH TO VALIDATION DATASET>
annotation_conversion:
converter: imagenet
annotation_file: <PATH TO ANNOTATION FILE>
has_background: true
preprocessing:
- type: resize
size: 256
aspect_ratio_scale: greater
- type: crop
size: 224
metrics:
- name: accuracy@top1
type: accuracy
top_k: 1
- name: accuracy@top5
type: accuracy
top_k: 5

View File

@ -1,34 +0,0 @@
models:
- name: MobileNet_v2
launchers:
- framework: dlsdk
device: CPU
adapter: classification
datasets:
- name: classification_dataset
data_source: <PATH TO VALIDATION DATASET>
annotation_conversion:
converter: imagenet
annotation_file: <PATH TO ANNOTATION FILE>
reader: pillow_imread
preprocessing:
- type: resize
size: 256
aspect_ratio_scale: greater
use_pillow: True
- type: crop
size: 224
use_pillow: True
- type: bgr_to_rgb
metrics:
- name: accuracy@top1
type: accuracy
top_k: 1
- name: accuracy@top5
type: accuracy
top_k: 5

View File

@ -1,73 +0,0 @@
evaluations:
- name: mtcnn
module: custom_evaluators.mtcnn_evaluator.MTCNNEvaluator
module_config:
network_info:
pnet:
outputs:
probability_out: prob1
region_out: conv4-2
inputs:
- name: data
type: INPUT
layout: NCWH
preprocessing:
- type: bgr_to_rgb
- type: pyramid
min_size: 10
factor: 0.79
rnet:
outputs:
probability_out: prob1
region_out: conv5-2
inputs:
- name: data
type: INPUT
layout: NCWH
preprocessing:
- type: bgr_to_rgb
onet:
outputs:
probability_out: prob1
region_out: conv6-2
inputs:
- name: data
type: INPUT
layout: NCWH
preprocessing:
- type: bgr_to_rgb
launchers:
- framework: dlsdk
device: CPU
datasets:
- name: wider
data_source: <PATH_TO_DATASET>/WIDER_val/images
annotation_conversion:
converter: wider
annotation_file: <PATH_TO_DATASET>/wider_face_split/wider_face_val_bbx_gt.txt
postprocessing:
- type: filter
apply_to: prediction
is_empty: True
- type: filter
height_range: 60
apply_to: annotation
metrics:
- type: recall
ignore_difficult: True
include_boundaries: True
allow_multiple_matches_per_ignored: True
distinct_conf: False
- type: map
ignore_difficult: True
include_boundaries: True
allow_multiple_matches_per_ignored: True
distinct_conf: False

View File

@ -1,38 +0,0 @@
models:
- name: NCF
launchers:
- framework: dlsdk
device: CPU
adapter: hit_ratio_adapter
batch: 2048
inputs:
- type: INPUT
value: "u"
name: embedding/embedding_lookup/placeholder_port_1
- type: INPUT
value: "i"
name: embedding_1/embedding_lookup/placeholder_port_1
- type: INPUT
value: "u"
name: embedding_2/embedding_lookup/placeholder_port_1
- type: INPUT
value: "i"
name: embedding_3/embedding_lookup/placeholder_port_1
allow_reshape_input: True
datasets:
- name: ncf_dataset
data_source: ncf
annotation: <EXISTING FOLDER PATH TO STORE ANNOTATION FILES>/ncf_converter.pickle
dataset_meta: <EXISTING FOLDER PATH TO STORE ANNOTATION FILES>/ncf_converter.json
annotation_conversion:
converter: movie_lens_converter
rating_file: <PATH TO RATING FILE>/ml-20m-test-ratings.csv
negative_file: <PATH TO NEGATIVE FILE>/ml-20m-test-negative.csv
users_max_number: 2048
reader: ncf_data_reader
metrics:
- type: hit_ratio
- type: ndcg

View File

@ -1,31 +0,0 @@
models:
- name: ssd-mobilenetv1
launchers:
- framework: dlsdk
device: CPU
adapter: ssd
batch: 1
datasets:
- name: classification_dataset
data_source: <PATH TO VALIDATION DATASET>/VOC2007/JPEGImages
annotation_conversion:
converter: "voc_detection"
annotations_dir: <PATH TO VALIDATION DATASET>/VOC2007/Annotations
images_dir: <PATH TO VALIDATION DATASET>/VOC2007/JPEGImages
imageset_file: <PATH TO VALIDATION DATASET>/VOC2007/ImageSets/Main/test.txt
has_background: true
preprocessing:
- type: resize
size: 300
postprocessing:
- type: resize_prediction_boxes
metrics:
- type: map
integral: 11point
ignore_difficult: true
presenter: print_scalar

View File

@ -1,37 +0,0 @@
models:
- name: SSD_ResNet34
launchers:
- framework: dlsdk
adapter:
type: ssd_onnx
scores_out: .*scores.*
labels_out: .*labels.*
bboxes_out: .*bboxes.*
datasets:
- name: COCO2017_80cl_bkgr
data_source: <PATH TO VALIDATION DATASET>/val2017
annotation_conversion:
converter: mscoco_detection
annotation_file: <PATH TO VALIDATION DATASET>/annotations/instances_val2017.json
has_background: True
use_full_label_map: False
reader: pillow_imread
preprocessing:
- type: resize
size: 1200
use_pillow: true
interpolation: BILINEAR
postprocessing:
- type: resize_prediction_boxes
metrics:
- type: map
integral: 11point
ignore_difficult: true
presenter: print_scalar
- type: coco_precision
- type: coco_orig_precision

View File

@ -1,30 +0,0 @@
models:
- name: ssd_resnet_50_512
launchers:
- framework: dlsdk
device: CPU
adapter: ssd
datasets:
- name: VOC2007_bkgr
data_source: <PATH TO VALIDATION DATASET>/VOC2007/JPEGImages
annotation_conversion:
converter: voc_detection
has_background: False
annotations_dir: <PATH TO VALIDATION DATASET>/VOC2007/Annotations
images_dir: <PATH TO VALIDATION DATASET>/VOC2007/JPEGImages
imageset_file: <PATH TO VALIDATION DATASET>/VOC2007/ImageSets/Main/test.txt
preprocessing:
- type: resize
size: 512
postprocessing:
- type: resize_prediction_boxes
metrics:
- type: map
integral: 11point
ignore_difficult: True
presenter: print_scalar

View File

@ -1,21 +0,0 @@
{
"model": {
"model_name": "densnet-121",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/densnet_121.yaml"
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "mixed",
"stat_subset_size": 300
}
}
]
}
}

View File

@ -1,60 +0,0 @@
{
"model": {
"model_name": "inceptionv3",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"datasets": [
{
"name": "imagenet_1001_classes",
"data_source": "PATH_TO_DATASET",
"annotation_conversion": {
"annotation_file": "PATH_TO_ANNOTATION_FILE",
"has_background": true,
"converter": "imagenet"
},
"preprocessing": [
{
"type": "crop",
"central_fraction": 0.875
},
{
"type": "resize",
"size": 299
}
],
"metrics": [
{
"type": "accuracy",
"name": "accuracy@top1",
"top_k": 1
},
{
"type": "accuracy",
"name": "accuracy@top5",
"top_k": 5
}
]
}
],
"launchers": [
{
"framework": "dlsdk",
"device": "CPU",
"adapter": "classification"
}
]
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 300
}
}
]
}
}

View File

@ -1,21 +0,0 @@
{
"model": {
"model_name": "mobilenetv1",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/mobilenet_v1_tf.yaml"
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "mixed",
"stat_subset_size": 300
}
}
]
}
}

View File

@ -1,68 +0,0 @@
{
"model": {
"model_name": "mobilenet_v2_1.0_224",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"launchers":
[
{
"framework": "dlsdk",
"adapter": "classification"
}
],
"datasets":
[
{
"name": "imagenet_1000_classes",
"reader": "pillow_imread",
"annotation_conversion": {
"converter": "imagenet",
"annotation_file": "PATH_TO_ANNOTATION_FILE"
},
"data_source": "PATH_TO_VALIDATION_IMAGES",
"preprocessing": [
{
"type": "bgr_to_rgb"
},
{
"type": "resize",
"size": 256,
"aspect_ratio_scale": "greater",
"use_pillow": true,
"interpolation": "BILINEAR"
},
{
"type": "crop",
"size": 224,
"use_pillow": true
}
],
"metrics": [
{
"name": "accuracy@top1",
"type": "accuracy",
"top_k": 1
},
{
"name": "accuracy@top5",
"type": "accuracy",
"top_k": 5
}
]
}
]
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 300
}
}
]
}
}

View File

@ -1,21 +0,0 @@
{
"model": {
"model_name": "mobilenetv2",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml"
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "mixed",
"stat_subset_size": 300
}
}
]
}
}

View File

@ -1,21 +0,0 @@
{
"model": {
"model_name": "mobilenet_v2_1.0_224",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "<CONFIG_PATH>"
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 300
}
}
]
}
}

View File

@ -1,21 +0,0 @@
{
"model": {
"model_name": "mobilenet_v2_1.0_224",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "<CONFIG_PATH>"
},
"compression": {
"algorithms": [
{
"name": "AccuracyAwareQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 300
}
}
]
}
}

View File

@ -1,25 +0,0 @@
{
"model": {
"model_name": "mobilenet_v2_1.0_224",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"type": "simplified",
// you can specify path to directory with images or video file
// also you can specify template for file names to filter images to load
// templates are unix style
"data_source": "PATH_TO_IMAGES"
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 300
}
}
]
}
}

View File

@ -1,30 +0,0 @@
{
"model": {
"model_name": "mobilenet_v2_1.0_224",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "<CONFIG_PATH>"
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 300
}
},
{
"name": "QuantNoiseEstimator",
"params": {
"stat_subset_size": 100,
"mode": "full_fq_noise",
"type": "sqnr",
"results_dump_filename": "./mobilenetv2_sqnr_data.csv"
}
}
]
}
}

View File

@ -1,29 +0,0 @@
{
"model": {
"model_name": "mobilenet_v2_1.0_224",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "<CONFIG_PATH>"
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 300
}
},
{
"name": "INT4MixedQuantization",
"params": {
"stat_subset_size": 300,
"ranking_subset_size": 300,
"maximal_drop": 0.01
}
}
]
}
}

View File

@ -1,102 +0,0 @@
{
"model": {
"model_name": "se_resnet50",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"launchers": [
{
"framework": "dlsdk",
"device": "CPU",
"adapter": "classification"
}
],
"datasets": [{
"name": "classification_dataset",
"data_source": "<PATH_TO_DATASET>",
"annotation_conversion": {
"converter": "imagenet",
"annotation_file": "<PATH_TO_ANNOTATION_FILE>"
},
"reader": "pillow_imread",
"preprocessing":[
{
"type": "bgr_to_rgb"
},
{
"use_pillow": true,
"type": "resize",
"size": 256,
"interpolation": "BILINEAR",
"aspect_ratio_scale": "greater"
},
{
"type": "crop",
"size": 224,
"use_pillow": true
}
],
"metrics": [
{
"name": "accuracy@top1",
"type": "accuracy",
"top_k": 1
},
{
"name": "accuracy@top5",
"type": "accuracy",
"top_k": 5
}
]
}]
},
"compression": {
"algorithms": [
{
"name": "MinMaxQuantization",
"params": {
"target_device": "CPU",
"preset": "mixed",
"stat_subset_size": 1000,
"ignored": {
"scope": [
"400", "402",
"416", "418",
"432", "434",
"450", "452",
"466", "468",
"482", "484",
"498", "500",
"516", "518",
"532", "534",
"548", "550",
"564", "566",
"580", "582",
"596", "598",
"614", "616",
"630", "632",
"646", "648"
]
},
"weights": {
"bits": 8,
"mode": "symmetric",
"granularity": "perchannel"
},
"activations": {
"bits": 8,
"mode": "symmetric",
"granularity": "pertensor"
}
}
},
{
"name": "FastBiasCorrection",
"params": {
"stat_subset_size": 1000
}
}
]
}
}

View File

@ -1,77 +0,0 @@
{
"model": {
"model_name": "squeezenet1_1",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"launchers": [
{
"framework": "dlsdk",
"device": "CPU",
"adapter": "classification"
}
],
"datasets": [
{
"name": "classification_dataset",
"data_source": "<PATH_TO_DATASET>",
"annotation_conversion": {
"converter": "imagenet",
"annotation_file": "<PATH_TO_ANNOTATION_FILE>",
"has_background": false
},
"reader": "pillow_imread",
"preprocessing":[
{
"type": "bgr_to_rgb"
},
{
"use_pillow": true,
"type": "resize",
"size": 256,
"interpolation": "BILINEAR",
"aspect_ratio_scale": "greater"
},
{
"type": "crop",
"size": 224,
"use_pillow": true
}
],
"metrics": [
{
"name": "accuracy@top1",
"type": "accuracy",
"top_k": 1
},
{
"name": "accuracy@top5",
"type": "accuracy",
"top_k": 5
}
]
}
]
},
"compression": {
"algorithms": [{
"name": "DefaultQuantization",
"params": {
"preset": "mixed",
"stat_subset_size": 1000,
"weights": {
"bits": 8,
"mode": "symmetric",
"granularity": "perchannel"
},
"activations": {
"bits": 8,
"mode": "symmetric",
"granularity": "pertensor"
}
}
}]
}
}

View File

@ -1,72 +0,0 @@
{
"model": {
"name": "bert_base_squad_1_1",
"model": "<PATH_TO_MODEL>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/bert_base_squad_1_1_tf_int8.yml"
},
"compression": {
"model_type": "transformer",
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "accuracy",
"stat_subset_size": 1000,
"weights": {
"bits": 8,
"mode": "symmetric",
"granularity": "perchannel",
"level_low": -127,
"level_high": 127
},
"activations": {
"bits": 8,
"mode": "symmetric",
"granularity": "pertensor"
},
"ignored": {
"scope" : [
"bert/encoder/layer_0/output/dense/MatMul",
"bert/encoder/layer_0/intermediate/dense/MatMul",
"bert/encoder/layer_0/attention/self/key/MatMul",
"bert/encoder/layer_0/attention/output/dense/MatMul",
"bert/encoder/layer_0/attention/self/MatMul",
"bert/encoder/layer_0/attention/self/MatMul_1",
"bert/encoder/layer_1/attention/self/key/MatMul",
"bert/encoder/layer_1/attention/self/MatMul",
"bert/encoder/layer_1/attention/self/MatMul_1",
"bert/encoder/layer_2/attention/self/MatMul",
"bert/encoder/layer_2/attention/self/MatMul_1",
"bert/encoder/layer_3/output/dense/MatMul",
"bert/encoder/layer_3/intermediate/dense/MatMul",
"bert/encoder/layer_3/attention/output/dense/MatMul",
"bert/encoder/layer_3/attention/self/MatMul",
"bert/encoder/layer_3/attention/self/MatMul_1",
"bert/encoder/layer_4/attention/self/value/MatMul",
"bert/encoder/layer_4/attention/self/MatMul",
"bert/encoder/layer_4/attention/self/MatMul_1",
"bert/encoder/layer_5/attention/self/MatMul",
"bert/encoder/layer_5/attention/self/MatMul_1",
"bert/encoder/layer_6/attention/self/MatMul",
"bert/encoder/layer_6/attention/self/MatMul_1",
"bert/encoder/layer_7/attention/self/MatMul",
"bert/encoder/layer_7/attention/self/MatMul_1",
"bert/encoder/layer_8/attention/self/MatMul",
"bert/encoder/layer_8/attention/self/MatMul_1",
"bert/encoder/layer_9/attention/self/MatMul",
"bert/encoder/layer_9/attention/self/MatMul_1",
"bert/encoder/layer_10/attention/self/MatMul",
"bert/encoder/layer_10/attention/self/MatMul_1",
"bert/encoder/layer_11/attention/self/MatMul",
"bert/encoder/layer_11/attention/self/MatMul_1",
"loss/MatMul"
]
}
}
}
]
}
}

View File

@ -1,69 +0,0 @@
{
"model": {
"name": "bert_base_squad_1_1",
"model": "<PATH_TO_MODEL>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/bert_base_squad_1_1_tf_int8.yml"
},
"compression": {
"model_type": "transformer",
"algorithms": [
{
"name": "AccuracyAwareQuantization",
"params": {
"metric_subset_ratio": 1,
"ranking_subset_size": 300,
"max_iter_num": 500,
"maximal_drop": 0.01,
"drop_type": "relative",
"base_algorithm": "DefaultQuantization",
"use_prev_if_drop_increase": true,
"range_estimator": {
"preset": "default"
},
"stat_subset_size": 1000,
"ignored": {
"scope" : [
"bert/encoder/layer_0/output/dense/MatMul",
"bert/encoder/layer_0/intermediate/dense/MatMul",
"bert/encoder/layer_0/attention/self/key/MatMul",
"bert/encoder/layer_0/attention/output/dense/MatMul",
"bert/encoder/layer_0/attention/self/MatMul",
"bert/encoder/layer_0/attention/self/MatMul_1",
"bert/encoder/layer_1/attention/self/key/MatMul",
"bert/encoder/layer_1/attention/self/MatMul",
"bert/encoder/layer_1/attention/self/MatMul_1",
"bert/encoder/layer_2/attention/self/MatMul",
"bert/encoder/layer_2/attention/self/MatMul_1",
"bert/encoder/layer_3/output/dense/MatMul",
"bert/encoder/layer_3/intermediate/dense/MatMul",
"bert/encoder/layer_3/attention/output/dense/MatMul",
"bert/encoder/layer_3/attention/self/MatMul",
"bert/encoder/layer_3/attention/self/MatMul_1",
"bert/encoder/layer_4/attention/self/value/MatMul",
"bert/encoder/layer_4/attention/self/MatMul",
"bert/encoder/layer_4/attention/self/MatMul_1",
"bert/encoder/layer_5/attention/self/MatMul",
"bert/encoder/layer_5/attention/self/MatMul_1",
"bert/encoder/layer_6/attention/self/MatMul",
"bert/encoder/layer_6/attention/self/MatMul_1",
"bert/encoder/layer_7/attention/self/MatMul",
"bert/encoder/layer_7/attention/self/MatMul_1",
"bert/encoder/layer_8/attention/self/MatMul",
"bert/encoder/layer_8/attention/self/MatMul_1",
"bert/encoder/layer_9/attention/self/MatMul",
"bert/encoder/layer_9/attention/self/MatMul_1",
"bert/encoder/layer_10/attention/self/MatMul",
"bert/encoder/layer_10/attention/self/MatMul_1",
"bert/encoder/layer_11/attention/self/MatMul",
"bert/encoder/layer_11/attention/self/MatMul_1",
"loss/MatMul"
]
}
}
}
]
}
}

View File

@ -1,86 +0,0 @@
{
"model": {
"name": "bert_large_squad_1_1",
"model": "<PATH_TO_MODEL>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/bert_large_squad_1_1_tf_int8.yml"
},
"compression": {
"model_type": "transformer",
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 100,
"ignored": {
"scope": [
"bert/encoder/layer_0/attention/self/MatMul",
"bert/encoder/layer_1/attention/self/MatMul",
"bert/encoder/layer_2/attention/self/MatMul",
"bert/encoder/layer_3/attention/self/MatMul",
"bert/encoder/layer_4/attention/self/MatMul",
"bert/encoder/layer_5/attention/self/MatMul",
"bert/encoder/layer_6/attention/self/MatMul",
"bert/encoder/layer_7/attention/self/MatMul",
"bert/encoder/layer_8/attention/self/MatMul",
"bert/encoder/layer_9/attention/self/MatMul",
"bert/encoder/layer_10/attention/self/MatMul",
"bert/encoder/layer_11/attention/self/MatMul",
"bert/encoder/layer_12/attention/self/MatMul",
"bert/encoder/layer_13/attention/self/MatMul",
"bert/encoder/layer_14/attention/self/MatMul",
"bert/encoder/layer_15/attention/self/MatMul",
"bert/encoder/layer_16/attention/self/MatMul",
"bert/encoder/layer_17/attention/self/MatMul",
"bert/encoder/layer_18/attention/self/MatMul",
"bert/encoder/layer_19/attention/self/MatMul",
"bert/encoder/layer_20/attention/self/MatMul",
"bert/encoder/layer_21/attention/self/MatMul",
"bert/encoder/layer_22/attention/self/MatMul",
"bert/encoder/layer_23/attention/self/MatMul",
"bert/encoder/layer_23/attention/self/MatMul_1",
"bert/encoder/layer_22/attention/self/MatMul_1",
"bert/encoder/layer_21/attention/self/MatMul_1",
"bert/encoder/layer_20/attention/self/MatMul_1",
"bert/encoder/layer_19/attention/self/MatMul_1",
"bert/encoder/layer_18/attention/self/MatMul_1",
"bert/encoder/layer_17/attention/self/MatMul_1",
"bert/encoder/layer_16/attention/self/MatMul_1",
"bert/encoder/layer_15/attention/self/MatMul_1",
"bert/encoder/layer_14/attention/self/MatMul_1",
"bert/encoder/layer_13/attention/self/MatMul_1",
"bert/encoder/layer_12/attention/self/MatMul_1",
"bert/encoder/layer_11/attention/self/MatMul_1",
"bert/encoder/layer_10/attention/self/MatMul_1",
"bert/encoder/layer_9/attention/self/MatMul_1",
"bert/encoder/layer_8/attention/self/MatMul_1",
"bert/encoder/layer_7/attention/self/MatMul_1",
"bert/encoder/layer_6/attention/self/MatMul_1",
"bert/encoder/layer_5/attention/self/MatMul_1",
"bert/encoder/layer_4/attention/self/MatMul_1",
"bert/encoder/layer_3/attention/self/MatMul_1",
"bert/encoder/layer_2/attention/self/MatMul_1",
"bert/encoder/layer_1/attention/self/MatMul_1",
"bert/encoder/layer_0/attention/self/MatMul_1",
"bert/encoder/layer_0/attention/output/dense/MatMul",
"bert/encoder/layer_3/attention/output/dense/MatMul",
"bert/encoder/layer_4/attention/self/value/MatMul",
"bert/encoder/layer_0/attention/self/key/MatMul",
"bert/encoder/layer_1/attention/self/key/MatMul",
"bert/encoder/layer_0/intermediate/dense/MatMul",
"bert/encoder/layer_0/output/dense/MatMul",
"bert/encoder/layer_3/intermediate/dense/MatMul",
"bert/encoder/layer_3/output/dense/MatMul",
"bert/encoder/layer_7/attention/self/key/MatMul"
]
}
}
}
]
}
}

View File

@ -1,87 +0,0 @@
{
"model": {
"name": "bert_large_squad_1_1",
"model": "<PATH_TO_MODEL>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/bert_large_squad_1_1_tf_int8.yml"
},
"compression": {
"target_device": "CPU",
"model_type": "transformer",
"algorithms": [
{
"name": "AccuracyAwareQuantization",
"params": {
"max_iter_num": 500,
"stat_subset_size": 100,
"ignored": {
"scope": [
"bert/encoder/layer_0/attention/self/MatMul",
"bert/encoder/layer_1/attention/self/MatMul",
"bert/encoder/layer_2/attention/self/MatMul",
"bert/encoder/layer_3/attention/self/MatMul",
"bert/encoder/layer_4/attention/self/MatMul",
"bert/encoder/layer_5/attention/self/MatMul",
"bert/encoder/layer_6/attention/self/MatMul",
"bert/encoder/layer_7/attention/self/MatMul",
"bert/encoder/layer_8/attention/self/MatMul",
"bert/encoder/layer_9/attention/self/MatMul",
"bert/encoder/layer_10/attention/self/MatMul",
"bert/encoder/layer_11/attention/self/MatMul",
"bert/encoder/layer_12/attention/self/MatMul",
"bert/encoder/layer_13/attention/self/MatMul",
"bert/encoder/layer_14/attention/self/MatMul",
"bert/encoder/layer_15/attention/self/MatMul",
"bert/encoder/layer_16/attention/self/MatMul",
"bert/encoder/layer_17/attention/self/MatMul",
"bert/encoder/layer_18/attention/self/MatMul",
"bert/encoder/layer_19/attention/self/MatMul",
"bert/encoder/layer_20/attention/self/MatMul",
"bert/encoder/layer_21/attention/self/MatMul",
"bert/encoder/layer_22/attention/self/MatMul",
"bert/encoder/layer_23/attention/self/MatMul",
"bert/encoder/layer_23/attention/self/MatMul_1",
"bert/encoder/layer_22/attention/self/MatMul_1",
"bert/encoder/layer_21/attention/self/MatMul_1",
"bert/encoder/layer_20/attention/self/MatMul_1",
"bert/encoder/layer_19/attention/self/MatMul_1",
"bert/encoder/layer_18/attention/self/MatMul_1",
"bert/encoder/layer_17/attention/self/MatMul_1",
"bert/encoder/layer_16/attention/self/MatMul_1",
"bert/encoder/layer_15/attention/self/MatMul_1",
"bert/encoder/layer_14/attention/self/MatMul_1",
"bert/encoder/layer_13/attention/self/MatMul_1",
"bert/encoder/layer_12/attention/self/MatMul_1",
"bert/encoder/layer_11/attention/self/MatMul_1",
"bert/encoder/layer_10/attention/self/MatMul_1",
"bert/encoder/layer_9/attention/self/MatMul_1",
"bert/encoder/layer_8/attention/self/MatMul_1",
"bert/encoder/layer_7/attention/self/MatMul_1",
"bert/encoder/layer_6/attention/self/MatMul_1",
"bert/encoder/layer_5/attention/self/MatMul_1",
"bert/encoder/layer_4/attention/self/MatMul_1",
"bert/encoder/layer_3/attention/self/MatMul_1",
"bert/encoder/layer_2/attention/self/MatMul_1",
"bert/encoder/layer_1/attention/self/MatMul_1",
"bert/encoder/layer_0/attention/self/MatMul_1",
"bert/encoder/layer_0/attention/output/dense/MatMul",
"bert/encoder/layer_3/attention/output/dense/MatMul",
"bert/encoder/layer_4/attention/self/value/MatMul",
"bert/encoder/layer_0/attention/self/key/MatMul",
"bert/encoder/layer_1/attention/self/key/MatMul",
"bert/encoder/layer_0/intermediate/dense/MatMul",
"bert/encoder/layer_0/output/dense/MatMul",
"bert/encoder/layer_3/intermediate/dense/MatMul",
"bert/encoder/layer_3/output/dense/MatMul",
"bert/encoder/layer_7/attention/self/key/MatMul"
]
}
}
}
]
}
}

View File

@ -1,34 +0,0 @@
{
"model": {
"model_name": "faster_rcnn_resnet101_coco",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/faster_rcnn_resnet101_coco.yaml"
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 300,
"ignored": {
"scope": [
"proposals/conv"
]
},
"activations": {
"range_estimator": {
"max": {
"aggregator": "max",
"type": "abs_max"
}
}
}
}
}
]
}
}

View File

@ -1,34 +0,0 @@
{
"model": {
"model_name": "faster_rcnn_resnet50_coco",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/faster_rcnn_resnet50_coco.yaml"
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 300,
"ignored": {
"scope": [
"proposals/conv"
]
},
"activations": {
"range_estimator": {
"max": {
"aggregator": "max",
"type": "abs_max"
}
}
}
}
}
]
}
}

View File

@ -1,32 +0,0 @@
{
"model": {
"model_name": "mask_rcnn_resnet50_atrous_coco",
"model": "<MODEL_PATH>",
"weights": "<WEIGHTS_PATH>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/mask_rcnn_resnet50_atrous_coco.yaml"
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 300,
"ignored": {
"scope": [
"FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/block1/unit_1/bottleneck_v1/add",
"proposals/conv",
"proposals/reshape_4d",
"SecondStageFeatureExtractor_1/resnet_v1_50/block4/unit_1/bottleneck_v1/shortcut/Conv2D",
"SecondStageFeatureExtractor_1/resnet_v1_50/block4/unit_1/bottleneck_v1/conv1/Conv2D",
"SecondStageFeatureExtractor/resnet_v1_50/block4/unit_1/bottleneck_v1/shortcut/Conv2D",
"SecondStageFeatureExtractor/resnet_v1_50/block4/unit_1/bottleneck_v1/conv1/Conv2D"
]
}
}
}
]
}
}

View File

@ -1,36 +0,0 @@
{
"model": {
"model_name": "mtcnn",
"cascade": [
{
"name": "pnet",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
{
"name": "rnet",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
{
"name": "onet",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
}
]
},
"engine": {
"config": "./configs/examples/accuracy_checker/mtcnn.yaml"
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "mixed",
"stat_subset_size": 300
}
}
]
}
}

View File

@ -1,21 +0,0 @@
{
"model": {
"model_name": "mobilenet_ssd",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "<CONFIG_PATH>"
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 300
}
}
]
}
}

View File

@ -1,59 +0,0 @@
{
"model": {
"model_name": "mobilenet-ssd",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"launchers": [
{
"framework": "dlsdk",
"device": "CPU",
"adapter": "ssd"
}
],
"datasets": [
{
"name": "VOC2007",
"data_source": "<PATH TO VALIDATION DATASET>/VOC2007/JPEGImages",
"annotation_conversion": {
"converter": "voc_detection",
"annotations_dir": "<PATH TO VALIDATION DATASET>/VOC2007/Annotations",
"images_dir": "<PATH TO VALIDATION DATASET>/VOC2007/JPEGImages",
"imageset_file": "<PATH TO VALIDATION DATASET>/VOC2007/ImageSets/Main/test.txt"
},
"preprocessing":[
{
"type": "resize",
"size": 300
}
],
"postprocessing":[{
"type": "resize_prediction_boxes"
}],
"metrics": [
{
"type": "map",
"integral": "11point",
"ignore_difficult": true,
"presenter": "print_scalar"
}
]
}
]
},
"compression": {
"algorithms": [
{
"name": "MinMaxQuantization",
"params": {
"preset": "mixed",
"range_estimator": {
"preset": "quantile"
},
"stat_subset_size": 1000
}
}
]
}
}

View File

@ -1,42 +0,0 @@
{
"model": {
"model_name": "ssd_resnet34_1200",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/ssd_resnet34.yaml"
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"stat_subset_size": 300,
"preset": "performance",
"ignored":{
"scope": [
"Mul_490",
"Mul_509",
"Add_511",
"Mul_507",
"Exp_512",
"Mul_514",
"Mul_548/Fused_Mul_",
"Mul_583/Fused_Mul_",
"Mul_618",
"Mul_653",
"Sub_549/add_",
"Sub_584/add_",
"Add_619",
"Add_654",
"Mul_703",
"Add_704",
"Add_labels"
]
}
}
}
]
}
}

View File

@ -1,57 +0,0 @@
{
"model": {
"model_name": "ssd_resnet50_512_mxnet",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"launchers": [
{
"framework": "dlsdk",
"device": "CPU",
"adapter": "ssd"
}
],
"datasets": [
{
"name": "VOC2007",
"data_source": "<PATH TO VALIDATION DATASET>/VOC2007/JPEGImages",
"annotation_conversion": {
"converter": "voc_detection",
"has_background": false,
"annotations_dir": "<PATH TO VALIDATION DATASET>/VOC2007/Annotations",
"images_dir": "<PATH TO VALIDATION DATASET>/VOC2007/JPEGImages",
"imageset_file": "<PATH TO VALIDATION DATASET>/VOC2007/ImageSets/Main/test.txt"
},
"preprocessing":[
{
"type": "resize",
"size": 512
}
],
"postprocessing":[{
"type": "resize_prediction_boxes"
}],
"metrics": [
{
"type": "map",
"integral": "11point",
"ignore_difficult": true,
"presenter": "print_scalar"
}
]
}
]
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 300
}
}
]
}
}

View File

@ -1,56 +0,0 @@
{
"model": {
"model_name": "mobilenetv1",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/mobilenet_v1_tf.yaml"
},
"optimizer": {
"name": "Tpe",
"params": {
"max_trials": 200,
"max_minutes": 1440,
"trials_load_method": "cold_start",
"accuracy_loss": 0.1,
"latency_reduce": 1.5,
"accuracy_weight": 1.0,
"latency_weight": 1.0,
"benchmark": {
"performance_count": false,
"batch_size": 1,
"nthreads": 8,
"nstreams": 1,
"nireq": 1,
"api_type": "async",
"niter": 1,
"duration_seconds": 30
}
}
},
"compression": {
"algorithms": [
{
"name": "ActivationChannelAlignment",
"params": {
"stat_subset_size": 1000
}
},
{
"name": "TunableQuantization",
"params": {
"stat_subset_size": 1000,
"preset": "performance",
"tuning_scope": ["layer"]
}
},
{
"name": "FastBiasCorrection",
"params": {
"stat_subset_size": 1000
}
}
]
}
}

View File

@ -1,47 +0,0 @@
{
"model": {
"model_name": "mobilenetv2",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml"
},
"compression": {
"algorithms": [
{
"name": "MinMaxQuantization",
"params": {
"preset": "mixed",
"stat_subset_size": 1000,
"weights": {
"bits": 8,
"mode": "asymmetric",
"granularity": "perchannel"
},
"activations": {
"bits": 8,
"mode": "asymmetric",
"granularity": "pertensor"
}
}
},
{
"name": "RangeOptimization",
"params": {
"stat_subset_size": 5000,
"result_filename": "rangeopt_results.csv",
"lower_boxsize": 0.1,
"upper_boxsize": 0.1,
"maxiter": 1500,
"optimization_scope": ["317", "315"],
"metric_name": "accuracy@top1",
"opt_backend": "nevergrad",
"optimizer_name": "CMA"
// use the below option to validate range values
// "activation_ranges_to_set": {"317": [-0.0380698, 2.35978142]}
}
}
]
}
}

View File

@ -1,56 +0,0 @@
{
"model": {
"model_name": "mobilenetv2",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml"
},
"optimizer": {
"name": "Tpe",
"params": {
"max_trials": 200,
"max_minutes": 1440,
"trials_load_method": "cold_start",
"accuracy_loss": 0.1,
"latency_reduce": 1.5,
"accuracy_weight": 1.0,
"latency_weight": 1.0,
"benchmark": {
"performance_count": false,
"batch_size": 1,
"nthreads": 8,
"nstreams": 1,
"nireq": 1,
"api_type": "async",
"niter": 1,
"duration_seconds": 30
}
}
},
"compression": {
"algorithms": [
{
"name": "ActivationChannelAlignment",
"params": {
"stat_subset_size": 1000
}
},
{
"name": "TunableQuantization",
"params": {
"stat_subset_size": 1000,
"preset": "performance",
"tuning_scope": ["layer"]
}
},
{
"name": "FastBiasCorrection",
"params": {
"stat_subset_size": 1000
}
}
]
}
}

View File

@ -1,56 +0,0 @@
{
"model": {
"model_name": "mobilenetv2",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml"
},
"compression": {
"algorithms": [
{
"name": "QuantileTuningAlgorithm",
"params": {
"opt_backend": "skopt",
"maxiter": 100,
"optimization_subset_size": 1000,
"preset": "mixed",
"stat_subset_size": 1000,
"weights": {
"bits": 8,
"mode": "asymmetric",
"granularity": "perchannel",
"range_estimator": {
"min": {
"type": "quantile",
"outlier_prob": 0.0015
},
"max": {
"type": "quantile",
"outlier_prob": 0.0015
}
}
},
"activations": {
"bits": 8,
"mode": "asymmetric",
"granularity": "pertensor",
"range_estimator": {
"min": {
"aggregator": "mean",
"type": "quantile",
"outlier_prob": 1e-3
},
"max": {
"aggregator": "mean",
"type": "quantile",
"outlier_prob": 1e-3
}
}
}
}
}
]
}
}

View File

@ -1,59 +0,0 @@
{
"model": {
"model_name": "mobilenet-ssd",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/ssd_mobilenet_v1.yaml"
},
"optimizer": {
"name": "Tpe",
"params": {
"max_trials": 200,
"max_minutes": 1440,
"trials_load_method": "cold_start",
"accuracy_loss": 0.1,
"latency_reduce": 1.5,
"accuracy_weight": 1.0,
"latency_weight": 0.0,
"benchmark": {
"performance_count": false,
"batch_size": 1,
"nthreads": 8,
"nstreams": 1,
"nireq": 1,
"api_type": "async",
"niter": 1,
"duration_seconds": 30
}
}
},
"compression": {
"algorithms": [
{
"name": "ActivationChannelAlignment",
"params": {
"stat_subset_size": 1000
}
},
{
"name": "TunableQuantization",
"params": {
"stat_subset_size": 1000,
"preset": "performance",
"tuning_scope": ["range_estimator"],
"estimator_tuning_scope": ["preset", "outlier_prob"],
"outlier_prob_choices": [1e-3, 1e-4, 1e-5]
}
},
{
"name": "FastBiasCorrection",
"params": {
"stat_subset_size": 1000
}
}
]
}
}

View File

@ -1,33 +0,0 @@
{
"model": {
"model_name": "ncf",
"model": "<PATH_TO_MODEL>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/ncf.yaml"
},
"compression": {
"algorithms": [
{
"name": "MinMaxQuantization",
"params": {
"preset": "mixed",
"stat_subset_size": 1000,
"weights": {
"bits": 8,
"mode": "symmetric",
"granularity": "perchannel",
"level_low": -127,
"level_high": 127
},
"activations": {
"bits": 8,
"mode": "symmetric",
"granularity": "pertensor"
}
}
}
]
}
}

View File

@ -1,60 +0,0 @@
{
"model": {
"model_name": "ncf",
"model": "<PATH_TO_MODEL>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/ncf.yaml"
},
"optimizer": {
"name": "Tpe",
"params": {
"max_trials": 100,
"max_minutes": 10,
"trials_load_method": "cold_start",
"accuracy_loss": 0.1,
"latency_reduce": 1.5,
"expected_quantization_ratio": 0.5,
"accuracy_weight": 1.0,
"latency_weight": 1.0,
"quantization_ratio_weight": 1.0,
"benchmark": {
"cpu_bind_thread": "YES",
"nthreads": 4,
"nstreams": 0,
"nireq": 0,
"api_type": "async",
"duration_seconds": 30,
"benchmark_app_dir":""
}
}
},
"compression": {
"algorithms": [
{
"name": "ActivationChannelAlignment",
"params": {
"stat_subset_size": 1000
}
},
{
"name": "TunableQuantization",
"params": {
"stat_subset_size": 1000,
"preset": "performance",
"tuning_scope": ["range_estimator"],
"estimator_tuning_scope": ["preset", "aggregator", "type", "outlier_prob"],
"outlier_prob_choices": [1e-3, 1e-4, 1e-5]
}
},
{
"name": "FastBiasCorrection",
"params": {
"stat_subset_size": 1000
}
}
]
}
}

View File

@ -1,49 +0,0 @@
{
"model": {
"model_name": "brain-tumor-segmentation-0001",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"launchers": [
{
"framework": "dlsdk",
"adapter": {
"type": "brain_tumor_segmentation"
}
}
],
"datasets": [
{
"name": "BraTS",
"data_source": "<PATH_TO_DATASET>",
"annotation_conversion": {
"converter": "brats_numpy",
"data_dir": "<PATH_TO_DATASET>",
"ids_file": "<PATH_TO_IDS_FILE>",
"labels_file": "<PATH_TO_LABELS_FILE>"
},
"reader": "numpy_reader",
"metrics": [
{
"type": "dice_index",
"median": true,
"presenter": "print_vector"
}
]
}
]
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"stat_subset_size": 72,
"preset": "performance"
}
}
]
}
}

View File

@ -1,26 +0,0 @@
{
"model": {
"model_name": "east",
"model": "<MODEL_PATH>",
"weights": "<WEIGHTS_PATH>"
},
"engine": {
"config": "./configs/examples/accuracy_checker/east_resnet_v1_50.yaml"
},
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"ignored": {
"scope": [
"feature_fusion/Conv_4/Conv2D"
]
},
"preset": "performance",
"stat_subset_size": 300
}
}
]
}
}

View File

@ -1,30 +0,0 @@
{
"model": {
"model_name": "resnet-50-pytorch",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "<CONFIG_PATH>"
},
"compression": {
"algorithms": [
{
"name": "WeightSparsity",
"params": {
"sparsity_level": 0.5,
"apply_for_all_nodes": true,
"stat_subset_size": 300,
"use_layerwise_tuning": true,
}
},
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 300
}
}
]
}
}

View File

@ -1,29 +0,0 @@
{
"model": {
"model_name": "ssd_resnet50_512",
"model": "<MODEL_PATH>",
"weights": "<PATH_TO_WEIGHTS>"
},
"engine": {
"config": "<CONFIG_PATH>"
},
"compression": {
"algorithms": [
{
"name": "WeightSparsity",
"params": {
"sparsity_level": 0.5,
"apply_for_all_nodes": true,
"stat_subset_size": 300
}
},
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 300
}
}
]
}
}

View File

@ -1,45 +0,0 @@
/* This configuration file is the fastest way to get started with the default
quantization algorithm in simplified mode. It contains only mandatory options
with commonly used values. All other options can be considered as an advanced
mode and requires deep knowledge of the quantization process. An overall description
of all possible parameters can be found in the default_quantization_spec.json */
{
/* Model parameters */
"model": {
"model_name": "model_name", // Model name
"model": "<MODEL_PATH>", // Path to model (.xml format)
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
},
/* Parameters of the engine used for model inference */
"engine": {
"type": "simplified",
"layout": "NCHW", // Layout of input data. Supported ["NCHW", "NHWC", "CHW", "CWH"] layout
"data_source": "PATH_TO_SOURCE" // You can specify path to directory with images. Also you can
// specify template for file names to filter images to load.
// Templates are unix style (This option valid only in simplified mode)
},
/* Optimization hyperparameters */
"compression": {
"target_device": "ANY", // Target device, the specificity of which will be taken
// into account during optimization
"algorithms": [
{
"name": "DefaultQuantization", // Optimization algorithm name
"params": {
"preset": "performance", // Preset [performance, mixed, accuracy] which control the quantization
// mode (symmetric, mixed (weights symmetric and activations asymmetric)
// and fully asymmetric respectively)
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
// for quantization parameters calculation
}
}
]
}
}

View File

@ -1,44 +0,0 @@
/* This configuration file is the fastest way to get started with the default
sparsity and default quantization algorithm. It contains only mandatory options
with commonly used values. All other options can be considered as an advanced
mode and requires deep knowledge of the quantization process. An overall description
of all possible parameters can be found in the default_quantization_spec.json */
{
/* Model parameters */
"model": {
"model_name": "model_name", // Model name
"model": "<MODEL_PATH>", // Path to model (.xml format)
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
},
/* Parameters of the engine used for model inference */
"engine": {
"config": "<CONFIG_PATH>" // Path to Accuracy Checker config
},
/* Optimization hyperparameters */
"compression": {
"target_device": "ANY", // Target device, the specificity of which will be taken
// into account during optimization
"algorithms": [
{
"name": "WeightSparsity",
"params": {
"sparsity_level": 0.3,
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
// for quantization parameters calculation
}
},
{
"name": "DefaultQuantization", // Optimization algorithm name
"params": {
// Preset [performance, mixed, accuracy] which control the quantization mode
// (symmetric, mixed (weights symmetric and activations asymmetric) and fully
// asymmetric respectively)
"preset": "performance",
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
// for quantization parameters calculation
}
}
]
}
}

View File

@ -1,133 +0,0 @@
{
/* Model parameters */
"model": {
"model_name": "model_name", // Model name
"model": "<MODEL_PATH>", // Path to a model (.xml format)
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
},
/* Parameters of the engine used for model inference. */
/* Post-Training Optimization Tool supports engine based on the Accuracy Checker and custom engine.
For custom engine, specify your own set of parameters.
The engine based on the Accuracy Checker uses Accuracy Checker parameters. You can specify the parameters
via the Accuracy Checker config file or directly in the engine section.
Find more information about Accuracy Checker parameters at
https://github.com/opencv/open_model_zoo/tree/master/tools/accuracy_checker */
"engine": {
"stat_requests_number": 8, // Number of requests during statistcs collection
"eval_requests_number": 8, // Number of requests during evaluation
"config": "<CONFIG_PATH>",
/* OR */
"name": "model_name",
"launchers": [
{
"framework": "dlsdk",
"device": "CPU",
"adapter": "classification"
}
],
"datasets": [
{
"name": "dataset_name",
"data_source": "<DATASET_PATH>",
"annotation": "<ANNOTATION_PATH>",
"preprocessing": [
{
"type": "resize",
"interpolation": "BILINEAR",
"aspect_ratio_scale": "greater",
"size": 224
}
],
"metrics": [
{
"name": "accuracy@top1",
"type": "accuracy",
"top_k": 1
}
]
}
]
},
/* Global optimizer used to find "optimal" hyperparameters */
"optimizer": {
"name": "Tpe", // Global optimizer name
"params": {
"max_trials": 100, // Maximum number of trails
"max_minutes": 10, // [Optional] Trials time limit. When it expires, the last trial is completed and the best result is returned.
"stop_on_target": true, // [Optional] Flag to stop TPE trials when accuracy_loss and latency_reduce targets are reached.
// If false or not specified TPE will continue until max_trials or max_minutes is reached even if targets are reached earlier.
"eval_subset_size": 2000, // [Optional] subset of test data used to evaluate hyperparameters. The whole dataset is used if no parameter specified.
"trials_load_method": "cold_start", // Start from scratch or reuse previous results, supported options [cold_start, warm_start, fine_tune, eval]
"accuracy_loss": 0.1, // Accuracy threshold (%)
"latency_reduce": 1.5, // Target latency improvement versus original model
"accuracy_weight": 1.0, // Accuracy weight in loss function
"latency_weight": 1.0, // Latency weight in loss function
// An optional list of reference metrics values.
// If not specified, all metrics will be calculated from the original model.
"metrics": [
{
"name": "accuracy", // Metric name
"baseline_value": 0.72 // Baseline metric value of the original model
}
],
"benchmark": {
// Latency measurement benchmark configuration (https://docs.openvinotoolkit.org/latest/_inference_engine_samples_benchmark_app_README.html)
"performance_count": false,
"batch_size": 0,
"nthreads": 4,
"nstreams": 0,
"nireq": 0,
"api_type": "sync",
"niter": 4,
"duration_seconds": 30,
"benchmark_app_dir": "<path to benchmark_app>" // Path to benchmark_app If not specified, Python base benchmark will be used. Use benchmark_app to reduce jitter in results.
}
}
},
/* Optimization hyperparameters */
"compression": {
"target_device": "ANY", // Target device, the specificity of which will be taken
// into account during optimization
"algorithms": [
{
"name": "ActivationChannelAlignment",
"params": {
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
// for quantization parameters calculation.
}
},
{
"name": "TunableQuantization",
"params": {
/* Preset is a collection of optimization algorithm parameters that will specify to the algorithm
to improve which metric the algorithm needs to concentrate. Each optimization algorithm supports
[performance, mixed, accuracy] presets which control the quantization mode
(symmetric, mixed(weights symmetric and activations asymmetric), and fully asymmetric respectively)*/
"preset": "performance",
"stat_subset_size": 300, // Size of subset to calculate activations statistics that can be used
// for quantization parameters calculation.
"tuning_scope": ["layer"], // List of quantization parameters that will be tuned,
// available options: [bits, mode, granularity, layer, range_estimator]
"estimator_tuning_scope": ["preset", "aggregator", "type", "outlier_prob"], // List of range_estimator parameters that will be tuned,
// available options: [preset, aggregator, type, outlier_prob]
"outlier_prob_choices": [1e-3, 1e-4, 1e-5] // List of outlier_prob values to use when tuning outlier_prob parameter
}
},
{
"name": "FastBiasCorrection",
"params": {
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
// for quantization parameters calculation.
}
}
]
}
}

View File

@ -1,84 +0,0 @@
/* This configuration file is the fastest way to get started with the TPE
optimization algorithm. It contains only mandatory options with commonly used
values. All other options can be considered as advanced mode and require
deep knowledge of the quantization process. Find overall description of all possible
parameters in tpe_spec.json */
{
/* Model parameters */
"model": {
"model_name": "model_name", // Model name
"model": "<MODEL_PATH>", // Path to a model (.xml format)
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
},
/* Parameters of the engine used for model inference. */
"engine": {
"config": "<CONFIG_PATH>" // Path to Accuracy Checker config
},
/* Optimizer used to find "optimal" hyperparameters */
"optimizer": {
"name": "Tpe", // Global optimizer name
"params": {
"max_trials": 200, // Maximum number of trails
"trials_load_method": "cold_start", // Start from scratch or reuse previous results, supported options [cold_start, warm_start, fine_tune, eval]
"accuracy_loss": 0.1, // Accuracy threshold (%)
"latency_reduce": 1.5, // Target latency improvement versus original model
"accuracy_weight": 1.0, // Accuracy weight in loss function
"latency_weight": 1.0, // Latency weight in loss function
"benchmark": {
// Latency measurement benchmark configuration (https://docs.openvinotoolkit.org/latest/_inference_engine_samples_benchmark_app_README.html)
"performance_count": false,
"batch_size": 0,
"nthreads": 4,
"nstreams": 0,
"nireq": 0,
"api_type": "sync",
"niter": 4,
"duration_seconds": 30,
"benchmark_app_dir": "<path to benchmark_app>" // Path to benchmark_app If not specified, Python base benchmark will be used. Use benchmark_app to reduce jitter in results.
}
}
},
/* Optimization hyperparameters */
"compression": {
"target_device": "ANY", // Target device, the specificity of which will be taken
// into account during optimization
"algorithms": [
{
"name": "ActivationChannelAlignment",
"params": {
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
// for quantization parameters calculation.
}
},
{
"name": "TunableQuantization",
"params": {
/* Preset is a collection of optimization algorithm parameters that will specify to the algorithm
to improve which metric the algorithm needs to concentrate. Each optimization algorithm supports
[performance, mixed, accuracy] presets which control the quantization mode
(symmetric, mixed(weights symmetric and activations asymmetric), and fully asymmetric respectively)*/
"preset": "performance",
"stat_subset_size": 300, // Size of subset to calculate activations statistics that can be used
// for quantization parameters calculation.
"tuning_scope": ["layer"] // List of quantization parameters that will be tuned,
// available options: [bits, mode, granularity, layer, range_estimator]
}
},
{
"name": "FastBiasCorrection",
"params": {
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
// for quantization parameters calculation.
}
}
]
}
}

View File

@ -1,15 +0,0 @@
# Installation Guide
## Install POT from PyPI
POT is distributed as a part of OpenVINO&trade; Development Tools package. For installation instruction, refer to this [document](@ref openvino_docs_install_guides_install_dev_tools).
## Install POT from GitHub
The latest version of the Post-training Optimization Tool is available on [GitHub](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot) and can be installed from source. As prerequisites, you need to install [OpenVINO&trade; Runtime](@ref openvino_docs_install_guides_install_runtime) and other dependencies such as [Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide) and [Accuracy Checker](@ref omz_tools_accuracy_checker).
To install POT from source:
- Clone the OpenVINO repository
```sh
git clone --recusive https://github.com/openvinotoolkit/openvino.git
```
After installation, POT is available as a Python library under `openvino.tools.pot.*` and in the command line by the `pot` alias. To verify it, run `pot -h`.

View File

@ -1,51 +0,0 @@
# Low Precision Optimization Guide
## Introduction
This document provides the best-known methods on how to use low-precision capabilities of the OpenVINO™ toolkit to transform models
to more hardware-friendly representation using such methods as quantization.
Currently, these capabilities are represented by several components:
- Low-precision runtime
- Post-training Optimization Tool (POT)
- [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf)
The first two components are the part of OpenVINO toolkit itself while the latter one is a separate tool build on top of the PyTorch* framework
and highly aligned with OpenVINO™.
This document covers high level aspects of model optimization flow in OpenVINO™.
## General Information
By low precision we imply the inference of Deep Learning models in the precision which is lower than 32 or 16 bits, such as *FLOAT32* and *FLOAT16*. For example, the most popular
bit-width for the low-precision inference is *INT8* (*UINT8*) because it is possible to get accurate 8-bit models which substantially speed up the inference.
Such models are represented by the quantized models, i.e. the models that were trained in the floating-point precision and then transformed to integer
representation with floating/fixed-point quantization operations between the layers. This transformation can be done using post-training methods or
with additional retraining/fine-tuning.
Starting from the OpenVINO 2020.1 release all the quantized models are represented using so-called `FakeQuantize` layer which is
a very expressive primitive and is able to represent such operations as `Quantize`, `Dequantize`, `Requantize`, and even more. This operation is
inserted into the model during quantization procedure and is aimed to store quantization parameters for the layers. For more details about this operation
please refer to the following [description](@ref openvino_docs_ops_quantization_FakeQuantize_1).
In order to execute such "fake-quantized" models, OpenVINO has a low-precision runtime which is a part of Inference Engine and consists of a
generic component translating the model to real integer representation and HW-specific part implemented in the corresponding HW plug-ins.
## Model Optimization Workflow
We propose a common workflow which aligns with what other DL frameworks have. It contains two main components: post-training quantization and Quantization-Aware Training (QAT).
The first component is the easiest way to get optimized models where the latter one can be considered as an alternative or an addition when the first does not give
accurate results.
The diagram below shows the optimization flow for the new model with OpenVINO and relative tools.
![](images/low_precision_flow.png)
- **Step 0: Model enabling**. In this step we should ensure that the model trained on the target dataset can be successfully inferred with [OpenVINO™ Runtime](@ref openvino_docs_OV_UG_OV_Runtime_User_Guide) in floating-point precision.
This process involves use of [model conversion API](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide) tool to convert the model from the source framework
to the OpenVINO Intermediate Representation (IR) and run it on CPU with Inference Engine.
> **NOTE**: This step presumes that the model has the same accuracy as in the original training framework and enabled in the [Accuracy Checker](@ref omz_tools_accuracy_checker) tool or through the custom validation sample.
- **Step 1: Post-training quantization**. As the first step for optimization, we suggest using INT8 quantization from POT where in most cases it is possible to get an accurate quantized model. At this step you do not need model re-training. The only thing required is a representative dataset which is usually several hundreds of images and it is used to collect statistics during the quantization process.
Post-training quantization is also really fast and usually takes several minutes depending on the model size and used HW. And, generally, a regular desktop system is enough to quantize most of [OpenVINO Model Zoo](https://github.com/opencv/open_model_zoo).
For more information on best practices of post-training optimization please refer to the [Post-training Optimization Best practices](BestPractices.md).
- **Step2: Quantization-Aware Training**: If the accuracy of the quantized model does not satisfy accuracy criteria, there is step two which implies QAT using [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf) for [PyTorch*](https://pytorch.org/) and [TensorFlow*](https://www.tensorflow.org/) models.
At this step, we assume the user has an original training pipeline of the model written on TensorFlow or PyTorch and NNCF is integrated into it.
After this step, you can get an accurate optimized model that can be converted to OpenVINO Intermediate Representation (IR) using model conversion API and inferred with OpenVINO Inference Engine.

View File

@ -1,33 +0,0 @@
# Low-precision model representation
## Introduction
The goal of this document is to describe how optimized models are represented in OpenVINO Intermediate Representation (IR) and provide guidance on interpretation rules for such models at runtime.
Currently, there are two groups of optimization methods that can change the IR after applying them to the full-precision model:
- **Sparsity**. It is represented by zeros inside the weights and this is up to the hardware plugin how to interpret these zeros (use weights as is or apply special compression algorithms and sparse arithmetic). No additional mask is provided with the model.
- **Quantization**. The rest of this document is dedicated to the representation of quantized models.
## Representation of quantized models
The OpenVINO Toolkit represents all the quantized models using the so-called [FakeQuantize](https://docs.openvino.ai/2021.4/openvino_docs_MO_DG_prepare_model_convert_model_Legacy_IR_Layers_Catalog_Spec.html#fakequantize-layer) operation. This operation is very expressive and allows mapping values from arbitrary input and output ranges. We project (discretize) the input values to the low-precision data type using affine transformation (with clamp and rounding) and then re-project discrete values back to the original range and data type. It can be considered as an emulation of the quantization/dequantization process which happens at runtime. The figure below shows a part of the DL model, namely the Convolutional layer, that undergoes various transformations, from being a floating-point model to an integer model executed in the OpenVINO runtime. Column 2 of this figure below shows a model quantized with [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf).
![](images/model_flow.png)
To reduce memory footprint weights of quantized models are transformed to a target data type, e.g. in the case of 8-bit quantization, this is int8. During this transformation, the floating-point weights tensor and one of the FakeQuantize operations that correspond to it are replaced with 8-bit weight tensor and the sequence of Convert, Subtract, Multiply operations that represent the typecast and dequantization parameters (scale and zero-point) as it is shown in column 3 of the figure.
## Interpreting FakeQuantize at runtime
At inference time, the quantized model undergoes the second set of transformations that allows interpreting floating-point operations with quantization rules as integer operations. OpenVINO Toolkit has Low-Precision Transformations (LPT) component for that purpose.
At runtime each FakeQuantize can be split into two independent operations: **Quantize** and **Dequantize** (column 4). **Quantize** transforms the input data into the target precision while **Dequantize** transforms the resulting values back to the original range. *Dequantize* operations can be propagated forward through the linear layers, such as *Convolution* or *Fully-Connected*, and, in some cases, fused with the following *Quantize* operation for the next layer into the so-called *Requantize* operation (column 5).
From the computation standpoint, the FakeQuantize formula is split into two parts:
`output = round((x - input_low) / (input_high - input_low) * (levels-1)) / (levels-1) * (output_high - output_low) + output_low`
The first part of this fomula represents *Quantize* operation:
`q = round((x - input_low) / (input_high - input_low) * (levels-1))`
The second is responsible for the dequantization:
`r = q / (levels-1) * (output_high - output_low) + output_low`
From the scale/zero-point notation standpoint the latter formula can be written as follows:
`r = (output_high - output_low) / (levels-1) * (q + output_low / (output_high - output_low) * (levels-1))`
Thus we can define:
- **Scale** as `(output_high - output_low) / (levels-1)`
- **Zero-point** as `-output_low / (output_high - output_low) * (levels-1)`
**Note**: During the quantization process the values `input_low`, `input_high`, `output_low`, `output_high` are selected so that to map a floating-point zero exactly to an integer value (zero-point) and vice versa.

View File

@ -1,122 +0,0 @@
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#! [image_loader]
import os
import numpy as np
import cv2 as cv
from openvino.tools.pot import DataLoader
class ImageLoader(DataLoader):
""" Loads images from a folder """
def __init__(self, dataset_path):
# Use OpenCV to gather image files
# Collect names of image files
self._files = []
all_files_in_dir = os.listdir(dataset_path)
for name in all_files_in_dir:
file = os.path.join(dataset_path, name)
if cv.haveImageReader(file):
self._files.append(file)
# Define shape of the model
self._shape = (224,224)
def __len__(self):
""" Returns the length of the dataset """
return len(self._files)
def __getitem__(self, index):
""" Returns image data by index in the NCHW layout
Note: model-specific preprocessing is omitted, consider adding it here
"""
if index >= len(self):
raise IndexError("Index out of dataset size")
image = cv.imread(self._files[index]) # read image with OpenCV
image = cv.resize(image, self._shape) # resize to a target input size
image = np.expand_dims(image, 0) # add batch dimension
image = image.transpose(0, 3, 1, 2) # convert to NCHW layout
return image, None # annotation is set to None
#! [image_loader]
#! [text_loader]
import os
from pathlib import Path
from datasets import load_dataset #pip install datasets
from transformers import AutoTokenizer #pip install transformers
from openvino.tools.pot import DataLoader
class TextLoader(DataLoader):
""" Loads content of .txt files from a folder """
def __init__(self, dataset_path):
# HuggingFace dataset API is used to process text files
# Collect names of text files
extension = ".txt"
files = sorted(str(p.stem) for p in
Path(dataset_path).glob("*" + extension))
files = [os.path.join(dataset_path, file + extension) for file in files]
self._dataset = load_dataset('text', data_files=files)
# replace with your tokenizer
self._tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
self._dataset = self._dataset.map(self._encode, batched=False)
# replace with names of model inputs
self._dataset.set_format(type='numpy',
columns=['input_ids', 'token_type_ids', 'attention_mask'])
def _encode(self, examples):
""" Tokenization of the input text """
return self._tokenizer(examples['text'], truncation=True, padding='max_length')
def __len__(self):
""" Returns the length of the dataset """
return len(self._dataset['train'])
def __getitem__(self, index):
""" Returns data by index as a (dict[str, np.array], None) """
if index >= len(self):
raise IndexError("Index out of dataset size")
data = self._dataset['train'][index]
return {'input_ids': data['input_ids'],
'token_type_ids': data['token_type_ids'],
'attention_mask': data['attention_mask']}, None # annotation is set to None
#! [text_loader]
#! [audio_loader]
import os
from pathlib import Path
import torchaudio # pip install torch torchaudio
from openvino.tools.pot import DataLoader
class AudioLoader(DataLoader):
""" Loads content of .wav files from a folder """
def __init__(self, dataset_path):
# Collect names of wav files
self._extension = ".wav"
self._dataset_path = dataset_path
self._files = sorted(str(p.stem) for p in
Path(self._dataset_path).glob("*" + self._extension))
def __len__(self):
""" Returns the length of the dataset """
return len(self._files)
def __getitem__(self, index):
""" Returns wav data by index
Note: model-specific preprocessing is omitted, consider adding it here
"""
if index >= len(self):
raise IndexError("Index out of dataset size")
file_name = self._files[index] + self._extension
file_path = os.path.join(self._dataset_path, file_name)
waveform, _ = torchaudio.load(file_path) # use a helper from torchaudio to load data
return waveform.numpy(), None # annotation is set to None
#! [audio_loader]

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4958239932616705f208607c20f63f92a6cdb219f8a5e9ff6046ff7835c451dc
size 47028

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2a5bd3b61d61b7eecb51fa0e932bc8215659d8f5b92f96abba927d9d3f94f277
size 38993

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0e564f28b20af9c92511a59389bb42934bc2e19dcaca593c435968d76f5ff7a6
size 28899

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bcaa0c75dab08dc03343b2bce069148e27141da1abc92fc5fde2fce3a5d8f5e8
size 19411

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2612fc6169dec150907d79b174c46c9b2f6428b5a20cf462c57a5ea2fc97f56a
size 97895

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5183c57dc825af40051782818d9bf40236bd6be8fbee3ae4e7a982000e4d6af8
size 89875

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e0bab657bf979494cb84459e29024e5b8b9cd320388c62c6a91b74b897b19718
size 18108

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:71365e85be040eb01ed524e568b332d9bb6222c760686c54db4e754f587082c2
size 31032

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3f68e826cfac63d8e6f8d77aa5b7fc61957a872dfb09b38695fb481044a6ddd5
size 48327

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:79ef392200a6d9ecad6be9cab7b1ecd4af7b88b4fd55f8f8884a02b16b435f68
size 36036

View File

@ -1,3 +0,0 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6b9a68861a65526203b56a897f0d6cec0ef860619e9aaf275bc0d7483bc34329
size 92994

View File

@ -1,33 +0,0 @@
<doxygenlayout xmlns:xi="http://www.w3.org/2001/XInclude" version="1.0">
<!-- POT Developer Guide -->
<navindex>
<tab id="pot" type="usergroup" title="Post-Training Optimization Tool" url="@ref pot_README">
<tab type="user" title="Installation Guide" url="@ref pot_InstallationGuide"/>
<tab type="user" title="Low Precision Optimization Guide" url="@ref pot_docs_LowPrecisionOptimizationGuide"/>
<tab type="usergroup" title="Quantization" url="@ref pot_compression_algorithms_quantization_README">
<tab type="user" title="DefaultQuantization Algorithm" url="@ref pot_compression_algorithms_quantization_default_README"/>
<tab type="user" title="AccuracyAwareQuantization Algorithm" url="@ref accuracy_aware_README"/>
<tab type="user" title="Saturation issue workaround" url="@ref pot_saturation_issue"/>
<tab type="user" title="Low-precision model representation" url="@ref pot_docs_model_representation"/>
</tab>
<tab type="user" title="Best Practices" url="@ref pot_docs_BestPractices"/>
<tab type="user" title="Command-line Interface" url="@ref pot_compression_cli_README">
<tab type="user" title="Simplified mode" url="@ref pot_docs_simplified_mode"/>
<tab type="user" title="End-to-end CLI example" url="@ref pot_configs_examples_README"/>
</tab>
<tab type="user" title="API" url="@ref pot_compression_api_README">
<tab type="user" title="API samples" url="@ref pot_sample_README">
<tab type="user" title="Image Classification quantization sample" url="@ref pot_sample_classification_README"/>
<tab type="user" title="Accuracy-Aware quantization sample" url="@ref pot_sample_object_detection_README"/>
<tab type="user" title="Cascaded model quantization sample" url="@ref pot_sample_face_detection_README"/>
<tab type="user" title="Semantic segmentation quantization sample" url="@ref pot_sample_segmentation_README"/>
<tab type="user" title="3D Segmentation quantization sample" url="@ref pot_sample_3d_segmentation_README"/>
<tab type="user" title="GNA speech sample" url="@ref pot_sample_speech_README"/>
</tab>
</tab>
<tab type="user" title="Configuration File Description" url="@ref pot_configs_README"/>
<tab type="user" title="Deep neural network protection through range supervision" url="@ref pot_ranger_README"/>
<tab type="user" title="Frequently Asked Questions" url="@ref pot_docs_FrequentlyAskedQuestions"/>
</tab>
</navindex>
</doxygenlayout>

View File

@ -1,7 +0,0 @@
# Copyright (C) 2020-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from openvino.tools.pot.app.run import main
if __name__ == '__main__':
main()

View File

@ -1,62 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
__path__ = __import__("pkgutil").extend_path(__path__, __name__)
# Required for Windows OS platforms
# Note: always top-level
try:
from openvino.utils import _add_openvino_libs_to_search_path
_add_openvino_libs_to_search_path()
except ImportError:
pass
# API 2.0
try:
# Import all public modules
from openvino import runtime as runtime
from openvino import frontend as frontend
from openvino import helpers as helpers
from openvino import preprocess as preprocess
from openvino import utils as utils
from openvino.runtime import properties as properties
# Import most important classes and functions from openvino.runtime
from openvino.runtime import Model
from openvino.runtime import Core
from openvino.runtime import CompiledModel
from openvino.runtime import InferRequest
from openvino.runtime import AsyncInferQueue
from openvino.runtime import Dimension
from openvino.runtime import Strides
from openvino.runtime import PartialShape
from openvino.runtime import Shape
from openvino.runtime import Layout
from openvino.runtime import Type
from openvino.runtime import Tensor
from openvino.runtime import OVAny
from openvino.runtime import compile_model
from openvino.runtime import get_batch
from openvino.runtime import set_batch
from openvino.runtime import serialize
from openvino.runtime import shutdown
from openvino.runtime import tensor_from_file
from openvino.runtime import save_model
from openvino.runtime import layout_helpers
# Set version for openvino package
from openvino.runtime import get_version
__version__ = get_version()
except ImportError:
import warnings
warnings.warn("openvino package has problems with imports!", ImportWarning, stacklevel=2)
# Tools
try:
# Model Conversion API - ovc should reside in the main namespace
from openvino.tools.ovc import convert_model
except ImportError:
pass

View File

@ -1,4 +0,0 @@
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
__path__ = __import__("pkgutil").extend_path(__path__, __name__)

View File

@ -1,69 +0,0 @@
# Copyright (C) 2020-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from .algorithms.quantization.accuracy_aware.algorithm import AccuracyAwareQuantization
from .algorithms.quantization.accuracy_aware_gna.algorithm import AccuracyAwareGNA
from .algorithms.quantization.accuracy_aware_common.algorithm import AccuracyAwareCommon
from .algorithms.quantization.accuracy_aware_common.mixed_precision import (
INT4MixedQuantization,
)
from .algorithms.quantization.fast_bias_correction.algorithm import FastBiasCorrection
from .algorithms.quantization.bias_correction.algorithm import BiasCorrection
from .algorithms.quantization.channel_alignment.algorithm import (
ActivationChannelAlignment,
)
from .algorithms.quantization.datafree.algorithm import DataFreeQuantization
from .algorithms.quantization.default.algorithm import DefaultQuantization
from .algorithms.quantization.minmax.algorithm import MinMaxQuantization
from .algorithms.quantization.optimization.rangeopt import RangeOptimization
from .algorithms.quantization.optimization.params_tuning import (
ParamsGridSearchAlgorithm,
)
from .algorithms.quantization.qnoise_estimator.algorithm import QuantNoiseEstimator
from .algorithms.quantization.tunable_quantization.algorithm import TunableQuantization
from .algorithms.quantization.outlier_channel_splitting.algorithm import (
OutlierChannelSplitting,
)
from .algorithms.quantization.weight_bias_correction.algorithm import (
WeightBiasCorrection,
)
from .algorithms.sparsity.magnitude_sparsity.algorithm import MagnitudeSparsity
from .algorithms.sparsity.default.algorithm import WeightSparsity
from .algorithms.sparsity.default.base_algorithm import BaseWeightSparsity
from .algorithms.quantization.overflow_correction.algorithm import OverflowCorrection
from .algorithms.quantization.range_supervision.algorithm import RangeSupervision
from .api.data_loader import DataLoader
from .api.metric import Metric
from .api.engine import Engine
from .engines.ie_engine import IEEngine
from .graph import load_model, save_model
from .graph.model_utils import compress_model_weights
from .pipeline.initializer import create_pipeline
QUANTIZATION_ALGORITHMS = [
'MinMaxQuantization',
'RangeOptimization',
'FastBiasCorrection',
'BiasCorrection',
'ActivationChannelAlignment',
'DataFreeQuantization',
'DefaultQuantization',
'AccuracyAwareQuantization',
'AccuracyAwareGNA',
'AccuracyAwareCommon',
'INT4MixedQuantization',
'TunableQuantization',
'QuantNoiseEstimator',
'OutlierChannelSplitting',
'WeightBiasCorrection',
'ParamsGridSearchAlgorithm',
'OverflowCorrection',
'RangeSupervision',
]
SPARSITY_ALGORITHMS = ['WeightSparsity',
'MagnitudeSparsity',
'BaseWeightSparsity']
__all__ = QUANTIZATION_ALGORITHMS + SPARSITY_ALGORITHMS

View File

@ -1,7 +0,0 @@
# Copyright (C) 2020-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import sys
from openvino.tools.pot.app.run import app
app(sys.argv[1:])

View File

@ -1,2 +0,0 @@
# Copyright (C) 2020-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

View File

@ -1,72 +0,0 @@
# Copyright (C) 2020-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from abc import ABC, abstractmethod
from copy import deepcopy
from .utils import process_ignored_scope
from ..api.engine import Engine
class Algorithm(ABC):
algo_type = 'quantization'
def __init__(self, config, engine: Engine):
""" Constructor
:param config: algorithm specific config
:param engine: model inference engine
:param sampler: Sampler class inheritor instance to read dataset
"""
self._config, self._engine = deepcopy(config), engine
self._stats_collector = None
self.params = {}
self.default_steps_size = 0.05
self.total_exec_steps = 0
if isinstance(self._config.ignored, dict) and 'scope' in self._config.ignored:
self._config.ignored.scope = process_ignored_scope(self._config.ignored.scope)
@property
def config(self):
return self._config
@property
def algo_collector(self):
return self._stats_collector
@algo_collector.setter
def algo_collector(self, collector):
self._stats_collector = collector
@abstractmethod
def run(self, model):
""" Run algorithm on model
:param model: model to apply algorithm
:return optimized model
"""
def statistics(self):
""" Returns a dictionary of printable statistics"""
return {}
def register_statistics(self, model, stats_collector):
"""
:param model: FP32 original model
:param stats_collector: object of StatisticsCollector class
:return: None
"""
def get_parameter_meta(self, _model):
""" Get parameters metadata
:param _model: model to get parameters for
:return params_meta: metadata of optional parameters
"""
return []
def compute_total_exec_steps(self, model=None):
""" Compute executions steps based on stat_subset_size, algorithm, model """
def update_config(self, config):
""" Update Algorithm configuration based on input config """
self._config = deepcopy(config)

View File

@ -1,27 +0,0 @@
# Copyright (C) 2020-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from ..utils.registry import Registry, RegistryStorage
COMPRESSION_ALGORITHMS = Registry('QuantizationAlgos')
REGISTRY_STORAGE = RegistryStorage(globals())
def get_registry(name):
return REGISTRY_STORAGE.get_registry(name)
def get_algorithm(name):
if name.startswith('.') or name.endswith('.'):
raise Exception('The algorithm name cannot start or end with "."')
if '.' in name:
ind = name.find('.')
reg_name = name[:ind]
algo_name = name[ind + 1:]
else:
reg_name = 'QuantizationAlgos'
algo_name = name
reg = get_registry(reg_name)
return reg.get(algo_name)

View File

@ -1,2 +0,0 @@
# Copyright (C) 2020-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

View File

@ -1,378 +0,0 @@
# Copyright (C) 2020-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import random
from copy import deepcopy
import numpy as np
import torch
from .utils import get_optimization_params
from ..quantization.accuracy_aware_common.utils import evaluate_model, create_metric_config
from ...algorithms.algorithm import Algorithm
from ...engines.simplified_engine import SimplifiedEngine
from ...graph import model_utils as mu, node_utils as nu
from ...graph.special_operations import OPERATIONS_WITH_WEIGHTS
from ...samplers.batch_sampler import BatchSampler
from ...statistics.collector import collect_statistics
from ...statistics.statistics import TensorStatistic
from ...utils.logger import get_logger
logger = get_logger(__name__)
# pylint: disable=E1102,C0415,R0902,R0912
class LayerwiseModelFinetuning(Algorithm):
name = 'LayerwiseModelFinetuning'
@property
def change_original_model(self):
return True
def __init__(self, config, engine):
super().__init__(config, engine)
self._tconf = {
'optimizer': 'Adam',
'loss': 'l2',
'seed': 0,
'weight_decay': 0,
'loss_logging_freq': 10,
'calibration_indices_pool': 300,
'use_only_fp_inputs': True,
'calculate_grads_on_loss_increase_only': True,
'update_every_batch': False,
'use_ranking_subset': False,
'tuning_ignored_scope': self._config.ignored.get('scope', []),
'batch_size': 1
}
for key, value in self._tconf.items():
self._tconf[key] = self._config.get(key, value)
self._device = 'cpu'
self._current_best_loss = 0.0
self._iteration = 0
self._safety_eps = 1e-8
self._dataset_size = len(self._engine.data_loader)
self._samples_indices_pool = range(self._dataset_size)
self._weighted_operations = [op['type'] for op in OPERATIONS_WITH_WEIGHTS]
self._is_variable_resolution_model = False
self._optimization_dataset_size = self._dataset_size
self._metric_subset_ratio = (10 * self._tconf['calibration_indices_pool'] / self._optimization_dataset_size)
self._ranking_subset_size = self._tconf['calibration_indices_pool']
self._original_model = None
self._initial_losses = {}
self._nodes_to_tune = {}
self._nodes_to_tune_input = {}
self._nodes_to_tune_output = {}
self._layer_ops_wrapped = {}
self._is_simplified_evaluation = isinstance(self._engine, SimplifiedEngine)
self._base_algo_config = deepcopy(self._config)
self._base_algo = None
self._base_algo_args = None
self._metrics_config = None
self.set_seed(self._tconf['seed'], self._device)
self.set_default_parameters()
def set_default_parameters(self):
if self._tconf['use_ranking_subset']:
if self._is_simplified_evaluation:
logger.info('Cannot use ranking subset in simplified mode')
self._tconf['use_ranking_subset'] = False
else:
self._metrics_config = create_metric_config(
self._engine,
self._config,
force_logit_comparison=True,
logit_distance_type='mse',
)
if (self._tconf['calibration_indices_pool'] is not None
and self._tconf['calibration_indices_pool'] < self._optimization_dataset_size):
self._samples_indices_pool = random.sample(
range(self._optimization_dataset_size), self._tconf['calibration_indices_pool'])
def run(self, model):
raise NotImplementedError
def _collect_nodes_to_tune(self, modified_model):
raise NotImplementedError
def _wrap_nodes(self, modified_model, nodes_to_tune):
raise NotImplementedError
def _calculate_gradients(self, losses):
pass
def _get_optimizer_and_criterion(self, wrapped_ops_parameters):
criterion, optimizer_algorithm = get_optimization_params(self._tconf['loss'], self._tconf['optimizer'])
optimizers = {
name: optimizer_algorithm(params=param, weight_decay=self._tconf['weight_decay'])
for name, param in wrapped_ops_parameters.items()
}
return optimizers, criterion
def _wrap_node(self, op_node, wrapper, op_info):
params = []
wrapped_op = None
if wrapper.is_able_to_wrap(op_node):
wrapped_op = wrapper(op_node, device=self._device, **op_info)
for name, param in wrapped_op.named_parameters():
lr_name = name + '_lr'
if lr_name in self._tconf.keys():
params.append({'lr': self._tconf[lr_name], 'params': [param]})
else:
logger.warning('Undefined parameter found: {}'.format(name))
continue
else:
logger.warning('Was not able to wrap layer {} with PyTorch'.format(op_node.fullname))
return wrapped_op, params
def _fine_tuning_loop(
self,
modified_model,
optimizers,
criterion,
n_batches,
fp_model_callbacks,
modified_model_callbacks=None
):
for layer in self._layer_ops_wrapped.values():
layer.to(self._device)
for optimizer in optimizers.values():
optimizer.zero_grad()
try:
# Calculate feature maps for the original model beforehand on the used batch
batch_indices_sample = self._random_samples()
fp_activations = self._update_batch_from_model(self._original_model,
batch_indices_sample,
fp_model_callbacks)
for batch_idx in range(n_batches):
if batch_idx != 0 and self._tconf['update_every_batch']:
logger.debug('Batch update')
batch_indices_sample = self._random_samples()
fp_activations = self._update_batch_from_model(self._original_model,
batch_indices_sample,
fp_model_callbacks)
modified_activations = fp_activations
if modified_model_callbacks:
modified_activations = self._update_batch_from_model(modified_model,
batch_indices_sample,
modified_model_callbacks)
self._fine_tuning_step(
optimizers,
criterion,
batch_idx,
fp_activations,
modified_activations,
n_batches
)
return 0
except MemoryError:
return -1
def _random_samples(self):
batch_indices_sample = random.sample(self._samples_indices_pool, self._tconf['batch_size'])
if self._is_simplified_evaluation:
batch_indices_sample = BatchSampler(batch_indices_sample)
return batch_indices_sample
def _update_batch_from_model(self, model, batch_indices_sample, model_callbacks):
self._engine.set_model(model)
_, output_activations = self._engine.predict(model_callbacks, batch_indices_sample)
return self._activation_maps_to_torch(output_activations)
def _fine_tuning_step(
self,
optimizers,
criterion,
batch_idx,
fp_activations,
modified_activations,
n_batches
):
accumulated_losses = {op_name: 0.0 for op_name in self._layer_ops_wrapped}
losses = {}
for op_name in self._layer_ops_wrapped:
torch_wrapped_op = self._layer_ops_wrapped[op_name]
input_name = self._nodes_to_tune_input[op_name]
output_name = self._nodes_to_tune_output[op_name]
in_blobs = modified_activations[input_name]['output']
if self._tconf['use_only_fp_inputs']:
in_blobs = fp_activations[input_name]['output']
fp_out_blobs = fp_activations[output_name]['output']
if not self._is_variable_resolution_model:
modified_out_blobs = torch_wrapped_op(in_blobs)
losses[op_name] = criterion(modified_out_blobs, fp_out_blobs)
else:
for blob_idx, modified_in_blob in enumerate(in_blobs):
modified_out_blob = torch_wrapped_op(torch.unsqueeze(modified_in_blob, 0))
losses[op_name] += criterion(
modified_out_blob, torch.unsqueeze(fp_out_blobs[blob_idx], 0)
)
for name, loss in losses.items():
accumulated_losses[name] = loss.data
if batch_idx == 0 and self._iteration == 0:
self._initial_losses = deepcopy(accumulated_losses)
self._initial_losses = {
name: val + self._safety_eps
for name, val in self._initial_losses.items()
}
weighted_loss = 0
for op_name in self._layer_ops_wrapped:
init_loss = self._initial_losses[op_name]
accumulated_loss = accumulated_losses[op_name]
weighted_loss += accumulated_loss / init_loss / len(self._initial_losses)
if batch_idx % self._tconf['loss_logging_freq'] == 0:
printable_loss = weighted_loss.to('cpu').numpy()
logger.info(
'Batch #%s/%s, weighted_loss: %s',
batch_idx + 1,
n_batches,
printable_loss,
)
if self._tconf['calculate_grads_on_loss_increase_only']:
if weighted_loss >= self._current_best_loss:
self._current_best_loss = weighted_loss
self._calculate_gradients(losses)
for op_name, optimizer in optimizers.items():
optimizer.step()
if self._current_best_loss == weighted_loss:
optimizer.zero_grad()
self._current_best_loss = weighted_loss
else:
self._calculate_gradients(losses)
for op_name, optimizer in optimizers.items():
optimizer.step()
optimizer.zero_grad()
if self._tconf['update_every_batch']:
for layer in self._layer_ops_wrapped.values():
layer.update_node_params()
def _activation_maps_to_torch(self, activations):
for layer_name in activations:
activations[layer_name]['output'] = [
torch.tensor(activations[layer_name]['output'][index][0]).to(self._device) for index in
range(len(activations[layer_name]['output']))]
if len({feature_map.shape for feature_map in activations[layer_name]['output']}) > 1:
self._is_variable_resolution_model = True
if not self._is_variable_resolution_model:
for layer_name in activations:
activations[layer_name]['output'] = torch.stack(activations[layer_name]['output'])
return activations
def _get_ranking_subset(self):
"""
Find a subset of samples with the highest distance between
outputs of original and compressed model (a ranking subset)
:return: ranking data subset indices
"""
base_algo = self._base_algo(**self._base_algo_args)
base_algo.register_statistics(self._original_model, self.algo_collector)
collect_statistics(self._engine, self._original_model, [base_algo])
base_model = base_algo.run(deepcopy(self._original_model))
output_node_name = nu.get_node_input(self._original_model.get_final_output_nodes()[0], 0).fullname
stats_layout = {output_node_name: {'output_logits': TensorStatistic(lambda logits: logits)}}
metric_subset_size = int(self._dataset_size * self._metric_subset_ratio)
diff_subset_indices = (
sorted(random.sample(range(self._dataset_size), metric_subset_size))
if metric_subset_size < self._dataset_size
else list(range(self._dataset_size))
)
_, original_per_sample_metrics = evaluate_model(
self._original_model,
self._engine,
self._dataset_size,
subset_indices=diff_subset_indices,
metrics_config=self._metrics_config,
output_node_name=output_node_name,
stats_layout=stats_layout,
)
_, base_model_per_sample_metrics = evaluate_model(
base_model,
self._engine,
self._dataset_size,
subset_indices=diff_subset_indices,
metrics_config=self._metrics_config,
output_node_name=output_node_name,
stats_layout=stats_layout,
)
persample_metric = list(self._metrics_config.values())[0].persample
sorted_sample_importance = persample_metric.sort_fn(
original_per_sample_metrics[persample_metric.name],
base_model_per_sample_metrics[persample_metric.name],
reverse=True,
)
ranking_indices = sorted_sample_importance[: self._ranking_subset_size]
ranking_subset = list(np.array(diff_subset_indices)[ranking_indices])
return ranking_subset
def _create_layer_callbacks(self, modified_model):
fp_model_callbacks = {}
modified_model_callbacks = {}
for op_name in self._nodes_to_tune:
modified_node = mu.get_node_by_name(modified_model, op_name)
input_node = self._get_input_node(modified_node)
output_node = input_node
if modified_node.type in self._weighted_operations:
bias_node = nu.get_bias_for_node(modified_node)
output_node = modified_node
if bias_node is not None:
output_node = nu.get_node_output(bias_node, 0)[0]
input_node_name = self._get_input_node_name(modified_node)
if self._tconf['use_only_fp_inputs']:
fp_model_callbacks[input_node_name] = {'output': lambda tensor: tensor}
else:
modified_model_callbacks[input_node_name] = {'output': lambda tensor: tensor}
fp_model_callbacks[output_node.fullname] = {'output': lambda tensor: tensor}
self._nodes_to_tune_input[op_name] = input_node_name
self._nodes_to_tune_output[op_name] = output_node.fullname
return fp_model_callbacks, modified_model_callbacks
def register_statistics(self, model, stats_collector):
self.algo_collector = stats_collector
def _check_batch_size(self):
if self._tconf['batch_size'] > self._dataset_size:
logger.debug('Batch size changed from - {} to dataset size - {}.'.format(
self._tconf['batch_size'], self._dataset_size))
self._tconf['batch_size'] = self._dataset_size
@staticmethod
def set_seed(seed, device):
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
if device != 'cpu':
import torch.backends.cudnn as cudnn
cudnn.deterministic = True
cudnn.benchmark = False
@staticmethod
def _get_input_node(node):
return nu.get_node_input(node, 0)
@staticmethod
def _get_input_node_name(node):
return nu.get_quantized_input_key(node)

View File

@ -1,235 +0,0 @@
# Copyright (C) 2020-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import numpy as np
import torch
from openvino.tools.pot.graph import node_utils as nu
from openvino.tools.pot.utils.logger import get_logger
from .utils import get_weight_node
logger = get_logger(__name__)
# pylint: disable=W0221
class STERound(torch.autograd.Function):
@staticmethod
def forward(ctx, input_data, val_min, val_max):
ctx.save_for_backward(input_data)
ctx.val_min = val_min
ctx.val_max = val_max
return input_data.round()
@staticmethod
def backward(ctx, grad_output):
(input_data,) = ctx.saved_tensors
alpha = 0.01
mask = (input_data <= ctx.val_max) & (input_data >= ctx.val_min)
mask = mask.type(input_data.dtype)
grad_input = grad_output * (mask * (1 - alpha) + alpha)
return grad_input, None, None
# pylint: disable=E1102,W0223
class FakeQuantize(torch.nn.Module):
"""
A pytorch wrapper for a single FakeQuantize node.
"""
@staticmethod
def is_able_to_wrap(node):
if node.type != 'FakeQuantize':
return False
is_const = [
node.in_port(i).get_source().node.type == 'Const' for i in range(1, 5)
]
if not all(is_const):
return False
data = [node.in_port(i).data.get_value() for i in range(1, 5)]
diff = [np.max(np.abs(data[i] - data[i + 2])) for i in [0, 1]]
diff = max(diff)
if diff > 10 ** -8:
logger.info('FakeQuantize {} has different input and output scales'.format(node.name))
return False
return True
def __init__(self, node, device='cpu', asymmetric=False):
super(FakeQuantize, self).__init__()
self.node = node
self.device = device
input_0 = nu.get_node_input(self.node, 0)
self.is_weight_fq = input_0.type == 'Const'
self.asymmetric = asymmetric
min_val = nu.get_node_value(nu.get_node_input(self.node, 1))
max_val = nu.get_node_value(nu.get_node_input(self.node, 2))
min_val = np.array(min_val, dtype=np.float32)
self.min = torch.tensor(min_val).to(self.device)
self.min = torch.nn.Parameter(self.min) if self.asymmetric else self.min
ranges = np.array(max_val - min_val, dtype=np.float32)
self.scale = torch.tensor(ranges).log()
self.scale = self.scale.to(self.device)
self.scale = torch.nn.Parameter(self.scale)
self.val_h = int(self.node.levels - 1)
self.val_l = 0
def update_node_params(self):
scale = self.scale.exp()
max_level = scale.detach().cpu().numpy()
max_level = np.reshape(max_level, nu.get_input_shape(self.node, 2))
min_level = self.min.detach().cpu().numpy()
min_level = np.reshape(min_level, nu.get_input_shape(self.node, 1))
max_level = min_level + max_level
self.node.in_port(1).data.set_value(min_level)
self.node.in_port(2).data.set_value(max_level)
self.node.in_port(3).data.set_value(min_level)
self.node.in_port(4).data.set_value(max_level)
def forward(self, x):
scale = self.scale.exp()
s = self.val_h * scale.reciprocal()
x = x - self.min
x = x * s
x = x.clamp(max=self.val_h, min=self.val_l)
x = STERound.apply(x, self.val_l, self.val_h)
x = x * s.reciprocal() + self.min
return x
# pylint: disable=E1102,W0223
class LinearModule(torch.nn.Module):
"""
A pytorch wrapper for a single Conv2d/Linear node.
"""
@staticmethod
def is_able_to_wrap(node):
if node.type not in ['Convolution', 'MatMul', 'GroupConvolution']:
return False
node_weight = nu.get_node_input(node, 1)
if node_weight.type == 'FakeQuantize':
node_weight = nu.get_node_input(node_weight, 0)
if node_weight.type != 'Const':
return False
if node.type != 'MatMul':
weights = nu.get_node_value(node_weight)
if len(weights.shape) != 4:
return False
s = node.stride
stride_check = (s[2] == s[3])
d = node.dilation
dilation_check = (d[2] == d[3])
if not dilation_check or not stride_check:
return False
bias_node = nu.get_bias_for_node(node)
if bias_node is not None:
bias_value = nu.get_node_value(bias_node)
if bias_value.shape[0] != 1:
return False
return True
def __init__(self,
node,
input_fq=None,
wrap_weight_fq=False,
device='cpu',
set_quantized_values_to_weight_parameter=False,
asymmetric=False):
super().__init__()
self.node = node
self.device = device
self.set_quantized_values_to_weight_parameter = set_quantized_values_to_weight_parameter
self.weight_fq, self.input_fq = None, input_fq
if wrap_weight_fq:
weight_fq = nu.get_node_input(self.node, 1)
weight_fq_wrapper = FakeQuantize
if not weight_fq_wrapper.is_able_to_wrap(weight_fq):
logger.warning('Was not able to wrap layer %s with pytorch', weight_fq.name)
self.weight_fq = None
else:
self.weight_fq = weight_fq_wrapper(weight_fq, device=device,
asymmetric=asymmetric)
node_weight = get_weight_node(node)
weights = nu.get_node_value(node_weight)
self.weights_dtype = weights.dtype
weights = torch.from_numpy(weights).to(torch.float32)
weights = weights.to(device)
self.weights = torch.nn.Parameter(weights)
self.bias = None
bias_node = nu.get_bias_for_node(self.node)
if bias_node is not None:
bias = nu.get_node_value(bias_node)
self.bias_dtype = bias.dtype
bias = torch.from_numpy(bias).to(torch.float32).squeeze()
bias = bias if bias.shape else bias.reshape(1)
bias = bias.to(device)
self.bias = torch.nn.Parameter(bias)
if self.node.type != 'MatMul':
self.stride = (int(node.stride[2]), int(node.stride[3]))
self.pads_begin, self.pads_end = node.pad[2], node.pad[3]
self.dilation = (int(node.dilation[2]), int(node.dilation[3]))
self.group = 1 if 'group' not in node else int(node.group)
def update_node_params(self):
weights = self.weights.detach()
weights = weights.cpu() if self.device != 'cpu' else weights
weights = weights.numpy().astype(self.weights_dtype)
weight_node = get_weight_node(self.node)
nu.set_node_value(weight_node, weights)
if self.weight_fq is not None:
self.weight_fq.update_node_params()
if self.input_fq is not None:
self.input_fq.update_node_params()
if self.bias is not None:
bias_node = nu.get_bias_for_node(self.node)
bias_shape = nu.get_node_value(bias_node).shape
bias = self.bias.data.reshape(bias_shape)
bias = bias.detach()
bias = bias.cpu() if self.device != 'cpu' else bias
bias = bias.numpy().astype(self.bias_dtype)
nu.set_node_value(bias_node, bias)
def forward(self, x):
w = self.weight_fq(self.weights) if self.weight_fq is not None else self.weights
x = self.input_fq(x) if self.input_fq is not None else x
if self.set_quantized_values_to_weight_parameter and self.weight_fq is not None:
self.weights.data = w
if self.node.type == 'MatMul':
x = torch.nn.functional.linear(x,
self.weights,
bias=self.bias)
else:
pad_top, pad_bottom = int(self.pads_begin[0]), int(self.pads_begin[1])
pad_left, pad_right = int(self.pads_end[0]), int(self.pads_end[1])
x = torch.nn.functional.pad(x, (pad_left, pad_right, pad_top, pad_bottom))
x = torch.nn.functional.conv2d(
x,
self.weights,
bias=self.bias,
stride=self.stride,
dilation=self.dilation,
groups=self.group
)
return x

Some files were not shown because too many files have changed in this diff Show More