parent
c79ae17bbf
commit
80618b0498
14
.ci/pot/Jenkinsfile
vendored
14
.ci/pot/Jenkinsfile
vendored
@ -1,14 +0,0 @@
|
||||
#!groovy
|
||||
|
||||
|
||||
properties([
|
||||
parameters([
|
||||
string(defaultValue: '',
|
||||
description: 'Pipeline shared library version (branch/tag/commit). Determined automatically if empty',
|
||||
name: 'library_version')
|
||||
])
|
||||
])
|
||||
|
||||
loadOpenVinoLibrary {
|
||||
potEntrypoint(this)
|
||||
}
|
@ -11,7 +11,6 @@ __version__ = get_version()
|
||||
from openvino._pyopenvino._offline_transformations import apply_fused_names_cleanup
|
||||
from openvino._pyopenvino._offline_transformations import apply_moc_transformations
|
||||
from openvino._pyopenvino._offline_transformations import apply_moc_legacy_transformations
|
||||
from openvino._pyopenvino._offline_transformations import apply_pot_transformations
|
||||
from openvino._pyopenvino._offline_transformations import apply_low_latency_transformation
|
||||
from openvino._pyopenvino._offline_transformations import apply_pruning_transformation
|
||||
from openvino._pyopenvino._offline_transformations import apply_make_stateful_transformation
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include <compress_quantize_weights.hpp>
|
||||
#include <openvino/pass/make_stateful.hpp>
|
||||
#include <openvino/pass/serialize.hpp>
|
||||
#include <pot_transformations.hpp>
|
||||
#include <pruning.hpp>
|
||||
#include <transformations/common_optimizations/compress_float_constants.hpp>
|
||||
#include <transformations/common_optimizations/fused_names_cleanup.hpp>
|
||||
@ -55,16 +54,6 @@ void regmodule_offline_transformations(py::module m) {
|
||||
py::arg("model"),
|
||||
py::arg("params_with_custom_types"));
|
||||
|
||||
m_offline_transformations.def(
|
||||
"apply_pot_transformations",
|
||||
[](std::shared_ptr<ov::Model> model, std::string device) {
|
||||
ov::pass::Manager manager;
|
||||
manager.register_pass<ov::pass::POTTransformations>(std::move(device));
|
||||
manager.run_passes(model);
|
||||
},
|
||||
py::arg("model"),
|
||||
py::arg("device"));
|
||||
|
||||
m_offline_transformations.def(
|
||||
"apply_low_latency_transformation",
|
||||
[](std::shared_ptr<ov::Model> model, bool use_const_initializer = true) {
|
||||
|
@ -7,7 +7,6 @@ import pytest
|
||||
import numpy as np
|
||||
from openvino._offline_transformations import (
|
||||
apply_moc_transformations,
|
||||
apply_pot_transformations,
|
||||
apply_low_latency_transformation,
|
||||
apply_pruning_transformation,
|
||||
apply_make_stateful_transformation,
|
||||
@ -113,15 +112,6 @@ def test_moc_with_smart_reshape():
|
||||
assert len(model.get_ops()) == 3
|
||||
|
||||
|
||||
def test_pot_transformations():
|
||||
model = get_relu_model()
|
||||
|
||||
apply_pot_transformations(model, "GNA")
|
||||
|
||||
assert model is not None
|
||||
assert len(model.get_ops()) == 3
|
||||
|
||||
|
||||
def test_low_latency_transformation():
|
||||
model = get_relu_model()
|
||||
|
||||
|
@ -1,33 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "openvino/pass/graph_rewrite.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace pass {
|
||||
|
||||
class POTTransformations;
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ov
|
||||
|
||||
/**
|
||||
* @brief This transformation is an entry point for OpenVINO transformations that will be
|
||||
* executed inside POT.
|
||||
*/
|
||||
|
||||
class ov::pass::POTTransformations : public ov::pass::ModelPass {
|
||||
std::string m_device;
|
||||
|
||||
public:
|
||||
OPENVINO_RTTI("POTTransformations", "0");
|
||||
explicit POTTransformations(std::string device) : m_device(std::move(device)) {}
|
||||
|
||||
bool run_on_model(const std::shared_ptr<ov::Model>&) override;
|
||||
};
|
@ -1,23 +0,0 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "pot_transformations.hpp"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "openvino/pass/manager.hpp"
|
||||
#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
|
||||
#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
|
||||
#include "transformations/op_conversions/gru_cell_decomposition.hpp"
|
||||
#include "transformations/op_conversions/lstm_cell_decomposition.hpp"
|
||||
|
||||
bool ov::pass::POTTransformations::run_on_model(const std::shared_ptr<ov::Model>& f) {
|
||||
ov::pass::Manager manager(get_pass_config());
|
||||
manager.register_pass<ov::pass::BidirectionalSequenceDecomposition>();
|
||||
manager.register_pass<ov::pass::ConvertSequenceToTensorIterator>();
|
||||
manager.register_pass<ov::pass::GRUCellDecomposition>();
|
||||
manager.register_pass<ov::pass::LSTMCellDecomposition>();
|
||||
manager.run_passes(f);
|
||||
return false;
|
||||
}
|
@ -9,12 +9,6 @@
|
||||
|
||||
add_subdirectory(mo)
|
||||
|
||||
# POT
|
||||
|
||||
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/pot/openvino/tools/pot/version.txt.in"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/pot/openvino/tools/pot/version.txt" @ONLY)
|
||||
|
||||
|
||||
if(ENABLE_PYTHON)
|
||||
# Benchmark Tool
|
||||
add_subdirectory(benchmark_tool)
|
||||
|
@ -79,7 +79,6 @@ endfunction()
|
||||
|
||||
set(INIT_FILES_TOOLS
|
||||
"${OpenVINO_SOURCE_DIR}/tools/mo/openvino/__init__.py"
|
||||
"${OpenVINO_SOURCE_DIR}/tools/pot/openvino/__init__.py"
|
||||
"${OpenVINO_SOURCE_DIR}/tools/openvino_dev/src/openvino/__init__.py")
|
||||
|
||||
ov_check_init_files_alignment("${INIT_FILES_TOOLS}")
|
||||
|
@ -52,13 +52,6 @@ PKG_INSTALL_CFG = {
|
||||
'extract_entry_points': True,
|
||||
'extract_extras': True,
|
||||
},
|
||||
"pot": {
|
||||
'src_dir': OPENVINO_DIR / 'tools' / 'pot',
|
||||
'black_list': ['*tests*'],
|
||||
'prefix': 'pot',
|
||||
'extract_entry_points': True,
|
||||
'extract_requirements': True,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
110
tools/pot/.gitignore
vendored
110
tools/pot/.gitignore
vendored
@ -1,110 +0,0 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# dotenv
|
||||
.env
|
||||
|
||||
# virtualenv
|
||||
.venv
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
|
||||
# PyCharm
|
||||
.idea
|
||||
|
||||
# snapshots
|
||||
*.tar
|
||||
|
||||
# version file
|
||||
version.txt
|
@ -1,29 +0,0 @@
|
||||
[MASTER]
|
||||
disable = fixme,
|
||||
invalid-name,
|
||||
missing-docstring,
|
||||
no-self-use,
|
||||
too-few-public-methods,
|
||||
too-many-arguments,
|
||||
too-many-locals
|
||||
max-attributes=20
|
||||
max-line-length = 120
|
||||
ignore-docstrings = yes
|
||||
ignored-modules = mo,accuracy_checker,extensions,openvino.inference_engine,cv2,open_model_zoo.model_tools._configuration,open_model_zoo.model_tools._common
|
||||
ignore-patterns = ac_imports.py
|
||||
extension-pkg-whitelist = numpy
|
||||
|
||||
[SIMILARITIES]
|
||||
min-similarity-lines = 19
|
||||
ignore-imports = yes
|
||||
|
||||
[BASIC]
|
||||
good-names=logger,fn
|
||||
|
||||
[DESIGN]
|
||||
max-statements=120
|
||||
max-branches=14
|
||||
max-nested-blocks=7
|
||||
|
||||
[OPTIONS]
|
||||
generated-members=torch.*
|
@ -1,5 +0,0 @@
|
||||
# See help here: https://docs.gitlab.com/ee/user/project/code_owners.html
|
||||
|
||||
# Control 3d party dependencies
|
||||
**/*requirements*.* openvino.configuration.mgmt@intel.com
|
||||
**/setup.py openvino.configuration.mgmt@intel.com
|
@ -1,58 +0,0 @@
|
||||
# Post-Training Optimization Tool
|
||||
|
||||
## Introduction
|
||||
|
||||
Post-training Optimization Tool (POT) is designed to accelerate the inference of deep learning models by applying
|
||||
special methods without model retraining or fine-tuning, for example, post-training 8-bit quantization. Therefore, the tool does not
|
||||
require a training dataset or a pipeline. To apply post-training algorithms from the POT, you need:
|
||||
* A floating-point precision model, FP32 or FP16, converted into the OpenVINO™ Intermediate Representation (IR) format
|
||||
and run on CPU with the OpenVINO™.
|
||||
* A representative calibration dataset representing a use case scenario, for example, 300 samples.
|
||||
|
||||
Figure below shows the optimization workflow:
|
||||

|
||||
|
||||
To get started with POT tool refer to the corresponding OpenVINO™ [documentation](https://docs.openvino.ai/2023.2/openvino_docs_model_optimization_guide.html).
|
||||
|
||||
## Installation
|
||||
### From PyPI
|
||||
POT is distributed as a part of OpenVINO™ Development Tools package. For installation instruction please refer to this [document](https://docs.openvino.ai/2023.2/openvino_docs_install_guides_install_dev_tools.html).
|
||||
|
||||
### From GitHub
|
||||
As prerequisites, you should install [OpenVINO™ Runtime](https://docs.openvino.ai/2023.2/openvino_docs_install_guides_overview.html) and other dependencies such as [Model Optimizer](https://docs.openvino.ai/2023.2/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) and [Accuracy Checker](https://docs.openvino.ai/2023.2/omz_tools_accuracy_checker.html).
|
||||
|
||||
To install POT from source:
|
||||
- Clone OpenVINO repository
|
||||
```sh
|
||||
git clone --recusive https://github.com/openvinotoolkit/openvino.git
|
||||
```
|
||||
- Navigate to `openvino/tools/pot/` folder
|
||||
- Install POT package:
|
||||
```sh
|
||||
python3 setup.py install
|
||||
```
|
||||
|
||||
After installation POT is available as a Python library under `openvino.tools.pot.*` and in the command line by the `pot` alias. To verify it, run `pot -h`.
|
||||
|
||||
|
||||
## Examples
|
||||
|
||||
OpenVINO provides several examples to demonstrate the POT optimization workflow:
|
||||
|
||||
* Command-line example:
|
||||
* [Quantization of Image Classification model](https://docs.openvino.ai/2023.2/pot_configs_examples_README.html)
|
||||
* API tutorials:
|
||||
* [Quantization of Image Classification model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/301-tensorflow-training-openvino)
|
||||
* [Quantization of Object Detection model from Model Zoo](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/111-yolov5-quantization-migration)
|
||||
* [Quantization of Segmentation model for medical data](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/110-ct-segmentation-quantize)
|
||||
* [Quantization of BERT for Text Classification](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/105-language-quantize-bert)
|
||||
* API examples:
|
||||
* [Quantization of 3D segmentation model](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/3d_segmentation)
|
||||
* [Quantization of Face Detection model](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/face_detection)
|
||||
* [Quantization of Object Detection model with controable accuracy](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/object_detection)
|
||||
* [Quantizatin of speech model for GNA device](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/speech)
|
||||
|
||||
|
||||
## See Also
|
||||
|
||||
* [Performance Benchmarks](https://docs.openvino.ai/2023.2/openvino_docs_performance_benchmarks.html)
|
@ -1,53 +0,0 @@
|
||||
# Post-training Optimization Tool
|
||||
|
||||
Starting with the 2020.1 version, OpenVINO™ toolkit delivers the Post-Training Optimization Tool designed to accelerate the inference of DL models by converting them into a more hardware-friendly representation by applying specific methods that do not require re-training, for example, post-training quantization.
|
||||
For more details about the low-precision flow in OpenVINO™, refer to the [Low Precision Optimization Guide](docs/LowPrecisionOptimizationGuide.md).
|
||||
|
||||
Post-Training Optimization Tool includes standalone command-line tool and Python* API that provide the following key features:
|
||||
|
||||
## Key features:
|
||||
|
||||
* Two supported post-training quantization algorithms: fast [DefaultQuantization](openvino/tools/pot/algorithms/quantization/default/README.md) and precise [AccuracyAwareQuantization](openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md), as well as multiple experimental methods.
|
||||
|
||||
* Symmetric and asymmetric quantization schemes. For more details, see the [Quantization](openvino/tools/pot/algorithms/quantization/README.md) section.
|
||||
* Per-channel quantization for Convolutional and Fully-Connected layers.
|
||||
* Multiple domains: Computer Vision, Recommendation Systems.
|
||||
* Ability to implement custom calibration pipeline via supported [API](openvino/tools/pot/api/README.md).
|
||||
* Compression for different HW targets such as CPU, GPU, NPU.
|
||||
* Post-training sparsity.
|
||||
|
||||
## Usage
|
||||
|
||||
### System requirements
|
||||
- Ubuntu 18.04 or later (64-bit)
|
||||
- Python 3.8 or later
|
||||
- OpenVINO
|
||||
|
||||
### Installation (Temporary)
|
||||
1) Clone the openvino repo: `git clone https://github.com/openvinotoolkit/openvino`
|
||||
2) Download submodules:
|
||||
```
|
||||
git submodule init
|
||||
git submodule update
|
||||
```
|
||||
3) Setup model conversion API.
|
||||
You can setup model conversion API that needs for POT purposed with the two ways:
|
||||
1. Install model conversion API with pip using "python setup.py install" at the mo folder (`<openvino_path>/tools/mo/setup.py`)
|
||||
2. Setup model conversion API for Python using PYTHONPATH environment variable. Add the following `<openvino_path>/tools/mo` into PYTHONPATH.
|
||||
4) Install requirements for accuracy checker:
|
||||
- From POT root: `cd ./thirdparty/open_model_zoo/tools/accuracy_checker`
|
||||
- Call setup script: `python3 setup.py install`
|
||||
- Get back to root POT dir: `cd <PATH_TO_POT_DIR>`
|
||||
5) Install requirements for the tool:
|
||||
- Call setup script: `python3 setup.py develop`
|
||||
|
||||
### Run
|
||||
1) Prepare configuration file for the tool based on the examples in the `configs` folder
|
||||
2) Navigate to compression tool directory
|
||||
3) Launch the tool running the following command:
|
||||
`python3 main.py -c <path to config file> -e`
|
||||
|
||||
To test the tool you can use PyTorch Mobilenet_v2 model from `tests/data/models/mobilenetv2_example/mobilenetv2_example.onnx`
|
||||
|
||||
- If there're some errors with imports in ModelOptimizer, first of all make the following steps:
|
||||
- If you've installed ModelOptimizer with setting _PYTHONPATH_ variable, checkout the path. It should be as following `<openvino_path>/tools/mo.` The whole command can be found in step 3 Installation (Temporary) guide above.
|
@ -1,2 +0,0 @@
|
||||
# Copyright (C) 2020-2022 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
@ -1,23 +0,0 @@
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import sys
|
||||
|
||||
import openvino.tools.pot.api
|
||||
import openvino.tools.pot.engines
|
||||
import openvino.tools.pot.graph
|
||||
import openvino.tools.pot.pipeline
|
||||
from openvino.tools.pot.utils.logger import get_logger
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
logger.warning('Import compression is deprecated. Please use openvino.tools.pot instead')
|
||||
|
||||
|
||||
sys.modules["compression.api"] = openvino.tools.pot.api
|
||||
sys.modules["compression.engines"] = openvino.tools.pot.engines
|
||||
sys.modules["compression.engines.ie_engine"] = openvino.tools.pot.engines.ie_engine
|
||||
sys.modules["compression.graph"] = openvino.tools.pot.graph
|
||||
sys.modules["compression.graph.model_utils"] = openvino.tools.pot.graph.model_utils
|
||||
sys.modules["compression.pipeline"] = openvino.tools.pot.pipeline
|
||||
sys.modules["compression.pipeline.initializer"] = openvino.tools.pot.pipeline.initializer
|
@ -1,204 +0,0 @@
|
||||
{
|
||||
/* Model parameters */
|
||||
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name
|
||||
"model": "<MODEL_PATH>", // Path to model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
|
||||
},
|
||||
|
||||
/* Parameters of the engine used for model inference */
|
||||
|
||||
// Post-Training Optimization Tool supports engine based on accuracy checker and custom engine.
|
||||
// For custom engine you should specify your own set of parameters.
|
||||
// The engine based on accuracy checker uses accuracy checker parameters.
|
||||
// You can specify the parameters via accuracy checker config file or directly in engine section.
|
||||
// More information about accuracy checker parameters can be found here:
|
||||
// https://github.com/opencv/open_model_zoo/tree/master/tools/accuracy_checker
|
||||
|
||||
"engine": {
|
||||
"stat_requests_number": 8, // Number of requests during statistcs collection
|
||||
"eval_requests_number": 8, // Number of requests during evaluation
|
||||
"config": "<CONFIG_PATH>",
|
||||
|
||||
/* OR */
|
||||
|
||||
"name": "model_name",
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "classification"
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "dataset_name",
|
||||
"data_source": "<DATASET_PATH>",
|
||||
"annotation": "<ANNOTATION_PATH>",
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "resize",
|
||||
"interpolation": "BILINEAR",
|
||||
"aspect_ratio_scale": "greater",
|
||||
"size": 224
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy@top1",
|
||||
"type": "accuracy",
|
||||
"top_k": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
/* Optimization hyperparameters */
|
||||
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"inplace_statistics": true, // An optional parameter, change method collect statistics,
|
||||
// reduces the amount of memory consumed,
|
||||
// but increases the calibration time
|
||||
// the default value is true
|
||||
"model_type": "None", // An optional parameter, needed for additional patterns in the model,
|
||||
// default value is None (supported only transformer now)
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "AccuracyAwareQuantization", // Optimization algorithm name
|
||||
"params": {
|
||||
"ranking_subset_size": 300, // A size of a subset which is used to rank layers by their
|
||||
// contribution to the accuracy drop
|
||||
|
||||
"max_iter_num": 20, // Maximum number of iterations of the algorithm (maximum of layers
|
||||
// that may be reverted back to full-precision)
|
||||
|
||||
"maximal_drop": 0.005, // Maximum accuracy drop which has to be achieved after the quantization
|
||||
|
||||
"drop_type": "absolute", // Drop type of the accuracy metric: relative or absolute (default)
|
||||
|
||||
"use_prev_if_drop_increase": false, // Whether to use NN snapshot from the previous algorithm
|
||||
// iteration in case if drop increases
|
||||
|
||||
"base_algorithm": "DefaultQuantization", // Base algorithm that is used to quantize model
|
||||
// at the beginning
|
||||
|
||||
"annotation_free": false, // Whether to compute accuracy drop on a dataset without annotation
|
||||
|
||||
"annotation_conf_threshold": 0.6, // Threshold for annotation creation in case of annotation free
|
||||
// algorithm execution. Images on which original model predicts
|
||||
// with confidence below this threshold will be skipped during
|
||||
// evaluation
|
||||
|
||||
"convert_to_mixed_preset": false, // Whether to convert the model to mixed mode if
|
||||
// the accuracy criteria of the symmetrically quantized
|
||||
// model are not satisfied
|
||||
|
||||
// An optional list of metrics that are taken into account during optimization.
|
||||
// If not specified, all metrics defined in engine config are used
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy", // Metric name to optimize
|
||||
"baseline_value": 0.72 // Baseline metric value of the original model
|
||||
}
|
||||
],
|
||||
|
||||
"metric_subset_ratio": 0.5, // A part of the validation set that is used to compare element-wise
|
||||
// full-precision and quantized models in case of predefined metric
|
||||
// values of the original model
|
||||
|
||||
"tune_hyperparams": false, // Whether to search the best quantization parameters for model.
|
||||
// This algo uses grid search engine based on a special subset of samples from the dataset
|
||||
|
||||
"ignored": {
|
||||
// List of nodes that are excluded from optimization
|
||||
"scope": [
|
||||
"<NODE_NAME>"
|
||||
],
|
||||
// List of types that are excluded from optimization
|
||||
"operations": [
|
||||
{
|
||||
"type": "<NODE_TYPE>",
|
||||
// Includes excluding by attributes
|
||||
"attributes": {
|
||||
"<NAME>": "<VALUE>" // Lists of values is not included
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "<NODE_TYPE>" // Excluding only by type
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"preset": "mixed", // A preset is a collection of optimization algorithm parameters
|
||||
// that will specify to the algorithm to improve which metric
|
||||
// the algorithm needs to concentrate. Each optimization algorithm
|
||||
// supports [performance, accuracy, mixed] presets
|
||||
|
||||
"stat_subset_size": 100, // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
|
||||
/* Manually specification quantization parametrs */
|
||||
|
||||
/* Quantization parameters for weights */
|
||||
|
||||
"weights": {
|
||||
"bits": 8, // Number of quantization bits
|
||||
"mode": "symmetric", // Quantization mode
|
||||
"granularity": "perchannel", // Granularity: a scale for each output channel
|
||||
"level_low": -127, // Low quantization level
|
||||
"level_high": 127, // High quantization level
|
||||
|
||||
/* Parameters specify how to calculate the minimum and maximum of quantization range */
|
||||
|
||||
"range_estimator": {
|
||||
"max": {
|
||||
"type": "quantile",
|
||||
"outlier_prob": 0.0001
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
/* Quantization parameters for activations */
|
||||
|
||||
"activations": {
|
||||
"bits": 8, // Number of quantization bits
|
||||
"mode": "asymmetric", // Quantization mode
|
||||
"granularity": "pertensor", // Granularity: one scale for output tensor
|
||||
|
||||
/* Parameters specify how to calculate the minimum and maximum of quantization range */
|
||||
|
||||
"range_estimator": {
|
||||
"preset": "quantile",
|
||||
|
||||
/* OR */
|
||||
|
||||
/* Minimum of quantization range */
|
||||
|
||||
"min": {
|
||||
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
|
||||
// mean_no_outliers, median_no_outliers, hl_estimator]
|
||||
|
||||
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
|
||||
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
|
||||
},
|
||||
|
||||
/* Maximum of quantization range */
|
||||
|
||||
"max": {
|
||||
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
|
||||
// mean_no_outliers, median_no_outliers, hl_estimator]
|
||||
|
||||
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
|
||||
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,84 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "model_name",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "classification"
|
||||
}
|
||||
],
|
||||
"datasets": {
|
||||
/* Global dataset preprocessing that will be used for all datasets
|
||||
if no local configuration is spesified */
|
||||
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "crop",
|
||||
"central_fraction": 0.875
|
||||
},
|
||||
{
|
||||
"type": "resize",
|
||||
"size": 224
|
||||
}
|
||||
],
|
||||
|
||||
/* Dataset for statistics collection */
|
||||
|
||||
"optimization": {
|
||||
"name": "classification_dataset",
|
||||
"data_source": "<DATASET_PATH>"
|
||||
},
|
||||
|
||||
/* Dataset for final evaluation */
|
||||
|
||||
"evaluation": {
|
||||
"name": "classification_dataset",
|
||||
"data_source": "<DATASET_PATH>",
|
||||
"annotation_conversion": {
|
||||
"converter": "imagenet",
|
||||
"annotation_file": "<ANNOTATION_FILE_PATH>"
|
||||
},
|
||||
|
||||
/* Local preprocessing config. Overwrites global config */
|
||||
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "crop",
|
||||
"central_fraction": 0.875
|
||||
},
|
||||
{
|
||||
"type": "resize",
|
||||
"size": 224
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy@top1",
|
||||
"type": "accuracy",
|
||||
"top_k": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"stat_requests_number": 8,
|
||||
"eval_requests_number": 8
|
||||
},
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,251 +0,0 @@
|
||||
{
|
||||
/* Model parameters */
|
||||
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name (name of whole cascade)
|
||||
|
||||
/* List of models in cascade */
|
||||
"cascade": [
|
||||
{
|
||||
/* The first model of cascade */
|
||||
"name": "<FIRST_MODEL_NAME>", // Name of the first model of cascade (should be taken from engine section)
|
||||
"model": "<MODEL_PATH>", // Path to the first model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to the first model weights (.bin format)
|
||||
},
|
||||
|
||||
/* ... */
|
||||
|
||||
{
|
||||
/* The last model of cascade */
|
||||
"name": "<LAST_MODEL_NAME>", // Name of the last model of cascade (should be taken from engine section)
|
||||
"model": "<MODEL_PATH>", // Path to the last model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to the last model weights (.bin format)
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
/* Parameters of the engine used for model inference */
|
||||
|
||||
// Post-Training Optimization Tool supports engine based on accuracy checker and custom engine.
|
||||
// For custom engine you should specify your own set of parameters.
|
||||
// The engine based on accuracy checker uses accuracy checker parameters.
|
||||
// You can specify the parameters via accuracy checker config file or directly in engine section.
|
||||
// More information about accuracy checker parameters can be found here:
|
||||
// https://github.com/opencv/open_model_zoo/tree/master/tools/accuracy_checker
|
||||
|
||||
"engine": {
|
||||
/* Accuracy checker mode (default) */
|
||||
|
||||
"config": "<CONFIG_PATH>",
|
||||
|
||||
/* OR */
|
||||
|
||||
"module": "<EVALUATOR_CORRESPONDING_TO_CURRENT_CASCADE>",
|
||||
"module_config": {
|
||||
"network_info": {
|
||||
"<FIRST_MODEL_NAME>": { // Name of the first model of cascade (this name should be used in model section)
|
||||
"outputs": {
|
||||
"probability_out": "prob1",
|
||||
"region_out": "conv4-2"
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"name": "data",
|
||||
"type": "INPUT",
|
||||
"layout": "NCWH"
|
||||
}
|
||||
],
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "bgr_to_rgb"
|
||||
},
|
||||
{
|
||||
"type": "pyramid",
|
||||
"min_size": 10,
|
||||
"factor": 0.79
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
/* ... */
|
||||
|
||||
"<LAST_MODEL_NAME>": { // Name of the last model of cascade (this name should be used in model section)
|
||||
"outputs": {
|
||||
"probability_out": "prob1",
|
||||
"region_out": "conv6-2"
|
||||
},
|
||||
"inputs": [
|
||||
{
|
||||
"name": "data",
|
||||
"type": "INPUT",
|
||||
"layout": "NCWH"
|
||||
}
|
||||
],
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "bgr_to_rgb"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk"
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "<DATASET_NAME>",
|
||||
"data_source": "<DATASET_PATH>",
|
||||
"postprocessing": [
|
||||
{
|
||||
"type": "filter",
|
||||
"apply_to": "prediction",
|
||||
"is_empty": true
|
||||
},
|
||||
{
|
||||
"type": "filter",
|
||||
"height_range": 60,
|
||||
"apply_to": "annotation"
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"type": "map",
|
||||
"ignore_difficult": true,
|
||||
"include_boundaries": true,
|
||||
"allow_multiple_matches_per_ignored": true,
|
||||
"distinct_conf": false
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
/* Optimization hyperparameters */
|
||||
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization", // Optimization algorithm name
|
||||
"params": {
|
||||
"ignored": {
|
||||
/* Ignored block for the first model of cascade */
|
||||
"<FIRST_MODEL_NAME>": {
|
||||
// List of nodes that are excluded from optimization
|
||||
"scope": [
|
||||
"<NODE_NAME>"
|
||||
],
|
||||
// List of types that are excluded from optimization
|
||||
"operations": [
|
||||
{
|
||||
"type": "<NODE_TYPE>",
|
||||
// Includes excluding by attributes
|
||||
"attributes": {
|
||||
"<NAME>": "<VALUE>" // Lists of values is not included
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "<NODE_TYPE>" // Excluding only by type
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
/* ... */
|
||||
|
||||
/* Ignored block for the last model of cascade */
|
||||
"<LAST_MODEL_NAME>": {
|
||||
// List of nodes that are excluded from optimization
|
||||
"scope": [
|
||||
"<NODE_NAME>"
|
||||
],
|
||||
// List of types that are excluded from optimization
|
||||
"operations": [
|
||||
{
|
||||
"type": "<NODE_TYPE>",
|
||||
// Includes excluding by attributes
|
||||
"attributes": {
|
||||
"<NAME>": "<VALUE>" // Lists of values is not included
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "<NODE_TYPE>" // Excluding only by type
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"preset": "accuracy", // A preset is a collection of optimization algorithm parameters
|
||||
// that will specify to the algorithm to improve which metric
|
||||
// the algorithm needs to concentrate. Each optimization algorithm
|
||||
// supports [performance, mixed, accuracy] presets
|
||||
|
||||
"stat_subset_size": 100, // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
|
||||
/* Manually specification quantization parametrs */
|
||||
|
||||
/* Quantization parameters for weights */
|
||||
|
||||
"weights": {
|
||||
"bits": 8, // Number of quantization bits
|
||||
"mode": "symmetric", // Quantization mode
|
||||
"granularity": "perchannel", // Granularity: a scale for each output channel
|
||||
"level_low": -127, // Low quantization level
|
||||
"level_high": 127, // High quantization level
|
||||
|
||||
/* Parameters specify how to calculate the minimum and maximum of quantization range */
|
||||
|
||||
"range_estimator": {
|
||||
"max": {
|
||||
"type": "quantile",
|
||||
"outlier_prob": 0.0001
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
/* Quantization parameters for activations */
|
||||
|
||||
"activations": {
|
||||
"bits": 8, // Number of quantization bits
|
||||
"mode": "asymmetric", // Quantization mode
|
||||
"granularity": "pertensor", // Granularity: one scale for output tensor
|
||||
|
||||
/* Parameters specify how to calculate the minimum and maximum of quantization range */
|
||||
|
||||
"range_estimator": {
|
||||
"preset": "quantile",
|
||||
|
||||
/* OR */
|
||||
|
||||
/* Minimum of quantization range */
|
||||
|
||||
"min": {
|
||||
"clipping_value": 0, // Threshold for min statistic value clipping (lower bound)
|
||||
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
|
||||
// mean_no_outliers, median_no_outliers, hl_estimator]
|
||||
|
||||
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
|
||||
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
|
||||
},
|
||||
|
||||
/* Maximum of quantization range */
|
||||
|
||||
"max": {
|
||||
"clipping_value": 6, // Threshold for max statistic value clipping (upper bound)
|
||||
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
|
||||
// mean_no_outliers, median_no_outliers, hl_estimator]
|
||||
|
||||
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
|
||||
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,182 +0,0 @@
|
||||
{
|
||||
/* Model parameters */
|
||||
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name
|
||||
"model": "<MODEL_PATH>", // Path to model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
|
||||
},
|
||||
|
||||
/* Parameters of the engine used for model inference */
|
||||
|
||||
// Post-Training Optimization Tool supports engine based on accuracy checker and custom engine.
|
||||
// For custom engine you should specify your own set of parameters.
|
||||
// The engine based on accuracy checker uses accuracy checker parameters.
|
||||
// You can specify the parameters via accuracy checker config file or directly in engine section.
|
||||
// More information about accuracy checker parameters can be found here:
|
||||
// https://github.com/opencv/open_model_zoo/tree/master/tools/accuracy_checker
|
||||
|
||||
"engine": {
|
||||
/* Accuracy checker mode (default) */
|
||||
|
||||
"stat_requests_number": 8, // Number of requests during statistcs collection
|
||||
"eval_requests_number": 8, // Number of requests during evaluation
|
||||
"config": "<CONFIG_PATH>",
|
||||
|
||||
/* OR */
|
||||
|
||||
"name": "model_name",
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "classification"
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "dataset_name",
|
||||
"data_source": "<DATASET_PATH>",
|
||||
"annotation": "<ANNOTATION_PATH>",
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "resize",
|
||||
"interpolation": "BILINEAR",
|
||||
"aspect_ratio_scale": "greater",
|
||||
"size": 224
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy@top1",
|
||||
"type": "accuracy",
|
||||
"top_k": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
/* OR */
|
||||
|
||||
/* Simplified mode */
|
||||
|
||||
"type": "simplified", // OR default value "type": "accuracy_checker" for non simplified mode
|
||||
|
||||
"data_source": "PATH_TO_SOURCE" // You can specify path to directory with images. Also you can
|
||||
// specify template for file names to filter images to load.
|
||||
// Templates are unix style (This option valid only in simplified mode)
|
||||
},
|
||||
|
||||
/* Optimization hyperparameters */
|
||||
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
|
||||
"model_type": "None", // An optional parameter, needed for additional patterns in the model,
|
||||
// default value is None (supported only transformer now)
|
||||
"dump_intermediate_model": false, // Save intermediate models for DefaultAlgorithm
|
||||
"inplace_statistics": true, // An optional parameter, change method collect statistics,
|
||||
// reduces the amount of memory consumed,
|
||||
// but increases the calibration time
|
||||
// the default value is true
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization", // Optimization algorithm name
|
||||
"params": {
|
||||
"ignored": {
|
||||
// List of nodes that are excluded from optimization
|
||||
"scope": [
|
||||
"<NODE_NAME>"
|
||||
],
|
||||
// List of types that are excluded from optimization
|
||||
"operations": [
|
||||
{
|
||||
"type": "<NODE_TYPE>",
|
||||
// Includes excluding by attributes
|
||||
"attributes": {
|
||||
"<NAME>": "<VALUE>" // Lists of values is not included
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "<NODE_TYPE>" // Excluding only by type
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"preset": "mixed", // A preset is a collection of optimization algorithm parameters
|
||||
// that will specify to the algorithm to improve which metric
|
||||
// the algorithm needs to concentrate. Each optimization algorithm
|
||||
// supports [performance, mixed, accuracy] presets
|
||||
|
||||
"stat_subset_size": 100, // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
|
||||
"shuffle_data": false, // Shuffle data before selecting the subset to calculate activation
|
||||
// statistics. An optional parameter, the default value is false
|
||||
|
||||
"seed": 0, // Seed for data shuffle. An optional parameter, the default value is 0
|
||||
|
||||
/* Manually specification quantization parametrs */
|
||||
|
||||
/* Quantization parameters for weights */
|
||||
|
||||
"weights": {
|
||||
"bits": 8, // Number of quantization bits
|
||||
"mode": "symmetric", // Quantization mode
|
||||
"granularity": "perchannel", // Granularity: a scale for each output channel
|
||||
"level_low": -127, // Low quantization level
|
||||
"level_high": 127, // High quantization level
|
||||
|
||||
/* Parameters specify how to calculate the minimum and maximum of quantization range */
|
||||
|
||||
"range_estimator": {
|
||||
"max": {
|
||||
"type": "quantile",
|
||||
"outlier_prob": 0.0001
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
/* Quantization parameters for activations */
|
||||
|
||||
"activations": {
|
||||
"bits": 8, // Number of quantization bits
|
||||
"mode": "symmetric", // Quantization mode
|
||||
"granularity": "pertensor", // Granularity: one scale for output tensor
|
||||
|
||||
/* Parameters specify how to calculate the minimum and maximum of quantization range */
|
||||
|
||||
"range_estimator": {
|
||||
"preset": "quantile",
|
||||
|
||||
/* OR */
|
||||
|
||||
/* Minimum of quantization range */
|
||||
|
||||
"min": {
|
||||
"clipping_value": 0, // Threshold for min statistic value clipping (lower bound)
|
||||
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
|
||||
// mean_no_outliers, median_no_outliers, hl_estimator]
|
||||
|
||||
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
|
||||
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
|
||||
},
|
||||
|
||||
/* Maximum of quantization range */
|
||||
|
||||
"max": {
|
||||
"clipping_value": 6, // Threshold for max statistic value clipping (upper bound)
|
||||
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
|
||||
// mean_no_outliers, median_no_outliers, hl_estimator]
|
||||
|
||||
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
|
||||
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,54 +0,0 @@
|
||||
models:
|
||||
- name: bert_base_squad1_1
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: cpu
|
||||
batch: 1
|
||||
adapter:
|
||||
type: bert_question_answering
|
||||
start_token_logits_output: "unstack/Squeeze_"
|
||||
end_token_logits_output: "unstack/Split.1"
|
||||
mo_params:
|
||||
data_type: FP32
|
||||
input: input_ids_1,input_mask_1,segment_ids_1
|
||||
input_shape: "[1,384], [1,384], [1,384]"
|
||||
output: unstack/Squeeze_,unstack/Split.1
|
||||
mo_flags:
|
||||
- disable_nhwc_to_nchw
|
||||
inputs:
|
||||
- name: "input_ids_1"
|
||||
type: INPUT
|
||||
value: "input_ids"
|
||||
precision: I32
|
||||
- name: "input_mask_1"
|
||||
type: INPUT
|
||||
value: 'input_mask'
|
||||
- name: "segment_ids_1"
|
||||
type: INPUT
|
||||
value: 'segment_ids'
|
||||
precision: I32
|
||||
|
||||
datasets:
|
||||
- name: squad
|
||||
data_source: <PATH_TO_DATASET>/squad1.1/
|
||||
annotation: <PATH_TO_DATASET>/squad.pickle
|
||||
reader:
|
||||
type: annotation_features_extractor
|
||||
features:
|
||||
- input_ids
|
||||
- input_mask
|
||||
- segment_ids
|
||||
postprocessing:
|
||||
- type: extract_answers_tokens
|
||||
max_answer: 30
|
||||
n_best_size: 20
|
||||
metrics:
|
||||
- name: 'F1'
|
||||
type: 'f1'
|
||||
reference: 88.57
|
||||
threshold: 0.01
|
||||
|
||||
- name: 'EM'
|
||||
type: 'exact_match'
|
||||
reference: 81.25
|
||||
threshold: 0.01
|
@ -1,54 +0,0 @@
|
||||
models:
|
||||
- name: bert_large_squad1_1
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: cpu
|
||||
batch: 1
|
||||
adapter:
|
||||
type: bert_question_answering
|
||||
start_token_logits_output: "unstack/Squeeze_"
|
||||
end_token_logits_output: "unstack/Split.1"
|
||||
mo_params:
|
||||
data_type: FP32
|
||||
input: input_ids_1,input_mask_1,segment_ids_1
|
||||
input_shape: "[1,384], [1,384], [1,384]"
|
||||
output: unstack/Squeeze_,unstack/Split.1
|
||||
mo_flags:
|
||||
- disable_nhwc_to_nchw
|
||||
inputs:
|
||||
- name: "input_ids_1"
|
||||
type: INPUT
|
||||
value: "input_ids"
|
||||
precision: I32
|
||||
- name: "input_mask_1"
|
||||
type: INPUT
|
||||
value: 'input_mask'
|
||||
- name: "segment_ids_1"
|
||||
type: INPUT
|
||||
value: 'segment_ids'
|
||||
precision: I32
|
||||
|
||||
datasets:
|
||||
- name: squad
|
||||
data_source: <PATH_TO_DATASET>/squad1.1/
|
||||
annotation: <PATH_TO_DATASET>/squad.pickle
|
||||
reader:
|
||||
type: annotation_features_extractor
|
||||
features:
|
||||
- input_ids
|
||||
- input_mask
|
||||
- segment_ids
|
||||
postprocessing:
|
||||
- type: extract_answers_tokens
|
||||
max_answer: 30
|
||||
n_best_size: 20
|
||||
metrics:
|
||||
- name: 'F1'
|
||||
type: 'f1'
|
||||
reference: 90.63
|
||||
threshold: 0.05
|
||||
|
||||
- name: 'EM'
|
||||
type: 'exact_match'
|
||||
reference: 83.59
|
||||
threshold: 0.05
|
@ -1,29 +0,0 @@
|
||||
models:
|
||||
- name: DensNet-121
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: classification
|
||||
|
||||
datasets:
|
||||
- name: classification_dataset
|
||||
data_source: <PATH TO VALIDATION DATASET>
|
||||
annotation_conversion:
|
||||
converter: imagenet
|
||||
annotation_file: <PATH TO ANNOTATION FILE>
|
||||
|
||||
preprocessing:
|
||||
- type: resize
|
||||
size: 256
|
||||
- type: crop
|
||||
size: 224
|
||||
|
||||
metrics:
|
||||
- name: accuracy@top1
|
||||
type: accuracy
|
||||
top_k: 1
|
||||
|
||||
- name: accuracy@top5
|
||||
type: accuracy
|
||||
top_k: 5
|
@ -1,29 +0,0 @@
|
||||
models:
|
||||
- name: east
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
allow_reshape_input: True
|
||||
adapter:
|
||||
type: east_text_detection
|
||||
score_map_out: feature_fusion/Conv_7/Sigmoid
|
||||
geometry_map_out: feature_fusion/concat_3
|
||||
|
||||
datasets:
|
||||
- name: ICDAR2015
|
||||
data_source: <PATH_TO_DATASET>
|
||||
annotation_conversion:
|
||||
converter: icdar_detection
|
||||
data_dir: <PATH_TO_GROUND_TRUTH>
|
||||
|
||||
preprocessing:
|
||||
- type: resize
|
||||
dst_height: 2400
|
||||
dst_width: 32
|
||||
aspect_ratio_scale: east_keep_aspect_ratio
|
||||
- type: bgr_to_rgb
|
||||
|
||||
metrics:
|
||||
- type: incidental_text_hmean
|
||||
name: f-measure
|
||||
ignore_difficult: True
|
@ -1,37 +0,0 @@
|
||||
models:
|
||||
- name: faster_rcnn_resnet101_coco
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: ssd
|
||||
inputs:
|
||||
- name: image_info
|
||||
type: CONST_INPUT
|
||||
value: [[600, 1024, 1]]
|
||||
datasets:
|
||||
- name: ms_coco_detection_91_classes
|
||||
annotation_conversion:
|
||||
converter: mscoco_detection
|
||||
annotation_file: <ANNOTATION_PATH>/instances_val2017.json
|
||||
has_background: True
|
||||
sort_annotations: True
|
||||
use_full_label_map: True
|
||||
data_source: <DATA_PATH>/val2017
|
||||
preprocessing:
|
||||
- type: resize
|
||||
aspect_ratio_scale: fit_to_window
|
||||
dst_height: 600
|
||||
dst_width: 1024
|
||||
- type: padding
|
||||
dst_height: 600
|
||||
dst_width: 1024
|
||||
pad_type: right_bottom
|
||||
|
||||
postprocessing:
|
||||
- type: faster_rcnn_postprocessing_resize
|
||||
dst_height: 600
|
||||
dst_width: 1024
|
||||
|
||||
metrics:
|
||||
- type: coco_precision
|
||||
max_detections: 100
|
@ -1,37 +0,0 @@
|
||||
models:
|
||||
- name: faster_rcnn_resnet50_coco
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: ssd
|
||||
inputs:
|
||||
- name: image_info
|
||||
type: CONST_INPUT
|
||||
value: [[600, 1024, 1]]
|
||||
datasets:
|
||||
- name: ms_coco_detection_91_classes
|
||||
annotation_conversion:
|
||||
converter: mscoco_detection
|
||||
annotation_file: <ANNOTATION_PATH>/instances_val2017.json
|
||||
has_background: True
|
||||
sort_annotations: True
|
||||
use_full_label_map: True
|
||||
data_source: <DATA_PATH>/val2017
|
||||
preprocessing:
|
||||
- type: resize
|
||||
aspect_ratio_scale: fit_to_window
|
||||
dst_height: 600
|
||||
dst_width: 1024
|
||||
- type: padding
|
||||
dst_height: 600
|
||||
dst_width: 1024
|
||||
pad_type: right_bottom
|
||||
|
||||
postprocessing:
|
||||
- type: faster_rcnn_postprocessing_resize
|
||||
dst_height: 600
|
||||
dst_width: 1024
|
||||
|
||||
metrics:
|
||||
- type: coco_precision
|
||||
max_detections: 100
|
@ -1,40 +0,0 @@
|
||||
models:
|
||||
- name: mask_rcnn_resnet50_atrous_coco
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
tags:
|
||||
- FP32
|
||||
adapter:
|
||||
type: mask_rcnn
|
||||
detection_out: reshape_do_2d
|
||||
raw_masks_out: masks
|
||||
inputs:
|
||||
- name: image_info
|
||||
type: CONST_INPUT
|
||||
value: [[800, 1365, 1]]
|
||||
datasets:
|
||||
- name: ms_coco_mask_rcnn_short_91_classes
|
||||
annotation_conversion:
|
||||
converter: mscoco_mask_rcnn
|
||||
annotation_file: <ANNOTATION_PATH>/instances_val2017.json
|
||||
has_background: True
|
||||
sort_annotations: True
|
||||
use_full_label_map: True
|
||||
data_source: <DATA_PATH>/val2017
|
||||
preprocessing:
|
||||
- type: resize
|
||||
aspect_ratio_scale: fit_to_window
|
||||
dst_height: 800
|
||||
dst_width: 1365
|
||||
- type: padding
|
||||
dst_height: 800
|
||||
dst_width: 1365
|
||||
pad_type: right_bottom
|
||||
postprocessing:
|
||||
- type: faster_rcnn_postprocessing_resize
|
||||
dst_height: 800
|
||||
dst_width: 1365
|
||||
metrics:
|
||||
- type: coco_orig_segm_precision
|
||||
- type: coco_orig_precision
|
@ -1,31 +0,0 @@
|
||||
models:
|
||||
- name: MobileNet_v1
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: classification
|
||||
|
||||
datasets:
|
||||
- name: classification_dataset
|
||||
data_source: <PATH TO VALIDATION DATASET>
|
||||
annotation_conversion:
|
||||
converter: imagenet
|
||||
annotation_file: <PATH TO ANNOTATION FILE>
|
||||
has_background: true
|
||||
|
||||
preprocessing:
|
||||
- type: resize
|
||||
size: 256
|
||||
aspect_ratio_scale: greater
|
||||
- type: crop
|
||||
size: 224
|
||||
|
||||
metrics:
|
||||
- name: accuracy@top1
|
||||
type: accuracy
|
||||
top_k: 1
|
||||
|
||||
- name: accuracy@top5
|
||||
type: accuracy
|
||||
top_k: 5
|
@ -1,34 +0,0 @@
|
||||
models:
|
||||
- name: MobileNet_v2
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: classification
|
||||
|
||||
datasets:
|
||||
- name: classification_dataset
|
||||
data_source: <PATH TO VALIDATION DATASET>
|
||||
annotation_conversion:
|
||||
converter: imagenet
|
||||
annotation_file: <PATH TO ANNOTATION FILE>
|
||||
reader: pillow_imread
|
||||
|
||||
preprocessing:
|
||||
- type: resize
|
||||
size: 256
|
||||
aspect_ratio_scale: greater
|
||||
use_pillow: True
|
||||
- type: crop
|
||||
size: 224
|
||||
use_pillow: True
|
||||
- type: bgr_to_rgb
|
||||
|
||||
metrics:
|
||||
- name: accuracy@top1
|
||||
type: accuracy
|
||||
top_k: 1
|
||||
|
||||
- name: accuracy@top5
|
||||
type: accuracy
|
||||
top_k: 5
|
@ -1,73 +0,0 @@
|
||||
evaluations:
|
||||
- name: mtcnn
|
||||
module: custom_evaluators.mtcnn_evaluator.MTCNNEvaluator
|
||||
module_config:
|
||||
network_info:
|
||||
pnet:
|
||||
outputs:
|
||||
probability_out: prob1
|
||||
region_out: conv4-2
|
||||
inputs:
|
||||
- name: data
|
||||
type: INPUT
|
||||
layout: NCWH
|
||||
preprocessing:
|
||||
- type: bgr_to_rgb
|
||||
- type: pyramid
|
||||
min_size: 10
|
||||
factor: 0.79
|
||||
|
||||
rnet:
|
||||
outputs:
|
||||
probability_out: prob1
|
||||
region_out: conv5-2
|
||||
inputs:
|
||||
- name: data
|
||||
type: INPUT
|
||||
layout: NCWH
|
||||
preprocessing:
|
||||
- type: bgr_to_rgb
|
||||
|
||||
onet:
|
||||
outputs:
|
||||
probability_out: prob1
|
||||
region_out: conv6-2
|
||||
inputs:
|
||||
- name: data
|
||||
type: INPUT
|
||||
layout: NCWH
|
||||
preprocessing:
|
||||
- type: bgr_to_rgb
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
|
||||
datasets:
|
||||
- name: wider
|
||||
data_source: <PATH_TO_DATASET>/WIDER_val/images
|
||||
annotation_conversion:
|
||||
converter: wider
|
||||
annotation_file: <PATH_TO_DATASET>/wider_face_split/wider_face_val_bbx_gt.txt
|
||||
|
||||
postprocessing:
|
||||
- type: filter
|
||||
apply_to: prediction
|
||||
is_empty: True
|
||||
- type: filter
|
||||
height_range: 60
|
||||
apply_to: annotation
|
||||
|
||||
metrics:
|
||||
- type: recall
|
||||
ignore_difficult: True
|
||||
include_boundaries: True
|
||||
allow_multiple_matches_per_ignored: True
|
||||
distinct_conf: False
|
||||
|
||||
- type: map
|
||||
ignore_difficult: True
|
||||
include_boundaries: True
|
||||
allow_multiple_matches_per_ignored: True
|
||||
distinct_conf: False
|
||||
|
@ -1,38 +0,0 @@
|
||||
models:
|
||||
- name: NCF
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: hit_ratio_adapter
|
||||
batch: 2048
|
||||
inputs:
|
||||
- type: INPUT
|
||||
value: "u"
|
||||
name: embedding/embedding_lookup/placeholder_port_1
|
||||
- type: INPUT
|
||||
value: "i"
|
||||
name: embedding_1/embedding_lookup/placeholder_port_1
|
||||
- type: INPUT
|
||||
value: "u"
|
||||
name: embedding_2/embedding_lookup/placeholder_port_1
|
||||
- type: INPUT
|
||||
value: "i"
|
||||
name: embedding_3/embedding_lookup/placeholder_port_1
|
||||
allow_reshape_input: True
|
||||
|
||||
datasets:
|
||||
- name: ncf_dataset
|
||||
data_source: ncf
|
||||
annotation: <EXISTING FOLDER PATH TO STORE ANNOTATION FILES>/ncf_converter.pickle
|
||||
dataset_meta: <EXISTING FOLDER PATH TO STORE ANNOTATION FILES>/ncf_converter.json
|
||||
annotation_conversion:
|
||||
converter: movie_lens_converter
|
||||
rating_file: <PATH TO RATING FILE>/ml-20m-test-ratings.csv
|
||||
negative_file: <PATH TO NEGATIVE FILE>/ml-20m-test-negative.csv
|
||||
users_max_number: 2048
|
||||
reader: ncf_data_reader
|
||||
|
||||
metrics:
|
||||
- type: hit_ratio
|
||||
- type: ndcg
|
@ -1,31 +0,0 @@
|
||||
models:
|
||||
- name: ssd-mobilenetv1
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: ssd
|
||||
batch: 1
|
||||
|
||||
datasets:
|
||||
- name: classification_dataset
|
||||
data_source: <PATH TO VALIDATION DATASET>/VOC2007/JPEGImages
|
||||
annotation_conversion:
|
||||
converter: "voc_detection"
|
||||
annotations_dir: <PATH TO VALIDATION DATASET>/VOC2007/Annotations
|
||||
images_dir: <PATH TO VALIDATION DATASET>/VOC2007/JPEGImages
|
||||
imageset_file: <PATH TO VALIDATION DATASET>/VOC2007/ImageSets/Main/test.txt
|
||||
has_background: true
|
||||
|
||||
preprocessing:
|
||||
- type: resize
|
||||
size: 300
|
||||
|
||||
postprocessing:
|
||||
- type: resize_prediction_boxes
|
||||
|
||||
metrics:
|
||||
- type: map
|
||||
integral: 11point
|
||||
ignore_difficult: true
|
||||
presenter: print_scalar
|
@ -1,37 +0,0 @@
|
||||
models:
|
||||
- name: SSD_ResNet34
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
adapter:
|
||||
type: ssd_onnx
|
||||
scores_out: .*scores.*
|
||||
labels_out: .*labels.*
|
||||
bboxes_out: .*bboxes.*
|
||||
|
||||
datasets:
|
||||
- name: COCO2017_80cl_bkgr
|
||||
data_source: <PATH TO VALIDATION DATASET>/val2017
|
||||
annotation_conversion:
|
||||
converter: mscoco_detection
|
||||
annotation_file: <PATH TO VALIDATION DATASET>/annotations/instances_val2017.json
|
||||
has_background: True
|
||||
use_full_label_map: False
|
||||
|
||||
|
||||
reader: pillow_imread
|
||||
preprocessing:
|
||||
- type: resize
|
||||
size: 1200
|
||||
use_pillow: true
|
||||
interpolation: BILINEAR
|
||||
|
||||
postprocessing:
|
||||
- type: resize_prediction_boxes
|
||||
|
||||
metrics:
|
||||
- type: map
|
||||
integral: 11point
|
||||
ignore_difficult: true
|
||||
presenter: print_scalar
|
||||
- type: coco_precision
|
||||
- type: coco_orig_precision
|
@ -1,30 +0,0 @@
|
||||
models:
|
||||
- name: ssd_resnet_50_512
|
||||
|
||||
launchers:
|
||||
- framework: dlsdk
|
||||
device: CPU
|
||||
adapter: ssd
|
||||
|
||||
datasets:
|
||||
- name: VOC2007_bkgr
|
||||
data_source: <PATH TO VALIDATION DATASET>/VOC2007/JPEGImages
|
||||
annotation_conversion:
|
||||
converter: voc_detection
|
||||
has_background: False
|
||||
annotations_dir: <PATH TO VALIDATION DATASET>/VOC2007/Annotations
|
||||
images_dir: <PATH TO VALIDATION DATASET>/VOC2007/JPEGImages
|
||||
imageset_file: <PATH TO VALIDATION DATASET>/VOC2007/ImageSets/Main/test.txt
|
||||
|
||||
preprocessing:
|
||||
- type: resize
|
||||
size: 512
|
||||
|
||||
postprocessing:
|
||||
- type: resize_prediction_boxes
|
||||
|
||||
metrics:
|
||||
- type: map
|
||||
integral: 11point
|
||||
ignore_difficult: True
|
||||
presenter: print_scalar
|
@ -1,21 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "densnet-121",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/densnet_121.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,60 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "inceptionv3",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"datasets": [
|
||||
{
|
||||
"name": "imagenet_1001_classes",
|
||||
"data_source": "PATH_TO_DATASET",
|
||||
"annotation_conversion": {
|
||||
"annotation_file": "PATH_TO_ANNOTATION_FILE",
|
||||
"has_background": true,
|
||||
"converter": "imagenet"
|
||||
},
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "crop",
|
||||
"central_fraction": 0.875
|
||||
},
|
||||
{
|
||||
"type": "resize",
|
||||
"size": 299
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"type": "accuracy",
|
||||
"name": "accuracy@top1",
|
||||
"top_k": 1
|
||||
},
|
||||
{
|
||||
"type": "accuracy",
|
||||
"name": "accuracy@top5",
|
||||
"top_k": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "classification"
|
||||
}
|
||||
]
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenetv1",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mobilenet_v1_tf.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,68 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet_v2_1.0_224",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"launchers":
|
||||
[
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"adapter": "classification"
|
||||
}
|
||||
],
|
||||
"datasets":
|
||||
[
|
||||
{
|
||||
"name": "imagenet_1000_classes",
|
||||
"reader": "pillow_imread",
|
||||
"annotation_conversion": {
|
||||
"converter": "imagenet",
|
||||
"annotation_file": "PATH_TO_ANNOTATION_FILE"
|
||||
},
|
||||
"data_source": "PATH_TO_VALIDATION_IMAGES",
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "bgr_to_rgb"
|
||||
},
|
||||
{
|
||||
"type": "resize",
|
||||
"size": 256,
|
||||
"aspect_ratio_scale": "greater",
|
||||
"use_pillow": true,
|
||||
"interpolation": "BILINEAR"
|
||||
},
|
||||
{
|
||||
"type": "crop",
|
||||
"size": 224,
|
||||
"use_pillow": true
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy@top1",
|
||||
"type": "accuracy",
|
||||
"top_k": 1
|
||||
},
|
||||
{
|
||||
"name": "accuracy@top5",
|
||||
"type": "accuracy",
|
||||
"top_k": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenetv2",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet_v2_1.0_224",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet_v2_1.0_224",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "AccuracyAwareQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet_v2_1.0_224",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"type": "simplified",
|
||||
// you can specify path to directory with images or video file
|
||||
// also you can specify template for file names to filter images to load
|
||||
// templates are unix style
|
||||
"data_source": "PATH_TO_IMAGES"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet_v2_1.0_224",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "QuantNoiseEstimator",
|
||||
"params": {
|
||||
"stat_subset_size": 100,
|
||||
"mode": "full_fq_noise",
|
||||
"type": "sqnr",
|
||||
"results_dump_filename": "./mobilenetv2_sqnr_data.csv"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet_v2_1.0_224",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "INT4MixedQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 300,
|
||||
"ranking_subset_size": 300,
|
||||
"maximal_drop": 0.01
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,102 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "se_resnet50",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "classification"
|
||||
}
|
||||
],
|
||||
"datasets": [{
|
||||
"name": "classification_dataset",
|
||||
"data_source": "<PATH_TO_DATASET>",
|
||||
"annotation_conversion": {
|
||||
"converter": "imagenet",
|
||||
"annotation_file": "<PATH_TO_ANNOTATION_FILE>"
|
||||
},
|
||||
"reader": "pillow_imread",
|
||||
"preprocessing":[
|
||||
{
|
||||
"type": "bgr_to_rgb"
|
||||
},
|
||||
{
|
||||
"use_pillow": true,
|
||||
"type": "resize",
|
||||
"size": 256,
|
||||
"interpolation": "BILINEAR",
|
||||
"aspect_ratio_scale": "greater"
|
||||
},
|
||||
{
|
||||
"type": "crop",
|
||||
"size": 224,
|
||||
"use_pillow": true
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy@top1",
|
||||
"type": "accuracy",
|
||||
"top_k": 1
|
||||
},
|
||||
{
|
||||
"name": "accuracy@top5",
|
||||
"type": "accuracy",
|
||||
"top_k": 5
|
||||
}
|
||||
]
|
||||
}]
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "MinMaxQuantization",
|
||||
"params": {
|
||||
"target_device": "CPU",
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 1000,
|
||||
"ignored": {
|
||||
"scope": [
|
||||
"400", "402",
|
||||
"416", "418",
|
||||
"432", "434",
|
||||
"450", "452",
|
||||
"466", "468",
|
||||
"482", "484",
|
||||
"498", "500",
|
||||
"516", "518",
|
||||
"532", "534",
|
||||
"548", "550",
|
||||
"564", "566",
|
||||
"580", "582",
|
||||
"596", "598",
|
||||
"614", "616",
|
||||
"630", "632",
|
||||
"646", "648"
|
||||
]
|
||||
},
|
||||
"weights": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "perchannel"
|
||||
},
|
||||
"activations": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "pertensor"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,77 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "squeezenet1_1",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "classification"
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "classification_dataset",
|
||||
"data_source": "<PATH_TO_DATASET>",
|
||||
"annotation_conversion": {
|
||||
"converter": "imagenet",
|
||||
"annotation_file": "<PATH_TO_ANNOTATION_FILE>",
|
||||
"has_background": false
|
||||
},
|
||||
"reader": "pillow_imread",
|
||||
"preprocessing":[
|
||||
{
|
||||
"type": "bgr_to_rgb"
|
||||
},
|
||||
{
|
||||
"use_pillow": true,
|
||||
"type": "resize",
|
||||
"size": 256,
|
||||
"interpolation": "BILINEAR",
|
||||
"aspect_ratio_scale": "greater"
|
||||
},
|
||||
{
|
||||
"type": "crop",
|
||||
"size": 224,
|
||||
"use_pillow": true
|
||||
}
|
||||
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy@top1",
|
||||
"type": "accuracy",
|
||||
"top_k": 1
|
||||
},
|
||||
{
|
||||
"name": "accuracy@top5",
|
||||
"type": "accuracy",
|
||||
"top_k": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 1000,
|
||||
"weights": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "perchannel"
|
||||
},
|
||||
"activations": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "pertensor"
|
||||
}
|
||||
}
|
||||
}]
|
||||
}
|
||||
}
|
@ -1,72 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"name": "bert_base_squad_1_1",
|
||||
"model": "<PATH_TO_MODEL>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/bert_base_squad_1_1_tf_int8.yml"
|
||||
},
|
||||
"compression": {
|
||||
"model_type": "transformer",
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "accuracy",
|
||||
"stat_subset_size": 1000,
|
||||
"weights": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "perchannel",
|
||||
"level_low": -127,
|
||||
"level_high": 127
|
||||
},
|
||||
"activations": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "pertensor"
|
||||
},
|
||||
"ignored": {
|
||||
"scope" : [
|
||||
"bert/encoder/layer_0/output/dense/MatMul",
|
||||
"bert/encoder/layer_0/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_0/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_1/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_2/attention/self/MatMul",
|
||||
"bert/encoder/layer_2/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_3/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_3/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/attention/self/MatMul",
|
||||
"bert/encoder/layer_3/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_4/attention/self/value/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_5/attention/self/MatMul",
|
||||
"bert/encoder/layer_5/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_6/attention/self/MatMul",
|
||||
"bert/encoder/layer_6/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_7/attention/self/MatMul",
|
||||
"bert/encoder/layer_7/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_8/attention/self/MatMul",
|
||||
"bert/encoder/layer_8/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_9/attention/self/MatMul",
|
||||
"bert/encoder/layer_9/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_10/attention/self/MatMul",
|
||||
"bert/encoder/layer_10/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_11/attention/self/MatMul",
|
||||
"bert/encoder/layer_11/attention/self/MatMul_1",
|
||||
"loss/MatMul"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,69 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"name": "bert_base_squad_1_1",
|
||||
"model": "<PATH_TO_MODEL>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/bert_base_squad_1_1_tf_int8.yml"
|
||||
},
|
||||
"compression": {
|
||||
"model_type": "transformer",
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "AccuracyAwareQuantization",
|
||||
"params": {
|
||||
"metric_subset_ratio": 1,
|
||||
"ranking_subset_size": 300,
|
||||
"max_iter_num": 500,
|
||||
"maximal_drop": 0.01,
|
||||
"drop_type": "relative",
|
||||
"base_algorithm": "DefaultQuantization",
|
||||
"use_prev_if_drop_increase": true,
|
||||
"range_estimator": {
|
||||
"preset": "default"
|
||||
},
|
||||
"stat_subset_size": 1000,
|
||||
"ignored": {
|
||||
"scope" : [
|
||||
"bert/encoder/layer_0/output/dense/MatMul",
|
||||
"bert/encoder/layer_0/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_0/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_1/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_2/attention/self/MatMul",
|
||||
"bert/encoder/layer_2/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_3/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_3/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/attention/self/MatMul",
|
||||
"bert/encoder/layer_3/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_4/attention/self/value/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_5/attention/self/MatMul",
|
||||
"bert/encoder/layer_5/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_6/attention/self/MatMul",
|
||||
"bert/encoder/layer_6/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_7/attention/self/MatMul",
|
||||
"bert/encoder/layer_7/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_8/attention/self/MatMul",
|
||||
"bert/encoder/layer_8/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_9/attention/self/MatMul",
|
||||
"bert/encoder/layer_9/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_10/attention/self/MatMul",
|
||||
"bert/encoder/layer_10/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_11/attention/self/MatMul",
|
||||
"bert/encoder/layer_11/attention/self/MatMul_1",
|
||||
"loss/MatMul"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,86 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"name": "bert_large_squad_1_1",
|
||||
"model": "<PATH_TO_MODEL>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/bert_large_squad_1_1_tf_int8.yml"
|
||||
},
|
||||
"compression": {
|
||||
"model_type": "transformer",
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 100,
|
||||
"ignored": {
|
||||
"scope": [
|
||||
"bert/encoder/layer_0/attention/self/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/MatMul",
|
||||
"bert/encoder/layer_2/attention/self/MatMul",
|
||||
"bert/encoder/layer_3/attention/self/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/MatMul",
|
||||
"bert/encoder/layer_5/attention/self/MatMul",
|
||||
"bert/encoder/layer_6/attention/self/MatMul",
|
||||
"bert/encoder/layer_7/attention/self/MatMul",
|
||||
"bert/encoder/layer_8/attention/self/MatMul",
|
||||
"bert/encoder/layer_9/attention/self/MatMul",
|
||||
"bert/encoder/layer_10/attention/self/MatMul",
|
||||
"bert/encoder/layer_11/attention/self/MatMul",
|
||||
"bert/encoder/layer_12/attention/self/MatMul",
|
||||
"bert/encoder/layer_13/attention/self/MatMul",
|
||||
"bert/encoder/layer_14/attention/self/MatMul",
|
||||
"bert/encoder/layer_15/attention/self/MatMul",
|
||||
"bert/encoder/layer_16/attention/self/MatMul",
|
||||
"bert/encoder/layer_17/attention/self/MatMul",
|
||||
"bert/encoder/layer_18/attention/self/MatMul",
|
||||
"bert/encoder/layer_19/attention/self/MatMul",
|
||||
"bert/encoder/layer_20/attention/self/MatMul",
|
||||
"bert/encoder/layer_21/attention/self/MatMul",
|
||||
"bert/encoder/layer_22/attention/self/MatMul",
|
||||
"bert/encoder/layer_23/attention/self/MatMul",
|
||||
"bert/encoder/layer_23/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_22/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_21/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_20/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_19/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_18/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_17/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_16/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_15/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_14/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_13/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_12/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_11/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_10/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_9/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_8/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_7/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_6/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_5/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_4/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_3/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_2/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_1/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_0/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_0/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/value/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_0/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_0/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_3/output/dense/MatMul",
|
||||
"bert/encoder/layer_7/attention/self/key/MatMul"
|
||||
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,87 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"name": "bert_large_squad_1_1",
|
||||
"model": "<PATH_TO_MODEL>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/bert_large_squad_1_1_tf_int8.yml"
|
||||
},
|
||||
"compression": {
|
||||
"target_device": "CPU",
|
||||
"model_type": "transformer",
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "AccuracyAwareQuantization",
|
||||
"params": {
|
||||
"max_iter_num": 500,
|
||||
"stat_subset_size": 100,
|
||||
"ignored": {
|
||||
"scope": [
|
||||
"bert/encoder/layer_0/attention/self/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/MatMul",
|
||||
"bert/encoder/layer_2/attention/self/MatMul",
|
||||
"bert/encoder/layer_3/attention/self/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/MatMul",
|
||||
"bert/encoder/layer_5/attention/self/MatMul",
|
||||
"bert/encoder/layer_6/attention/self/MatMul",
|
||||
"bert/encoder/layer_7/attention/self/MatMul",
|
||||
"bert/encoder/layer_8/attention/self/MatMul",
|
||||
"bert/encoder/layer_9/attention/self/MatMul",
|
||||
"bert/encoder/layer_10/attention/self/MatMul",
|
||||
"bert/encoder/layer_11/attention/self/MatMul",
|
||||
"bert/encoder/layer_12/attention/self/MatMul",
|
||||
"bert/encoder/layer_13/attention/self/MatMul",
|
||||
"bert/encoder/layer_14/attention/self/MatMul",
|
||||
"bert/encoder/layer_15/attention/self/MatMul",
|
||||
"bert/encoder/layer_16/attention/self/MatMul",
|
||||
"bert/encoder/layer_17/attention/self/MatMul",
|
||||
"bert/encoder/layer_18/attention/self/MatMul",
|
||||
"bert/encoder/layer_19/attention/self/MatMul",
|
||||
"bert/encoder/layer_20/attention/self/MatMul",
|
||||
"bert/encoder/layer_21/attention/self/MatMul",
|
||||
"bert/encoder/layer_22/attention/self/MatMul",
|
||||
"bert/encoder/layer_23/attention/self/MatMul",
|
||||
"bert/encoder/layer_23/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_22/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_21/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_20/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_19/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_18/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_17/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_16/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_15/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_14/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_13/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_12/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_11/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_10/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_9/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_8/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_7/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_6/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_5/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_4/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_3/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_2/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_1/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_0/attention/self/MatMul_1",
|
||||
"bert/encoder/layer_0/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/attention/output/dense/MatMul",
|
||||
"bert/encoder/layer_4/attention/self/value/MatMul",
|
||||
"bert/encoder/layer_0/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_1/attention/self/key/MatMul",
|
||||
"bert/encoder/layer_0/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_0/output/dense/MatMul",
|
||||
"bert/encoder/layer_3/intermediate/dense/MatMul",
|
||||
"bert/encoder/layer_3/output/dense/MatMul",
|
||||
"bert/encoder/layer_7/attention/self/key/MatMul"
|
||||
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,34 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "faster_rcnn_resnet101_coco",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/faster_rcnn_resnet101_coco.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300,
|
||||
"ignored": {
|
||||
"scope": [
|
||||
"proposals/conv"
|
||||
]
|
||||
},
|
||||
"activations": {
|
||||
"range_estimator": {
|
||||
"max": {
|
||||
"aggregator": "max",
|
||||
"type": "abs_max"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,34 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "faster_rcnn_resnet50_coco",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/faster_rcnn_resnet50_coco.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300,
|
||||
"ignored": {
|
||||
"scope": [
|
||||
"proposals/conv"
|
||||
]
|
||||
},
|
||||
"activations": {
|
||||
"range_estimator": {
|
||||
"max": {
|
||||
"aggregator": "max",
|
||||
"type": "abs_max"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mask_rcnn_resnet50_atrous_coco",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<WEIGHTS_PATH>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mask_rcnn_resnet50_atrous_coco.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300,
|
||||
"ignored": {
|
||||
"scope": [
|
||||
"FirstStageFeatureExtractor/resnet_v1_50/resnet_v1_50/block1/unit_1/bottleneck_v1/add",
|
||||
"proposals/conv",
|
||||
"proposals/reshape_4d",
|
||||
"SecondStageFeatureExtractor_1/resnet_v1_50/block4/unit_1/bottleneck_v1/shortcut/Conv2D",
|
||||
"SecondStageFeatureExtractor_1/resnet_v1_50/block4/unit_1/bottleneck_v1/conv1/Conv2D",
|
||||
"SecondStageFeatureExtractor/resnet_v1_50/block4/unit_1/bottleneck_v1/shortcut/Conv2D",
|
||||
"SecondStageFeatureExtractor/resnet_v1_50/block4/unit_1/bottleneck_v1/conv1/Conv2D"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,36 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mtcnn",
|
||||
"cascade": [
|
||||
{
|
||||
"name": "pnet",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
{
|
||||
"name": "rnet",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
{
|
||||
"name": "onet",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
}
|
||||
]
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mtcnn.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet_ssd",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,59 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet-ssd",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "ssd"
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "VOC2007",
|
||||
"data_source": "<PATH TO VALIDATION DATASET>/VOC2007/JPEGImages",
|
||||
"annotation_conversion": {
|
||||
"converter": "voc_detection",
|
||||
"annotations_dir": "<PATH TO VALIDATION DATASET>/VOC2007/Annotations",
|
||||
"images_dir": "<PATH TO VALIDATION DATASET>/VOC2007/JPEGImages",
|
||||
"imageset_file": "<PATH TO VALIDATION DATASET>/VOC2007/ImageSets/Main/test.txt"
|
||||
},
|
||||
"preprocessing":[
|
||||
{
|
||||
"type": "resize",
|
||||
"size": 300
|
||||
}
|
||||
],
|
||||
"postprocessing":[{
|
||||
"type": "resize_prediction_boxes"
|
||||
}],
|
||||
"metrics": [
|
||||
{
|
||||
"type": "map",
|
||||
"integral": "11point",
|
||||
"ignore_difficult": true,
|
||||
"presenter": "print_scalar"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "MinMaxQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"range_estimator": {
|
||||
"preset": "quantile"
|
||||
},
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,42 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "ssd_resnet34_1200",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/ssd_resnet34.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 300,
|
||||
"preset": "performance",
|
||||
"ignored":{
|
||||
"scope": [
|
||||
"Mul_490",
|
||||
"Mul_509",
|
||||
"Add_511",
|
||||
"Mul_507",
|
||||
"Exp_512",
|
||||
"Mul_514",
|
||||
"Mul_548/Fused_Mul_",
|
||||
"Mul_583/Fused_Mul_",
|
||||
"Mul_618",
|
||||
"Mul_653",
|
||||
"Sub_549/add_",
|
||||
"Sub_584/add_",
|
||||
"Add_619",
|
||||
"Add_654",
|
||||
"Mul_703",
|
||||
"Add_704",
|
||||
"Add_labels"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,57 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "ssd_resnet50_512_mxnet",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "ssd"
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "VOC2007",
|
||||
"data_source": "<PATH TO VALIDATION DATASET>/VOC2007/JPEGImages",
|
||||
"annotation_conversion": {
|
||||
"converter": "voc_detection",
|
||||
"has_background": false,
|
||||
"annotations_dir": "<PATH TO VALIDATION DATASET>/VOC2007/Annotations",
|
||||
"images_dir": "<PATH TO VALIDATION DATASET>/VOC2007/JPEGImages",
|
||||
"imageset_file": "<PATH TO VALIDATION DATASET>/VOC2007/ImageSets/Main/test.txt"
|
||||
},
|
||||
"preprocessing":[
|
||||
{
|
||||
"type": "resize",
|
||||
"size": 512
|
||||
}
|
||||
],
|
||||
"postprocessing":[{
|
||||
"type": "resize_prediction_boxes"
|
||||
}],
|
||||
"metrics": [
|
||||
{
|
||||
"type": "map",
|
||||
"integral": "11point",
|
||||
"ignore_difficult": true,
|
||||
"presenter": "print_scalar"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,56 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenetv1",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mobilenet_v1_tf.yaml"
|
||||
},
|
||||
"optimizer": {
|
||||
"name": "Tpe",
|
||||
"params": {
|
||||
"max_trials": 200,
|
||||
"max_minutes": 1440,
|
||||
"trials_load_method": "cold_start",
|
||||
"accuracy_loss": 0.1,
|
||||
"latency_reduce": 1.5,
|
||||
"accuracy_weight": 1.0,
|
||||
"latency_weight": 1.0,
|
||||
"benchmark": {
|
||||
"performance_count": false,
|
||||
"batch_size": 1,
|
||||
"nthreads": 8,
|
||||
"nstreams": 1,
|
||||
"nireq": 1,
|
||||
"api_type": "async",
|
||||
"niter": 1,
|
||||
"duration_seconds": 30
|
||||
}
|
||||
}
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "ActivationChannelAlignment",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "TunableQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 1000,
|
||||
"preset": "performance",
|
||||
"tuning_scope": ["layer"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,47 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenetv2",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "MinMaxQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 1000,
|
||||
"weights": {
|
||||
"bits": 8,
|
||||
"mode": "asymmetric",
|
||||
"granularity": "perchannel"
|
||||
},
|
||||
"activations": {
|
||||
"bits": 8,
|
||||
"mode": "asymmetric",
|
||||
"granularity": "pertensor"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "RangeOptimization",
|
||||
"params": {
|
||||
"stat_subset_size": 5000,
|
||||
"result_filename": "rangeopt_results.csv",
|
||||
"lower_boxsize": 0.1,
|
||||
"upper_boxsize": 0.1,
|
||||
"maxiter": 1500,
|
||||
"optimization_scope": ["317", "315"],
|
||||
"metric_name": "accuracy@top1",
|
||||
"opt_backend": "nevergrad",
|
||||
"optimizer_name": "CMA"
|
||||
// use the below option to validate range values
|
||||
// "activation_ranges_to_set": {"317": [-0.0380698, 2.35978142]}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,56 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenetv2",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml"
|
||||
},
|
||||
"optimizer": {
|
||||
"name": "Tpe",
|
||||
"params": {
|
||||
"max_trials": 200,
|
||||
"max_minutes": 1440,
|
||||
"trials_load_method": "cold_start",
|
||||
"accuracy_loss": 0.1,
|
||||
"latency_reduce": 1.5,
|
||||
"accuracy_weight": 1.0,
|
||||
"latency_weight": 1.0,
|
||||
"benchmark": {
|
||||
"performance_count": false,
|
||||
"batch_size": 1,
|
||||
"nthreads": 8,
|
||||
"nstreams": 1,
|
||||
"nireq": 1,
|
||||
"api_type": "async",
|
||||
"niter": 1,
|
||||
"duration_seconds": 30
|
||||
}
|
||||
}
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "ActivationChannelAlignment",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "TunableQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 1000,
|
||||
"preset": "performance",
|
||||
"tuning_scope": ["layer"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,56 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenetv2",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "QuantileTuningAlgorithm",
|
||||
"params": {
|
||||
"opt_backend": "skopt",
|
||||
"maxiter": 100,
|
||||
"optimization_subset_size": 1000,
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 1000,
|
||||
"weights": {
|
||||
"bits": 8,
|
||||
"mode": "asymmetric",
|
||||
"granularity": "perchannel",
|
||||
"range_estimator": {
|
||||
"min": {
|
||||
"type": "quantile",
|
||||
"outlier_prob": 0.0015
|
||||
},
|
||||
"max": {
|
||||
"type": "quantile",
|
||||
"outlier_prob": 0.0015
|
||||
}
|
||||
}
|
||||
},
|
||||
"activations": {
|
||||
"bits": 8,
|
||||
"mode": "asymmetric",
|
||||
"granularity": "pertensor",
|
||||
"range_estimator": {
|
||||
"min": {
|
||||
"aggregator": "mean",
|
||||
"type": "quantile",
|
||||
"outlier_prob": 1e-3
|
||||
},
|
||||
"max": {
|
||||
"aggregator": "mean",
|
||||
"type": "quantile",
|
||||
"outlier_prob": 1e-3
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,59 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "mobilenet-ssd",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/ssd_mobilenet_v1.yaml"
|
||||
},
|
||||
"optimizer": {
|
||||
"name": "Tpe",
|
||||
"params": {
|
||||
"max_trials": 200,
|
||||
"max_minutes": 1440,
|
||||
"trials_load_method": "cold_start",
|
||||
"accuracy_loss": 0.1,
|
||||
"latency_reduce": 1.5,
|
||||
"accuracy_weight": 1.0,
|
||||
"latency_weight": 0.0,
|
||||
"benchmark": {
|
||||
"performance_count": false,
|
||||
"batch_size": 1,
|
||||
"nthreads": 8,
|
||||
"nstreams": 1,
|
||||
"nireq": 1,
|
||||
"api_type": "async",
|
||||
"niter": 1,
|
||||
"duration_seconds": 30
|
||||
}
|
||||
}
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "ActivationChannelAlignment",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "TunableQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 1000,
|
||||
"preset": "performance",
|
||||
"tuning_scope": ["range_estimator"],
|
||||
"estimator_tuning_scope": ["preset", "outlier_prob"],
|
||||
"outlier_prob_choices": [1e-3, 1e-4, 1e-5]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
@ -1,33 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "ncf",
|
||||
"model": "<PATH_TO_MODEL>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/ncf.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "MinMaxQuantization",
|
||||
"params": {
|
||||
"preset": "mixed",
|
||||
"stat_subset_size": 1000,
|
||||
"weights": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "perchannel",
|
||||
"level_low": -127,
|
||||
"level_high": 127
|
||||
},
|
||||
"activations": {
|
||||
"bits": 8,
|
||||
"mode": "symmetric",
|
||||
"granularity": "pertensor"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,60 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "ncf",
|
||||
"model": "<PATH_TO_MODEL>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/ncf.yaml"
|
||||
},
|
||||
"optimizer": {
|
||||
"name": "Tpe",
|
||||
"params": {
|
||||
"max_trials": 100,
|
||||
"max_minutes": 10,
|
||||
"trials_load_method": "cold_start",
|
||||
"accuracy_loss": 0.1,
|
||||
"latency_reduce": 1.5,
|
||||
"expected_quantization_ratio": 0.5,
|
||||
"accuracy_weight": 1.0,
|
||||
"latency_weight": 1.0,
|
||||
"quantization_ratio_weight": 1.0,
|
||||
"benchmark": {
|
||||
"cpu_bind_thread": "YES",
|
||||
"nthreads": 4,
|
||||
"nstreams": 0,
|
||||
"nireq": 0,
|
||||
"api_type": "async",
|
||||
"duration_seconds": 30,
|
||||
"benchmark_app_dir":""
|
||||
}
|
||||
}
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "ActivationChannelAlignment",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "TunableQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 1000,
|
||||
"preset": "performance",
|
||||
"tuning_scope": ["range_estimator"],
|
||||
"estimator_tuning_scope": ["preset", "aggregator", "type", "outlier_prob"],
|
||||
"outlier_prob_choices": [1e-3, 1e-4, 1e-5]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 1000
|
||||
}
|
||||
}
|
||||
|
||||
]
|
||||
}
|
||||
}
|
@ -1,49 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "brain-tumor-segmentation-0001",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"adapter": {
|
||||
"type": "brain_tumor_segmentation"
|
||||
}
|
||||
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "BraTS",
|
||||
"data_source": "<PATH_TO_DATASET>",
|
||||
"annotation_conversion": {
|
||||
"converter": "brats_numpy",
|
||||
"data_dir": "<PATH_TO_DATASET>",
|
||||
"ids_file": "<PATH_TO_IDS_FILE>",
|
||||
"labels_file": "<PATH_TO_LABELS_FILE>"
|
||||
},
|
||||
"reader": "numpy_reader",
|
||||
"metrics": [
|
||||
{
|
||||
"type": "dice_index",
|
||||
"median": true,
|
||||
"presenter": "print_vector"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"stat_subset_size": 72,
|
||||
"preset": "performance"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "east",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<WEIGHTS_PATH>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "./configs/examples/accuracy_checker/east_resnet_v1_50.yaml"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"ignored": {
|
||||
"scope": [
|
||||
"feature_fusion/Conv_4/Conv2D"
|
||||
]
|
||||
},
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "resnet-50-pytorch",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "WeightSparsity",
|
||||
"params": {
|
||||
"sparsity_level": 0.5,
|
||||
"apply_for_all_nodes": true,
|
||||
"stat_subset_size": 300,
|
||||
"use_layerwise_tuning": true,
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
{
|
||||
"model": {
|
||||
"model_name": "ssd_resnet50_512",
|
||||
"model": "<MODEL_PATH>",
|
||||
"weights": "<PATH_TO_WEIGHTS>"
|
||||
},
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>"
|
||||
},
|
||||
"compression": {
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "WeightSparsity",
|
||||
"params": {
|
||||
"sparsity_level": 0.5,
|
||||
"apply_for_all_nodes": true,
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "DefaultQuantization",
|
||||
"params": {
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,45 +0,0 @@
|
||||
/* This configuration file is the fastest way to get started with the default
|
||||
quantization algorithm in simplified mode. It contains only mandatory options
|
||||
with commonly used values. All other options can be considered as an advanced
|
||||
mode and requires deep knowledge of the quantization process. An overall description
|
||||
of all possible parameters can be found in the default_quantization_spec.json */
|
||||
|
||||
{
|
||||
/* Model parameters */
|
||||
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name
|
||||
"model": "<MODEL_PATH>", // Path to model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
|
||||
},
|
||||
|
||||
/* Parameters of the engine used for model inference */
|
||||
|
||||
"engine": {
|
||||
"type": "simplified",
|
||||
"layout": "NCHW", // Layout of input data. Supported ["NCHW", "NHWC", "CHW", "CWH"] layout
|
||||
"data_source": "PATH_TO_SOURCE" // You can specify path to directory with images. Also you can
|
||||
// specify template for file names to filter images to load.
|
||||
// Templates are unix style (This option valid only in simplified mode)
|
||||
},
|
||||
|
||||
/* Optimization hyperparameters */
|
||||
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "DefaultQuantization", // Optimization algorithm name
|
||||
"params": {
|
||||
"preset": "performance", // Preset [performance, mixed, accuracy] which control the quantization
|
||||
// mode (symmetric, mixed (weights symmetric and activations asymmetric)
|
||||
// and fully asymmetric respectively)
|
||||
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,44 +0,0 @@
|
||||
/* This configuration file is the fastest way to get started with the default
|
||||
sparsity and default quantization algorithm. It contains only mandatory options
|
||||
with commonly used values. All other options can be considered as an advanced
|
||||
mode and requires deep knowledge of the quantization process. An overall description
|
||||
of all possible parameters can be found in the default_quantization_spec.json */
|
||||
|
||||
{
|
||||
/* Model parameters */
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name
|
||||
"model": "<MODEL_PATH>", // Path to model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
|
||||
},
|
||||
/* Parameters of the engine used for model inference */
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>" // Path to Accuracy Checker config
|
||||
},
|
||||
/* Optimization hyperparameters */
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "WeightSparsity",
|
||||
"params": {
|
||||
"sparsity_level": 0.3,
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "DefaultQuantization", // Optimization algorithm name
|
||||
"params": {
|
||||
// Preset [performance, mixed, accuracy] which control the quantization mode
|
||||
// (symmetric, mixed (weights symmetric and activations asymmetric) and fully
|
||||
// asymmetric respectively)
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,133 +0,0 @@
|
||||
{
|
||||
/* Model parameters */
|
||||
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name
|
||||
"model": "<MODEL_PATH>", // Path to a model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
|
||||
},
|
||||
|
||||
/* Parameters of the engine used for model inference. */
|
||||
|
||||
/* Post-Training Optimization Tool supports engine based on the Accuracy Checker and custom engine.
|
||||
For custom engine, specify your own set of parameters.
|
||||
The engine based on the Accuracy Checker uses Accuracy Checker parameters. You can specify the parameters
|
||||
via the Accuracy Checker config file or directly in the engine section.
|
||||
Find more information about Accuracy Checker parameters at
|
||||
https://github.com/opencv/open_model_zoo/tree/master/tools/accuracy_checker */
|
||||
|
||||
"engine": {
|
||||
"stat_requests_number": 8, // Number of requests during statistcs collection
|
||||
"eval_requests_number": 8, // Number of requests during evaluation
|
||||
"config": "<CONFIG_PATH>",
|
||||
/* OR */
|
||||
"name": "model_name",
|
||||
"launchers": [
|
||||
{
|
||||
"framework": "dlsdk",
|
||||
"device": "CPU",
|
||||
"adapter": "classification"
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
{
|
||||
"name": "dataset_name",
|
||||
"data_source": "<DATASET_PATH>",
|
||||
"annotation": "<ANNOTATION_PATH>",
|
||||
"preprocessing": [
|
||||
{
|
||||
"type": "resize",
|
||||
"interpolation": "BILINEAR",
|
||||
"aspect_ratio_scale": "greater",
|
||||
"size": 224
|
||||
}
|
||||
],
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy@top1",
|
||||
"type": "accuracy",
|
||||
"top_k": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
/* Global optimizer used to find "optimal" hyperparameters */
|
||||
|
||||
"optimizer": {
|
||||
"name": "Tpe", // Global optimizer name
|
||||
"params": {
|
||||
"max_trials": 100, // Maximum number of trails
|
||||
"max_minutes": 10, // [Optional] Trials time limit. When it expires, the last trial is completed and the best result is returned.
|
||||
"stop_on_target": true, // [Optional] Flag to stop TPE trials when accuracy_loss and latency_reduce targets are reached.
|
||||
// If false or not specified TPE will continue until max_trials or max_minutes is reached even if targets are reached earlier.
|
||||
"eval_subset_size": 2000, // [Optional] subset of test data used to evaluate hyperparameters. The whole dataset is used if no parameter specified.
|
||||
"trials_load_method": "cold_start", // Start from scratch or reuse previous results, supported options [cold_start, warm_start, fine_tune, eval]
|
||||
"accuracy_loss": 0.1, // Accuracy threshold (%)
|
||||
"latency_reduce": 1.5, // Target latency improvement versus original model
|
||||
"accuracy_weight": 1.0, // Accuracy weight in loss function
|
||||
"latency_weight": 1.0, // Latency weight in loss function
|
||||
// An optional list of reference metrics values.
|
||||
// If not specified, all metrics will be calculated from the original model.
|
||||
"metrics": [
|
||||
{
|
||||
"name": "accuracy", // Metric name
|
||||
"baseline_value": 0.72 // Baseline metric value of the original model
|
||||
}
|
||||
],
|
||||
"benchmark": {
|
||||
// Latency measurement benchmark configuration (https://docs.openvinotoolkit.org/latest/_inference_engine_samples_benchmark_app_README.html)
|
||||
"performance_count": false,
|
||||
"batch_size": 0,
|
||||
"nthreads": 4,
|
||||
"nstreams": 0,
|
||||
"nireq": 0,
|
||||
"api_type": "sync",
|
||||
"niter": 4,
|
||||
"duration_seconds": 30,
|
||||
"benchmark_app_dir": "<path to benchmark_app>" // Path to benchmark_app If not specified, Python base benchmark will be used. Use benchmark_app to reduce jitter in results.
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
/* Optimization hyperparameters */
|
||||
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "ActivationChannelAlignment",
|
||||
"params": {
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation.
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "TunableQuantization",
|
||||
"params": {
|
||||
/* Preset is a collection of optimization algorithm parameters that will specify to the algorithm
|
||||
to improve which metric the algorithm needs to concentrate. Each optimization algorithm supports
|
||||
[performance, mixed, accuracy] presets which control the quantization mode
|
||||
(symmetric, mixed(weights symmetric and activations asymmetric), and fully asymmetric respectively)*/
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300, // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation.
|
||||
"tuning_scope": ["layer"], // List of quantization parameters that will be tuned,
|
||||
// available options: [bits, mode, granularity, layer, range_estimator]
|
||||
"estimator_tuning_scope": ["preset", "aggregator", "type", "outlier_prob"], // List of range_estimator parameters that will be tuned,
|
||||
// available options: [preset, aggregator, type, outlier_prob]
|
||||
"outlier_prob_choices": [1e-3, 1e-4, 1e-5] // List of outlier_prob values to use when tuning outlier_prob parameter
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation.
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,84 +0,0 @@
|
||||
/* This configuration file is the fastest way to get started with the TPE
|
||||
optimization algorithm. It contains only mandatory options with commonly used
|
||||
values. All other options can be considered as advanced mode and require
|
||||
deep knowledge of the quantization process. Find overall description of all possible
|
||||
parameters in tpe_spec.json */
|
||||
|
||||
{
|
||||
/* Model parameters */
|
||||
|
||||
"model": {
|
||||
"model_name": "model_name", // Model name
|
||||
"model": "<MODEL_PATH>", // Path to a model (.xml format)
|
||||
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
|
||||
},
|
||||
|
||||
/* Parameters of the engine used for model inference. */
|
||||
|
||||
"engine": {
|
||||
"config": "<CONFIG_PATH>" // Path to Accuracy Checker config
|
||||
},
|
||||
|
||||
/* Optimizer used to find "optimal" hyperparameters */
|
||||
|
||||
"optimizer": {
|
||||
"name": "Tpe", // Global optimizer name
|
||||
"params": {
|
||||
"max_trials": 200, // Maximum number of trails
|
||||
"trials_load_method": "cold_start", // Start from scratch or reuse previous results, supported options [cold_start, warm_start, fine_tune, eval]
|
||||
"accuracy_loss": 0.1, // Accuracy threshold (%)
|
||||
"latency_reduce": 1.5, // Target latency improvement versus original model
|
||||
"accuracy_weight": 1.0, // Accuracy weight in loss function
|
||||
"latency_weight": 1.0, // Latency weight in loss function
|
||||
"benchmark": {
|
||||
// Latency measurement benchmark configuration (https://docs.openvinotoolkit.org/latest/_inference_engine_samples_benchmark_app_README.html)
|
||||
"performance_count": false,
|
||||
"batch_size": 0,
|
||||
"nthreads": 4,
|
||||
"nstreams": 0,
|
||||
"nireq": 0,
|
||||
"api_type": "sync",
|
||||
"niter": 4,
|
||||
"duration_seconds": 30,
|
||||
"benchmark_app_dir": "<path to benchmark_app>" // Path to benchmark_app If not specified, Python base benchmark will be used. Use benchmark_app to reduce jitter in results.
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
/* Optimization hyperparameters */
|
||||
|
||||
"compression": {
|
||||
"target_device": "ANY", // Target device, the specificity of which will be taken
|
||||
// into account during optimization
|
||||
"algorithms": [
|
||||
{
|
||||
"name": "ActivationChannelAlignment",
|
||||
"params": {
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation.
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "TunableQuantization",
|
||||
"params": {
|
||||
/* Preset is a collection of optimization algorithm parameters that will specify to the algorithm
|
||||
to improve which metric the algorithm needs to concentrate. Each optimization algorithm supports
|
||||
[performance, mixed, accuracy] presets which control the quantization mode
|
||||
(symmetric, mixed(weights symmetric and activations asymmetric), and fully asymmetric respectively)*/
|
||||
"preset": "performance",
|
||||
"stat_subset_size": 300, // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation.
|
||||
"tuning_scope": ["layer"] // List of quantization parameters that will be tuned,
|
||||
// available options: [bits, mode, granularity, layer, range_estimator]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "FastBiasCorrection",
|
||||
"params": {
|
||||
"stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used
|
||||
// for quantization parameters calculation.
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
# Installation Guide
|
||||
|
||||
## Install POT from PyPI
|
||||
POT is distributed as a part of OpenVINO™ Development Tools package. For installation instruction, refer to this [document](@ref openvino_docs_install_guides_install_dev_tools).
|
||||
|
||||
## Install POT from GitHub
|
||||
The latest version of the Post-training Optimization Tool is available on [GitHub](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot) and can be installed from source. As prerequisites, you need to install [OpenVINO™ Runtime](@ref openvino_docs_install_guides_install_runtime) and other dependencies such as [Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide) and [Accuracy Checker](@ref omz_tools_accuracy_checker).
|
||||
|
||||
To install POT from source:
|
||||
- Clone the OpenVINO repository
|
||||
```sh
|
||||
git clone --recusive https://github.com/openvinotoolkit/openvino.git
|
||||
```
|
||||
|
||||
After installation, POT is available as a Python library under `openvino.tools.pot.*` and in the command line by the `pot` alias. To verify it, run `pot -h`.
|
@ -1,51 +0,0 @@
|
||||
# Low Precision Optimization Guide
|
||||
|
||||
## Introduction
|
||||
This document provides the best-known methods on how to use low-precision capabilities of the OpenVINO™ toolkit to transform models
|
||||
to more hardware-friendly representation using such methods as quantization.
|
||||
|
||||
Currently, these capabilities are represented by several components:
|
||||
- Low-precision runtime
|
||||
- Post-training Optimization Tool (POT)
|
||||
- [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf)
|
||||
|
||||
The first two components are the part of OpenVINO toolkit itself while the latter one is a separate tool build on top of the PyTorch* framework
|
||||
and highly aligned with OpenVINO™.
|
||||
|
||||
This document covers high level aspects of model optimization flow in OpenVINO™.
|
||||
|
||||
## General Information
|
||||
|
||||
By low precision we imply the inference of Deep Learning models in the precision which is lower than 32 or 16 bits, such as *FLOAT32* and *FLOAT16*. For example, the most popular
|
||||
bit-width for the low-precision inference is *INT8* (*UINT8*) because it is possible to get accurate 8-bit models which substantially speed up the inference.
|
||||
Such models are represented by the quantized models, i.e. the models that were trained in the floating-point precision and then transformed to integer
|
||||
representation with floating/fixed-point quantization operations between the layers. This transformation can be done using post-training methods or
|
||||
with additional retraining/fine-tuning.
|
||||
|
||||
Starting from the OpenVINO 2020.1 release all the quantized models are represented using so-called `FakeQuantize` layer which is
|
||||
a very expressive primitive and is able to represent such operations as `Quantize`, `Dequantize`, `Requantize`, and even more. This operation is
|
||||
inserted into the model during quantization procedure and is aimed to store quantization parameters for the layers. For more details about this operation
|
||||
please refer to the following [description](@ref openvino_docs_ops_quantization_FakeQuantize_1).
|
||||
|
||||
In order to execute such "fake-quantized" models, OpenVINO has a low-precision runtime which is a part of Inference Engine and consists of a
|
||||
generic component translating the model to real integer representation and HW-specific part implemented in the corresponding HW plug-ins.
|
||||
|
||||
## Model Optimization Workflow
|
||||
We propose a common workflow which aligns with what other DL frameworks have. It contains two main components: post-training quantization and Quantization-Aware Training (QAT).
|
||||
The first component is the easiest way to get optimized models where the latter one can be considered as an alternative or an addition when the first does not give
|
||||
accurate results.
|
||||
|
||||
The diagram below shows the optimization flow for the new model with OpenVINO and relative tools.
|
||||
|
||||

|
||||
|
||||
- **Step 0: Model enabling**. In this step we should ensure that the model trained on the target dataset can be successfully inferred with [OpenVINO™ Runtime](@ref openvino_docs_OV_UG_OV_Runtime_User_Guide) in floating-point precision.
|
||||
This process involves use of [model conversion API](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide) tool to convert the model from the source framework
|
||||
to the OpenVINO Intermediate Representation (IR) and run it on CPU with Inference Engine.
|
||||
> **NOTE**: This step presumes that the model has the same accuracy as in the original training framework and enabled in the [Accuracy Checker](@ref omz_tools_accuracy_checker) tool or through the custom validation sample.
|
||||
- **Step 1: Post-training quantization**. As the first step for optimization, we suggest using INT8 quantization from POT where in most cases it is possible to get an accurate quantized model. At this step you do not need model re-training. The only thing required is a representative dataset which is usually several hundreds of images and it is used to collect statistics during the quantization process.
|
||||
Post-training quantization is also really fast and usually takes several minutes depending on the model size and used HW. And, generally, a regular desktop system is enough to quantize most of [OpenVINO Model Zoo](https://github.com/opencv/open_model_zoo).
|
||||
For more information on best practices of post-training optimization please refer to the [Post-training Optimization Best practices](BestPractices.md).
|
||||
- **Step2: Quantization-Aware Training**: If the accuracy of the quantized model does not satisfy accuracy criteria, there is step two which implies QAT using [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf) for [PyTorch*](https://pytorch.org/) and [TensorFlow*](https://www.tensorflow.org/) models.
|
||||
At this step, we assume the user has an original training pipeline of the model written on TensorFlow or PyTorch and NNCF is integrated into it.
|
||||
After this step, you can get an accurate optimized model that can be converted to OpenVINO Intermediate Representation (IR) using model conversion API and inferred with OpenVINO Inference Engine.
|
@ -1,33 +0,0 @@
|
||||
# Low-precision model representation
|
||||
|
||||
## Introduction
|
||||
The goal of this document is to describe how optimized models are represented in OpenVINO Intermediate Representation (IR) and provide guidance on interpretation rules for such models at runtime.
|
||||
Currently, there are two groups of optimization methods that can change the IR after applying them to the full-precision model:
|
||||
- **Sparsity**. It is represented by zeros inside the weights and this is up to the hardware plugin how to interpret these zeros (use weights as is or apply special compression algorithms and sparse arithmetic). No additional mask is provided with the model.
|
||||
- **Quantization**. The rest of this document is dedicated to the representation of quantized models.
|
||||
|
||||
## Representation of quantized models
|
||||
|
||||
The OpenVINO Toolkit represents all the quantized models using the so-called [FakeQuantize](https://docs.openvino.ai/2021.4/openvino_docs_MO_DG_prepare_model_convert_model_Legacy_IR_Layers_Catalog_Spec.html#fakequantize-layer) operation. This operation is very expressive and allows mapping values from arbitrary input and output ranges. We project (discretize) the input values to the low-precision data type using affine transformation (with clamp and rounding) and then re-project discrete values back to the original range and data type. It can be considered as an emulation of the quantization/dequantization process which happens at runtime. The figure below shows a part of the DL model, namely the Convolutional layer, that undergoes various transformations, from being a floating-point model to an integer model executed in the OpenVINO runtime. Column 2 of this figure below shows a model quantized with [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf).
|
||||

|
||||
|
||||
To reduce memory footprint weights of quantized models are transformed to a target data type, e.g. in the case of 8-bit quantization, this is int8. During this transformation, the floating-point weights tensor and one of the FakeQuantize operations that correspond to it are replaced with 8-bit weight tensor and the sequence of Convert, Subtract, Multiply operations that represent the typecast and dequantization parameters (scale and zero-point) as it is shown in column 3 of the figure.
|
||||
|
||||
## Interpreting FakeQuantize at runtime
|
||||
At inference time, the quantized model undergoes the second set of transformations that allows interpreting floating-point operations with quantization rules as integer operations. OpenVINO Toolkit has Low-Precision Transformations (LPT) component for that purpose.
|
||||
At runtime each FakeQuantize can be split into two independent operations: **Quantize** and **Dequantize** (column 4). **Quantize** transforms the input data into the target precision while **Dequantize** transforms the resulting values back to the original range. *Dequantize* operations can be propagated forward through the linear layers, such as *Convolution* or *Fully-Connected*, and, in some cases, fused with the following *Quantize* operation for the next layer into the so-called *Requantize* operation (column 5).
|
||||
|
||||
From the computation standpoint, the FakeQuantize formula is split into two parts:
|
||||
`output = round((x - input_low) / (input_high - input_low) * (levels-1)) / (levels-1) * (output_high - output_low) + output_low`
|
||||
The first part of this fomula represents *Quantize* operation:
|
||||
`q = round((x - input_low) / (input_high - input_low) * (levels-1))`
|
||||
The second is responsible for the dequantization:
|
||||
`r = q / (levels-1) * (output_high - output_low) + output_low`
|
||||
From the scale/zero-point notation standpoint the latter formula can be written as follows:
|
||||
`r = (output_high - output_low) / (levels-1) * (q + output_low / (output_high - output_low) * (levels-1))`
|
||||
|
||||
Thus we can define:
|
||||
- **Scale** as `(output_high - output_low) / (levels-1)`
|
||||
- **Zero-point** as `-output_low / (output_high - output_low) * (levels-1)`
|
||||
|
||||
**Note**: During the quantization process the values `input_low`, `input_high`, `output_low`, `output_high` are selected so that to map a floating-point zero exactly to an integer value (zero-point) and vice versa.
|
@ -1,122 +0,0 @@
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
#! [image_loader]
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import cv2 as cv
|
||||
|
||||
from openvino.tools.pot import DataLoader
|
||||
|
||||
class ImageLoader(DataLoader):
|
||||
""" Loads images from a folder """
|
||||
def __init__(self, dataset_path):
|
||||
# Use OpenCV to gather image files
|
||||
# Collect names of image files
|
||||
self._files = []
|
||||
all_files_in_dir = os.listdir(dataset_path)
|
||||
for name in all_files_in_dir:
|
||||
file = os.path.join(dataset_path, name)
|
||||
if cv.haveImageReader(file):
|
||||
self._files.append(file)
|
||||
|
||||
# Define shape of the model
|
||||
self._shape = (224,224)
|
||||
|
||||
def __len__(self):
|
||||
""" Returns the length of the dataset """
|
||||
return len(self._files)
|
||||
|
||||
def __getitem__(self, index):
|
||||
""" Returns image data by index in the NCHW layout
|
||||
Note: model-specific preprocessing is omitted, consider adding it here
|
||||
"""
|
||||
if index >= len(self):
|
||||
raise IndexError("Index out of dataset size")
|
||||
|
||||
image = cv.imread(self._files[index]) # read image with OpenCV
|
||||
image = cv.resize(image, self._shape) # resize to a target input size
|
||||
image = np.expand_dims(image, 0) # add batch dimension
|
||||
image = image.transpose(0, 3, 1, 2) # convert to NCHW layout
|
||||
return image, None # annotation is set to None
|
||||
#! [image_loader]
|
||||
|
||||
#! [text_loader]
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from datasets import load_dataset #pip install datasets
|
||||
from transformers import AutoTokenizer #pip install transformers
|
||||
|
||||
from openvino.tools.pot import DataLoader
|
||||
|
||||
class TextLoader(DataLoader):
|
||||
""" Loads content of .txt files from a folder """
|
||||
def __init__(self, dataset_path):
|
||||
# HuggingFace dataset API is used to process text files
|
||||
# Collect names of text files
|
||||
extension = ".txt"
|
||||
files = sorted(str(p.stem) for p in
|
||||
Path(dataset_path).glob("*" + extension))
|
||||
files = [os.path.join(dataset_path, file + extension) for file in files]
|
||||
self._dataset = load_dataset('text', data_files=files)
|
||||
# replace with your tokenizer
|
||||
self._tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
|
||||
self._dataset = self._dataset.map(self._encode, batched=False)
|
||||
# replace with names of model inputs
|
||||
self._dataset.set_format(type='numpy',
|
||||
columns=['input_ids', 'token_type_ids', 'attention_mask'])
|
||||
|
||||
def _encode(self, examples):
|
||||
""" Tokenization of the input text """
|
||||
return self._tokenizer(examples['text'], truncation=True, padding='max_length')
|
||||
|
||||
def __len__(self):
|
||||
""" Returns the length of the dataset """
|
||||
return len(self._dataset['train'])
|
||||
|
||||
def __getitem__(self, index):
|
||||
""" Returns data by index as a (dict[str, np.array], None) """
|
||||
if index >= len(self):
|
||||
raise IndexError("Index out of dataset size")
|
||||
|
||||
data = self._dataset['train'][index]
|
||||
return {'input_ids': data['input_ids'],
|
||||
'token_type_ids': data['token_type_ids'],
|
||||
'attention_mask': data['attention_mask']}, None # annotation is set to None
|
||||
#! [text_loader]
|
||||
|
||||
#! [audio_loader]
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import torchaudio # pip install torch torchaudio
|
||||
|
||||
from openvino.tools.pot import DataLoader
|
||||
|
||||
class AudioLoader(DataLoader):
|
||||
""" Loads content of .wav files from a folder """
|
||||
def __init__(self, dataset_path):
|
||||
# Collect names of wav files
|
||||
self._extension = ".wav"
|
||||
self._dataset_path = dataset_path
|
||||
self._files = sorted(str(p.stem) for p in
|
||||
Path(self._dataset_path).glob("*" + self._extension))
|
||||
|
||||
def __len__(self):
|
||||
""" Returns the length of the dataset """
|
||||
return len(self._files)
|
||||
|
||||
def __getitem__(self, index):
|
||||
""" Returns wav data by index
|
||||
Note: model-specific preprocessing is omitted, consider adding it here
|
||||
"""
|
||||
if index >= len(self):
|
||||
raise IndexError("Index out of dataset size")
|
||||
|
||||
file_name = self._files[index] + self._extension
|
||||
file_path = os.path.join(self._dataset_path, file_name)
|
||||
waveform, _ = torchaudio.load(file_path) # use a helper from torchaudio to load data
|
||||
return waveform.numpy(), None # annotation is set to None
|
||||
#! [audio_loader]
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4958239932616705f208607c20f63f92a6cdb219f8a5e9ff6046ff7835c451dc
|
||||
size 47028
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2a5bd3b61d61b7eecb51fa0e932bc8215659d8f5b92f96abba927d9d3f94f277
|
||||
size 38993
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0e564f28b20af9c92511a59389bb42934bc2e19dcaca593c435968d76f5ff7a6
|
||||
size 28899
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bcaa0c75dab08dc03343b2bce069148e27141da1abc92fc5fde2fce3a5d8f5e8
|
||||
size 19411
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2612fc6169dec150907d79b174c46c9b2f6428b5a20cf462c57a5ea2fc97f56a
|
||||
size 97895
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5183c57dc825af40051782818d9bf40236bd6be8fbee3ae4e7a982000e4d6af8
|
||||
size 89875
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e0bab657bf979494cb84459e29024e5b8b9cd320388c62c6a91b74b897b19718
|
||||
size 18108
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:71365e85be040eb01ed524e568b332d9bb6222c760686c54db4e754f587082c2
|
||||
size 31032
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3f68e826cfac63d8e6f8d77aa5b7fc61957a872dfb09b38695fb481044a6ddd5
|
||||
size 48327
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:79ef392200a6d9ecad6be9cab7b1ecd4af7b88b4fd55f8f8884a02b16b435f68
|
||||
size 36036
|
@ -1,3 +0,0 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6b9a68861a65526203b56a897f0d6cec0ef860619e9aaf275bc0d7483bc34329
|
||||
size 92994
|
@ -1,33 +0,0 @@
|
||||
<doxygenlayout xmlns:xi="http://www.w3.org/2001/XInclude" version="1.0">
|
||||
<!-- POT Developer Guide -->
|
||||
<navindex>
|
||||
<tab id="pot" type="usergroup" title="Post-Training Optimization Tool" url="@ref pot_README">
|
||||
<tab type="user" title="Installation Guide" url="@ref pot_InstallationGuide"/>
|
||||
<tab type="user" title="Low Precision Optimization Guide" url="@ref pot_docs_LowPrecisionOptimizationGuide"/>
|
||||
<tab type="usergroup" title="Quantization" url="@ref pot_compression_algorithms_quantization_README">
|
||||
<tab type="user" title="DefaultQuantization Algorithm" url="@ref pot_compression_algorithms_quantization_default_README"/>
|
||||
<tab type="user" title="AccuracyAwareQuantization Algorithm" url="@ref accuracy_aware_README"/>
|
||||
<tab type="user" title="Saturation issue workaround" url="@ref pot_saturation_issue"/>
|
||||
<tab type="user" title="Low-precision model representation" url="@ref pot_docs_model_representation"/>
|
||||
</tab>
|
||||
<tab type="user" title="Best Practices" url="@ref pot_docs_BestPractices"/>
|
||||
<tab type="user" title="Command-line Interface" url="@ref pot_compression_cli_README">
|
||||
<tab type="user" title="Simplified mode" url="@ref pot_docs_simplified_mode"/>
|
||||
<tab type="user" title="End-to-end CLI example" url="@ref pot_configs_examples_README"/>
|
||||
</tab>
|
||||
<tab type="user" title="API" url="@ref pot_compression_api_README">
|
||||
<tab type="user" title="API samples" url="@ref pot_sample_README">
|
||||
<tab type="user" title="Image Classification quantization sample" url="@ref pot_sample_classification_README"/>
|
||||
<tab type="user" title="Accuracy-Aware quantization sample" url="@ref pot_sample_object_detection_README"/>
|
||||
<tab type="user" title="Cascaded model quantization sample" url="@ref pot_sample_face_detection_README"/>
|
||||
<tab type="user" title="Semantic segmentation quantization sample" url="@ref pot_sample_segmentation_README"/>
|
||||
<tab type="user" title="3D Segmentation quantization sample" url="@ref pot_sample_3d_segmentation_README"/>
|
||||
<tab type="user" title="GNA speech sample" url="@ref pot_sample_speech_README"/>
|
||||
</tab>
|
||||
</tab>
|
||||
<tab type="user" title="Configuration File Description" url="@ref pot_configs_README"/>
|
||||
<tab type="user" title="Deep neural network protection through range supervision" url="@ref pot_ranger_README"/>
|
||||
<tab type="user" title="Frequently Asked Questions" url="@ref pot_docs_FrequentlyAskedQuestions"/>
|
||||
</tab>
|
||||
</navindex>
|
||||
</doxygenlayout>
|
@ -1,7 +0,0 @@
|
||||
# Copyright (C) 2020-2022 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from openvino.tools.pot.app.run import main
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,62 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
__path__ = __import__("pkgutil").extend_path(__path__, __name__)
|
||||
|
||||
# Required for Windows OS platforms
|
||||
# Note: always top-level
|
||||
try:
|
||||
from openvino.utils import _add_openvino_libs_to_search_path
|
||||
_add_openvino_libs_to_search_path()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# API 2.0
|
||||
try:
|
||||
# Import all public modules
|
||||
from openvino import runtime as runtime
|
||||
from openvino import frontend as frontend
|
||||
from openvino import helpers as helpers
|
||||
from openvino import preprocess as preprocess
|
||||
from openvino import utils as utils
|
||||
from openvino.runtime import properties as properties
|
||||
|
||||
# Import most important classes and functions from openvino.runtime
|
||||
from openvino.runtime import Model
|
||||
from openvino.runtime import Core
|
||||
from openvino.runtime import CompiledModel
|
||||
from openvino.runtime import InferRequest
|
||||
from openvino.runtime import AsyncInferQueue
|
||||
|
||||
from openvino.runtime import Dimension
|
||||
from openvino.runtime import Strides
|
||||
from openvino.runtime import PartialShape
|
||||
from openvino.runtime import Shape
|
||||
from openvino.runtime import Layout
|
||||
from openvino.runtime import Type
|
||||
from openvino.runtime import Tensor
|
||||
from openvino.runtime import OVAny
|
||||
|
||||
from openvino.runtime import compile_model
|
||||
from openvino.runtime import get_batch
|
||||
from openvino.runtime import set_batch
|
||||
from openvino.runtime import serialize
|
||||
from openvino.runtime import shutdown
|
||||
from openvino.runtime import tensor_from_file
|
||||
from openvino.runtime import save_model
|
||||
from openvino.runtime import layout_helpers
|
||||
|
||||
# Set version for openvino package
|
||||
from openvino.runtime import get_version
|
||||
__version__ = get_version()
|
||||
except ImportError:
|
||||
import warnings
|
||||
warnings.warn("openvino package has problems with imports!", ImportWarning, stacklevel=2)
|
||||
|
||||
# Tools
|
||||
try:
|
||||
# Model Conversion API - ovc should reside in the main namespace
|
||||
from openvino.tools.ovc import convert_model
|
||||
except ImportError:
|
||||
pass
|
@ -1,4 +0,0 @@
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
__path__ = __import__("pkgutil").extend_path(__path__, __name__)
|
@ -1,69 +0,0 @@
|
||||
# Copyright (C) 2020-2022 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from .algorithms.quantization.accuracy_aware.algorithm import AccuracyAwareQuantization
|
||||
from .algorithms.quantization.accuracy_aware_gna.algorithm import AccuracyAwareGNA
|
||||
from .algorithms.quantization.accuracy_aware_common.algorithm import AccuracyAwareCommon
|
||||
from .algorithms.quantization.accuracy_aware_common.mixed_precision import (
|
||||
INT4MixedQuantization,
|
||||
)
|
||||
from .algorithms.quantization.fast_bias_correction.algorithm import FastBiasCorrection
|
||||
from .algorithms.quantization.bias_correction.algorithm import BiasCorrection
|
||||
from .algorithms.quantization.channel_alignment.algorithm import (
|
||||
ActivationChannelAlignment,
|
||||
)
|
||||
from .algorithms.quantization.datafree.algorithm import DataFreeQuantization
|
||||
from .algorithms.quantization.default.algorithm import DefaultQuantization
|
||||
from .algorithms.quantization.minmax.algorithm import MinMaxQuantization
|
||||
from .algorithms.quantization.optimization.rangeopt import RangeOptimization
|
||||
from .algorithms.quantization.optimization.params_tuning import (
|
||||
ParamsGridSearchAlgorithm,
|
||||
)
|
||||
from .algorithms.quantization.qnoise_estimator.algorithm import QuantNoiseEstimator
|
||||
from .algorithms.quantization.tunable_quantization.algorithm import TunableQuantization
|
||||
from .algorithms.quantization.outlier_channel_splitting.algorithm import (
|
||||
OutlierChannelSplitting,
|
||||
)
|
||||
from .algorithms.quantization.weight_bias_correction.algorithm import (
|
||||
WeightBiasCorrection,
|
||||
)
|
||||
from .algorithms.sparsity.magnitude_sparsity.algorithm import MagnitudeSparsity
|
||||
from .algorithms.sparsity.default.algorithm import WeightSparsity
|
||||
from .algorithms.sparsity.default.base_algorithm import BaseWeightSparsity
|
||||
from .algorithms.quantization.overflow_correction.algorithm import OverflowCorrection
|
||||
from .algorithms.quantization.range_supervision.algorithm import RangeSupervision
|
||||
|
||||
from .api.data_loader import DataLoader
|
||||
from .api.metric import Metric
|
||||
from .api.engine import Engine
|
||||
from .engines.ie_engine import IEEngine
|
||||
from .graph import load_model, save_model
|
||||
from .graph.model_utils import compress_model_weights
|
||||
from .pipeline.initializer import create_pipeline
|
||||
|
||||
QUANTIZATION_ALGORITHMS = [
|
||||
'MinMaxQuantization',
|
||||
'RangeOptimization',
|
||||
'FastBiasCorrection',
|
||||
'BiasCorrection',
|
||||
'ActivationChannelAlignment',
|
||||
'DataFreeQuantization',
|
||||
'DefaultQuantization',
|
||||
'AccuracyAwareQuantization',
|
||||
'AccuracyAwareGNA',
|
||||
'AccuracyAwareCommon',
|
||||
'INT4MixedQuantization',
|
||||
'TunableQuantization',
|
||||
'QuantNoiseEstimator',
|
||||
'OutlierChannelSplitting',
|
||||
'WeightBiasCorrection',
|
||||
'ParamsGridSearchAlgorithm',
|
||||
'OverflowCorrection',
|
||||
'RangeSupervision',
|
||||
]
|
||||
|
||||
SPARSITY_ALGORITHMS = ['WeightSparsity',
|
||||
'MagnitudeSparsity',
|
||||
'BaseWeightSparsity']
|
||||
|
||||
__all__ = QUANTIZATION_ALGORITHMS + SPARSITY_ALGORITHMS
|
@ -1,7 +0,0 @@
|
||||
# Copyright (C) 2020-2022 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import sys
|
||||
from openvino.tools.pot.app.run import app
|
||||
|
||||
app(sys.argv[1:])
|
@ -1,2 +0,0 @@
|
||||
# Copyright (C) 2020-2022 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
@ -1,72 +0,0 @@
|
||||
# Copyright (C) 2020-2022 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from copy import deepcopy
|
||||
|
||||
from .utils import process_ignored_scope
|
||||
from ..api.engine import Engine
|
||||
|
||||
|
||||
class Algorithm(ABC):
|
||||
|
||||
algo_type = 'quantization'
|
||||
|
||||
def __init__(self, config, engine: Engine):
|
||||
""" Constructor
|
||||
:param config: algorithm specific config
|
||||
:param engine: model inference engine
|
||||
:param sampler: Sampler class inheritor instance to read dataset
|
||||
"""
|
||||
self._config, self._engine = deepcopy(config), engine
|
||||
self._stats_collector = None
|
||||
self.params = {}
|
||||
self.default_steps_size = 0.05
|
||||
self.total_exec_steps = 0
|
||||
|
||||
if isinstance(self._config.ignored, dict) and 'scope' in self._config.ignored:
|
||||
self._config.ignored.scope = process_ignored_scope(self._config.ignored.scope)
|
||||
|
||||
@property
|
||||
def config(self):
|
||||
return self._config
|
||||
|
||||
@property
|
||||
def algo_collector(self):
|
||||
return self._stats_collector
|
||||
|
||||
@algo_collector.setter
|
||||
def algo_collector(self, collector):
|
||||
self._stats_collector = collector
|
||||
|
||||
@abstractmethod
|
||||
def run(self, model):
|
||||
""" Run algorithm on model
|
||||
:param model: model to apply algorithm
|
||||
:return optimized model
|
||||
"""
|
||||
|
||||
def statistics(self):
|
||||
""" Returns a dictionary of printable statistics"""
|
||||
return {}
|
||||
|
||||
def register_statistics(self, model, stats_collector):
|
||||
"""
|
||||
:param model: FP32 original model
|
||||
:param stats_collector: object of StatisticsCollector class
|
||||
:return: None
|
||||
"""
|
||||
|
||||
def get_parameter_meta(self, _model):
|
||||
""" Get parameters metadata
|
||||
:param _model: model to get parameters for
|
||||
:return params_meta: metadata of optional parameters
|
||||
"""
|
||||
return []
|
||||
|
||||
def compute_total_exec_steps(self, model=None):
|
||||
""" Compute executions steps based on stat_subset_size, algorithm, model """
|
||||
|
||||
def update_config(self, config):
|
||||
""" Update Algorithm configuration based on input config """
|
||||
self._config = deepcopy(config)
|
@ -1,27 +0,0 @@
|
||||
# Copyright (C) 2020-2022 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from ..utils.registry import Registry, RegistryStorage
|
||||
|
||||
COMPRESSION_ALGORITHMS = Registry('QuantizationAlgos')
|
||||
REGISTRY_STORAGE = RegistryStorage(globals())
|
||||
|
||||
|
||||
def get_registry(name):
|
||||
return REGISTRY_STORAGE.get_registry(name)
|
||||
|
||||
|
||||
def get_algorithm(name):
|
||||
if name.startswith('.') or name.endswith('.'):
|
||||
raise Exception('The algorithm name cannot start or end with "."')
|
||||
|
||||
if '.' in name:
|
||||
ind = name.find('.')
|
||||
reg_name = name[:ind]
|
||||
algo_name = name[ind + 1:]
|
||||
else:
|
||||
reg_name = 'QuantizationAlgos'
|
||||
algo_name = name
|
||||
|
||||
reg = get_registry(reg_name)
|
||||
return reg.get(algo_name)
|
@ -1,2 +0,0 @@
|
||||
# Copyright (C) 2020-2022 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
@ -1,378 +0,0 @@
|
||||
# Copyright (C) 2020-2022 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import random
|
||||
from copy import deepcopy
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from .utils import get_optimization_params
|
||||
from ..quantization.accuracy_aware_common.utils import evaluate_model, create_metric_config
|
||||
from ...algorithms.algorithm import Algorithm
|
||||
from ...engines.simplified_engine import SimplifiedEngine
|
||||
from ...graph import model_utils as mu, node_utils as nu
|
||||
from ...graph.special_operations import OPERATIONS_WITH_WEIGHTS
|
||||
from ...samplers.batch_sampler import BatchSampler
|
||||
from ...statistics.collector import collect_statistics
|
||||
from ...statistics.statistics import TensorStatistic
|
||||
from ...utils.logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
# pylint: disable=E1102,C0415,R0902,R0912
|
||||
class LayerwiseModelFinetuning(Algorithm):
|
||||
name = 'LayerwiseModelFinetuning'
|
||||
|
||||
@property
|
||||
def change_original_model(self):
|
||||
return True
|
||||
|
||||
def __init__(self, config, engine):
|
||||
super().__init__(config, engine)
|
||||
self._tconf = {
|
||||
'optimizer': 'Adam',
|
||||
'loss': 'l2',
|
||||
'seed': 0,
|
||||
'weight_decay': 0,
|
||||
'loss_logging_freq': 10,
|
||||
'calibration_indices_pool': 300,
|
||||
'use_only_fp_inputs': True,
|
||||
'calculate_grads_on_loss_increase_only': True,
|
||||
'update_every_batch': False,
|
||||
'use_ranking_subset': False,
|
||||
'tuning_ignored_scope': self._config.ignored.get('scope', []),
|
||||
'batch_size': 1
|
||||
}
|
||||
for key, value in self._tconf.items():
|
||||
self._tconf[key] = self._config.get(key, value)
|
||||
|
||||
self._device = 'cpu'
|
||||
self._current_best_loss = 0.0
|
||||
self._iteration = 0
|
||||
self._safety_eps = 1e-8
|
||||
self._dataset_size = len(self._engine.data_loader)
|
||||
self._samples_indices_pool = range(self._dataset_size)
|
||||
self._weighted_operations = [op['type'] for op in OPERATIONS_WITH_WEIGHTS]
|
||||
self._is_variable_resolution_model = False
|
||||
self._optimization_dataset_size = self._dataset_size
|
||||
self._metric_subset_ratio = (10 * self._tconf['calibration_indices_pool'] / self._optimization_dataset_size)
|
||||
self._ranking_subset_size = self._tconf['calibration_indices_pool']
|
||||
|
||||
self._original_model = None
|
||||
self._initial_losses = {}
|
||||
self._nodes_to_tune = {}
|
||||
self._nodes_to_tune_input = {}
|
||||
self._nodes_to_tune_output = {}
|
||||
self._layer_ops_wrapped = {}
|
||||
self._is_simplified_evaluation = isinstance(self._engine, SimplifiedEngine)
|
||||
self._base_algo_config = deepcopy(self._config)
|
||||
self._base_algo = None
|
||||
self._base_algo_args = None
|
||||
self._metrics_config = None
|
||||
|
||||
self.set_seed(self._tconf['seed'], self._device)
|
||||
self.set_default_parameters()
|
||||
|
||||
def set_default_parameters(self):
|
||||
if self._tconf['use_ranking_subset']:
|
||||
if self._is_simplified_evaluation:
|
||||
logger.info('Cannot use ranking subset in simplified mode')
|
||||
self._tconf['use_ranking_subset'] = False
|
||||
else:
|
||||
self._metrics_config = create_metric_config(
|
||||
self._engine,
|
||||
self._config,
|
||||
force_logit_comparison=True,
|
||||
logit_distance_type='mse',
|
||||
)
|
||||
|
||||
if (self._tconf['calibration_indices_pool'] is not None
|
||||
and self._tconf['calibration_indices_pool'] < self._optimization_dataset_size):
|
||||
self._samples_indices_pool = random.sample(
|
||||
range(self._optimization_dataset_size), self._tconf['calibration_indices_pool'])
|
||||
|
||||
def run(self, model):
|
||||
raise NotImplementedError
|
||||
|
||||
def _collect_nodes_to_tune(self, modified_model):
|
||||
raise NotImplementedError
|
||||
|
||||
def _wrap_nodes(self, modified_model, nodes_to_tune):
|
||||
raise NotImplementedError
|
||||
|
||||
def _calculate_gradients(self, losses):
|
||||
pass
|
||||
|
||||
def _get_optimizer_and_criterion(self, wrapped_ops_parameters):
|
||||
criterion, optimizer_algorithm = get_optimization_params(self._tconf['loss'], self._tconf['optimizer'])
|
||||
optimizers = {
|
||||
name: optimizer_algorithm(params=param, weight_decay=self._tconf['weight_decay'])
|
||||
for name, param in wrapped_ops_parameters.items()
|
||||
}
|
||||
return optimizers, criterion
|
||||
|
||||
def _wrap_node(self, op_node, wrapper, op_info):
|
||||
params = []
|
||||
wrapped_op = None
|
||||
if wrapper.is_able_to_wrap(op_node):
|
||||
wrapped_op = wrapper(op_node, device=self._device, **op_info)
|
||||
for name, param in wrapped_op.named_parameters():
|
||||
lr_name = name + '_lr'
|
||||
if lr_name in self._tconf.keys():
|
||||
params.append({'lr': self._tconf[lr_name], 'params': [param]})
|
||||
else:
|
||||
logger.warning('Undefined parameter found: {}'.format(name))
|
||||
continue
|
||||
else:
|
||||
logger.warning('Was not able to wrap layer {} with PyTorch'.format(op_node.fullname))
|
||||
return wrapped_op, params
|
||||
|
||||
def _fine_tuning_loop(
|
||||
self,
|
||||
modified_model,
|
||||
optimizers,
|
||||
criterion,
|
||||
n_batches,
|
||||
fp_model_callbacks,
|
||||
modified_model_callbacks=None
|
||||
):
|
||||
for layer in self._layer_ops_wrapped.values():
|
||||
layer.to(self._device)
|
||||
|
||||
for optimizer in optimizers.values():
|
||||
optimizer.zero_grad()
|
||||
|
||||
try:
|
||||
# Calculate feature maps for the original model beforehand on the used batch
|
||||
batch_indices_sample = self._random_samples()
|
||||
fp_activations = self._update_batch_from_model(self._original_model,
|
||||
batch_indices_sample,
|
||||
fp_model_callbacks)
|
||||
|
||||
for batch_idx in range(n_batches):
|
||||
if batch_idx != 0 and self._tconf['update_every_batch']:
|
||||
logger.debug('Batch update')
|
||||
batch_indices_sample = self._random_samples()
|
||||
fp_activations = self._update_batch_from_model(self._original_model,
|
||||
batch_indices_sample,
|
||||
fp_model_callbacks)
|
||||
|
||||
modified_activations = fp_activations
|
||||
if modified_model_callbacks:
|
||||
modified_activations = self._update_batch_from_model(modified_model,
|
||||
batch_indices_sample,
|
||||
modified_model_callbacks)
|
||||
|
||||
self._fine_tuning_step(
|
||||
optimizers,
|
||||
criterion,
|
||||
batch_idx,
|
||||
fp_activations,
|
||||
modified_activations,
|
||||
n_batches
|
||||
)
|
||||
return 0
|
||||
|
||||
except MemoryError:
|
||||
return -1
|
||||
|
||||
def _random_samples(self):
|
||||
batch_indices_sample = random.sample(self._samples_indices_pool, self._tconf['batch_size'])
|
||||
if self._is_simplified_evaluation:
|
||||
batch_indices_sample = BatchSampler(batch_indices_sample)
|
||||
return batch_indices_sample
|
||||
|
||||
def _update_batch_from_model(self, model, batch_indices_sample, model_callbacks):
|
||||
self._engine.set_model(model)
|
||||
|
||||
_, output_activations = self._engine.predict(model_callbacks, batch_indices_sample)
|
||||
return self._activation_maps_to_torch(output_activations)
|
||||
|
||||
def _fine_tuning_step(
|
||||
self,
|
||||
optimizers,
|
||||
criterion,
|
||||
batch_idx,
|
||||
fp_activations,
|
||||
modified_activations,
|
||||
n_batches
|
||||
):
|
||||
accumulated_losses = {op_name: 0.0 for op_name in self._layer_ops_wrapped}
|
||||
losses = {}
|
||||
for op_name in self._layer_ops_wrapped:
|
||||
torch_wrapped_op = self._layer_ops_wrapped[op_name]
|
||||
input_name = self._nodes_to_tune_input[op_name]
|
||||
output_name = self._nodes_to_tune_output[op_name]
|
||||
|
||||
in_blobs = modified_activations[input_name]['output']
|
||||
if self._tconf['use_only_fp_inputs']:
|
||||
in_blobs = fp_activations[input_name]['output']
|
||||
fp_out_blobs = fp_activations[output_name]['output']
|
||||
|
||||
if not self._is_variable_resolution_model:
|
||||
modified_out_blobs = torch_wrapped_op(in_blobs)
|
||||
losses[op_name] = criterion(modified_out_blobs, fp_out_blobs)
|
||||
else:
|
||||
for blob_idx, modified_in_blob in enumerate(in_blobs):
|
||||
modified_out_blob = torch_wrapped_op(torch.unsqueeze(modified_in_blob, 0))
|
||||
losses[op_name] += criterion(
|
||||
modified_out_blob, torch.unsqueeze(fp_out_blobs[blob_idx], 0)
|
||||
)
|
||||
|
||||
for name, loss in losses.items():
|
||||
accumulated_losses[name] = loss.data
|
||||
|
||||
if batch_idx == 0 and self._iteration == 0:
|
||||
self._initial_losses = deepcopy(accumulated_losses)
|
||||
self._initial_losses = {
|
||||
name: val + self._safety_eps
|
||||
for name, val in self._initial_losses.items()
|
||||
}
|
||||
|
||||
weighted_loss = 0
|
||||
for op_name in self._layer_ops_wrapped:
|
||||
init_loss = self._initial_losses[op_name]
|
||||
accumulated_loss = accumulated_losses[op_name]
|
||||
weighted_loss += accumulated_loss / init_loss / len(self._initial_losses)
|
||||
|
||||
if batch_idx % self._tconf['loss_logging_freq'] == 0:
|
||||
printable_loss = weighted_loss.to('cpu').numpy()
|
||||
logger.info(
|
||||
'Batch #%s/%s, weighted_loss: %s',
|
||||
batch_idx + 1,
|
||||
n_batches,
|
||||
printable_loss,
|
||||
)
|
||||
|
||||
if self._tconf['calculate_grads_on_loss_increase_only']:
|
||||
if weighted_loss >= self._current_best_loss:
|
||||
self._current_best_loss = weighted_loss
|
||||
self._calculate_gradients(losses)
|
||||
for op_name, optimizer in optimizers.items():
|
||||
optimizer.step()
|
||||
if self._current_best_loss == weighted_loss:
|
||||
optimizer.zero_grad()
|
||||
self._current_best_loss = weighted_loss
|
||||
else:
|
||||
self._calculate_gradients(losses)
|
||||
for op_name, optimizer in optimizers.items():
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
if self._tconf['update_every_batch']:
|
||||
for layer in self._layer_ops_wrapped.values():
|
||||
layer.update_node_params()
|
||||
|
||||
def _activation_maps_to_torch(self, activations):
|
||||
for layer_name in activations:
|
||||
activations[layer_name]['output'] = [
|
||||
torch.tensor(activations[layer_name]['output'][index][0]).to(self._device) for index in
|
||||
range(len(activations[layer_name]['output']))]
|
||||
if len({feature_map.shape for feature_map in activations[layer_name]['output']}) > 1:
|
||||
self._is_variable_resolution_model = True
|
||||
if not self._is_variable_resolution_model:
|
||||
for layer_name in activations:
|
||||
activations[layer_name]['output'] = torch.stack(activations[layer_name]['output'])
|
||||
return activations
|
||||
|
||||
def _get_ranking_subset(self):
|
||||
"""
|
||||
Find a subset of samples with the highest distance between
|
||||
outputs of original and compressed model (a ranking subset)
|
||||
:return: ranking data subset indices
|
||||
"""
|
||||
base_algo = self._base_algo(**self._base_algo_args)
|
||||
base_algo.register_statistics(self._original_model, self.algo_collector)
|
||||
collect_statistics(self._engine, self._original_model, [base_algo])
|
||||
base_model = base_algo.run(deepcopy(self._original_model))
|
||||
output_node_name = nu.get_node_input(self._original_model.get_final_output_nodes()[0], 0).fullname
|
||||
|
||||
stats_layout = {output_node_name: {'output_logits': TensorStatistic(lambda logits: logits)}}
|
||||
metric_subset_size = int(self._dataset_size * self._metric_subset_ratio)
|
||||
diff_subset_indices = (
|
||||
sorted(random.sample(range(self._dataset_size), metric_subset_size))
|
||||
if metric_subset_size < self._dataset_size
|
||||
else list(range(self._dataset_size))
|
||||
)
|
||||
|
||||
_, original_per_sample_metrics = evaluate_model(
|
||||
self._original_model,
|
||||
self._engine,
|
||||
self._dataset_size,
|
||||
subset_indices=diff_subset_indices,
|
||||
metrics_config=self._metrics_config,
|
||||
output_node_name=output_node_name,
|
||||
stats_layout=stats_layout,
|
||||
)
|
||||
_, base_model_per_sample_metrics = evaluate_model(
|
||||
base_model,
|
||||
self._engine,
|
||||
self._dataset_size,
|
||||
subset_indices=diff_subset_indices,
|
||||
metrics_config=self._metrics_config,
|
||||
output_node_name=output_node_name,
|
||||
stats_layout=stats_layout,
|
||||
)
|
||||
|
||||
persample_metric = list(self._metrics_config.values())[0].persample
|
||||
sorted_sample_importance = persample_metric.sort_fn(
|
||||
original_per_sample_metrics[persample_metric.name],
|
||||
base_model_per_sample_metrics[persample_metric.name],
|
||||
reverse=True,
|
||||
)
|
||||
ranking_indices = sorted_sample_importance[: self._ranking_subset_size]
|
||||
ranking_subset = list(np.array(diff_subset_indices)[ranking_indices])
|
||||
return ranking_subset
|
||||
|
||||
def _create_layer_callbacks(self, modified_model):
|
||||
fp_model_callbacks = {}
|
||||
modified_model_callbacks = {}
|
||||
|
||||
for op_name in self._nodes_to_tune:
|
||||
modified_node = mu.get_node_by_name(modified_model, op_name)
|
||||
|
||||
input_node = self._get_input_node(modified_node)
|
||||
output_node = input_node
|
||||
if modified_node.type in self._weighted_operations:
|
||||
bias_node = nu.get_bias_for_node(modified_node)
|
||||
output_node = modified_node
|
||||
if bias_node is not None:
|
||||
output_node = nu.get_node_output(bias_node, 0)[0]
|
||||
input_node_name = self._get_input_node_name(modified_node)
|
||||
|
||||
if self._tconf['use_only_fp_inputs']:
|
||||
fp_model_callbacks[input_node_name] = {'output': lambda tensor: tensor}
|
||||
else:
|
||||
modified_model_callbacks[input_node_name] = {'output': lambda tensor: tensor}
|
||||
fp_model_callbacks[output_node.fullname] = {'output': lambda tensor: tensor}
|
||||
self._nodes_to_tune_input[op_name] = input_node_name
|
||||
self._nodes_to_tune_output[op_name] = output_node.fullname
|
||||
|
||||
return fp_model_callbacks, modified_model_callbacks
|
||||
|
||||
def register_statistics(self, model, stats_collector):
|
||||
self.algo_collector = stats_collector
|
||||
|
||||
def _check_batch_size(self):
|
||||
if self._tconf['batch_size'] > self._dataset_size:
|
||||
logger.debug('Batch size changed from - {} to dataset size - {}.'.format(
|
||||
self._tconf['batch_size'], self._dataset_size))
|
||||
self._tconf['batch_size'] = self._dataset_size
|
||||
|
||||
@staticmethod
|
||||
def set_seed(seed, device):
|
||||
np.random.seed(seed)
|
||||
random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
if device != 'cpu':
|
||||
import torch.backends.cudnn as cudnn
|
||||
cudnn.deterministic = True
|
||||
cudnn.benchmark = False
|
||||
|
||||
@staticmethod
|
||||
def _get_input_node(node):
|
||||
return nu.get_node_input(node, 0)
|
||||
|
||||
@staticmethod
|
||||
def _get_input_node_name(node):
|
||||
return nu.get_quantized_input_key(node)
|
@ -1,235 +0,0 @@
|
||||
# Copyright (C) 2020-2022 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from openvino.tools.pot.graph import node_utils as nu
|
||||
from openvino.tools.pot.utils.logger import get_logger
|
||||
from .utils import get_weight_node
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
# pylint: disable=W0221
|
||||
class STERound(torch.autograd.Function):
|
||||
@staticmethod
|
||||
def forward(ctx, input_data, val_min, val_max):
|
||||
ctx.save_for_backward(input_data)
|
||||
ctx.val_min = val_min
|
||||
ctx.val_max = val_max
|
||||
return input_data.round()
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
(input_data,) = ctx.saved_tensors
|
||||
alpha = 0.01
|
||||
mask = (input_data <= ctx.val_max) & (input_data >= ctx.val_min)
|
||||
mask = mask.type(input_data.dtype)
|
||||
grad_input = grad_output * (mask * (1 - alpha) + alpha)
|
||||
return grad_input, None, None
|
||||
|
||||
|
||||
# pylint: disable=E1102,W0223
|
||||
class FakeQuantize(torch.nn.Module):
|
||||
"""
|
||||
A pytorch wrapper for a single FakeQuantize node.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def is_able_to_wrap(node):
|
||||
if node.type != 'FakeQuantize':
|
||||
return False
|
||||
is_const = [
|
||||
node.in_port(i).get_source().node.type == 'Const' for i in range(1, 5)
|
||||
]
|
||||
if not all(is_const):
|
||||
return False
|
||||
data = [node.in_port(i).data.get_value() for i in range(1, 5)]
|
||||
diff = [np.max(np.abs(data[i] - data[i + 2])) for i in [0, 1]]
|
||||
diff = max(diff)
|
||||
if diff > 10 ** -8:
|
||||
logger.info('FakeQuantize {} has different input and output scales'.format(node.name))
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def __init__(self, node, device='cpu', asymmetric=False):
|
||||
super(FakeQuantize, self).__init__()
|
||||
self.node = node
|
||||
self.device = device
|
||||
input_0 = nu.get_node_input(self.node, 0)
|
||||
self.is_weight_fq = input_0.type == 'Const'
|
||||
self.asymmetric = asymmetric
|
||||
|
||||
min_val = nu.get_node_value(nu.get_node_input(self.node, 1))
|
||||
max_val = nu.get_node_value(nu.get_node_input(self.node, 2))
|
||||
min_val = np.array(min_val, dtype=np.float32)
|
||||
self.min = torch.tensor(min_val).to(self.device)
|
||||
self.min = torch.nn.Parameter(self.min) if self.asymmetric else self.min
|
||||
|
||||
ranges = np.array(max_val - min_val, dtype=np.float32)
|
||||
self.scale = torch.tensor(ranges).log()
|
||||
self.scale = self.scale.to(self.device)
|
||||
self.scale = torch.nn.Parameter(self.scale)
|
||||
|
||||
self.val_h = int(self.node.levels - 1)
|
||||
self.val_l = 0
|
||||
|
||||
def update_node_params(self):
|
||||
scale = self.scale.exp()
|
||||
max_level = scale.detach().cpu().numpy()
|
||||
max_level = np.reshape(max_level, nu.get_input_shape(self.node, 2))
|
||||
min_level = self.min.detach().cpu().numpy()
|
||||
min_level = np.reshape(min_level, nu.get_input_shape(self.node, 1))
|
||||
max_level = min_level + max_level
|
||||
|
||||
self.node.in_port(1).data.set_value(min_level)
|
||||
self.node.in_port(2).data.set_value(max_level)
|
||||
self.node.in_port(3).data.set_value(min_level)
|
||||
self.node.in_port(4).data.set_value(max_level)
|
||||
|
||||
def forward(self, x):
|
||||
scale = self.scale.exp()
|
||||
s = self.val_h * scale.reciprocal()
|
||||
x = x - self.min
|
||||
x = x * s
|
||||
x = x.clamp(max=self.val_h, min=self.val_l)
|
||||
x = STERound.apply(x, self.val_l, self.val_h)
|
||||
x = x * s.reciprocal() + self.min
|
||||
return x
|
||||
|
||||
|
||||
# pylint: disable=E1102,W0223
|
||||
class LinearModule(torch.nn.Module):
|
||||
"""
|
||||
A pytorch wrapper for a single Conv2d/Linear node.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def is_able_to_wrap(node):
|
||||
if node.type not in ['Convolution', 'MatMul', 'GroupConvolution']:
|
||||
return False
|
||||
|
||||
node_weight = nu.get_node_input(node, 1)
|
||||
if node_weight.type == 'FakeQuantize':
|
||||
node_weight = nu.get_node_input(node_weight, 0)
|
||||
if node_weight.type != 'Const':
|
||||
return False
|
||||
|
||||
if node.type != 'MatMul':
|
||||
|
||||
weights = nu.get_node_value(node_weight)
|
||||
if len(weights.shape) != 4:
|
||||
return False
|
||||
|
||||
s = node.stride
|
||||
stride_check = (s[2] == s[3])
|
||||
|
||||
d = node.dilation
|
||||
dilation_check = (d[2] == d[3])
|
||||
|
||||
if not dilation_check or not stride_check:
|
||||
return False
|
||||
|
||||
bias_node = nu.get_bias_for_node(node)
|
||||
if bias_node is not None:
|
||||
bias_value = nu.get_node_value(bias_node)
|
||||
if bias_value.shape[0] != 1:
|
||||
return False
|
||||
return True
|
||||
|
||||
def __init__(self,
|
||||
node,
|
||||
input_fq=None,
|
||||
wrap_weight_fq=False,
|
||||
device='cpu',
|
||||
set_quantized_values_to_weight_parameter=False,
|
||||
asymmetric=False):
|
||||
super().__init__()
|
||||
|
||||
self.node = node
|
||||
self.device = device
|
||||
|
||||
self.set_quantized_values_to_weight_parameter = set_quantized_values_to_weight_parameter
|
||||
self.weight_fq, self.input_fq = None, input_fq
|
||||
|
||||
if wrap_weight_fq:
|
||||
weight_fq = nu.get_node_input(self.node, 1)
|
||||
weight_fq_wrapper = FakeQuantize
|
||||
if not weight_fq_wrapper.is_able_to_wrap(weight_fq):
|
||||
logger.warning('Was not able to wrap layer %s with pytorch', weight_fq.name)
|
||||
self.weight_fq = None
|
||||
else:
|
||||
self.weight_fq = weight_fq_wrapper(weight_fq, device=device,
|
||||
asymmetric=asymmetric)
|
||||
|
||||
node_weight = get_weight_node(node)
|
||||
weights = nu.get_node_value(node_weight)
|
||||
self.weights_dtype = weights.dtype
|
||||
weights = torch.from_numpy(weights).to(torch.float32)
|
||||
weights = weights.to(device)
|
||||
self.weights = torch.nn.Parameter(weights)
|
||||
|
||||
self.bias = None
|
||||
bias_node = nu.get_bias_for_node(self.node)
|
||||
if bias_node is not None:
|
||||
bias = nu.get_node_value(bias_node)
|
||||
self.bias_dtype = bias.dtype
|
||||
bias = torch.from_numpy(bias).to(torch.float32).squeeze()
|
||||
bias = bias if bias.shape else bias.reshape(1)
|
||||
bias = bias.to(device)
|
||||
self.bias = torch.nn.Parameter(bias)
|
||||
|
||||
if self.node.type != 'MatMul':
|
||||
self.stride = (int(node.stride[2]), int(node.stride[3]))
|
||||
self.pads_begin, self.pads_end = node.pad[2], node.pad[3]
|
||||
self.dilation = (int(node.dilation[2]), int(node.dilation[3]))
|
||||
self.group = 1 if 'group' not in node else int(node.group)
|
||||
|
||||
def update_node_params(self):
|
||||
weights = self.weights.detach()
|
||||
weights = weights.cpu() if self.device != 'cpu' else weights
|
||||
weights = weights.numpy().astype(self.weights_dtype)
|
||||
weight_node = get_weight_node(self.node)
|
||||
nu.set_node_value(weight_node, weights)
|
||||
|
||||
if self.weight_fq is not None:
|
||||
self.weight_fq.update_node_params()
|
||||
if self.input_fq is not None:
|
||||
self.input_fq.update_node_params()
|
||||
|
||||
if self.bias is not None:
|
||||
bias_node = nu.get_bias_for_node(self.node)
|
||||
bias_shape = nu.get_node_value(bias_node).shape
|
||||
bias = self.bias.data.reshape(bias_shape)
|
||||
|
||||
bias = bias.detach()
|
||||
bias = bias.cpu() if self.device != 'cpu' else bias
|
||||
bias = bias.numpy().astype(self.bias_dtype)
|
||||
nu.set_node_value(bias_node, bias)
|
||||
|
||||
def forward(self, x):
|
||||
w = self.weight_fq(self.weights) if self.weight_fq is not None else self.weights
|
||||
x = self.input_fq(x) if self.input_fq is not None else x
|
||||
if self.set_quantized_values_to_weight_parameter and self.weight_fq is not None:
|
||||
self.weights.data = w
|
||||
|
||||
if self.node.type == 'MatMul':
|
||||
x = torch.nn.functional.linear(x,
|
||||
self.weights,
|
||||
bias=self.bias)
|
||||
else:
|
||||
pad_top, pad_bottom = int(self.pads_begin[0]), int(self.pads_begin[1])
|
||||
pad_left, pad_right = int(self.pads_end[0]), int(self.pads_end[1])
|
||||
x = torch.nn.functional.pad(x, (pad_left, pad_right, pad_top, pad_bottom))
|
||||
x = torch.nn.functional.conv2d(
|
||||
x,
|
||||
self.weights,
|
||||
bias=self.bias,
|
||||
stride=self.stride,
|
||||
dilation=self.dilation,
|
||||
groups=self.group
|
||||
)
|
||||
|
||||
return x
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user