openvino/tools/pot/configs/accuracy_aware_quantization_spec.json

{
    /* Model parameters */

    "model": {
        "model_name": "model_name", // Model name
        "model": "<MODEL_PATH>", // Path to model (.xml format)
        "weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
    },

    /* Parameters of the engine used for model inference */

    // Post-Training Optimization Tool supports engine based on accuracy checker and custom engine.
    // For custom engine you should specify your own set of parameters.
    // The engine based on accuracy checker uses accuracy checker parameters.
    // You can specify the parameters via accuracy checker config file or directly in engine section.
    // More information about accuracy checker parameters can be found here:
    // https://github.com/opencv/open_model_zoo/tree/master/tools/accuracy_checker

    "engine": {
        "stat_requests_number": 8, // Number of requests during statistcs collection
        "eval_requests_number": 8, // Number of requests during evaluation
        "config": "<CONFIG_PATH>",

        /* OR */

        "name": "model_name",
        "launchers": [
            {
                "framework": "dlsdk",
                "device": "CPU",
                "adapter": "classification"
            }
        ],
        "datasets": [
            {
                "name": "dataset_name",
                "data_source": "<DATASET_PATH>",
                "annotation": "<ANNOTATION_PATH>",
                "preprocessing": [
                    {
                        "type": "resize",
                        "interpolation": "BILINEAR",
                        "aspect_ratio_scale": "greater",
                        "size": 224
                    }
                ],
                "metrics": [
                    {
                        "name": "accuracy@top1",
                        "type": "accuracy",
                        "top_k": 1
                    }
                ]
            }
        ]
    },

    /* Optimization hyperparameters */

    "compression": {
        "target_device": "ANY", // Target device, the specificity of which will be taken
                                // into account during optimization
        "inplace_statistics": true,   // An optional parameter, change method collect statistics,
                                      // reduces the amount of memory consumed,
                                      // but increases the calibration time
                                      // the default value is true
        "model_type": "None",   // An optional parameter, needed for additional patterns in the model,
                                // default value is None (supported only transformer now)
        "algorithms": [
            {
                "name": "AccuracyAwareQuantization", // Optimization algorithm name
                "params": {
                    "ranking_subset_size": 300, // A size of a subset which is used to rank layers by their
                                                // contribution to the accuracy drop

                    "max_iter_num": 20, // Maximum number of iterations of the algorithm (maximum of layers
                                        // that may be reverted back to full-precision)

                    "maximal_drop": 0.005, // Maximum accuracy drop which has to be achieved after the quantization

                    "drop_type": "absolute", // Drop type of the accuracy metric: relative or absolute (default)

                    "use_prev_if_drop_increase": false, // Whether to use NN snapshot from the previous algorithm
                                                        // iteration in case if drop increases

                    "base_algorithm": "DefaultQuantization", // Base algorithm that is used to quantize model
                                                             // at the beginning

                    "annotation_free": false, // Whether to compute accuracy drop on a dataset without annotation

                    "annotation_conf_threshold": 0.6, // Threshold for annotation creation in case of annotation free
                                                      // algorithm execution. Images on which original model predicts
                                                      // with confidence below this threshold will be skipped during
                                                      // evaluation

                    "convert_to_mixed_preset": false, // Whether to convert the model to mixed mode if
                                                      // the accuracy criteria of the symmetrically quantized
                                                      // model are not satisfied

                    // An optional list of metrics that are taken into account during optimization.
                    // If not specified, all metrics defined in engine config are used
                    "metrics": [
                        {
                            "name": "accuracy", // Metric name to optimize
                            "baseline_value": 0.72 // Baseline metric value of the original model
                        }
                    ],

                    "metric_subset_ratio": 0.5, // A part of the validation set that is used to compare element-wise
                                                // full-precision and quantized models in case of predefined metric
                                                // values of the original model

                    "tune_hyperparams": false, // Whether to search the best quantization parameters for model.
                                               // This algo uses grid search engine based on a special subset of samples from the dataset

                    "ignored": {
                        // List of nodes that are excluded from optimization
                        "scope": [
                            "<NODE_NAME>"
                        ],
                        // List of types that are excluded from optimization
                        "operations": [
                            {
                                "type": "<NODE_TYPE>",
                                // Includes excluding by attributes
                                "attributes": {
                                    "<NAME>": "<VALUE>" // Lists of values is not included
                                }
                            },
                            {
                                "type": "<NODE_TYPE>" // Excluding only by type
                            }
                        ]
                    },

                    "preset": "mixed", // A preset is a collection of optimization algorithm parameters
                                       // that will specify to the algorithm to improve which metric
                                       // the algorithm needs to concentrate. Each optimization algorithm
                                       // supports [performance, accuracy, mixed] presets

                    "stat_subset_size": 100, // Size of subset to calculate activations statistics that can be used
                                             // for quantization parameters calculation

                    /* Manually specification quantization parametrs */

                    /* Quantization parameters for weights */

                    "weights": {
                        "bits": 8, // Number of quantization bits
                        "mode": "symmetric", // Quantization mode
                        "granularity": "perchannel", // Granularity: a scale for each output channel
                        "level_low": -127, // Low quantization level
                        "level_high": 127, // High quantization level

                        /* Parameters specify how to calculate the minimum and maximum of quantization range */

                        "range_estimator": {
                            "max": {
                                "type": "quantile",
                                "outlier_prob": 0.0001
                            }
                        }
                    },

                    /* Quantization parameters for activations */

                    "activations": {
                        "bits": 8, // Number of quantization bits
                        "mode": "asymmetric", // Quantization mode
                        "granularity": "pertensor", // Granularity: one scale for output tensor

                        /* Parameters specify how to calculate the minimum and maximum of quantization range */

                        "range_estimator": {
                            "preset": "quantile",

                            /* OR */

                            /* Minimum of quantization range */

                            "min": {
                                "aggregator": "mean", // Batch aggregation type [mean, max, min, median,
                                                      // mean_no_outliers, median_no_outliers, hl_estimator]

                                "type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
                                "outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
                            },

                            /* Maximum of quantization range */

                            "max": {
                                "aggregator": "mean", // Batch aggregation type [mean, max, min, median,
                                                      // mean_no_outliers, median_no_outliers, hl_estimator]

                                "type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
                                "outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
                            }
                        }
                    }
                }
            }
        ]
    }
}