Files
openvino/tools/pot/configs/accuracy_aware_quantization_spec.json

205 lines
9.7 KiB
JSON

{
/* Model parameters */
"model": {
"model_name": "model_name", // Model name
"model": "<MODEL_PATH>", // Path to model (.xml format)
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
},
/* Parameters of the engine used for model inference */
// Post-Training Optimization Tool supports engine based on accuracy checker and custom engine.
// For custom engine you should specify your own set of parameters.
// The engine based on accuracy checker uses accuracy checker parameters.
// You can specify the parameters via accuracy checker config file or directly in engine section.
// More information about accuracy checker parameters can be found here:
// https://github.com/opencv/open_model_zoo/tree/master/tools/accuracy_checker
"engine": {
"stat_requests_number": 8, // Number of requests during statistcs collection
"eval_requests_number": 8, // Number of requests during evaluation
"config": "<CONFIG_PATH>",
/* OR */
"name": "model_name",
"launchers": [
{
"framework": "dlsdk",
"device": "CPU",
"adapter": "classification"
}
],
"datasets": [
{
"name": "dataset_name",
"data_source": "<DATASET_PATH>",
"annotation": "<ANNOTATION_PATH>",
"preprocessing": [
{
"type": "resize",
"interpolation": "BILINEAR",
"aspect_ratio_scale": "greater",
"size": 224
}
],
"metrics": [
{
"name": "accuracy@top1",
"type": "accuracy",
"top_k": 1
}
]
}
]
},
/* Optimization hyperparameters */
"compression": {
"target_device": "ANY", // Target device, the specificity of which will be taken
// into account during optimization
"inplace_statistics": true, // An optional parameter, change method collect statistics,
// reduces the amount of memory consumed,
// but increases the calibration time
// the default value is true
"model_type": "None", // An optional parameter, needed for additional patterns in the model,
// default value is None (supported only transformer now)
"algorithms": [
{
"name": "AccuracyAwareQuantization", // Optimization algorithm name
"params": {
"ranking_subset_size": 300, // A size of a subset which is used to rank layers by their
// contribution to the accuracy drop
"max_iter_num": 20, // Maximum number of iterations of the algorithm (maximum of layers
// that may be reverted back to full-precision)
"maximal_drop": 0.005, // Maximum accuracy drop which has to be achieved after the quantization
"drop_type": "absolute", // Drop type of the accuracy metric: relative or absolute (default)
"use_prev_if_drop_increase": false, // Whether to use NN snapshot from the previous algorithm
// iteration in case if drop increases
"base_algorithm": "DefaultQuantization", // Base algorithm that is used to quantize model
// at the beginning
"annotation_free": false, // Whether to compute accuracy drop on a dataset without annotation
"annotation_conf_threshold": 0.6, // Threshold for annotation creation in case of annotation free
// algorithm execution. Images on which original model predicts
// with confidence below this threshold will be skipped during
// evaluation
"convert_to_mixed_preset": false, // Whether to convert the model to mixed mode if
// the accuracy criteria of the symmetrically quantized
// model are not satisfied
// An optional list of metrics that are taken into account during optimization.
// If not specified, all metrics defined in engine config are used
"metrics": [
{
"name": "accuracy", // Metric name to optimize
"baseline_value": 0.72 // Baseline metric value of the original model
}
],
"metric_subset_ratio": 0.5, // A part of the validation set that is used to compare element-wise
// full-precision and quantized models in case of predefined metric
// values of the original model
"tune_hyperparams": false, // Whether to search the best quantization parameters for model.
// This algo uses grid search engine based on a special subset of samples from the dataset
"ignored": {
// List of nodes that are excluded from optimization
"scope": [
"<NODE_NAME>"
],
// List of types that are excluded from optimization
"operations": [
{
"type": "<NODE_TYPE>",
// Includes excluding by attributes
"attributes": {
"<NAME>": "<VALUE>" // Lists of values is not included
}
},
{
"type": "<NODE_TYPE>" // Excluding only by type
}
]
},
"preset": "mixed", // A preset is a collection of optimization algorithm parameters
// that will specify to the algorithm to improve which metric
// the algorithm needs to concentrate. Each optimization algorithm
// supports [performance, accuracy, mixed] presets
"stat_subset_size": 100, // Size of subset to calculate activations statistics that can be used
// for quantization parameters calculation
/* Manually specification quantization parametrs */
/* Quantization parameters for weights */
"weights": {
"bits": 8, // Number of quantization bits
"mode": "symmetric", // Quantization mode
"granularity": "perchannel", // Granularity: a scale for each output channel
"level_low": -127, // Low quantization level
"level_high": 127, // High quantization level
/* Parameters specify how to calculate the minimum and maximum of quantization range */
"range_estimator": {
"max": {
"type": "quantile",
"outlier_prob": 0.0001
}
}
},
/* Quantization parameters for activations */
"activations": {
"bits": 8, // Number of quantization bits
"mode": "asymmetric", // Quantization mode
"granularity": "pertensor", // Granularity: one scale for output tensor
/* Parameters specify how to calculate the minimum and maximum of quantization range */
"range_estimator": {
"preset": "quantile",
/* OR */
/* Minimum of quantization range */
"min": {
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
// mean_no_outliers, median_no_outliers, hl_estimator]
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
},
/* Maximum of quantization range */
"max": {
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
// mean_no_outliers, median_no_outliers, hl_estimator]
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
}
}
}
}
}
]
}
}