205 lines
9.7 KiB
JSON
205 lines
9.7 KiB
JSON
{
|
|
/* Model parameters */
|
|
|
|
"model": {
|
|
"model_name": "model_name", // Model name
|
|
"model": "<MODEL_PATH>", // Path to model (.xml format)
|
|
"weights": "<PATH_TO_WEIGHTS>" // Path to weights (.bin format)
|
|
},
|
|
|
|
/* Parameters of the engine used for model inference */
|
|
|
|
// Post-Training Optimization Tool supports engine based on accuracy checker and custom engine.
|
|
// For custom engine you should specify your own set of parameters.
|
|
// The engine based on accuracy checker uses accuracy checker parameters.
|
|
// You can specify the parameters via accuracy checker config file or directly in engine section.
|
|
// More information about accuracy checker parameters can be found here:
|
|
// https://github.com/opencv/open_model_zoo/tree/master/tools/accuracy_checker
|
|
|
|
"engine": {
|
|
"stat_requests_number": 8, // Number of requests during statistcs collection
|
|
"eval_requests_number": 8, // Number of requests during evaluation
|
|
"config": "<CONFIG_PATH>",
|
|
|
|
/* OR */
|
|
|
|
"name": "model_name",
|
|
"launchers": [
|
|
{
|
|
"framework": "dlsdk",
|
|
"device": "CPU",
|
|
"adapter": "classification"
|
|
}
|
|
],
|
|
"datasets": [
|
|
{
|
|
"name": "dataset_name",
|
|
"data_source": "<DATASET_PATH>",
|
|
"annotation": "<ANNOTATION_PATH>",
|
|
"preprocessing": [
|
|
{
|
|
"type": "resize",
|
|
"interpolation": "BILINEAR",
|
|
"aspect_ratio_scale": "greater",
|
|
"size": 224
|
|
}
|
|
],
|
|
"metrics": [
|
|
{
|
|
"name": "accuracy@top1",
|
|
"type": "accuracy",
|
|
"top_k": 1
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
|
|
/* Optimization hyperparameters */
|
|
|
|
"compression": {
|
|
"target_device": "ANY", // Target device, the specificity of which will be taken
|
|
// into account during optimization
|
|
"inplace_statistics": true, // An optional parameter, change method collect statistics,
|
|
// reduces the amount of memory consumed,
|
|
// but increases the calibration time
|
|
// the default value is true
|
|
"model_type": "None", // An optional parameter, needed for additional patterns in the model,
|
|
// default value is None (supported only transformer now)
|
|
"algorithms": [
|
|
{
|
|
"name": "AccuracyAwareQuantization", // Optimization algorithm name
|
|
"params": {
|
|
"ranking_subset_size": 300, // A size of a subset which is used to rank layers by their
|
|
// contribution to the accuracy drop
|
|
|
|
"max_iter_num": 20, // Maximum number of iterations of the algorithm (maximum of layers
|
|
// that may be reverted back to full-precision)
|
|
|
|
"maximal_drop": 0.005, // Maximum accuracy drop which has to be achieved after the quantization
|
|
|
|
"drop_type": "absolute", // Drop type of the accuracy metric: relative or absolute (default)
|
|
|
|
"use_prev_if_drop_increase": false, // Whether to use NN snapshot from the previous algorithm
|
|
// iteration in case if drop increases
|
|
|
|
"base_algorithm": "DefaultQuantization", // Base algorithm that is used to quantize model
|
|
// at the beginning
|
|
|
|
"annotation_free": false, // Whether to compute accuracy drop on a dataset without annotation
|
|
|
|
"annotation_conf_threshold": 0.6, // Threshold for annotation creation in case of annotation free
|
|
// algorithm execution. Images on which original model predicts
|
|
// with confidence below this threshold will be skipped during
|
|
// evaluation
|
|
|
|
"convert_to_mixed_preset": false, // Whether to convert the model to mixed mode if
|
|
// the accuracy criteria of the symmetrically quantized
|
|
// model are not satisfied
|
|
|
|
// An optional list of metrics that are taken into account during optimization.
|
|
// If not specified, all metrics defined in engine config are used
|
|
"metrics": [
|
|
{
|
|
"name": "accuracy", // Metric name to optimize
|
|
"baseline_value": 0.72 // Baseline metric value of the original model
|
|
}
|
|
],
|
|
|
|
"metric_subset_ratio": 0.5, // A part of the validation set that is used to compare element-wise
|
|
// full-precision and quantized models in case of predefined metric
|
|
// values of the original model
|
|
|
|
"tune_hyperparams": false, // Whether to search the best quantization parameters for model.
|
|
// This algo uses grid search engine based on a special subset of samples from the dataset
|
|
|
|
"ignored": {
|
|
// List of nodes that are excluded from optimization
|
|
"scope": [
|
|
"<NODE_NAME>"
|
|
],
|
|
// List of types that are excluded from optimization
|
|
"operations": [
|
|
{
|
|
"type": "<NODE_TYPE>",
|
|
// Includes excluding by attributes
|
|
"attributes": {
|
|
"<NAME>": "<VALUE>" // Lists of values is not included
|
|
}
|
|
},
|
|
{
|
|
"type": "<NODE_TYPE>" // Excluding only by type
|
|
}
|
|
]
|
|
},
|
|
|
|
"preset": "mixed", // A preset is a collection of optimization algorithm parameters
|
|
// that will specify to the algorithm to improve which metric
|
|
// the algorithm needs to concentrate. Each optimization algorithm
|
|
// supports [performance, accuracy, mixed] presets
|
|
|
|
"stat_subset_size": 100, // Size of subset to calculate activations statistics that can be used
|
|
// for quantization parameters calculation
|
|
|
|
/* Manually specification quantization parametrs */
|
|
|
|
/* Quantization parameters for weights */
|
|
|
|
"weights": {
|
|
"bits": 8, // Number of quantization bits
|
|
"mode": "symmetric", // Quantization mode
|
|
"granularity": "perchannel", // Granularity: a scale for each output channel
|
|
"level_low": -127, // Low quantization level
|
|
"level_high": 127, // High quantization level
|
|
|
|
/* Parameters specify how to calculate the minimum and maximum of quantization range */
|
|
|
|
"range_estimator": {
|
|
"max": {
|
|
"type": "quantile",
|
|
"outlier_prob": 0.0001
|
|
}
|
|
}
|
|
},
|
|
|
|
/* Quantization parameters for activations */
|
|
|
|
"activations": {
|
|
"bits": 8, // Number of quantization bits
|
|
"mode": "asymmetric", // Quantization mode
|
|
"granularity": "pertensor", // Granularity: one scale for output tensor
|
|
|
|
/* Parameters specify how to calculate the minimum and maximum of quantization range */
|
|
|
|
"range_estimator": {
|
|
"preset": "quantile",
|
|
|
|
/* OR */
|
|
|
|
/* Minimum of quantization range */
|
|
|
|
"min": {
|
|
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
|
|
// mean_no_outliers, median_no_outliers, hl_estimator]
|
|
|
|
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
|
|
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
|
|
},
|
|
|
|
/* Maximum of quantization range */
|
|
|
|
"max": {
|
|
"aggregator": "mean", // Batch aggregation type [mean, max, min, median,
|
|
// mean_no_outliers, median_no_outliers, hl_estimator]
|
|
|
|
"type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile]
|
|
"outlier_prob": 0.0001 // Outlier probability: estimator consider samples which
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
}
|