diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp index ecfff329eb3..81c25e7f28b 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp @@ -80,28 +80,17 @@ public: return -8.f; case element::i8: switch (levels) { + case 16: + return -8.f; case 255: return -127.f; - case 256: + default: return -128.f; } - break; case element::i16: - switch (levels) { - case 65536: - return -32768.f; - case 65535: - return -32767.f; - } - break; + return levels == 65535 ? -32767.f : -32768.f; case element::i32: - switch (levels) { - case static_cast(4294967296): - return -2147483648.f; - case 4294967295: - return -2147483647.f; - } - break; + return -2147483647.f; // -2147483647.f == -2147483648.f case element::f16: return -1.0e15f; case element::f32: @@ -117,19 +106,29 @@ public: case element::u4: return 15.f; case element::u8: - return 255.f; + switch (levels) { + case 16: + return 15.f; + default: + return 255.f; + } case element::u16: return 65535.f; case element::u32: - return 4294967296.f; + return 4294967296.f; // 4294967296.f == 4294967295.f case element::i4: return 7.f; case element::i8: - return 127.f; + switch (levels) { + case 16: + return 7.f; + default: + return 127.f; + } case element::i16: return 32767.f; case element::i32: - return 2147483647.f; + return 2147483648.f; // 2147483648.f == 2147483647.f case element::f16: return 1.0e15f; case element::f32: @@ -145,6 +144,10 @@ public: return 254.f; } else if (maxLevelsForPrecision == 256ul) { return 255.f; + } else if (maxLevelsForPrecision == 16ul) { + return 15.f; + } else if (maxLevelsForPrecision == 15ul) { + return 14.f; } else { THROW_TRANSFORMATION_EXCEPTION << "unexpected quantization level " << maxLevelsForPrecision; } diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize_decomposition.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize_decomposition.cpp index a4b6c52e949..d13d3b54f2e 100644 --- a/inference-engine/src/low_precision_transformations/src/fake_quantize_decomposition.cpp +++ b/inference-engine/src/low_precision_transformations/src/fake_quantize_decomposition.cpp @@ -29,6 +29,7 @@ FakeQuantizeDecompositionTransformation::FakeQuantizeDecompositionTransformation if (transformation_callback(op)) { return false; } + return transform(*context, m); }; diff --git a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp index 0d9508ca23d..028e5ee4cbc 100644 --- a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp @@ -226,8 +226,9 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails( hasNegative = true; if (outputHighValues[i] != 0.f) { - const float expectedRatio = (quantizationLevels == 256 || quantizationLevels == 65536 || quantizationLevels == 4294967296) ? - asymmetricIntervalSideRatio : -1.f; + const float expectedRatio = + (quantizationLevels == 16 || quantizationLevels == 256 || + quantizationLevels == 65536 || quantizationLevels == 4294967296) ? asymmetricIntervalSideRatio : -1.f; const float actualRatio = outputLowValues[i] / outputHighValues[i]; const float actual = std::fabs((actualRatio - expectedRatio) / std::min(actualRatio, expectedRatio)); if (actual > quantizationIntervalAsymmetryThreshold) { @@ -273,6 +274,7 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails( switch (quantizationLevels) { case 256: case 255: + case 16: resultPrecision = element::i8; break; case 65536: @@ -290,6 +292,7 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails( switch (quantizationLevels) { case 256: case 255: + case 16: resultPrecision = element::u8; break; case 65536: diff --git a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp index 6ea8d159cd8..ee5ed04ad26 100644 --- a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp +++ b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp @@ -162,7 +162,7 @@ bool QuantizationDetails::empty() const noexcept { } bool QuantizationDetails::isSupportedLevel(const size_t level) { - static const std::unordered_set supported_levels = { 255, 256, 65536, 65535, static_cast(4294967296), 4294967295 }; + static const std::unordered_set supported_levels = { 16, 255, 256, 65536, 65535, static_cast(4294967296), 4294967295 }; return supported_levels.find(level) != supported_levels.end(); } diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp index cb8b650bac8..7beb4722459 100644 --- a/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp +++ b/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp @@ -166,25 +166,26 @@ void VariantWrapper::merge( const auto size = std::abs(sharedValue->minInterval.high - sharedValue->minInterval.low); if (resultSize > size) { resultSharedValue->minInterval = sharedValue->minInterval; + if (resultAttribute->levels != 0ul) { + float dequantizationMul; + float dequantizationSub; + float updatedOutputLowValue; + float updatedOutputHighValue; - float dequantizationMul; - float dequantizationSub; - float updatedOutputLowValue; - float updatedOutputHighValue; + const size_t minLevels = NetworkHelper::calculateLevels( + 0.f, + DataPrecision::getMaxValue(resultAttribute->levels), + resultSharedValue->combinedInterval.low, + resultSharedValue->combinedInterval.high, + resultSharedValue->minInterval.low, + resultSharedValue->minInterval.high, + dequantizationMul, + dequantizationSub, + updatedOutputLowValue, + updatedOutputHighValue); - const size_t minLevels = NetworkHelper::calculateLevels( - 0.f, - DataPrecision::getMaxValue(resultAttribute->levels), - resultSharedValue->combinedInterval.low, - resultSharedValue->combinedInterval.high, - resultSharedValue->minInterval.low, - resultSharedValue->minInterval.high, - dequantizationMul, - dequantizationSub, - updatedOutputLowValue, - updatedOutputHighValue); - - resultSharedValue->minLevels = minLevels; + resultSharedValue->minLevels = minLevels; + } #ifdef LPT_DEBUG resultSharedValue->minLevelsOperation = sharedValue->minLevelsOperation; diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp index 64e71691075..3483d37fed1 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/concat_transformation.cpp @@ -967,7 +967,7 @@ const std::vector testValues = { { {element::f32}, {}, { 0.01f } }, } }, - // unexpected quantization levels, concat + // INT4+INT8 quantization levels, concat { LayerTransformation::createParamsU8I8(), false, @@ -990,16 +990,16 @@ const std::vector testValues = { ngraph::element::f32, {}, }, - false, + true, false, }, - // unexpected quantization levels, concat multi channels + // INT4+INT8 quantization levels, concat multi channels { LayerTransformation::createParamsU8I8(), true, 1, { - { 16ul, {}, {0.f}, {1.5f}, {0.f}, {15.f} }, + { 16ul, {}, {0.f}, {1.5f}, {0.f}, {1.5f} }, {}, {}, { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, @@ -1007,16 +1007,16 @@ const std::vector testValues = { {} }, { - { 16ul, {}, {0.f}, {1.5f}, {0.f}, {15.f} }, + {16ul, {}, {0.f}, {1.5f}, {0.f}, {15.f}, ngraph::element::u8}, {}, {}, - { 256ul, {}, {0.f}, {2.55f}, {0.f}, {2.55f} }, + {256ul, {}, {0.f}, {2.55f}, {0.f}, {255.f}, ngraph::element::u8}, {}, {}, - ngraph::element::f32, - {}, + ngraph::element::u8, + { ngraph::element::f32, {}, {{ 0.1f, 0.1f, 0.1f, 0.01f, 0.01f, 0.01f }} } }, - false, + true, false } }; diff --git a/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_transformation.cpp b/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_transformation.cpp index 3ef8abe07f5..f4e808bfd53 100644 --- a/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_transformation.cpp +++ b/inference-engine/tests/functional/inference_engine/lp_transformations/fake_quantize_transformation.cpp @@ -322,6 +322,28 @@ const std::vector fakeQuantizeTransformati // { ngraph::element::f16, { {ngraph::element::f16}, {}, { {0.01f, 0.1f, 1.f} }} } // } //}, + // u4 through u8 + { + LayerTransformation::createParamsU8I8(), + { 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } }, + { 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 15.f } }, + ngraph::element::u8, + { + { ngraph::element::f32, { {ngraph::element::f32}, {}, { 0.1f }} }, + { ngraph::element::f16, { {ngraph::element::f16}, {}, { 0.1f }} } + } + }, + // i4 through i8 + { + LayerTransformation::createParamsI8I8(), + { 16ul, {}, { -0.8f }, { 0.7f }, { -0.8f }, { 0.7f } }, + { 16ul, {}, { -0.8f }, { 0.7f }, { -8.f }, { 7.f } }, + ngraph::element::i8, + { + { ngraph::element::f32, {{ngraph::element::f32}, { }, { 0.1f }} }, + { ngraph::element::f16, {{ngraph::element::f16}, { }, { 0.1f }} } + } + }, }; INSTANTIATE_TEST_SUITE_P( diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp index ffd5c2b1d43..818ebe8ec32 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/concat_transformation.cpp @@ -38,10 +38,20 @@ const std::vector testValues = { { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f} } }, // FQ with unexpected quantizationLevels + { + { 14ul, ngraph::Shape({}), {0.f}, {15.f}, {0.f}, {1.5f} }, + { 14ul, ngraph::Shape({}), {0.f}, {15.f}, {0.f}, {1.5f} } + }, + // FQ with INT4 quantizationLevels { { 16ul, ngraph::Shape({}), {0.f}, {15.f}, {0.f}, {1.5f} }, { 16ul, ngraph::Shape({}), {0.f}, {15.f}, {0.f}, {1.5f} } }, + // FQ with INT4+INT8 quantizationLevels + { + { 16ul, ngraph::Shape({}), {0.f}, {15.f}, {0.f}, {1.5f} }, + { 256ul, ngraph::Shape({}), {0.f}, {2.55f}, {0.f}, {2.55f} } + }, }; const std::vector shapes = { diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp index 5bb88d768e4..b2d223c8f31 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp @@ -54,15 +54,15 @@ const std::vector params "U8" }, { - { 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } }, + { 14ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } }, false, - { 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } }, + { 14ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } }, false, "Convolution", "FP32" }, { - { 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, + { 14ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } }, false, { 255ul, ngraph::Shape { 1, 1, 1, 1 }, { -12.7f }, { 12.7f }, { -12.7f }, { 12.7f } }, false, @@ -72,7 +72,7 @@ const std::vector params { { 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 25.5f } }, false, - { 16ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } }, + { 14ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 254.f }, { -12.7f }, { 12.7f } }, false, "Convolution", "FP32" diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp index 4ed701efeca..5b1166b1577 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp @@ -46,14 +46,13 @@ const std::vector fakeQuantizeOnDataValues = { { 256ul, {}, { -127.5f }, { 0.f }, { -127.5f }, { 0.f } }, "Pooling", "U8" }, - // INT4 FQ's are not transformed and inferred via FP32 { { 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } }, - "Pooling", "FP32" + "Pooling", "U8" }, { - { 16ul, {}, { -8.f }, { 7.f }, { -0.8f }, { 0.7f } }, - "Pooling", "FP32" + { 16ul, {}, { -0.8f }, { 0.7f }, { -0.8f }, { 0.7f } }, + "Pooling", "I8" }, // INT16, INT32 FQ's are transformed, but updatePrecision = false for inference on CPU Plugin and inferred via FP32 { diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp index 5d07fdf8d34..f3d90f0ea72 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/fake_quantize_transformation.cpp @@ -48,11 +48,11 @@ const std::vector fakeQuantizeOnDataValues = { }, { { 16ul, {}, { 0.f }, { 1.5f }, { 0.f }, { 1.5f } }, - "Pooling", "FP32" + "Pooling", "U8" }, { { 16ul, {}, { -8.f }, { 7.f }, { -0.8f }, { 0.7f } }, - "Pooling", "FP32" + "Pooling", "I8" }, // nGraph: I8->FP32 Convert is not supported // { 256ul, {}, { -1.28f} , { 1.27f }, { -1.28f} , { 1.27f } },