From e170785c1fc9bc957d30a5dbd3a874d4e0d77b57 Mon Sep 17 00:00:00 2001 From: Nadezhda Ageeva Date: Fri, 16 Apr 2021 11:36:23 +0300 Subject: [PATCH] Change FW levels type from int to size_t (#5218) --- .../src/gna_plugin/backend/dnn_types.h | 2 +- .../layers/gna_fake_quantize_layer.hpp | 4 +- .../src/legacy_api/include/legacy/ie_layers.h | 19 +++++- .../legacy_api/src/ie_layer_validators.cpp | 2 +- .../src/legacy_api/src/ie_layers.cpp | 50 +++++++++++--- .../nodes/mkldnn_quantize_node.h | 2 +- .../subgraph_tests/quantized_mat_mul.cpp | 65 ++++++++++++++++-- .../subgraph/quantized_mat_mul.hpp | 5 ++ .../src/subgraph/quantized_mat_mul.cpp | 66 +++++++++++++------ ngraph/test/type_prop/fake_quantize.cpp | 8 +-- 10 files changed, 176 insertions(+), 47 deletions(-) diff --git a/inference-engine/src/gna_plugin/backend/dnn_types.h b/inference-engine/src/gna_plugin/backend/dnn_types.h index 403799f4b1b..02e009d7841 100644 --- a/inference-engine/src/gna_plugin/backend/dnn_types.h +++ b/inference-engine/src/gna_plugin/backend/dnn_types.h @@ -36,7 +36,7 @@ enum DnnActivationType : uint8_t { struct FakeQuantizeParams { int8_t set; - int32_t levels; + size_t levels; // if input is per-channel quantization - input pointers contains per-channel ranges int8_t inputPerChannel; float* input_low; diff --git a/inference-engine/src/gna_plugin/layers/gna_fake_quantize_layer.hpp b/inference-engine/src/gna_plugin/layers/gna_fake_quantize_layer.hpp index 722fbe863e6..cd44e8756b9 100644 --- a/inference-engine/src/gna_plugin/layers/gna_fake_quantize_layer.hpp +++ b/inference-engine/src/gna_plugin/layers/gna_fake_quantize_layer.hpp @@ -28,7 +28,7 @@ class GNAFakeQuantizeLayer { DnnActivation parseAsActivation() const { DnnActivation fqActivation; - fqActivation.fqParams.levels = fqLayer->GetParamAsInt("levels"); + fqActivation.fqParams.levels = fqLayer->GetParamAsSizeT("levels"); auto inputShape = getShapeForRange(fqLayer, 1); auto outputShape = getShapeForRange(fqLayer, 3); @@ -64,7 +64,7 @@ class GNAFakeQuantizeLayer { } int32_t getLevels() { - return fqLayer->GetParamAsInt("levels"); + return fqLayer->GetParamAsSizeT("levels"); } std::pair, std::vector> getInputRange() { diff --git a/inference-engine/src/legacy_api/include/legacy/ie_layers.h b/inference-engine/src/legacy_api/include/legacy/ie_layers.h index b1c036278e4..f1ce4598e85 100644 --- a/inference-engine/src/legacy_api/include/legacy/ie_layers.h +++ b/inference-engine/src/legacy_api/include/legacy/ie_layers.h @@ -300,6 +300,23 @@ public: */ unsigned int GetParamAsUInt(const char* param) const; + /** + * @brief Returns an size_t value for the given parameter or returns the default value + * + * @param param Name of the layer parameter + * @param def Default value of the parameter if not found + * @return An size_t value for the specified parameter + */ + size_t GetParamAsSizeT(const char* param, size_t def) const; + + /** + * @brief Returns an size_t value for the given parameter + * + * @param param Name of the layer parameter + * @return An size_t value for the specified parameter + */ + size_t GetParamAsSizeT(const char* param) const; + /** * @brief Returns a vector of unsigned int values for the given parameter or returns the default value * @@ -1953,7 +1970,7 @@ public: /** * @brief The number of quantization levels */ - int levels = 1; + size_t levels = 1; /** * @brief Creates a new QuantizeLayer instance. diff --git a/inference-engine/src/legacy_api/src/ie_layer_validators.cpp b/inference-engine/src/legacy_api/src/ie_layer_validators.cpp index c4c7d55b566..a1fcada985b 100644 --- a/inference-engine/src/legacy_api/src/ie_layer_validators.cpp +++ b/inference-engine/src/legacy_api/src/ie_layer_validators.cpp @@ -1033,7 +1033,7 @@ void QuantizeValidator::parseParams(CNNLayer* layer) { IE_THROW() << "Layer is not instance of QuantizeLayer class"; } - casted->levels = casted->GetParamAsInt("levels", 1); + casted->levels = casted->GetParamAsSizeT("levels", 1); if (casted->levels <= 1) { IE_THROW() << layer->name << ": Incorrect value for parameter levels = " << casted->levels diff --git a/inference-engine/src/legacy_api/src/ie_layers.cpp b/inference-engine/src/legacy_api/src/ie_layers.cpp index 35dc24d2b1c..1649aaea12b 100644 --- a/inference-engine/src/legacy_api/src/ie_layers.cpp +++ b/inference-engine/src/legacy_api/src/ie_layers.cpp @@ -178,10 +178,10 @@ std::vector CNNLayer::GetParamAsInts(const char* param) const { unsigned int CNNLayer::GetParamAsUInt(const char* param, unsigned int def) const { std::string val = GetParamAsString(param, std::to_string(def).c_str()); std::string message = "Cannot parse parameter " + std::string(param) + " from IR for layer " + name + - ". Value " + val + " cannot be casted to int."; + ". Value " + val + " cannot be casted to unsigned int."; try { - int value = std::stoi(val); - if (value < 0) { + long value = std::stol(val); + if ((value < 0) || (value > std::numeric_limits::max())) { IE_THROW() << message; } return static_cast(value); @@ -195,8 +195,8 @@ unsigned int CNNLayer::GetParamAsUInt(const char* param) const { std::string message = "Cannot parse parameter " + std::string(param) + " from IR for layer " + name + ". Value " + val + " cannot be casted to unsigned int."; try { - int value = std::stoi(val); - if (value < 0) { + long value = std::stol(val); + if ((value < 0) || (value > std::numeric_limits::max())) { IE_THROW() << message; } return static_cast(value); @@ -215,8 +215,8 @@ std::vector CNNLayer::GetParamAsUInts(const char* param, std::vect if (vals.empty()) return def; while (getline(stream, str, ',')) { try { - int value = std::stoi(str); - if (value < 0) { + long value = std::stol(str); + if ((value < 0) || (value > std::numeric_limits::max())) { IE_THROW() << message; } result.push_back(static_cast(value)); @@ -233,11 +233,11 @@ std::vector CNNLayer::GetParamAsUInts(const char* param) const { std::istringstream stream(vals); std::string str; std::string message = "Cannot parse parameter " + std::string(param) + " " + str + " from IR for layer " + - name + ". Value " + vals + " cannot be casted to int."; + name + ". Value " + vals + " cannot be casted to unsigned int."; while (getline(stream, str, ',')) { try { - int value = std::stoi(str); - if (value < 0) { + long value = std::stol(str); + if ((value < 0) || (value > std::numeric_limits::max())) { IE_THROW() << message; } result.push_back(static_cast(value)); @@ -248,6 +248,36 @@ std::vector CNNLayer::GetParamAsUInts(const char* param) const { return result; } +size_t CNNLayer::GetParamAsSizeT(const char* param, size_t def) const { + std::string val = GetParamAsString(param, std::to_string(def).c_str()); + std::string message = "Cannot parse parameter " + std::string(param) + " from IR for layer " + name + + ". Value " + val + " cannot be casted to size_t."; + try { + long long value = std::stoll(val); + if ((value < 0) || (static_cast(value) > std::numeric_limits::max())) { + IE_THROW() << message; + } + return static_cast(value); + } catch (...) { + IE_THROW() << message; + } +} + +size_t CNNLayer::GetParamAsSizeT(const char* param) const { + std::string val = GetParamAsString(param); + std::string message = "Cannot parse parameter " + std::string(param) + " from IR for layer " + name + + ". Value " + val + " cannot be casted to size_t."; + try { + long long value = std::stoll(val); + if ((value < 0) || (static_cast(value) > std::numeric_limits::max())) { + IE_THROW() << message; + } + return static_cast(value); + } catch (...) { + IE_THROW() << message; + } +} + bool CNNLayer::GetParamAsBool(const char* param, bool def) const { std::string val = GetParamAsString(param, std::to_string(def).c_str()); std::string loweredCaseValue; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h index 234fd103d8a..84c9884a82c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h @@ -122,7 +122,7 @@ private: void executeBinarization(); void executeQuantization(); - int levels = -1; + size_t levels = 0; std::vector binarizationThresholds; std::vector binarizationOutputMask; diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_mat_mul.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_mat_mul.cpp index 9e74d2a0384..3af9da8f49c 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_mat_mul.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/quantized_mat_mul.cpp @@ -3,6 +3,7 @@ // #include +#include #include "subgraph_tests/quantized_mat_mul.hpp" @@ -23,24 +24,63 @@ const std::vector> shapesB = { {1, 4, 6, 4} }; -const std::vector levels = {256}; +const std::vector ranges_i8 = { + { -127, 128 } +}; + +const std::vector ranges_u8 = { + { 0, 255 } +}; + +const std::vector ranges_i16 = { + { -32768, 32767 } +}; + +const std::vector ranges_i32 = { + { INT32_MIN, INT32_MAX } +}; + +const std::vector levels_8 = {256}; +const std::vector levels_16 = {65536}; +const std::vector levels_32 = {4294967296}; const std::vector granularity = {Pertensor}; -const auto quantParams_i8i8 = ::testing::Combine( - ::testing::ValuesIn(levels), +const auto quantParams_i8 = ::testing::Combine( + ::testing::ValuesIn(levels_8), + ::testing::ValuesIn(ranges_u8), + ::testing::ValuesIn(ranges_i8), ::testing::ValuesIn(granularity), ::testing::Values(InferenceEngine::Precision::I8) ); -const auto quantParams_u8i8 = ::testing::Combine( - ::testing::ValuesIn(levels), +const auto quantParams_u8 = ::testing::Combine( + ::testing::ValuesIn(levels_8), + ::testing::ValuesIn(ranges_u8), + ::testing::ValuesIn(ranges_u8), ::testing::ValuesIn(granularity), ::testing::Values(InferenceEngine::Precision::U8) ); +const auto quantParams_i16 = ::testing::Combine( + ::testing::ValuesIn(levels_16), + ::testing::ValuesIn(ranges_i32), + ::testing::ValuesIn(ranges_i16), + ::testing::ValuesIn(granularity), + ::testing::Values(InferenceEngine::Precision::I16) +); + +const auto quantParams_i32 = ::testing::Combine( + ::testing::ValuesIn(levels_32), + ::testing::ValuesIn(ranges_i32), + ::testing::ValuesIn(ranges_i32), + ::testing::ValuesIn(granularity), + ::testing::Values(InferenceEngine::Precision::I32) +); + INSTANTIATE_TEST_CASE_P(smoke_QuantMatMul_i8i8, QuantMatMulTest, ::testing::Combine( - quantParams_i8i8, + quantParams_i8, + quantParams_i8, ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(shapesA), ::testing::ValuesIn(shapesB), @@ -49,7 +89,18 @@ INSTANTIATE_TEST_CASE_P(smoke_QuantMatMul_i8i8, QuantMatMulTest, INSTANTIATE_TEST_CASE_P(smoke_QuantMatMul_u8i8, QuantMatMulTest, ::testing::Combine( - quantParams_u8i8, + quantParams_u8, + quantParams_i8, + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(shapesA), + ::testing::ValuesIn(shapesB), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + QuantMatMulTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_QuantMatMul_i16i32, QuantMatMulTest, + ::testing::Combine( + quantParams_i16, + quantParams_i32, ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(shapesA), ::testing::ValuesIn(shapesB), diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/quantized_mat_mul.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/quantized_mat_mul.hpp index d97d911df03..2155d987afe 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/quantized_mat_mul.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/quantized_mat_mul.hpp @@ -13,12 +13,17 @@ namespace SubgraphTestsDefinitions { +typedef std::pair QuantRange; + typedef std::tuple< size_t, + QuantRange, + QuantRange, ngraph::helpers::QuantizationGranularity, InferenceEngine::Precision> QuantParams; typedef std::tuple< + QuantParams, QuantParams, InferenceEngine::Precision, InferenceEngine::SizeVector, diff --git a/inference-engine/tests/functional/shared_test_classes/src/subgraph/quantized_mat_mul.cpp b/inference-engine/tests/functional/shared_test_classes/src/subgraph/quantized_mat_mul.cpp index 61868dfe0d5..bae902c0298 100644 --- a/inference-engine/tests/functional/shared_test_classes/src/subgraph/quantized_mat_mul.cpp +++ b/inference-engine/tests/functional/shared_test_classes/src/subgraph/quantized_mat_mul.cpp @@ -10,64 +10,90 @@ namespace SubgraphTestsDefinitions { using ngraph::helpers::QuantizationGranularity; std::string QuantMatMulTest::getTestCaseName(const testing::TestParamInfo &obj) { - QuantParams quantParams; + QuantParams quantParams0; + QuantParams quantParams1; InferenceEngine::Precision netPrecision; InferenceEngine::SizeVector inputShape0; InferenceEngine::SizeVector inputShape1; + QuantRange inputRange0; + QuantRange inputRange1; + QuantRange outputRange0; + QuantRange outputRange1; std::string targetDevice; - std::tie(quantParams, netPrecision, inputShape0, inputShape1, targetDevice) = obj.param; + std::tie(quantParams0, quantParams1, netPrecision, inputShape0, inputShape1, targetDevice) = obj.param; - size_t quantLevels; - QuantizationGranularity quantGranularity; + size_t quantLevels0; + size_t quantLevels1; + QuantizationGranularity quantGranularity0; + QuantizationGranularity quantGranularity1; InferenceEngine::Precision fqPrec0; - std::tie(quantLevels, quantGranularity, fqPrec0) = quantParams; + InferenceEngine::Precision fqPrec1; + std::tie(quantLevels0, inputRange0, outputRange0, quantGranularity0, fqPrec0) = quantParams0; + std::tie(quantLevels1, inputRange1, outputRange1, quantGranularity1, fqPrec1) = quantParams1; std::ostringstream result; result << "IS0=" << CommonTestUtils::vec2str(inputShape0) << "_"; result << "IS1=" << CommonTestUtils::vec2str(inputShape1) << "_"; - result << "Levels=" << quantLevels << "_"; - result << "QuantGranularity=" << quantGranularity << "_"; + result << "Levels0=" << quantLevels0 << "_"; + result << "Levels1=" << quantLevels1 << "_"; + result << "inputRange0=" << inputRange0.first << "_" << inputRange0.second << "_"; + result << "outputRange0=" << outputRange0.first << "_" << outputRange0.second << "_"; + result << "inputRange1=" << inputRange1.first << "_" << inputRange1.second << "_"; + result << "outputRange1=" << outputRange1.first << "_" << outputRange1.second << "_"; + result << "QuantGranularity0=" << quantGranularity0 << "_"; + result << "QuantGranularity1=" << quantGranularity1 << "_"; result << "fq0PRC=" << fqPrec0.name() << "_"; + result << "fq1PRC=" << fqPrec1.name() << "_"; result << "netPRC=" << netPrecision.name() << "_"; result << "targetDevice=" << targetDevice; return result.str(); } void QuantMatMulTest::SetUp() { - QuantParams quantParams; + QuantParams quantParams0; + QuantParams quantParams1; InferenceEngine::SizeVector inputShape0; InferenceEngine::SizeVector inputShape1; auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; - std::tie(quantParams, netPrecision, inputShape0, inputShape1, targetDevice) = this->GetParam(); + std::tie(quantParams0, quantParams1, netPrecision, inputShape0, inputShape1, targetDevice) = this->GetParam(); - size_t quantLevels; - QuantizationGranularity quantGranularity; + size_t quantLevels0; + size_t quantLevels1; + QuantRange inputRange0; + QuantRange inputRange1; + QuantRange outputRange0; + QuantRange outputRange1; + QuantizationGranularity quantGranularity0; + QuantizationGranularity quantGranularity1; InferenceEngine::Precision fqPrec0; - std::tie(quantLevels, quantGranularity, fqPrec0) = quantParams; + InferenceEngine::Precision fqPrec1; + std::tie(quantLevels0, inputRange0, outputRange0, quantGranularity0, fqPrec0) = quantParams0; + std::tie(quantLevels1, inputRange1, outputRange1, quantGranularity1, fqPrec1) = quantParams1; auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); auto params = ngraph::builder::makeParams(ngPrc, {inputShape0, inputShape1}); auto paramOuts = ngraph::helpers::convert2OutputVector( ngraph::helpers::castOps2Nodes(params)); - auto makeFakeQuantizeNode = [ngPrc, quantLevels, quantGranularity](const ngraph::Output &in, - std::vector inputShape, InferenceEngine::Precision prec) -> std::shared_ptr { + auto makeFakeQuantizeNode = [ngPrc](size_t quantLevels, QuantRange inputRange, QuantRange outputRange, + QuantizationGranularity quantGranularity, const ngraph::Output &in, std::vector inputShape, + InferenceEngine::Precision prec) -> std::shared_ptr { std::vector dataFqConstShapes(inputShape.size(), 1); if (quantGranularity == ngraph::helpers::Perchannel) dataFqConstShapes[1] = inputShape[1]; size_t constDataSize = ngraph::shape_size(dataFqConstShapes); std::vector inputLowData(constDataSize), inputHighData(constDataSize), outputLowData(constDataSize), outputHighData(constDataSize); for (int i = 0; i < constDataSize; i++) { - inputLowData[i] = 0; - inputHighData[i] = 255; - outputLowData[i] = prec == InferenceEngine::Precision::I8 ? -128 : 0; - outputHighData[i] = prec == InferenceEngine::Precision::I8 ? 127 : 255; + inputLowData[i] = inputRange.first; + inputHighData[i] = inputRange.second; + outputLowData[i] = outputRange.first; + outputHighData[i] = outputRange.second; } return ngraph::builder::makeFakeQuantize(in, ngPrc, quantLevels, dataFqConstShapes, inputLowData, inputHighData, outputLowData, outputHighData); }; - auto dataFq0 = makeFakeQuantizeNode(paramOuts[0], inputShape0, fqPrec0); - auto dataFq1 = makeFakeQuantizeNode(paramOuts[1], inputShape1, InferenceEngine::Precision::I8); + auto dataFq0 = makeFakeQuantizeNode(quantLevels0, inputRange0, outputRange0, quantGranularity0, paramOuts[0], inputShape0, fqPrec0); + auto dataFq1 = makeFakeQuantizeNode(quantLevels1, inputRange1, outputRange1, quantGranularity1, paramOuts[1], inputShape1, fqPrec1); auto MatMul = std::dynamic_pointer_cast( ngraph::builder::makeMatMul(dataFq0, dataFq1)); diff --git a/ngraph/test/type_prop/fake_quantize.cpp b/ngraph/test/type_prop/fake_quantize.cpp index 0488d9a755a..039c5587a55 100644 --- a/ngraph/test/type_prop/fake_quantize.cpp +++ b/ngraph/test/type_prop/fake_quantize.cpp @@ -16,7 +16,7 @@ TEST(type_prop, fake_quantize) const auto input_high = make_shared(element::f32, Shape{}); const auto output_low = make_shared(element::f32, Shape{}); const auto output_high = make_shared(element::f32, Shape{}); - const int levels = 5; + const size_t levels = 5; const auto fake_quantize = make_shared(data, input_low, input_high, output_low, output_high, levels); @@ -31,7 +31,7 @@ TEST(type_prop, fake_quantize_autob) const auto input_high = make_shared(element::f32, Shape{1, 2, 3, 4}); const auto output_low = make_shared(element::f32, Shape{4}); const auto output_high = make_shared(element::f32, Shape{}); - const int levels = 5; + const size_t levels = 5; const auto fake_quantize = make_shared(data, input_low, input_high, output_low, output_high, levels); @@ -46,7 +46,7 @@ TEST(type_prop, fake_quantize_invalid_autob) auto input_high = make_shared(element::f32, Shape{}); auto output_low = make_shared(element::f32, Shape{}); auto output_high = make_shared(element::f32, Shape{}); - const int levels = 5; + const size_t levels = 5; try { @@ -59,4 +59,4 @@ TEST(type_prop, fake_quantize_invalid_autob) { EXPECT_HAS_SUBSTRING(error.what(), std::string("Argument shapes are inconsistent")); } -} +} \ No newline at end of file