From bc6938509387d9529663fdc2068993a2021225e8 Mon Sep 17 00:00:00 2001 From: Marcin Kusmierski Date: Tue, 20 Dec 2022 10:20:09 +0100 Subject: [PATCH] =?UTF-8?q?[GNA]=20Create=20new=20tests=20for=20PWL=20appr?= =?UTF-8?q?oximation=20and=20refactor=20numerical=5Fu=E2=80=A6=20(#14604)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [GNA] Small Improvement in PWLApproximation and update of unit test for it. * added template method for floating pointcomparision * added new tests for PWLApproximation * [GNA] Refactor numerical_utils and usage of its method in plugin * refactor numerical utils use template methods for conversion and comparision --- .../intel_gna/src/backend/make_pwl.cpp | 44 +-- .../pwl_border_values_counter_identity.cpp | 8 +- .../intel_gna/src/backend/pwl_tools.cpp | 2 +- .../intel_gna/src/common/numerical_utils.hpp | 40 ++- .../src/frontend/scale_factor_calc.cpp | 60 ++-- .../intel_gna/src/gna_graph_compiler.cpp | 17 +- .../intel_gna/src/gna_plugin_config.cpp | 3 +- .../src/optimizer/gna_pass_manager.cpp | 15 +- src/plugins/intel_gna/src/runtime/pwl.cpp | 21 +- .../intel_gna/src/scale_factor_helper.cpp | 4 +- .../src/transformations/pwl_approximation.cpp | 10 +- .../src/transformations/pwl_approximation.hpp | 4 +- .../unit/backend/gna_make_pwl_identity.cpp | 3 +- .../tests/unit/transformations/gna_pwl.cpp | 285 ++++++++++++++---- 14 files changed, 353 insertions(+), 163 deletions(-) diff --git a/src/plugins/intel_gna/src/backend/make_pwl.cpp b/src/plugins/intel_gna/src/backend/make_pwl.cpp index 18a17d8a566..556be6f413a 100644 --- a/src/plugins/intel_gna/src/backend/make_pwl.cpp +++ b/src/plugins/intel_gna/src/backend/make_pwl.cpp @@ -19,6 +19,7 @@ #include "log/log.hpp" using namespace ov::intel_gna; +using namespace ov::intel_gna::common; // This function performs emulation of HW saturation of PWL segments in SW // by inserting additional segments when overflow would happen @@ -50,7 +51,7 @@ static void insert_extra_pwl_segments(std::vector& gna_pwl, if (y_value > static_cast(INT16_MAX) || y_value < static_cast(INT16_MIN)) { float x_value = ((static_cast(y_max) - yBase) * scale) / slope + xBase; - extra_segment.xBase = FLOAT_TO_INT32(x_value) & XBASEMASK; + extra_segment.xBase = FloatToInt32(x_value) & XBASEMASK; extra_segment.yBase = slope > 0 ? y_max : y_min; extra_segment.slope = 0; extra_segments[gna_pwl_size] = extra_segment; @@ -125,20 +126,21 @@ void make_gna_pwl(const DnnActivation& fun, int32_t y_lower = y_min; int16_t y_upper = y_max; if (fun.fqParams.set) { - x_lower = static_cast(std::max(FLOAT_TO_INT64(*fun.fqParams.input_low * 1.25 * in_scale), static_cast(x_lower))); - x_upper = static_cast(std::min(FLOAT_TO_INT64(*fun.fqParams.input_high * 1.25 * in_scale), static_cast(x_upper))); + x_lower = static_cast(std::max(FloatToInt64(*fun.fqParams.input_low * 1.25 * in_scale), static_cast(x_lower))); + x_upper = static_cast(std::min( + FloatToInt64(*fun.fqParams.input_high * 1.25 * in_scale), static_cast(x_upper))); // y_lower can be reduced with negative slope y_lower = static_cast(*fun.fqParams.input_low * 1.25 * out_scale); - y_upper = static_cast(std::min(FLOAT_TO_INT32(*fun.fqParams.input_high * 1.25 * out_scale), static_cast(y_upper))); + y_upper = static_cast(std::min(FloatToInt32(*fun.fqParams.input_high * 1.25 * out_scale), static_cast(y_upper))); } else { - if (x_lower < y_lower * in_scale / out_scale) x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale); - if (y_lower < x_lower * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * out_scale / in_scale); + if (x_lower < y_lower * in_scale / out_scale) x_lower = FloatToInt32(y_lower * in_scale / out_scale); + if (y_lower < x_lower * out_scale / in_scale) y_lower = FloatToInt16(x_lower * out_scale / in_scale); } - gna_pwl[0].yBase = std::max(FLOAT_TO_INT32(y_lower * fun.args.lrelu.negative_slope), static_cast(y_min)); + gna_pwl[0].yBase = std::max(FloatToInt32(y_lower * fun.args.lrelu.negative_slope), static_cast(y_min)); s = gna_slope(fun.args.lrelu.negative_slope, in_scale, out_scale); gna_pwl[0].xBase = (x_lower & XBASEMASK) | s.slope_scale_index; // zero out the 2 lsb - gna_pwl[0].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); + gna_pwl[0].slope = FloatToInt16(s.slope * s.slope_scale); print_segment((int32_t)(gna_pwl[0].xBase & XBASEMASK) / in_scale, gna_pwl[0].yBase / out_scale, @@ -147,7 +149,7 @@ void make_gna_pwl(const DnnActivation& fun, gna_pwl[1].xBase = 0; gna_pwl[1].yBase = 0; s = gna_slope(1.0, in_scale, out_scale); - gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); + gna_pwl[1].slope = FloatToInt16(s.slope * s.slope_scale); gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index; print_segment(0.0, 0.0, (gna_pwl[1].slope * in_scale) / (out_scale*s.slope_scale)); @@ -205,18 +207,18 @@ void make_gna_pwl(const DnnActivation& fun, if (fun == kActKaldiLstmClipping) { if (x_lower < l_bound * in_scale) { if (y_lower < l_bound * out_scale) { - x_lower = FLOAT_TO_INT32(l_bound * in_scale); - y_lower = FLOAT_TO_INT16(l_bound * out_scale); + x_lower = FloatToInt32(l_bound * in_scale); + y_lower = FloatToInt16(l_bound * out_scale); } else { - x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale); + x_lower = FloatToInt32(y_lower * in_scale / out_scale); } } if (x_upper > u_bound * in_scale) { if (y_upper > u_bound * out_scale) { - x_upper = FLOAT_TO_INT32(u_bound * in_scale); - y_upper = FLOAT_TO_INT16(u_bound * out_scale); + x_upper = FloatToInt32(u_bound * in_scale); + y_upper = FloatToInt16(u_bound * out_scale); } else { - x_upper = FLOAT_TO_INT32(y_upper * in_scale / out_scale); + x_upper = FloatToInt32(y_upper * in_scale / out_scale); } } } @@ -230,7 +232,7 @@ void make_gna_pwl(const DnnActivation& fun, gna_pwl[1].xBase = x_lower & XBASEMASK; // zero out the 2 lsb gna_pwl[1].yBase = y_lower; s = gna_slope(1.0, in_scale, out_scale); - gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); + gna_pwl[1].slope = FloatToInt16(s.slope * s.slope_scale); gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index; print_segment((int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale, gna_pwl[1].yBase / out_scale, 1.0); @@ -251,8 +253,8 @@ void make_gna_pwl(const DnnActivation& fun, auto n_segments = 2; - if (y_upper > x_upper * out_scale / in_scale) y_upper = FLOAT_TO_INT16(x_upper * out_scale / in_scale); - if (x_upper > y_upper * in_scale / out_scale) x_upper = FLOAT_TO_INT32(y_upper * in_scale / out_scale); + if (y_upper > x_upper * out_scale / in_scale) y_upper = FloatToInt16(x_upper * out_scale / in_scale); + if (x_upper > y_upper * in_scale / out_scale) x_upper = FloatToInt32(y_upper * in_scale / out_scale); if (y_upper == y_max) { // saturation at ends - need one more segment n_segments += 1; @@ -268,14 +270,14 @@ void make_gna_pwl(const DnnActivation& fun, gna_pwl[i].xBase = (-x_upper) & XBASEMASK; // zero out the 2 lsb gna_pwl[i].yBase = y_upper; s = gna_slope(-1.0, in_scale, out_scale); - gna_pwl[i].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); + gna_pwl[i].slope = FloatToInt16(s.slope * s.slope_scale); gna_pwl[i].xBase = gna_pwl[i].xBase | s.slope_scale_index; print_segment((int32_t)(gna_pwl[i].xBase & XBASEMASK) / in_scale, gna_pwl[i].yBase / out_scale, -1.0); gna_pwl[i + 1].xBase = 0; gna_pwl[i + 1].yBase = 0; s = gna_slope(1.0, in_scale, out_scale); - gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); + gna_pwl[i + 1].slope = FloatToInt16(s.slope * s.slope_scale); gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index; print_segment((int32_t)(gna_pwl[i + 1].xBase & XBASEMASK) / in_scale, gna_pwl[i + 1].yBase / out_scale, 1.0); break; @@ -300,7 +302,7 @@ static T cast_check_overflow(double v, bool round = true) { return std::numeric_limits::min(); } - return round ? FLOAT_TO_INT32(v) : static_cast(v); + return round ? FloatToInt32(v) : static_cast(v); } /** diff --git a/src/plugins/intel_gna/src/backend/pwl_border_values_counter_identity.cpp b/src/plugins/intel_gna/src/backend/pwl_border_values_counter_identity.cpp index bc087094771..fc7d764dceb 100644 --- a/src/plugins/intel_gna/src/backend/pwl_border_values_counter_identity.cpp +++ b/src/plugins/intel_gna/src/backend/pwl_border_values_counter_identity.cpp @@ -40,13 +40,13 @@ BorderValues BorderValuesCounterIdentity::CreateBorderValues(const BorderValues& int16_t y_lower = default_values.y_lower; int16_t y_upper = default_values.y_upper; if (x_lower < y_lower * in_scale / out_scale) - x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale); + x_lower = common::FloatToInt32(y_lower * in_scale / out_scale); if (x_upper > y_upper * in_scale / out_scale) - x_upper = FLOAT_TO_INT32(y_upper * in_scale / out_scale); + x_upper = common::FloatToInt32(y_upper * in_scale / out_scale); if (y_lower < x_lower * out_scale / in_scale) - y_lower = FLOAT_TO_INT16(x_lower * out_scale / in_scale); + y_lower = common::FloatToInt16(x_lower * out_scale / in_scale); if (y_upper > x_upper * out_scale / in_scale) - y_upper = FLOAT_TO_INT16(x_upper * out_scale / in_scale); + y_upper = common::FloatToInt16(x_upper * out_scale / in_scale); return {x_lower, x_upper, y_lower, y_upper, {default_values.y_lower, default_values.y_upper}}; } diff --git a/src/plugins/intel_gna/src/backend/pwl_tools.cpp b/src/plugins/intel_gna/src/backend/pwl_tools.cpp index c87d4d8f377..50094411ffb 100644 --- a/src/plugins/intel_gna/src/backend/pwl_tools.cpp +++ b/src/plugins/intel_gna/src/backend/pwl_tools.cpp @@ -28,7 +28,7 @@ int64_t ComputeSlopeScale(const int32_t x_base) { PWLSegmentSlope ComputeSlopeForSegment(double slope, double in_scale, double out_scale) { const auto gna_slope_value = gna_slope(slope, in_scale, out_scale); - auto segment_slope = FLOAT_TO_INT64(gna_slope_value.slope * gna_slope_value.slope_scale); + auto segment_slope = common::FloatToInt64(gna_slope_value.slope * gna_slope_value.slope_scale); if (segment_slope > std::numeric_limits::max()) { segment_slope = std::numeric_limits::max(); diff --git a/src/plugins/intel_gna/src/common/numerical_utils.hpp b/src/plugins/intel_gna/src/common/numerical_utils.hpp index ffb86e85315..19a4fe9596f 100644 --- a/src/plugins/intel_gna/src/common/numerical_utils.hpp +++ b/src/plugins/intel_gna/src/common/numerical_utils.hpp @@ -4,28 +4,42 @@ #pragma once -#include #include +#include namespace ov { namespace intel_gna { namespace common { -#define FLOAT_TO_INT8(a) static_cast(((a) < 0)?((a) - 0.5f):((a) + 0.5f)) -#define FLOAT_TO_INT16(a) static_cast(((a) < 0)?((a) - 0.5f):((a) + 0.5f)) -#define FLOAT_TO_INT32(a) static_cast(((a) < 0)?((a)-0.5f):((a)+0.5f)) -#define FLOAT_TO_INT64(a) static_cast(((a) < 0)?((a)-0.5f):((a)+0.5f)) +template +inline T FloatToInteger(float a) { + return static_cast((a < 0.0f) ? (a - 0.5f) : (a + 0.5f)); +} +inline int8_t FloatToInt8(float a) { + return FloatToInteger(a); +} +inline int16_t FloatToInt16(float a) { + return FloatToInteger(a); +} +inline int32_t FloatToInt32(float a) { + return FloatToInteger(a); +} +inline int64_t FloatToInt64(float a) { + return FloatToInteger(a); +} /** - * @brief Compares two float values and returns if they are equal - * @param p1 First float value - * @param p2 Second float value - * @return Returns true if two float values are equal + * @brief Compare two floating point values and return true if they are equal with given accuracy + * @param p1 First floating point value + * @param p2 Second floating point value + * @param accuracy accuracy of comparision + * @return Returns true if two floating point values are equal */ -inline bool fp32eq(float p1, float p2, float accuracy = 0.00001f) { +template ::value, int>::type = 0> +bool AreFpEq(T p1, T p2, T accuracy = std::numeric_limits::epsilon()) { return (std::abs(p1 - p2) <= accuracy * std::min(std::abs(p1), std::abs(p2))); } -} // namespace common -} // namespace intel_gna -} // namespace ov +} // namespace common +} // namespace intel_gna +} // namespace ov diff --git a/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp b/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp index e7d461c5d68..9633cb70c1f 100644 --- a/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp +++ b/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp @@ -13,6 +13,7 @@ namespace ov { namespace intel_gna { +using namespace common; namespace frontend { constexpr float activation_scale_factor = 2048.f; @@ -93,14 +94,14 @@ float ScaleFactorCalculator::selectBestOutputScaleFactors(float inScale, auto sd = 0.0f; for (size_t j = 0; j < slopes.size(); ++j) { auto s = gna_slope(slopes[j], inScale, outScale); - auto slope = FLOAT_TO_INT16(s.slope * s.slope_scale); + auto slope = FloatToInt16(s.slope * s.slope_scale); if (slope < std::numeric_limits::min() || slope > std::numeric_limits::max()) { sd += std::numeric_limits::max(); continue; } auto testSlope = static_cast(slope) / s.slope_scale * inScale / outScale; - if (common::fp32eq(static_cast(testSlope), static_cast(slopes[j]), 1.0E-6f)) { + if (AreFpEq(static_cast(testSlope), static_cast(slopes[j]), 1.0E-6f)) { return outScale; } @@ -150,7 +151,7 @@ float ScaleFactorCalculator::selectBestWeightsScaleFactors(float inScale, } auto testSlope = static_cast(slope) / s.slope_scale * (inScale * weightScale) / outScale; - if (common::fp32eq(static_cast(testSlope), static_cast(slopes[j]))) { + if (AreFpEq(static_cast(testSlope), static_cast(slopes[j]))) { return outScale; } sd += pow(testSlope - slopes[j], 2.0); @@ -258,7 +259,7 @@ bool ScaleFactorCalculator::requantizeInput(InferenceEngine::CNNLayerPtr input, return true; } - if (info.isWeightableIdentity() && !common::fp32eq(quantDataForInputLayer->_weights_quant.GetScale(), 1.0f)) { + if (info.isWeightableIdentity() && !AreFpEq(quantDataForInputLayer->_weights_quant.GetScale(), 1.0f)) { auto reducer = std::max(1.0f, quantDataForInputLayer->_dst_quant.GetScale() / newOutputScale); auto newWeightsScale = std::max(1.0f, quantDataForInputLayer->_weights_quant.GetScale() / reducer); quantDataForInputLayer->_weights_quant.SetScale(static_cast(newWeightsScale)); @@ -354,7 +355,7 @@ float ScaleFactorCalculator::adjustScaleFactor(float sf, auto scaleFactors = generateScaleFactors(startRange, endRange, steps); auto newScaleFactor = selectBestOutputScaleFactors(quantizedParams->_src_quant.GetScale(), scaleFactors, slopes); - if (!common::fp32eq(sf, newScaleFactor) && !common::fp32eq(newScaleFactor, 0.0f) && !std::isinf(newScaleFactor)) { + if (!AreFpEq(sf, newScaleFactor) && !AreFpEq(newScaleFactor, 0.0f) && !std::isinf(newScaleFactor)) { log::debug() << "[INFO] Adjusting scale factor for " << cnnLayer->name << " from: " << sf << " to: " << newScaleFactor << "\n"; sf = newScaleFactor; @@ -443,7 +444,7 @@ float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const* auto input_max_value = static_cast(std::numeric_limits::max()); auto output_max_value = static_cast((inputsSize == 2) ? std::numeric_limits::max() : std::numeric_limits::max()); - auto x_min = common::fp32eq(fmod(exponent, 1.0), 0) ? input_min_value / quantizedParams->_src_quant.GetScale() : 0.0; + auto x_min = AreFpEq(fmod(exponent, 1.0), 0.0) ? input_min_value / quantizedParams->_src_quant.GetScale() : 0.0; x_min = std::max(x_min, -pow_domain); auto x_max = input_max_value / quantizedParams->_src_quant.GetScale(); @@ -534,7 +535,7 @@ float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const* auto levels = std::min(quantizedParams->_dst_quant.GetLevels(), static_cast(std::numeric_limits::max()) + 1); result = CalculateScaleFactorFromStats(levels, minOutValue, maxOutValue); - if (std::isinf(result) || common::fp32eq(absMax, 0.0f)) { + if (std::isinf(result) || AreFpEq(absMax, 0.0f)) { result = max_activation_scale_factor; } @@ -556,7 +557,7 @@ float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const* (layer.isIdentity() || layer.isFakeQuantize()) && LayerInfo(prevLayer).isWeightableIdentity()) { auto prevLayerQuant = InferenceEngine::getInjectedData(*prevLayer); auto prevLayer2 = CNNNetHasPrevLayer(prevLayer.get(), 0) ? CNNNetPrevLayerSkipCertain(prevLayer, 0, skipNonFunctional) : nullptr; - if (!common::fp32eq(prevLayerQuant->_src_quant.GetScale(), 1.0f) && + if (!AreFpEq(prevLayerQuant->_src_quant.GetScale(), 1.0f) && prevLayerQuant->_src_quant.IsStatsSet() && (prevLayer2 == nullptr || LayerInfo(prevLayer2).has8BOr16BOutput())) { result = prevLayerQuant->_src_quant.GetScale(); @@ -620,14 +621,14 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cn auto quantSibling = InferenceEngine::getInjectedData(input); // after restarting from memory input - quant is fine - if (common::fp32eq(quantSibling->_dst_quant.GetScale(), inputQuant->_dst_quant.GetScale())) { + if (AreFpEq(quantSibling->_dst_quant.GetScale(), inputQuant->_dst_quant.GetScale())) { quant->_src_quant.SetScale(inputQuant->_dst_quant.GetScale()); quant->_dst_quant.SetScale(inputQuant->_dst_quant.GetScale()); return true; } if ((!fake_quantized && quantSibling->_dst_quant.IsScaleSet()) || - (fake_quantized && quantSibling->_dst_quant.IsScaleSet() && !common::fp32eq(quantSibling->_dst_quant.GetScale(), 1.0) && + (fake_quantized && quantSibling->_dst_quant.IsScaleSet() && !AreFpEq(quantSibling->_dst_quant.GetScale(), 1.0f) && quantSibling->_dst_quant.GetScale() < inputQuant->_dst_quant.GetScale()) || quantSibling->_dst_quant.IsScaleSet() && infiniteLoopCount > 0) { // means we already restarted propagation input memory layer @@ -733,7 +734,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cn auto scale_val = static_cast(levels) / abs_val; //TODO: use FQ formula for scale factor calculation - if (std::isinf(scale_val) || common::fp32eq(abs_val, 0.0f)) { + if (std::isinf(scale_val) || AreFpEq(abs_val, 0.0f)) { quant->_dst_quant.SetScale(fake_quantized ? levels : 1.0f); } else { quant->_dst_quant.SetScale(scale_val); @@ -762,7 +763,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cn } auto powerScale = std::abs(powerLayer->scale); - if (common::fp32eq(powerScale, 0.0f)) { + if (AreFpEq(powerScale, 0.0f)) { powerScale = 1.0f; } auto weightsScaleFactor = MAX_VAL_2B_WEIGHT / powerScale; @@ -773,8 +774,8 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cn } else if (layerInfo.isActivation()) { // todo: calculate proper scale factor where we need to expand it a bit to be safe to stay in int16 weights // set the initial value - if (!quant->_dst_quant.IsScaleSet() || common::fp32eq(quant->_dst_quant.GetScale(), 1.0f) || - !common::fp32eq(quant->_src_quant.GetScale(), inputQuant->_dst_quant.GetScale())) { + if (!quant->_dst_quant.IsScaleSet() || AreFpEq(quant->_dst_quant.GetScale(), 1.0f) || + !AreFpEq(quant->_src_quant.GetScale(), inputQuant->_dst_quant.GetScale())) { quant->_src_quant.SetScale(inputQuant->_dst_quant.GetScale()); auto scale = getActivationScale(cnnLayer, layerInfo, inputsSize, fake_quantized); quant->_dst_quant.SetScale(scale); @@ -865,13 +866,13 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseL bestWeightsScale = i; } - if (common::fp32eq(error, 0.0f)) { + if (AreFpEq(error, 0.0f)) { break; } } if (bestWeightsScale > 0.0f && - !common::fp32eq(bestWeightsScale, quantParams1->_weights_quant.GetScale())) { + !AreFpEq(bestWeightsScale, quantParams1->_weights_quant.GetScale())) { quantParams1->_weights_quant.SetScale(bestWeightsScale); quantParams1->_dst_quant.SetScale(quantParams1->_weights_quant.GetScale() * quantParams1->_src_quant.GetScale()); @@ -888,7 +889,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseL auto maxValue = (GetInputPrecision() == InferenceEngine::Precision::I8) ? std::numeric_limits::max() : std::numeric_limits::max(); if (quantData->_weights_quant.GetScale() > maxValue && - !common::fp32eq(quantData->_weights_quant.GetScale(), maxValue)) { + !AreFpEq(quantData->_weights_quant.GetScale(), static_cast(maxValue))) { float newOutputScale = quantParams0->_dst_quant.GetScale() * maxValue; log::debug() << "[INFO] weights saturated for " << eltwiseLayer->name << ", try to requiantize input " << in1->name << std::endl; if (requantizeInput(in1, newOutputScale, result, infiniteLoopCount)) { @@ -901,6 +902,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseL if (!quantData->_dst_quant.IsStatsSet()) { return true; } + auto weightsReducer = calculateWeightsReducerFromDstStats(quantData->_dst_quant); if (weightsReducer > initial_weights_reducer_val) { float newOutputScale = quantParams1->_dst_quant.GetScale() / weightsReducer; @@ -911,10 +913,8 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseL } } break; - default : THROW_GNA_EXCEPTION << "Unsupported Eltwise layer for quantisation: " << eltwiseLayer->_operation; } - return true; } @@ -946,7 +946,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay auto scaleFactor = quantParams0->_dst_quant.GetScale(); auto scaleFactorCheck = [scaleFactor](InferenceEngine::CNNLayerPtr& inputLayer) { auto quantParams = InferenceEngine::getInjectedData(inputLayer); - return common::fp32eq(quantParams->_dst_quant.GetScale(), scaleFactor); + return AreFpEq(quantParams->_dst_quant.GetScale(), scaleFactor); }; if (std::find_if_not(inputLayers.begin() + 1, inputLayers.end(), scaleFactorCheck) == inputLayers.end()) { @@ -968,7 +968,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay auto nextInputIt = sourceLayerIt + 1; while ((nextInputIt = std::find_if(nextInputIt, inputLayers.end(), inputLayerCheck)) != inputLayers.end()) { auto quantParamsSecond = InferenceEngine::getInjectedData(*nextInputIt); - if (!common::fp32eq(quantParamsSecond->_dst_quant.GetScale(), quantParamsFirst->_dst_quant.GetScale())) { + if (!common::AreFpEq(quantParamsSecond->_dst_quant.GetScale(), quantParamsFirst->_dst_quant.GetScale())) { THROW_GNA_EXCEPTION << "Two Input layers " << (*sourceLayerIt)->name << " and " << (*nextInputIt)->name << " have different scales in concat!!! \n"; } @@ -991,8 +991,8 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay for (auto it = inputLayers.begin(); it != inputLayers.end(); ++it) { auto quantParams = InferenceEngine::getInjectedData(*it); if ((quantParams->_dst_quant.GetScale() < minScaleFactor && - !common::fp32eq(quantParams->_dst_quant.GetScale(), 1.0f)) || - common::fp32eq(minScaleFactor, 1.0f)) { + !AreFpEq(quantParams->_dst_quant.GetScale(), 1.0f)) || + AreFpEq(minScaleFactor, 1.0f)) { minScaleFactor = quantParams->_dst_quant.GetScale(); sourceLayerIt = it; } @@ -1002,7 +1002,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay auto sourceLayerCheck = [](InferenceEngine::CNNLayerPtr& inputLayer) { auto quantParams = InferenceEngine::getInjectedData(inputLayer); LayerInfo info(inputLayer); - return !info.isActivation() && !common::fp32eq(quantParams->_dst_quant.GetScale(), 1.0f); + return !info.isActivation() && !AreFpEq(quantParams->_dst_quant.GetScale(), 1.0f); }; sourceLayerIt = std::find_if(inputLayers.begin(), inputLayers.end(), sourceLayerCheck); } @@ -1010,7 +1010,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay if (sourceLayerIt == inputLayers.end()) { auto nonDefaultScaleFactor = [](InferenceEngine::CNNLayerPtr& inputLayer) { auto quantParams = InferenceEngine::getInjectedData(inputLayer); - return !common::fp32eq(quantParams->_dst_quant.GetScale(), 1.0f); + return !AreFpEq(quantParams->_dst_quant.GetScale(), 1.0f); }; sourceLayerIt = std::find_if(inputLayers.begin(), inputLayers.end(), nonDefaultScaleFactor); @@ -1026,7 +1026,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay for (auto it = inputLayers.begin(); it != inputLayers.end(); ++it) { auto quantParamsIn = InferenceEngine::getInjectedData(*it); - if (common::fp32eq(quantParamsIn->_dst_quant.GetScale(), scaleFactor)) { + if (AreFpEq(quantParamsIn->_dst_quant.GetScale(), scaleFactor)) { continue; } @@ -1035,7 +1035,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay quantParamsIn->_dst_quant.SetScale(quantParams->_dst_quant.GetScale()); } else { // possible case when some of the concat inputs are free to select scale ex: const->concat<-affine - if (!common::fp32eq(quantParamsIn->_dst_quant.GetScale(), 1.0f) && !LayerInfo(*it).isActivation()) { + if (!AreFpEq(quantParamsIn->_dst_quant.GetScale(), 1.0f) && !LayerInfo(*it).isActivation()) { concatIdxToUpdate.insert(std::distance(inputLayers.begin(), it)); } @@ -1047,7 +1047,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay auto updatedScaleFactor = InferenceEngine::getInjectedData(in0)->_dst_quant.GetScale(); auto equalScaleFactor = [updatedScaleFactor](InferenceEngine::CNNLayerPtr& inputLayer) { auto quantParams = InferenceEngine::getInjectedData(inputLayer); - return common::fp32eq(quantParams->_dst_quant.GetScale(), updatedScaleFactor); + return AreFpEq(quantParams->_dst_quant.GetScale(), updatedScaleFactor); }; auto layerIt = std::find_if_not(inputLayers.begin() + 1, inputLayers.end(), equalScaleFactor); @@ -1117,11 +1117,11 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay auto prevLayerQuant = InferenceEngine::getInjectedData(*prevLayer); auto bestWeightsScale = 1.0f; auto slopes = getPWLSlopes(restarLayerInfo); - if (!slopes.empty() && !common::fp32eq(prevLayerQuant->_src_quant.GetScale(), newScaleFactor)) { + if (!slopes.empty() && !AreFpEq(prevLayerQuant->_src_quant.GetScale(), newScaleFactor)) { bestWeightsScale = selectBestWeightsScaleFactors(prevLayerQuant->_src_quant.GetScale(), newScaleFactor, weightsScales, { 1.0f }); } - if (!slopes.empty() && !common::fp32eq(bestWeightsScale, prevLayerQuant->_weights_quant.GetScale())) { + if (!slopes.empty() && !AreFpEq(bestWeightsScale, prevLayerQuant->_weights_quant.GetScale())) { log::debug() << "[INFO][Concat] Optimizing weights scale factor for '" << prevLayer->name << "' layer. Change from " << prevLayerQuant->_weights_quant.GetScale() << " to " << bestWeightsScale << "\n"; diff --git a/src/plugins/intel_gna/src/gna_graph_compiler.cpp b/src/plugins/intel_gna/src/gna_graph_compiler.cpp index 7ead9b8ed75..e8605f69716 100644 --- a/src/plugins/intel_gna/src/gna_graph_compiler.cpp +++ b/src/plugins/intel_gna/src/gna_graph_compiler.cpp @@ -45,6 +45,7 @@ using namespace std; using namespace ov::intel_gna; using namespace GNAPluginNS; using namespace ov::intel_gna::frontend; +using namespace ov::intel_gna::common; using namespace memory; static bool CheckIFLastComponentIsPrecededByConv2D(const GNAPluginNS::backend::DnnComponents::storage_type& components, @@ -829,16 +830,16 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) { } else { IE_ASSERT(quantized != nullptr); if (!gna_config.gnaFlags.input_low_precision) { - auto quantizedScale = FLOAT_TO_INT16(std::min(quantized->_weights_quant.GetScale() * power.scale, + auto quantizedScale = FloatToInt16(std::min(quantized->_weights_quant.GetScale() * power.scale, static_cast(INT16_MAX))); - auto quantizedOffset = FLOAT_TO_INT32(std::min(quantized->_dst_quant.GetScale() * power.offset, + auto quantizedOffset = FloatToInt32(std::min(quantized->_dst_quant.GetScale() * power.offset, static_cast(INT32_MAX))); gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, quantizedScale, num_rows_out, 64); gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, quantizedOffset, num_rows_out, 64); } else { - auto quantizedScale = FLOAT_TO_INT8(std::min(quantized->_weights_quant.GetScale() * power.scale, + auto quantizedScale = FloatToInt8(std::min(quantized->_weights_quant.GetScale() * power.scale, static_cast(INT8_MAX))); - auto quantizedOffset = FLOAT_TO_INT8(std::min(quantized->_dst_quant.GetScale() * power.offset, + auto quantizedOffset = FloatToInt8(std::min(quantized->_dst_quant.GetScale() * power.offset, static_cast(INT8_MAX))); gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, quantizedScale, num_rows_out, 64); gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, quantizedOffset, num_rows_out, 64); @@ -1370,10 +1371,10 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) { auto scaledIdentity = -quantized->_weights_quant.GetScale(); if (gna_config.gnaFlags.input_low_precision == false) { - auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast(INT16_MAX))); + auto quantizedIdentity = FloatToInt16(std::min(scaledIdentity, static_cast(INT16_MAX))); gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); } else { - auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast(INT8_MAX))); + auto quantizedIdentity = FloatToInt8(std::min(scaledIdentity, static_cast(INT8_MAX))); gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); } @@ -1387,11 +1388,11 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) { auto scaledIdentity = quantized->_weights_quant.GetScale(); if (gna_config.gnaFlags.input_low_precision == false) { - auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast(INT16_MAX))); + auto quantizedIdentity = FloatToInt16(std::min(scaledIdentity, static_cast(INT16_MAX))); gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); } else { - auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast(INT8_MAX))); + auto quantizedIdentity = FloatToInt8(std::min(scaledIdentity, static_cast(INT8_MAX))); gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); } diff --git a/src/plugins/intel_gna/src/gna_plugin_config.cpp b/src/plugins/intel_gna/src/gna_plugin_config.cpp index 4bc1e0c6557..ef8443e55e6 100644 --- a/src/plugins/intel_gna/src/gna_plugin_config.cpp +++ b/src/plugins/intel_gna/src/gna_plugin_config.cpp @@ -20,6 +20,7 @@ using namespace InferenceEngine; using namespace InferenceEngine::details; using namespace ov::intel_gna; +using namespace ov::intel_gna::common; namespace GNAPluginNS { const uint8_t Config::max_num_requests; @@ -55,7 +56,7 @@ void Config::UpdateFromMap(const std::map& config) { auto value = item.second; auto check_scale_factor = [&] (float scale_factor) { - if (ov::intel_gna::common::fp32eq(scale_factor, 0.0f) || std::isinf(scale_factor)) { + if (AreFpEq(scale_factor, 0.0f) || std::isinf(scale_factor)) { THROW_GNA_EXCEPTION << "input scale factor of 0.0f or +-inf not supported"; } }; diff --git a/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp b/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp index 4fdb7ec3fdc..ba95e65fd1d 100644 --- a/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp +++ b/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp @@ -48,6 +48,7 @@ using namespace InferenceEngine; using namespace InferenceEngine::details; using namespace GNAPluginNS; using namespace ov::intel_gna::frontend; +using namespace ov::intel_gna::common; #define pass_trace() log::debug() << "[" << getName() << "] " @@ -467,18 +468,18 @@ void SubstituteSoftSignPass::run() { auto powerLayer = LayerInfo(addition).as(); // first layer after abs must have scale of 1, offset of 1 and power of either 1 or -1 - if (!common::fp32eq(powerLayer->scale, 1.0f) || !common::fp32eq(powerLayer->offset, 1.0f) || - !common::fp32eq(std::abs(powerLayer->power), 1.0f)) continue; + if (!AreFpEq(powerLayer->scale, 1.0f) || !AreFpEq(powerLayer->offset, 1.0f) || + !AreFpEq(std::abs(powerLayer->power), 1.0f)) continue; // power == -1, offset = 1, scale = 1 - if (common::fp32eq(powerLayer->power, -1.0f)) { + if (AreFpEq(powerLayer->power, -1.0f)) { std::swap(addition, power); } else { // power = 1, offset = 1, scale - 1 power = getNthChild(addition, 0); if (!LayerInfo(power).isPower()) continue; auto powerLayer_1 = LayerInfo(power).as(); // layer after addition must have power of -1, offset of 0 and scale of 1 - if (!common::fp32eq(powerLayer_1->power, -1.0f) || !common::fp32eq(powerLayer_1->offset, 0.0f) || - !common::fp32eq(powerLayer_1->scale, 1.0f)) + if (!AreFpEq(powerLayer_1->power, -1.0f) || !AreFpEq(powerLayer_1->offset, 0.0f) || + !AreFpEq(powerLayer_1->scale, 1.0f)) continue; } @@ -2163,8 +2164,8 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() { THROW_GNA_LAYER_EXCEPTION(fqLayer) << " unsupported per-channel quantisation"; } - if (!LayerInfo(prevLayer).isConst() && !common::fp32eq(inputRange.first.front(), outputRange.first.front()) && - !common::fp32eq(inputRange.second.front(), outputRange.second.front())) { + if (!LayerInfo(prevLayer).isConst() && !AreFpEq(inputRange.first.front(), outputRange.first.front()) && + !AreFpEq(inputRange.second.front(), outputRange.second.front())) { THROW_GNA_LAYER_EXCEPTION(fqLayer) << " unsupported data range conversion. Input: (" << inputRange.first.front() << "," << inputRange.second.front() << "), output: (" << outputRange.first.front() << "," << outputRange.second.front() << ")"; diff --git a/src/plugins/intel_gna/src/runtime/pwl.cpp b/src/plugins/intel_gna/src/runtime/pwl.cpp index dee041ac026..4fcae51c20f 100644 --- a/src/plugins/intel_gna/src/runtime/pwl.cpp +++ b/src/plugins/intel_gna/src/runtime/pwl.cpp @@ -33,6 +33,7 @@ #include "ops/reference/pwl.hpp" using namespace ov::intel_gna; +using namespace ov::intel_gna::common; double relu(const double x) { if (x < 0) { return(0.0); } else { return(x); } } double leaky_relu(const double x) { if (x < 0.0) { return(LEAKYRELU_SLOPE*x); } else { return(x); } } @@ -164,11 +165,11 @@ void PwlDesign(const DnnActivation& activation_type, break; } slope_scale = static_cast(1) << (8 * (1 + slope_scale_index)); - ptr_segment[i].slope = FLOAT_TO_INT16(slope * slope_scale); + ptr_segment[i].slope = FloatToInt16(slope * slope_scale); ptr_segment[i].xBase = ptr_segment[i].xBase | slope_scale_index; } - ptr_segment[i].yBase = FLOAT_TO_INT16(floatval * scale_out); + ptr_segment[i].yBase = FloatToInt16(floatval * scale_out); log::debug() << (static_cast((ptr_segment[i].xBase & XBASEMASK))/scale_out) << " " << (static_cast((ptr_segment[i].yBase))/scale_out) @@ -212,10 +213,10 @@ void PwlDesign(const DnnActivation& activation_type, break; } slope_scale = static_cast(1) << (8 * (1 + slope_scale_index)); - ptr_segment[i].slope = FLOAT_TO_INT16(slope * slope_scale); + ptr_segment[i].slope = FloatToInt16(slope * slope_scale); ptr_segment[i].xBase = ptr_segment[i].xBase | slope_scale_index; } - ptr_segment[i].yBase = FLOAT_TO_INT16(floatval * scale_out); + ptr_segment[i].yBase = FloatToInt16(floatval * scale_out); log::debug() << (static_cast((ptr_segment[i].xBase & XBASEMASK))/scale_out) << " " << (static_cast((ptr_segment[i].yBase))/scale_out) @@ -259,10 +260,10 @@ void PwlDesign(const DnnActivation& activation_type, break; } slope_scale = static_cast(1) << (8 * (1 + slope_scale_index)); - ptr_segment[i].slope = FLOAT_TO_INT16(slope * slope_scale); + ptr_segment[i].slope = FloatToInt16(slope * slope_scale); ptr_segment[i].xBase = ptr_segment[i].xBase | slope_scale_index; } - ptr_segment[i].yBase = FLOAT_TO_INT16(floatval * scale_out); + ptr_segment[i].yBase = FloatToInt16(floatval * scale_out); log::debug() << (static_cast((ptr_segment[i].xBase & XBASEMASK)) / scale_out) << " " << (static_cast((ptr_segment[i].yBase)) / scale_out) @@ -321,7 +322,7 @@ void PwlDesign(const DnnActivation& activation_type, break; } slope_scale = static_cast(1) << (8 * (1 + slope_scale_index)); - ptr_segment[1].slope = FLOAT_TO_INT16(slope * slope_scale); + ptr_segment[1].slope = FloatToInt16(slope * slope_scale); ptr_segment[1].xBase = ptr_segment[1].xBase | slope_scale_index; } ptr_segment[2].xBase = static_cast(x_upper_limit & XBASEMASK); @@ -340,7 +341,7 @@ void PwlDesign(const DnnActivation& activation_type, auto input_min_value = static_cast(std::numeric_limits::min()); auto input_max_value = static_cast(std::numeric_limits::max()); - double x_min = ov::intel_gna::common::fp32eq(fmod(activation_type.args.pow.exponent, 1.0), 0.0f)? input_min_value / scale_in: 0.0; + double x_min = AreFpEq(fmod(activation_type.args.pow.exponent, 1.0), 0.0)? input_min_value / scale_in: 0.0; x_min = std::max(x_min, -POW_DOMAIN); double x_max = input_max_value / scale_in; @@ -371,10 +372,10 @@ void PwlDesign(const DnnActivation& activation_type, double slope = (valnext - val) / (static_cast(xbasenext - xbase) / scale_in); auto s = gna_slope(slope, scale_in, scale_out); - ptr_segment[i].slope = FLOAT_TO_INT16(s.slope * s.slope_scale); + ptr_segment[i].slope = FloatToInt16(s.slope * s.slope_scale); ptr_segment[i].xBase = ptr_segment[i].xBase | s.slope_scale_index; - ptr_segment[i].yBase = FLOAT_TO_INT16(val * scale_out); + ptr_segment[i].yBase = FloatToInt16(val * scale_out); log::debug() << (static_cast((ptr_segment[i].xBase & XBASEMASK)) / scale_out) << " " << (static_cast((ptr_segment[i].yBase)) / scale_out) diff --git a/src/plugins/intel_gna/src/scale_factor_helper.cpp b/src/plugins/intel_gna/src/scale_factor_helper.cpp index a2e1968e5f0..e63aa183e76 100644 --- a/src/plugins/intel_gna/src/scale_factor_helper.cpp +++ b/src/plugins/intel_gna/src/scale_factor_helper.cpp @@ -20,7 +20,7 @@ static bool IsCustomInputScaleFactorAvailableLegacy(const std::vector& in bool is_scale_factor_custom = false; for (const auto& scale_factor : input_scale_factors) { - if (!fp32eq(scale_factor, GNAPluginNS::kScaleFactorDefault)) { + if (!AreFpEq(scale_factor, GNAPluginNS::kScaleFactorDefault)) { is_scale_factor_custom = true; break; } @@ -48,7 +48,7 @@ static bool IsCustomInputScaleFactorPerInputAvailable(const std::map& activation_function, if (max_epsilon > max_epsilon_prev) { j = j - 1; Delta = Delta / 2; - } else if (max_epsilon == max_epsilon_prev) { + same_epsilon = false; + } else if (AreFpEq(max_epsilon, max_epsilon_prev)) { if (!same_epsilon) { same_epsilon = true; } else { @@ -362,10 +364,10 @@ static bool pwl_search_power(const std::shared_ptr& node, } } - if (common::fp32eq(exponent, 1.0)) { + if (AreFpEq(exponent, 1.0)) { // An affine primitive will be used in this case. return false; - } else if (common::fp32eq(exponent, 0.0)) { + } else if (AreFpEq(exponent, 0.0)) { segments.emplace_back(0, 1, -std::numeric_limits::infinity()); segments.emplace_back(0, 1, std::numeric_limits::infinity()); segments.emplace_back(0, 0, std::numeric_limits::infinity()); @@ -384,7 +386,7 @@ static bool pwl_search_power(const std::shared_ptr& node, segments.insert(segments.begin(), { 0, segments.front().beta, - common::fp32eq(fmod(exponent, 1.0), 0.0f) ? -std::numeric_limits::infinity() : 0}); + AreFpEq(fmod(exponent, 1.0), 0.0) ? -std::numeric_limits::infinity() : 0}); segments.back().b = segments.back().beta; segments.push_back({0, 0, std::numeric_limits::infinity()}); return true; diff --git a/src/plugins/intel_gna/src/transformations/pwl_approximation.hpp b/src/plugins/intel_gna/src/transformations/pwl_approximation.hpp index 48c62809236..2e4dce92788 100644 --- a/src/plugins/intel_gna/src/transformations/pwl_approximation.hpp +++ b/src/plugins/intel_gna/src/transformations/pwl_approximation.hpp @@ -232,11 +232,11 @@ struct Function { } static double lower_bound(double exponent) { - return common::fp32eq(fmod(exponent, 1.0), 0.0f) ? -16 : 0; + return common::AreFpEq(fmod(exponent, 1.0), 0.0) ? -16.0 : 0.0; } static double upper_bound() { - return 16; + return 16.0; } const double m_exponent; diff --git a/src/plugins/intel_gna/tests/unit/backend/gna_make_pwl_identity.cpp b/src/plugins/intel_gna/tests/unit/backend/gna_make_pwl_identity.cpp index 01b05f14012..6006439f9dd 100644 --- a/src/plugins/intel_gna/tests/unit/backend/gna_make_pwl_identity.cpp +++ b/src/plugins/intel_gna/tests/unit/backend/gna_make_pwl_identity.cpp @@ -18,6 +18,7 @@ #include "runtime/pwl.h" using namespace ov::intel_gna::backend; +using namespace ov::intel_gna::common; using namespace ov::intel_gna::backend::pwl_tools; namespace { @@ -236,7 +237,7 @@ MakePWLIdentityTestParam createIdentityParamsForScales(double in, double out) { // check if exception is thrown if division by zero is possible // check if exception is thrown if scale factor with too big difference are used - const auto x_lower = FLOAT_TO_INT32(static_cast(std::numeric_limits::min()) * in / out); + const auto x_lower = FloatToInt32(static_cast(std::numeric_limits::min()) * in / out); if (slope.value == 0 || x_lower == 0) { should_throw = true; diff --git a/src/plugins/intel_gna/tests/unit/transformations/gna_pwl.cpp b/src/plugins/intel_gna/tests/unit/transformations/gna_pwl.cpp index 713f98e127f..fe11f113be7 100644 --- a/src/plugins/intel_gna/tests/unit/transformations/gna_pwl.cpp +++ b/src/plugins/intel_gna/tests/unit/transformations/gna_pwl.cpp @@ -4,109 +4,276 @@ #include -#include "transformations/pwl_approximation.hpp" +#include +#include +#include +#include #include "common_test_utils/data_utils.hpp" #include "common_test_utils/ngraph_test_utils.hpp" -#include +#include "transformations/pwl_approximation.hpp" -namespace pwl_test { -template -std::shared_ptr CreateActivationFunction(const ngraph::Shape& input_shape) { +using namespace ov::intel_gna::common; + +namespace { + +template +struct Function {}; + +template <> +struct Function { + static std::function get_function() { + return [](const double x) { + return 0.5 * (1.0 + std::tanh(x / 2.0)); + }; + } +}; + +template <> +struct Function { + static std::function get_function() { + return [](const double x) { + return std::tanh(x); + }; + } +}; + +template <> +struct Function { + static std::function get_function() { + return [](const double x) { + return x / (1.0 + std::abs(x)); + }; + } +}; + +template <> +struct Function { + static std::function get_function() { + return [](const double x) { + return std::log(x); + }; + } +}; + +template <> +struct Function { + static std::function get_function() { + return [](const double x) { + return std::exp(x); + }; + } +}; + +template <> +struct Function { + static std::function get_function(double exp) { + return [exp](const double x) { + return std::pow(x, exp); + }; + } +}; + +template +using Enable = + std::enable_if::value || std::is_same::value || + std::is_same::value || + std::is_same::value || std::is_same::value, + int>; +template +using EnableWithExtraArg = std::enable_if::value, int>; + +template +class GnaPWlTestsFixture { +public: + template ::type = 0> + GnaPWlTestsFixture(const ngraph::Shape& input_shape, + double lower_bound, + double upper_bound, + double max_error_percent); + + template ::type = 0> + GnaPWlTestsFixture(const ngraph::Shape& input_shape, + double lower_bound, + double upper_bound, + double exp, + double max_error_percent); + + void run(); + +private: + void validate_results(const std::vector& input_data, + const std::vector& results, + double max_error_percent); + + double count_abs_peak_to_peak(int samples = 1000); + + template + static std::shared_ptr create_activation_function(const ngraph::Shape& input_shape); + + template + static std::shared_ptr create_activation_function(const ngraph::Shape& input_shape, double exp); + + double _lower_bound; + double _upper_bound; + double _max_error_percent; + std::shared_ptr _function_under_test; + std::function _reference_function; +}; + +template +template ::type> +inline GnaPWlTestsFixture::GnaPWlTestsFixture(const ngraph::Shape& input_shape, + double lower_bound, + double upper_bound, + double max_error_percent) + : _lower_bound(lower_bound), + _upper_bound(upper_bound), + _max_error_percent(max_error_percent) { + _function_under_test = create_activation_function(input_shape); + _reference_function = Function::get_function(); +} + +template +template ::type> +inline GnaPWlTestsFixture::GnaPWlTestsFixture(const ngraph::Shape& input_shape, + double lower_bound, + double upper_bound, + double exp, + double max_error_percent) + : _lower_bound(lower_bound), + _upper_bound(upper_bound), + _max_error_percent(max_error_percent) { + _function_under_test = create_activation_function(input_shape, exp); + _reference_function = Function::get_function(exp); +} + +template +template +inline std::shared_ptr GnaPWlTestsFixture::create_activation_function( + const ngraph::Shape& input_shape) { auto input_params = std::make_shared(ngraph::element::f32, input_shape); auto f = std::make_shared(input_params); auto result = std::make_shared(f); return std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); } -template -std::shared_ptr CreateActivationFunction(const ngraph::Shape& input_shape, double exp) { +template +template +inline std::shared_ptr GnaPWlTestsFixture::create_activation_function( + const ngraph::Shape& input_shape, + double exp) { auto input_params = std::make_shared(ngraph::element::f32, input_shape); auto exponents = ngraph::opset8::Constant::create(ngraph::element::f32, ngraph::Shape{}, {exp}); auto f = std::make_shared(input_params, exponents); auto result = std::make_shared(f); return std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); } -} // namespace pwl_test -namespace { -void RunTest(const std::shared_ptr& func, - const std::shared_ptr& reference_func, - float lower_bound, - float upper_bound) { - double max_error_percent = 1; +template +inline double GnaPWlTestsFixture::count_abs_peak_to_peak(int samples) { + double delta = (_upper_bound - _lower_bound) / (samples + 1); + + if (delta <= 0) { + std::stringstream str_stream; + str_stream << "Cannot count test parameters for given data!! Lower bound=" << _lower_bound + << ", upper bound=" << _upper_bound << std::endl; + throw std::runtime_error(str_stream.str()); + } + + double min_val = _reference_function(_lower_bound); + double max_val = min_val; + + for (int i = 0; i < samples; i++) { + double arg = _lower_bound + i * delta; + double val = _reference_function(arg); + if (val > max_val) + max_val = val; + if (val < min_val) + min_val = val; + } + + return std::abs(max_val - min_val); +} + +template +inline void GnaPWlTestsFixture::run() { { ngraph::pass::Manager m; m.register_pass(); - m.register_pass(max_error_percent); - m.run_passes(func); - ASSERT_NO_THROW(check_rt_info(func)); + m.register_pass(_max_error_percent); + m.run_passes(_function_under_test); + ASSERT_NO_THROW(check_rt_info(_function_under_test)); } - auto shape = func->input().get_node_shared_ptr()->get_output_shape(0); + auto shape = _function_under_test->input().get_node_shared_ptr()->get_output_shape(0); ov::runtime::TensorVector result(1); - std::vector data = CommonTestUtils::generate_float_numbers(ov::shape_size(shape), lower_bound, upper_bound); + std::vector data = + CommonTestUtils::generate_float_numbers(ov::shape_size(shape), _lower_bound, _upper_bound); ov::runtime::Tensor input{ov::element::f32, shape, data.data()}; - ASSERT_TRUE(func->evaluate(result, ov::runtime::TensorVector{input})); - - ov::runtime::TensorVector result_ref(1); - ASSERT_TRUE(reference_func->evaluate(result_ref, ov::runtime::TensorVector{input})); + ASSERT_TRUE(_function_under_test->evaluate(result, ov::runtime::TensorVector{input})); const float* result_data = result[0].data(); - const float* result_ref_data = result_ref[0].data(); - for (size_t i = 0; i < result[0].get_size(); i++) { - double delta = std::abs(result_data[i] - result_ref_data[i]); - ASSERT_TRUE(delta <= max_error_percent); + std::vector results(result_data, result_data + result[0].get_size()); + + validate_results(data, results, _max_error_percent); +} + +template +inline void GnaPWlTestsFixture::validate_results(const std::vector& input_data, + const std::vector& results, + double max_error_percent) { + ASSERT_FALSE(results.empty()); + + std::vector reference_values; + std::for_each(input_data.begin(), input_data.end(), [&reference_values, this](const double& x) { + reference_values.push_back(_reference_function(x)); + }); + + ASSERT_EQ(results.size(), reference_values.size()); + + auto abs_peak_to_peak = count_abs_peak_to_peak(); + + for (int i = 0; i < results.size(); ++i) { + double delta = std::abs(static_cast(results[i]) - static_cast(reference_values[i])); + double error_percentage = delta / abs_peak_to_peak * 100.0; + EXPECT_TRUE(error_percentage < max_error_percent); } } -} // namespace TEST(GnaPwlTest, Sigmoid) { - RunTest( - pwl_test::CreateActivationFunction({1, 100}), - pwl_test::CreateActivationFunction({1, 100}), - -10, - 10); + GnaPWlTestsFixture test_instance({1, 100}, -10.0, 10.0, 1.0); + test_instance.run(); } TEST(GnaPwlTest, Tanh) { - RunTest( - pwl_test::CreateActivationFunction({1, 32}), - pwl_test::CreateActivationFunction({1, 32}), - -5, - 5); + GnaPWlTestsFixture test_instance({1, 32}, -5.0, 5.0, 1.0); + test_instance.run(); } TEST(GnaPwlTest, Exp) { - RunTest( - pwl_test::CreateActivationFunction({1, 32}), - pwl_test::CreateActivationFunction({1, 32}), - -std::log2(INT16_MAX), - std::log10(INT16_MAX)); + GnaPWlTestsFixture test_instance({1, 32}, -std::log2(INT16_MAX), std::log10(INT16_MAX), 1.0); + test_instance.run(); } TEST(GnaPwlTest, SoftSign) { - RunTest( - pwl_test::CreateActivationFunction({1, 32}), - pwl_test::CreateActivationFunction({1, 32}), - -10, - 10); + GnaPWlTestsFixture test_instance({1, 32}, -10, 10, 1.0); + test_instance.run(); } TEST(GnaPwlTest, Log) { - RunTest( - pwl_test::CreateActivationFunction({1, 32}), - pwl_test::CreateActivationFunction({1, 32}), - 0.001, - 2981); + GnaPWlTestsFixture test_instance({1, 32}, 0.001, 2981, 1.0); + test_instance.run(); } TEST(GnaPwlTest, Power) { - for (float exp = 1; exp <= 2.2; exp+=0.1) { - RunTest( - pwl_test::CreateActivationFunction({1, 32}, exp), - pwl_test::CreateActivationFunction({1, 32}, exp), - ov::intel_gna::common::fp32eq(std::fmod(exp, 1.0), 0.0) ? -16 : 0, - 16); + for (float exp = 1; exp <= 2.2; exp += 0.1) { + GnaPWlTestsFixture test_instance({1, 32}, + AreFpEq(std::fmod(exp, 1.0), 0.0) ? -16 : 0, + 16, + exp, + 1.0); + test_instance.run(); } } +} // namespace \ No newline at end of file