[GNA] Create new tests for PWL approximation and refactor numerical_u… (#14604)

* [GNA] Small Improvement in PWLApproximation and update of unit test for it.

* added template method for floating pointcomparision
* added new tests for PWLApproximation

* [GNA] Refactor numerical_utils and usage of its method in plugin

* refactor numerical utils use template methods for conversion and comparision
This commit is contained in:
Marcin Kusmierski 2022-12-20 10:20:09 +01:00 committed by GitHub
parent 8ef6ad3665
commit bc69385093
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 353 additions and 163 deletions

View File

@ -19,6 +19,7 @@
#include "log/log.hpp"
using namespace ov::intel_gna;
using namespace ov::intel_gna::common;
// This function performs emulation of HW saturation of PWL segments in SW
// by inserting additional segments when overflow would happen
@ -50,7 +51,7 @@ static void insert_extra_pwl_segments(std::vector<gna_pwl_segment_t>& gna_pwl,
if (y_value > static_cast<float>(INT16_MAX) || y_value < static_cast<float>(INT16_MIN)) {
float x_value = ((static_cast<float>(y_max) - yBase) * scale) / slope + xBase;
extra_segment.xBase = FLOAT_TO_INT32(x_value) & XBASEMASK;
extra_segment.xBase = FloatToInt32(x_value) & XBASEMASK;
extra_segment.yBase = slope > 0 ? y_max : y_min;
extra_segment.slope = 0;
extra_segments[gna_pwl_size] = extra_segment;
@ -125,20 +126,21 @@ void make_gna_pwl(const DnnActivation& fun,
int32_t y_lower = y_min;
int16_t y_upper = y_max;
if (fun.fqParams.set) {
x_lower = static_cast<int32_t>(std::max(FLOAT_TO_INT64(*fun.fqParams.input_low * 1.25 * in_scale), static_cast<int64_t>(x_lower)));
x_upper = static_cast<int32_t>(std::min(FLOAT_TO_INT64(*fun.fqParams.input_high * 1.25 * in_scale), static_cast<int64_t>(x_upper)));
x_lower = static_cast<int32_t>(std::max(FloatToInt64(*fun.fqParams.input_low * 1.25 * in_scale), static_cast<int64_t>(x_lower)));
x_upper = static_cast<int32_t>(std::min(
FloatToInt64(*fun.fqParams.input_high * 1.25 * in_scale), static_cast<int64_t>(x_upper)));
// y_lower can be reduced with negative slope
y_lower = static_cast<int32_t>(*fun.fqParams.input_low * 1.25 * out_scale);
y_upper = static_cast<int16_t>(std::min(FLOAT_TO_INT32(*fun.fqParams.input_high * 1.25 * out_scale), static_cast<int32_t>(y_upper)));
y_upper = static_cast<int16_t>(std::min(FloatToInt32(*fun.fqParams.input_high * 1.25 * out_scale), static_cast<int32_t>(y_upper)));
} else {
if (x_lower < y_lower * in_scale / out_scale) x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale);
if (y_lower < x_lower * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * out_scale / in_scale);
if (x_lower < y_lower * in_scale / out_scale) x_lower = FloatToInt32(y_lower * in_scale / out_scale);
if (y_lower < x_lower * out_scale / in_scale) y_lower = FloatToInt16(x_lower * out_scale / in_scale);
}
gna_pwl[0].yBase = std::max(FLOAT_TO_INT32(y_lower * fun.args.lrelu.negative_slope), static_cast<int32_t>(y_min));
gna_pwl[0].yBase = std::max(FloatToInt32(y_lower * fun.args.lrelu.negative_slope), static_cast<int32_t>(y_min));
s = gna_slope(fun.args.lrelu.negative_slope, in_scale, out_scale);
gna_pwl[0].xBase = (x_lower & XBASEMASK) | s.slope_scale_index; // zero out the 2 lsb
gna_pwl[0].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
gna_pwl[0].slope = FloatToInt16(s.slope * s.slope_scale);
print_segment((int32_t)(gna_pwl[0].xBase & XBASEMASK) / in_scale,
gna_pwl[0].yBase / out_scale,
@ -147,7 +149,7 @@ void make_gna_pwl(const DnnActivation& fun,
gna_pwl[1].xBase = 0;
gna_pwl[1].yBase = 0;
s = gna_slope(1.0, in_scale, out_scale);
gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
gna_pwl[1].slope = FloatToInt16(s.slope * s.slope_scale);
gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
print_segment(0.0, 0.0, (gna_pwl[1].slope * in_scale) / (out_scale*s.slope_scale));
@ -205,18 +207,18 @@ void make_gna_pwl(const DnnActivation& fun,
if (fun == kActKaldiLstmClipping) {
if (x_lower < l_bound * in_scale) {
if (y_lower < l_bound * out_scale) {
x_lower = FLOAT_TO_INT32(l_bound * in_scale);
y_lower = FLOAT_TO_INT16(l_bound * out_scale);
x_lower = FloatToInt32(l_bound * in_scale);
y_lower = FloatToInt16(l_bound * out_scale);
} else {
x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale);
x_lower = FloatToInt32(y_lower * in_scale / out_scale);
}
}
if (x_upper > u_bound * in_scale) {
if (y_upper > u_bound * out_scale) {
x_upper = FLOAT_TO_INT32(u_bound * in_scale);
y_upper = FLOAT_TO_INT16(u_bound * out_scale);
x_upper = FloatToInt32(u_bound * in_scale);
y_upper = FloatToInt16(u_bound * out_scale);
} else {
x_upper = FLOAT_TO_INT32(y_upper * in_scale / out_scale);
x_upper = FloatToInt32(y_upper * in_scale / out_scale);
}
}
}
@ -230,7 +232,7 @@ void make_gna_pwl(const DnnActivation& fun,
gna_pwl[1].xBase = x_lower & XBASEMASK; // zero out the 2 lsb
gna_pwl[1].yBase = y_lower;
s = gna_slope(1.0, in_scale, out_scale);
gna_pwl[1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
gna_pwl[1].slope = FloatToInt16(s.slope * s.slope_scale);
gna_pwl[1].xBase = gna_pwl[1].xBase | s.slope_scale_index;
print_segment((int32_t)(gna_pwl[1].xBase & XBASEMASK) / in_scale, gna_pwl[1].yBase / out_scale, 1.0);
@ -251,8 +253,8 @@ void make_gna_pwl(const DnnActivation& fun,
auto n_segments = 2;
if (y_upper > x_upper * out_scale / in_scale) y_upper = FLOAT_TO_INT16(x_upper * out_scale / in_scale);
if (x_upper > y_upper * in_scale / out_scale) x_upper = FLOAT_TO_INT32(y_upper * in_scale / out_scale);
if (y_upper > x_upper * out_scale / in_scale) y_upper = FloatToInt16(x_upper * out_scale / in_scale);
if (x_upper > y_upper * in_scale / out_scale) x_upper = FloatToInt32(y_upper * in_scale / out_scale);
if (y_upper == y_max) { // saturation at ends - need one more segment
n_segments += 1;
@ -268,14 +270,14 @@ void make_gna_pwl(const DnnActivation& fun,
gna_pwl[i].xBase = (-x_upper) & XBASEMASK; // zero out the 2 lsb
gna_pwl[i].yBase = y_upper;
s = gna_slope(-1.0, in_scale, out_scale);
gna_pwl[i].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
gna_pwl[i].slope = FloatToInt16(s.slope * s.slope_scale);
gna_pwl[i].xBase = gna_pwl[i].xBase | s.slope_scale_index;
print_segment((int32_t)(gna_pwl[i].xBase & XBASEMASK) / in_scale, gna_pwl[i].yBase / out_scale, -1.0);
gna_pwl[i + 1].xBase = 0;
gna_pwl[i + 1].yBase = 0;
s = gna_slope(1.0, in_scale, out_scale);
gna_pwl[i + 1].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
gna_pwl[i + 1].slope = FloatToInt16(s.slope * s.slope_scale);
gna_pwl[i + 1].xBase = gna_pwl[i + 1].xBase | s.slope_scale_index;
print_segment((int32_t)(gna_pwl[i + 1].xBase & XBASEMASK) / in_scale, gna_pwl[i + 1].yBase / out_scale, 1.0);
break;
@ -300,7 +302,7 @@ static T cast_check_overflow(double v, bool round = true) {
return std::numeric_limits<T>::min();
}
return round ? FLOAT_TO_INT32(v) : static_cast<T>(v);
return round ? FloatToInt32(v) : static_cast<T>(v);
}
/**

View File

@ -40,13 +40,13 @@ BorderValues BorderValuesCounterIdentity::CreateBorderValues(const BorderValues&
int16_t y_lower = default_values.y_lower;
int16_t y_upper = default_values.y_upper;
if (x_lower < y_lower * in_scale / out_scale)
x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale);
x_lower = common::FloatToInt32(y_lower * in_scale / out_scale);
if (x_upper > y_upper * in_scale / out_scale)
x_upper = FLOAT_TO_INT32(y_upper * in_scale / out_scale);
x_upper = common::FloatToInt32(y_upper * in_scale / out_scale);
if (y_lower < x_lower * out_scale / in_scale)
y_lower = FLOAT_TO_INT16(x_lower * out_scale / in_scale);
y_lower = common::FloatToInt16(x_lower * out_scale / in_scale);
if (y_upper > x_upper * out_scale / in_scale)
y_upper = FLOAT_TO_INT16(x_upper * out_scale / in_scale);
y_upper = common::FloatToInt16(x_upper * out_scale / in_scale);
return {x_lower, x_upper, y_lower, y_upper, {default_values.y_lower, default_values.y_upper}};
}

View File

@ -28,7 +28,7 @@ int64_t ComputeSlopeScale(const int32_t x_base) {
PWLSegmentSlope ComputeSlopeForSegment(double slope, double in_scale, double out_scale) {
const auto gna_slope_value = gna_slope(slope, in_scale, out_scale);
auto segment_slope = FLOAT_TO_INT64(gna_slope_value.slope * gna_slope_value.slope_scale);
auto segment_slope = common::FloatToInt64(gna_slope_value.slope * gna_slope_value.slope_scale);
if (segment_slope > std::numeric_limits<int16_t>::max()) {
segment_slope = std::numeric_limits<int16_t>::max();

View File

@ -4,28 +4,42 @@
#pragma once
#include <cstdlib>
#include <algorithm>
#include <cstdlib>
namespace ov {
namespace intel_gna {
namespace common {
#define FLOAT_TO_INT8(a) static_cast<int8_t>(((a) < 0)?((a) - 0.5f):((a) + 0.5f))
#define FLOAT_TO_INT16(a) static_cast<int16_t>(((a) < 0)?((a) - 0.5f):((a) + 0.5f))
#define FLOAT_TO_INT32(a) static_cast<int32_t>(((a) < 0)?((a)-0.5f):((a)+0.5f))
#define FLOAT_TO_INT64(a) static_cast<int64_t>(((a) < 0)?((a)-0.5f):((a)+0.5f))
template <typename T>
inline T FloatToInteger(float a) {
return static_cast<T>((a < 0.0f) ? (a - 0.5f) : (a + 0.5f));
}
inline int8_t FloatToInt8(float a) {
return FloatToInteger<int8_t>(a);
}
inline int16_t FloatToInt16(float a) {
return FloatToInteger<int16_t>(a);
}
inline int32_t FloatToInt32(float a) {
return FloatToInteger<int32_t>(a);
}
inline int64_t FloatToInt64(float a) {
return FloatToInteger<int64_t>(a);
}
/**
* @brief Compares two float values and returns if they are equal
* @param p1 First float value
* @param p2 Second float value
* @return Returns true if two float values are equal
* @brief Compare two floating point values and return true if they are equal with given accuracy
* @param p1 First floating point value
* @param p2 Second floating point value
* @param accuracy accuracy of comparision
* @return Returns true if two floating point values are equal
*/
inline bool fp32eq(float p1, float p2, float accuracy = 0.00001f) {
template <typename T, typename std::enable_if<std::is_floating_point<T>::value, int>::type = 0>
bool AreFpEq(T p1, T p2, T accuracy = std::numeric_limits<T>::epsilon()) {
return (std::abs(p1 - p2) <= accuracy * std::min(std::abs(p1), std::abs(p2)));
}
} // namespace common
} // namespace intel_gna
} // namespace ov
} // namespace common
} // namespace intel_gna
} // namespace ov

View File

@ -13,6 +13,7 @@
namespace ov {
namespace intel_gna {
using namespace common;
namespace frontend {
constexpr float activation_scale_factor = 2048.f;
@ -93,14 +94,14 @@ float ScaleFactorCalculator::selectBestOutputScaleFactors(float inScale,
auto sd = 0.0f;
for (size_t j = 0; j < slopes.size(); ++j) {
auto s = gna_slope(slopes[j], inScale, outScale);
auto slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
auto slope = FloatToInt16(s.slope * s.slope_scale);
if (slope < std::numeric_limits<int16_t>::min() || slope > std::numeric_limits<int16_t>::max()) {
sd += std::numeric_limits<int8_t>::max();
continue;
}
auto testSlope = static_cast<double>(slope) / s.slope_scale * inScale / outScale;
if (common::fp32eq(static_cast<float>(testSlope), static_cast<float>(slopes[j]), 1.0E-6f)) {
if (AreFpEq(static_cast<float>(testSlope), static_cast<float>(slopes[j]), 1.0E-6f)) {
return outScale;
}
@ -150,7 +151,7 @@ float ScaleFactorCalculator::selectBestWeightsScaleFactors(float inScale,
}
auto testSlope = static_cast<double>(slope) / s.slope_scale * (inScale * weightScale) / outScale;
if (common::fp32eq(static_cast<float>(testSlope), static_cast<float>(slopes[j]))) {
if (AreFpEq(static_cast<float>(testSlope), static_cast<float>(slopes[j]))) {
return outScale;
}
sd += pow(testSlope - slopes[j], 2.0);
@ -258,7 +259,7 @@ bool ScaleFactorCalculator::requantizeInput(InferenceEngine::CNNLayerPtr input,
return true;
}
if (info.isWeightableIdentity() && !common::fp32eq(quantDataForInputLayer->_weights_quant.GetScale(), 1.0f)) {
if (info.isWeightableIdentity() && !AreFpEq(quantDataForInputLayer->_weights_quant.GetScale(), 1.0f)) {
auto reducer = std::max(1.0f, quantDataForInputLayer->_dst_quant.GetScale() / newOutputScale);
auto newWeightsScale = std::max(1.0f, quantDataForInputLayer->_weights_quant.GetScale() / reducer);
quantDataForInputLayer->_weights_quant.SetScale(static_cast<int32_t>(newWeightsScale));
@ -354,7 +355,7 @@ float ScaleFactorCalculator::adjustScaleFactor(float sf,
auto scaleFactors = generateScaleFactors(startRange, endRange, steps);
auto newScaleFactor = selectBestOutputScaleFactors(quantizedParams->_src_quant.GetScale(), scaleFactors, slopes);
if (!common::fp32eq(sf, newScaleFactor) && !common::fp32eq(newScaleFactor, 0.0f) && !std::isinf(newScaleFactor)) {
if (!AreFpEq(sf, newScaleFactor) && !AreFpEq(newScaleFactor, 0.0f) && !std::isinf(newScaleFactor)) {
log::debug() << "[INFO] Adjusting scale factor for " << cnnLayer->name
<< " from: " << sf << " to: " << newScaleFactor << "\n";
sf = newScaleFactor;
@ -443,7 +444,7 @@ float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const*
auto input_max_value = static_cast<double>(std::numeric_limits<int32_t>::max());
auto output_max_value = static_cast<double>((inputsSize == 2) ? std::numeric_limits<int16_t>::max() : std::numeric_limits<int8_t>::max());
auto x_min = common::fp32eq(fmod(exponent, 1.0), 0) ? input_min_value / quantizedParams->_src_quant.GetScale() : 0.0;
auto x_min = AreFpEq(fmod(exponent, 1.0), 0.0) ? input_min_value / quantizedParams->_src_quant.GetScale() : 0.0;
x_min = std::max(x_min, -pow_domain);
auto x_max = input_max_value / quantizedParams->_src_quant.GetScale();
@ -534,7 +535,7 @@ float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const*
auto levels = std::min(quantizedParams->_dst_quant.GetLevels(), static_cast<size_t>(std::numeric_limits<uint16_t>::max()) + 1);
result = CalculateScaleFactorFromStats(levels, minOutValue, maxOutValue);
if (std::isinf(result) || common::fp32eq(absMax, 0.0f)) {
if (std::isinf(result) || AreFpEq(absMax, 0.0f)) {
result = max_activation_scale_factor;
}
@ -556,7 +557,7 @@ float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const*
(layer.isIdentity() || layer.isFakeQuantize()) && LayerInfo(prevLayer).isWeightableIdentity()) {
auto prevLayerQuant = InferenceEngine::getInjectedData<QuantizedLayerParams>(*prevLayer);
auto prevLayer2 = CNNNetHasPrevLayer(prevLayer.get(), 0) ? CNNNetPrevLayerSkipCertain(prevLayer, 0, skipNonFunctional) : nullptr;
if (!common::fp32eq(prevLayerQuant->_src_quant.GetScale(), 1.0f) &&
if (!AreFpEq(prevLayerQuant->_src_quant.GetScale(), 1.0f) &&
prevLayerQuant->_src_quant.IsStatsSet() &&
(prevLayer2 == nullptr || LayerInfo(prevLayer2).has8BOr16BOutput())) {
result = prevLayerQuant->_src_quant.GetScale();
@ -620,14 +621,14 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cn
auto quantSibling = InferenceEngine::getInjectedData<QuantizedLayerParams>(input);
// after restarting from memory input - quant is fine
if (common::fp32eq(quantSibling->_dst_quant.GetScale(), inputQuant->_dst_quant.GetScale())) {
if (AreFpEq(quantSibling->_dst_quant.GetScale(), inputQuant->_dst_quant.GetScale())) {
quant->_src_quant.SetScale(inputQuant->_dst_quant.GetScale());
quant->_dst_quant.SetScale(inputQuant->_dst_quant.GetScale());
return true;
}
if ((!fake_quantized && quantSibling->_dst_quant.IsScaleSet()) ||
(fake_quantized && quantSibling->_dst_quant.IsScaleSet() && !common::fp32eq(quantSibling->_dst_quant.GetScale(), 1.0) &&
(fake_quantized && quantSibling->_dst_quant.IsScaleSet() && !AreFpEq(quantSibling->_dst_quant.GetScale(), 1.0f) &&
quantSibling->_dst_quant.GetScale() < inputQuant->_dst_quant.GetScale()) ||
quantSibling->_dst_quant.IsScaleSet() && infiniteLoopCount > 0) {
// means we already restarted propagation input memory layer
@ -733,7 +734,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cn
auto scale_val = static_cast<float>(levels) / abs_val;
//TODO: use FQ formula for scale factor calculation
if (std::isinf(scale_val) || common::fp32eq(abs_val, 0.0f)) {
if (std::isinf(scale_val) || AreFpEq(abs_val, 0.0f)) {
quant->_dst_quant.SetScale(fake_quantized ? levels : 1.0f);
} else {
quant->_dst_quant.SetScale(scale_val);
@ -762,7 +763,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cn
}
auto powerScale = std::abs(powerLayer->scale);
if (common::fp32eq(powerScale, 0.0f)) {
if (AreFpEq(powerScale, 0.0f)) {
powerScale = 1.0f;
}
auto weightsScaleFactor = MAX_VAL_2B_WEIGHT / powerScale;
@ -773,8 +774,8 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cn
} else if (layerInfo.isActivation()) {
// todo: calculate proper scale factor where we need to expand it a bit to be safe to stay in int16 weights
// set the initial value
if (!quant->_dst_quant.IsScaleSet() || common::fp32eq(quant->_dst_quant.GetScale(), 1.0f) ||
!common::fp32eq(quant->_src_quant.GetScale(), inputQuant->_dst_quant.GetScale())) {
if (!quant->_dst_quant.IsScaleSet() || AreFpEq(quant->_dst_quant.GetScale(), 1.0f) ||
!AreFpEq(quant->_src_quant.GetScale(), inputQuant->_dst_quant.GetScale())) {
quant->_src_quant.SetScale(inputQuant->_dst_quant.GetScale());
auto scale = getActivationScale(cnnLayer, layerInfo, inputsSize, fake_quantized);
quant->_dst_quant.SetScale(scale);
@ -865,13 +866,13 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseL
bestWeightsScale = i;
}
if (common::fp32eq(error, 0.0f)) {
if (AreFpEq(error, 0.0f)) {
break;
}
}
if (bestWeightsScale > 0.0f &&
!common::fp32eq(bestWeightsScale, quantParams1->_weights_quant.GetScale())) {
!AreFpEq(bestWeightsScale, quantParams1->_weights_quant.GetScale())) {
quantParams1->_weights_quant.SetScale(bestWeightsScale);
quantParams1->_dst_quant.SetScale(quantParams1->_weights_quant.GetScale() *
quantParams1->_src_quant.GetScale());
@ -888,7 +889,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseL
auto maxValue = (GetInputPrecision() == InferenceEngine::Precision::I8)
? std::numeric_limits<int8_t>::max() : std::numeric_limits<int16_t>::max();
if (quantData->_weights_quant.GetScale() > maxValue &&
!common::fp32eq(quantData->_weights_quant.GetScale(), maxValue)) {
!AreFpEq(quantData->_weights_quant.GetScale(), static_cast<float>(maxValue))) {
float newOutputScale = quantParams0->_dst_quant.GetScale() * maxValue;
log::debug() << "[INFO] weights saturated for " << eltwiseLayer->name << ", try to requiantize input " << in1->name << std::endl;
if (requantizeInput(in1, newOutputScale, result, infiniteLoopCount)) {
@ -901,6 +902,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseL
if (!quantData->_dst_quant.IsStatsSet()) {
return true;
}
auto weightsReducer = calculateWeightsReducerFromDstStats(quantData->_dst_quant);
if (weightsReducer > initial_weights_reducer_val) {
float newOutputScale = quantParams1->_dst_quant.GetScale() / weightsReducer;
@ -911,10 +913,8 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseL
}
}
break;
default : THROW_GNA_EXCEPTION << "Unsupported Eltwise layer for quantisation: " << eltwiseLayer->_operation;
}
return true;
}
@ -946,7 +946,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay
auto scaleFactor = quantParams0->_dst_quant.GetScale();
auto scaleFactorCheck = [scaleFactor](InferenceEngine::CNNLayerPtr& inputLayer) {
auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(inputLayer);
return common::fp32eq(quantParams->_dst_quant.GetScale(), scaleFactor);
return AreFpEq(quantParams->_dst_quant.GetScale(), scaleFactor);
};
if (std::find_if_not(inputLayers.begin() + 1, inputLayers.end(), scaleFactorCheck) == inputLayers.end()) {
@ -968,7 +968,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay
auto nextInputIt = sourceLayerIt + 1;
while ((nextInputIt = std::find_if(nextInputIt, inputLayers.end(), inputLayerCheck)) != inputLayers.end()) {
auto quantParamsSecond = InferenceEngine::getInjectedData<QuantizedLayerParams>(*nextInputIt);
if (!common::fp32eq(quantParamsSecond->_dst_quant.GetScale(), quantParamsFirst->_dst_quant.GetScale())) {
if (!common::AreFpEq(quantParamsSecond->_dst_quant.GetScale(), quantParamsFirst->_dst_quant.GetScale())) {
THROW_GNA_EXCEPTION << "Two Input layers " << (*sourceLayerIt)->name
<< " and " << (*nextInputIt)->name << " have different scales in concat!!! \n";
}
@ -991,8 +991,8 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay
for (auto it = inputLayers.begin(); it != inputLayers.end(); ++it) {
auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(*it);
if ((quantParams->_dst_quant.GetScale() < minScaleFactor &&
!common::fp32eq(quantParams->_dst_quant.GetScale(), 1.0f)) ||
common::fp32eq(minScaleFactor, 1.0f)) {
!AreFpEq(quantParams->_dst_quant.GetScale(), 1.0f)) ||
AreFpEq(minScaleFactor, 1.0f)) {
minScaleFactor = quantParams->_dst_quant.GetScale();
sourceLayerIt = it;
}
@ -1002,7 +1002,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay
auto sourceLayerCheck = [](InferenceEngine::CNNLayerPtr& inputLayer) {
auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(inputLayer);
LayerInfo info(inputLayer);
return !info.isActivation() && !common::fp32eq(quantParams->_dst_quant.GetScale(), 1.0f);
return !info.isActivation() && !AreFpEq(quantParams->_dst_quant.GetScale(), 1.0f);
};
sourceLayerIt = std::find_if(inputLayers.begin(), inputLayers.end(), sourceLayerCheck);
}
@ -1010,7 +1010,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay
if (sourceLayerIt == inputLayers.end()) {
auto nonDefaultScaleFactor = [](InferenceEngine::CNNLayerPtr& inputLayer) {
auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(inputLayer);
return !common::fp32eq(quantParams->_dst_quant.GetScale(), 1.0f);
return !AreFpEq(quantParams->_dst_quant.GetScale(), 1.0f);
};
sourceLayerIt = std::find_if(inputLayers.begin(), inputLayers.end(), nonDefaultScaleFactor);
@ -1026,7 +1026,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay
for (auto it = inputLayers.begin(); it != inputLayers.end(); ++it) {
auto quantParamsIn = InferenceEngine::getInjectedData<QuantizedLayerParams>(*it);
if (common::fp32eq(quantParamsIn->_dst_quant.GetScale(), scaleFactor)) {
if (AreFpEq(quantParamsIn->_dst_quant.GetScale(), scaleFactor)) {
continue;
}
@ -1035,7 +1035,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay
quantParamsIn->_dst_quant.SetScale(quantParams->_dst_quant.GetScale());
} else {
// possible case when some of the concat inputs are free to select scale ex: const->concat<-affine
if (!common::fp32eq(quantParamsIn->_dst_quant.GetScale(), 1.0f) && !LayerInfo(*it).isActivation()) {
if (!AreFpEq(quantParamsIn->_dst_quant.GetScale(), 1.0f) && !LayerInfo(*it).isActivation()) {
concatIdxToUpdate.insert(std::distance(inputLayers.begin(), it));
}
@ -1047,7 +1047,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay
auto updatedScaleFactor = InferenceEngine::getInjectedData<QuantizedLayerParams>(in0)->_dst_quant.GetScale();
auto equalScaleFactor = [updatedScaleFactor](InferenceEngine::CNNLayerPtr& inputLayer) {
auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(inputLayer);
return common::fp32eq(quantParams->_dst_quant.GetScale(), updatedScaleFactor);
return AreFpEq(quantParams->_dst_quant.GetScale(), updatedScaleFactor);
};
auto layerIt = std::find_if_not(inputLayers.begin() + 1, inputLayers.end(), equalScaleFactor);
@ -1117,11 +1117,11 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerConcat(InferenceEngine::ConcatLay
auto prevLayerQuant = InferenceEngine::getInjectedData<QuantizedLayerParams>(*prevLayer);
auto bestWeightsScale = 1.0f;
auto slopes = getPWLSlopes(restarLayerInfo);
if (!slopes.empty() && !common::fp32eq(prevLayerQuant->_src_quant.GetScale(), newScaleFactor)) {
if (!slopes.empty() && !AreFpEq(prevLayerQuant->_src_quant.GetScale(), newScaleFactor)) {
bestWeightsScale = selectBestWeightsScaleFactors(prevLayerQuant->_src_quant.GetScale(),
newScaleFactor, weightsScales, { 1.0f });
}
if (!slopes.empty() && !common::fp32eq(bestWeightsScale, prevLayerQuant->_weights_quant.GetScale())) {
if (!slopes.empty() && !AreFpEq(bestWeightsScale, prevLayerQuant->_weights_quant.GetScale())) {
log::debug() << "[INFO][Concat] Optimizing weights scale factor for '" << prevLayer->name << "' layer. Change from "
<< prevLayerQuant->_weights_quant.GetScale() << " to " << bestWeightsScale << "\n";

View File

@ -45,6 +45,7 @@ using namespace std;
using namespace ov::intel_gna;
using namespace GNAPluginNS;
using namespace ov::intel_gna::frontend;
using namespace ov::intel_gna::common;
using namespace memory;
static bool CheckIFLastComponentIsPrecededByConv2D(const GNAPluginNS::backend::DnnComponents::storage_type& components,
@ -829,16 +830,16 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
} else {
IE_ASSERT(quantized != nullptr);
if (!gna_config.gnaFlags.input_low_precision) {
auto quantizedScale = FLOAT_TO_INT16(std::min(quantized->_weights_quant.GetScale() * power.scale,
auto quantizedScale = FloatToInt16(std::min(quantized->_weights_quant.GetScale() * power.scale,
static_cast<float>(INT16_MAX)));
auto quantizedOffset = FLOAT_TO_INT32(std::min(quantized->_dst_quant.GetScale() * power.offset,
auto quantizedOffset = FloatToInt32(std::min(quantized->_dst_quant.GetScale() * power.offset,
static_cast<float>(INT32_MAX)));
gnamem->getQueue(REGION_RO)->push_value<int16_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
} else {
auto quantizedScale = FLOAT_TO_INT8(std::min(quantized->_weights_quant.GetScale() * power.scale,
auto quantizedScale = FloatToInt8(std::min(quantized->_weights_quant.GetScale() * power.scale,
static_cast<float>(INT8_MAX)));
auto quantizedOffset = FLOAT_TO_INT8(std::min(quantized->_dst_quant.GetScale() * power.offset,
auto quantizedOffset = FloatToInt8(std::min(quantized->_dst_quant.GetScale() * power.offset,
static_cast<float>(INT8_MAX)));
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
@ -1370,10 +1371,10 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
auto scaledIdentity = -quantized->_weights_quant.GetScale();
if (gna_config.gnaFlags.input_low_precision == false) {
auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
auto quantizedIdentity = FloatToInt16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
gnamem->getQueue(REGION_RO)->push_value<int16_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
} else {
auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
auto quantizedIdentity = FloatToInt8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
}
@ -1387,11 +1388,11 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
auto scaledIdentity = quantized->_weights_quant.GetScale();
if (gna_config.gnaFlags.input_low_precision == false) {
auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
auto quantizedIdentity = FloatToInt16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
gnamem->getQueue(REGION_RO)->push_value<int16_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
} else {
auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
auto quantizedIdentity = FloatToInt8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
}

View File

@ -20,6 +20,7 @@
using namespace InferenceEngine;
using namespace InferenceEngine::details;
using namespace ov::intel_gna;
using namespace ov::intel_gna::common;
namespace GNAPluginNS {
const uint8_t Config::max_num_requests;
@ -55,7 +56,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
auto value = item.second;
auto check_scale_factor = [&] (float scale_factor) {
if (ov::intel_gna::common::fp32eq(scale_factor, 0.0f) || std::isinf(scale_factor)) {
if (AreFpEq(scale_factor, 0.0f) || std::isinf(scale_factor)) {
THROW_GNA_EXCEPTION << "input scale factor of 0.0f or +-inf not supported";
}
};

View File

@ -48,6 +48,7 @@ using namespace InferenceEngine;
using namespace InferenceEngine::details;
using namespace GNAPluginNS;
using namespace ov::intel_gna::frontend;
using namespace ov::intel_gna::common;
#define pass_trace() log::debug() << "[" << getName() << "] "
@ -467,18 +468,18 @@ void SubstituteSoftSignPass::run() {
auto powerLayer = LayerInfo(addition).as<PowerLayer*>();
// first layer after abs must have scale of 1, offset of 1 and power of either 1 or -1
if (!common::fp32eq(powerLayer->scale, 1.0f) || !common::fp32eq(powerLayer->offset, 1.0f) ||
!common::fp32eq(std::abs(powerLayer->power), 1.0f)) continue;
if (!AreFpEq(powerLayer->scale, 1.0f) || !AreFpEq(powerLayer->offset, 1.0f) ||
!AreFpEq(std::abs(powerLayer->power), 1.0f)) continue;
// power == -1, offset = 1, scale = 1
if (common::fp32eq(powerLayer->power, -1.0f)) {
if (AreFpEq(powerLayer->power, -1.0f)) {
std::swap(addition, power);
} else { // power = 1, offset = 1, scale - 1
power = getNthChild(addition, 0);
if (!LayerInfo(power).isPower()) continue;
auto powerLayer_1 = LayerInfo(power).as<PowerLayer*>();
// layer after addition must have power of -1, offset of 0 and scale of 1
if (!common::fp32eq(powerLayer_1->power, -1.0f) || !common::fp32eq(powerLayer_1->offset, 0.0f) ||
!common::fp32eq(powerLayer_1->scale, 1.0f))
if (!AreFpEq(powerLayer_1->power, -1.0f) || !AreFpEq(powerLayer_1->offset, 0.0f) ||
!AreFpEq(powerLayer_1->scale, 1.0f))
continue;
}
@ -2163,8 +2164,8 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
THROW_GNA_LAYER_EXCEPTION(fqLayer) << " unsupported per-channel quantisation";
}
if (!LayerInfo(prevLayer).isConst() && !common::fp32eq(inputRange.first.front(), outputRange.first.front()) &&
!common::fp32eq(inputRange.second.front(), outputRange.second.front())) {
if (!LayerInfo(prevLayer).isConst() && !AreFpEq(inputRange.first.front(), outputRange.first.front()) &&
!AreFpEq(inputRange.second.front(), outputRange.second.front())) {
THROW_GNA_LAYER_EXCEPTION(fqLayer) << " unsupported data range conversion. Input: (" <<
inputRange.first.front() << "," << inputRange.second.front() << "), output: (" <<
outputRange.first.front() << "," << outputRange.second.front() << ")";

View File

@ -33,6 +33,7 @@
#include "ops/reference/pwl.hpp"
using namespace ov::intel_gna;
using namespace ov::intel_gna::common;
double relu(const double x) { if (x < 0) { return(0.0); } else { return(x); } }
double leaky_relu(const double x) { if (x < 0.0) { return(LEAKYRELU_SLOPE*x); } else { return(x); } }
@ -164,11 +165,11 @@ void PwlDesign(const DnnActivation& activation_type,
break;
}
slope_scale = static_cast<uint64_t>(1) << (8 * (1 + slope_scale_index));
ptr_segment[i].slope = FLOAT_TO_INT16(slope * slope_scale);
ptr_segment[i].slope = FloatToInt16(slope * slope_scale);
ptr_segment[i].xBase = ptr_segment[i].xBase | slope_scale_index;
}
ptr_segment[i].yBase = FLOAT_TO_INT16(floatval * scale_out);
ptr_segment[i].yBase = FloatToInt16(floatval * scale_out);
log::debug() << (static_cast<int32_t>((ptr_segment[i].xBase & XBASEMASK))/scale_out)
<< " "
<< (static_cast<float>((ptr_segment[i].yBase))/scale_out)
@ -212,10 +213,10 @@ void PwlDesign(const DnnActivation& activation_type,
break;
}
slope_scale = static_cast<uint64_t>(1) << (8 * (1 + slope_scale_index));
ptr_segment[i].slope = FLOAT_TO_INT16(slope * slope_scale);
ptr_segment[i].slope = FloatToInt16(slope * slope_scale);
ptr_segment[i].xBase = ptr_segment[i].xBase | slope_scale_index;
}
ptr_segment[i].yBase = FLOAT_TO_INT16(floatval * scale_out);
ptr_segment[i].yBase = FloatToInt16(floatval * scale_out);
log::debug() << (static_cast<int32_t>((ptr_segment[i].xBase & XBASEMASK))/scale_out)
<< " "
<< (static_cast<float>((ptr_segment[i].yBase))/scale_out)
@ -259,10 +260,10 @@ void PwlDesign(const DnnActivation& activation_type,
break;
}
slope_scale = static_cast<uint64_t>(1) << (8 * (1 + slope_scale_index));
ptr_segment[i].slope = FLOAT_TO_INT16(slope * slope_scale);
ptr_segment[i].slope = FloatToInt16(slope * slope_scale);
ptr_segment[i].xBase = ptr_segment[i].xBase | slope_scale_index;
}
ptr_segment[i].yBase = FLOAT_TO_INT16(floatval * scale_out);
ptr_segment[i].yBase = FloatToInt16(floatval * scale_out);
log::debug() << (static_cast<int32_t>((ptr_segment[i].xBase & XBASEMASK)) / scale_out)
<< " "
<< (static_cast<float>((ptr_segment[i].yBase)) / scale_out)
@ -321,7 +322,7 @@ void PwlDesign(const DnnActivation& activation_type,
break;
}
slope_scale = static_cast<uint64_t>(1) << (8 * (1 + slope_scale_index));
ptr_segment[1].slope = FLOAT_TO_INT16(slope * slope_scale);
ptr_segment[1].slope = FloatToInt16(slope * slope_scale);
ptr_segment[1].xBase = ptr_segment[1].xBase | slope_scale_index;
}
ptr_segment[2].xBase = static_cast<int32_t>(x_upper_limit & XBASEMASK);
@ -340,7 +341,7 @@ void PwlDesign(const DnnActivation& activation_type,
auto input_min_value = static_cast<double>(std::numeric_limits<int32_t>::min());
auto input_max_value = static_cast<double>(std::numeric_limits<int32_t>::max());
double x_min = ov::intel_gna::common::fp32eq(fmod(activation_type.args.pow.exponent, 1.0), 0.0f)? input_min_value / scale_in: 0.0;
double x_min = AreFpEq(fmod(activation_type.args.pow.exponent, 1.0), 0.0)? input_min_value / scale_in: 0.0;
x_min = std::max(x_min, -POW_DOMAIN);
double x_max = input_max_value / scale_in;
@ -371,10 +372,10 @@ void PwlDesign(const DnnActivation& activation_type,
double slope = (valnext - val) / (static_cast<double>(xbasenext - xbase) / scale_in);
auto s = gna_slope(slope, scale_in, scale_out);
ptr_segment[i].slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
ptr_segment[i].slope = FloatToInt16(s.slope * s.slope_scale);
ptr_segment[i].xBase = ptr_segment[i].xBase | s.slope_scale_index;
ptr_segment[i].yBase = FLOAT_TO_INT16(val * scale_out);
ptr_segment[i].yBase = FloatToInt16(val * scale_out);
log::debug() << (static_cast<int32_t>((ptr_segment[i].xBase & XBASEMASK)) / scale_out)
<< " "
<< (static_cast<float>((ptr_segment[i].yBase)) / scale_out)

View File

@ -20,7 +20,7 @@ static bool IsCustomInputScaleFactorAvailableLegacy(const std::vector<float>& in
bool is_scale_factor_custom = false;
for (const auto& scale_factor : input_scale_factors) {
if (!fp32eq(scale_factor, GNAPluginNS::kScaleFactorDefault)) {
if (!AreFpEq(scale_factor, GNAPluginNS::kScaleFactorDefault)) {
is_scale_factor_custom = true;
break;
}
@ -48,7 +48,7 @@ static bool IsCustomInputScaleFactorPerInputAvailable(const std::map<std::string
bool is_scale_factor_custom = false;
for (const auto& scale_factor : per_input_scale_factors) {
if (!fp32eq(scale_factor.second, GNAPluginNS::kScaleFactorDefault)) {
if (!AreFpEq(scale_factor.second, GNAPluginNS::kScaleFactorDefault)) {
is_scale_factor_custom = true;
break;
}

View File

@ -23,6 +23,7 @@ static constexpr double EXP_BREAK = 0.045;
using namespace ov::intel_gna;
using namespace ov::intel_gna::pass;
using namespace ov::intel_gna::common;
NGRAPH_RTTI_DEFINITION(PWLApproximation, "PWLApproximation", 0);
NGRAPH_RTTI_DEFINITION(PWLApproximationWithFq, "PWLApproximationWithFq", 0);
@ -149,7 +150,8 @@ double pivot_search(const details::Function<T>& activation_function,
if (max_epsilon > max_epsilon_prev) {
j = j - 1;
Delta = Delta / 2;
} else if (max_epsilon == max_epsilon_prev) {
same_epsilon = false;
} else if (AreFpEq(max_epsilon, max_epsilon_prev)) {
if (!same_epsilon) {
same_epsilon = true;
} else {
@ -362,10 +364,10 @@ static bool pwl_search_power(const std::shared_ptr<ngraph::Node>& node,
}
}
if (common::fp32eq(exponent, 1.0)) {
if (AreFpEq(exponent, 1.0)) {
// An affine primitive will be used in this case.
return false;
} else if (common::fp32eq(exponent, 0.0)) {
} else if (AreFpEq(exponent, 0.0)) {
segments.emplace_back(0, 1, -std::numeric_limits<double>::infinity());
segments.emplace_back(0, 1, std::numeric_limits<double>::infinity());
segments.emplace_back(0, 0, std::numeric_limits<double>::infinity());
@ -384,7 +386,7 @@ static bool pwl_search_power(const std::shared_ptr<ngraph::Node>& node,
segments.insert(segments.begin(), {
0,
segments.front().beta,
common::fp32eq(fmod(exponent, 1.0), 0.0f) ? -std::numeric_limits<double>::infinity() : 0});
AreFpEq(fmod(exponent, 1.0), 0.0) ? -std::numeric_limits<double>::infinity() : 0});
segments.back().b = segments.back().beta;
segments.push_back({0, 0, std::numeric_limits<double>::infinity()});
return true;

View File

@ -232,11 +232,11 @@ struct Function<ngraph::opset8::Power> {
}
static double lower_bound(double exponent) {
return common::fp32eq(fmod(exponent, 1.0), 0.0f) ? -16 : 0;
return common::AreFpEq(fmod(exponent, 1.0), 0.0) ? -16.0 : 0.0;
}
static double upper_bound() {
return 16;
return 16.0;
}
const double m_exponent;

View File

@ -18,6 +18,7 @@
#include "runtime/pwl.h"
using namespace ov::intel_gna::backend;
using namespace ov::intel_gna::common;
using namespace ov::intel_gna::backend::pwl_tools;
namespace {
@ -236,7 +237,7 @@ MakePWLIdentityTestParam createIdentityParamsForScales(double in, double out) {
// check if exception is thrown if division by zero is possible
// check if exception is thrown if scale factor with too big difference are used
const auto x_lower = FLOAT_TO_INT32(static_cast<double>(std::numeric_limits<int16_t>::min()) * in / out);
const auto x_lower = FloatToInt32(static_cast<double>(std::numeric_limits<int16_t>::min()) * in / out);
if (slope.value == 0 || x_lower == 0) {
should_throw = true;

View File

@ -4,109 +4,276 @@
#include <gtest/gtest.h>
#include "transformations/pwl_approximation.hpp"
#include <sstream>
#include <stdexcept>
#include <transformations/init_node_info.hpp>
#include <type_traits>
#include "common_test_utils/data_utils.hpp"
#include "common_test_utils/ngraph_test_utils.hpp"
#include <transformations/init_node_info.hpp>
#include "transformations/pwl_approximation.hpp"
namespace pwl_test {
template<typename T>
std::shared_ptr<ngraph::Function> CreateActivationFunction(const ngraph::Shape& input_shape) {
using namespace ov::intel_gna::common;
namespace {
template <typename T>
struct Function {};
template <>
struct Function<ngraph::opset9::Sigmoid> {
static std::function<double(double)> get_function() {
return [](const double x) {
return 0.5 * (1.0 + std::tanh(x / 2.0));
};
}
};
template <>
struct Function<ngraph::opset9::Tanh> {
static std::function<double(double)> get_function() {
return [](const double x) {
return std::tanh(x);
};
}
};
template <>
struct Function<ngraph::opset9::SoftSign> {
static std::function<double(double)> get_function() {
return [](const double x) {
return x / (1.0 + std::abs(x));
};
}
};
template <>
struct Function<ngraph::opset9::Log> {
static std::function<double(double)> get_function() {
return [](const double x) {
return std::log(x);
};
}
};
template <>
struct Function<ngraph::opset9::Exp> {
static std::function<double(double)> get_function() {
return [](const double x) {
return std::exp(x);
};
}
};
template <>
struct Function<ngraph::opset9::Power> {
static std::function<double(double)> get_function(double exp) {
return [exp](const double x) {
return std::pow(x, exp);
};
}
};
template <typename T>
using Enable =
std::enable_if<std::is_same<T, ngraph::opset9::Sigmoid>::value || std::is_same<T, ngraph::opset9::Tanh>::value ||
std::is_same<T, ngraph::opset9::SoftSign>::value ||
std::is_same<T, ngraph::opset9::Log>::value || std::is_same<T, ngraph::opset9::Exp>::value,
int>;
template <typename T>
using EnableWithExtraArg = std::enable_if<std::is_same<T, ngraph::opset9::Power>::value, int>;
template <typename T>
class GnaPWlTestsFixture {
public:
template <typename U = T, typename Enable<U>::type = 0>
GnaPWlTestsFixture(const ngraph::Shape& input_shape,
double lower_bound,
double upper_bound,
double max_error_percent);
template <typename U = T, typename EnableWithExtraArg<U>::type = 0>
GnaPWlTestsFixture(const ngraph::Shape& input_shape,
double lower_bound,
double upper_bound,
double exp,
double max_error_percent);
void run();
private:
void validate_results(const std::vector<float>& input_data,
const std::vector<float>& results,
double max_error_percent);
double count_abs_peak_to_peak(int samples = 1000);
template <typename U = T>
static std::shared_ptr<ngraph::Function> create_activation_function(const ngraph::Shape& input_shape);
template <typename U = T>
static std::shared_ptr<ngraph::Function> create_activation_function(const ngraph::Shape& input_shape, double exp);
double _lower_bound;
double _upper_bound;
double _max_error_percent;
std::shared_ptr<ngraph::Function> _function_under_test;
std::function<double(double)> _reference_function;
};
template <typename T>
template <typename U, typename Enable<U>::type>
inline GnaPWlTestsFixture<T>::GnaPWlTestsFixture(const ngraph::Shape& input_shape,
double lower_bound,
double upper_bound,
double max_error_percent)
: _lower_bound(lower_bound),
_upper_bound(upper_bound),
_max_error_percent(max_error_percent) {
_function_under_test = create_activation_function<U>(input_shape);
_reference_function = Function<U>::get_function();
}
template <typename T>
template <typename U, typename EnableWithExtraArg<U>::type>
inline GnaPWlTestsFixture<T>::GnaPWlTestsFixture(const ngraph::Shape& input_shape,
double lower_bound,
double upper_bound,
double exp,
double max_error_percent)
: _lower_bound(lower_bound),
_upper_bound(upper_bound),
_max_error_percent(max_error_percent) {
_function_under_test = create_activation_function<U>(input_shape, exp);
_reference_function = Function<U>::get_function(exp);
}
template <typename T>
template <typename U>
inline std::shared_ptr<ngraph::Function> GnaPWlTestsFixture<T>::create_activation_function(
const ngraph::Shape& input_shape) {
auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, input_shape);
auto f = std::make_shared<T>(input_params);
auto result = std::make_shared<ngraph::opset8::Result>(f);
return std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
}
template<typename T>
std::shared_ptr<ngraph::Function> CreateActivationFunction(const ngraph::Shape& input_shape, double exp) {
template <typename T>
template <typename U>
inline std::shared_ptr<ngraph::Function> GnaPWlTestsFixture<T>::create_activation_function(
const ngraph::Shape& input_shape,
double exp) {
auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, input_shape);
auto exponents = ngraph::opset8::Constant::create(ngraph::element::f32, ngraph::Shape{}, {exp});
auto f = std::make_shared<T>(input_params, exponents);
auto result = std::make_shared<ngraph::opset8::Result>(f);
return std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
}
} // namespace pwl_test
namespace {
void RunTest(const std::shared_ptr<ngraph::Function>& func,
const std::shared_ptr<ngraph::Function>& reference_func,
float lower_bound,
float upper_bound) {
double max_error_percent = 1;
template <typename T>
inline double GnaPWlTestsFixture<T>::count_abs_peak_to_peak(int samples) {
double delta = (_upper_bound - _lower_bound) / (samples + 1);
if (delta <= 0) {
std::stringstream str_stream;
str_stream << "Cannot count test parameters for given data!! Lower bound=" << _lower_bound
<< ", upper bound=" << _upper_bound << std::endl;
throw std::runtime_error(str_stream.str());
}
double min_val = _reference_function(_lower_bound);
double max_val = min_val;
for (int i = 0; i < samples; i++) {
double arg = _lower_bound + i * delta;
double val = _reference_function(arg);
if (val > max_val)
max_val = val;
if (val < min_val)
min_val = val;
}
return std::abs(max_val - min_val);
}
template <typename T>
inline void GnaPWlTestsFixture<T>::run() {
{
ngraph::pass::Manager m;
m.register_pass<ngraph::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::PWLApproximation>(max_error_percent);
m.run_passes(func);
ASSERT_NO_THROW(check_rt_info(func));
m.register_pass<ov::intel_gna::pass::PWLApproximation>(_max_error_percent);
m.run_passes(_function_under_test);
ASSERT_NO_THROW(check_rt_info(_function_under_test));
}
auto shape = func->input().get_node_shared_ptr()->get_output_shape(0);
auto shape = _function_under_test->input().get_node_shared_ptr()->get_output_shape(0);
ov::runtime::TensorVector result(1);
std::vector<float> data = CommonTestUtils::generate_float_numbers(ov::shape_size(shape), lower_bound, upper_bound);
std::vector<float> data =
CommonTestUtils::generate_float_numbers(ov::shape_size(shape), _lower_bound, _upper_bound);
ov::runtime::Tensor input{ov::element::f32, shape, data.data()};
ASSERT_TRUE(func->evaluate(result, ov::runtime::TensorVector{input}));
ov::runtime::TensorVector result_ref(1);
ASSERT_TRUE(reference_func->evaluate(result_ref, ov::runtime::TensorVector{input}));
ASSERT_TRUE(_function_under_test->evaluate(result, ov::runtime::TensorVector{input}));
const float* result_data = result[0].data<float>();
const float* result_ref_data = result_ref[0].data<float>();
for (size_t i = 0; i < result[0].get_size(); i++) {
double delta = std::abs(result_data[i] - result_ref_data[i]);
ASSERT_TRUE(delta <= max_error_percent);
std::vector<float> results(result_data, result_data + result[0].get_size());
validate_results(data, results, _max_error_percent);
}
template <typename T>
inline void GnaPWlTestsFixture<T>::validate_results(const std::vector<float>& input_data,
const std::vector<float>& results,
double max_error_percent) {
ASSERT_FALSE(results.empty());
std::vector<float> reference_values;
std::for_each(input_data.begin(), input_data.end(), [&reference_values, this](const double& x) {
reference_values.push_back(_reference_function(x));
});
ASSERT_EQ(results.size(), reference_values.size());
auto abs_peak_to_peak = count_abs_peak_to_peak();
for (int i = 0; i < results.size(); ++i) {
double delta = std::abs(static_cast<double>(results[i]) - static_cast<double>(reference_values[i]));
double error_percentage = delta / abs_peak_to_peak * 100.0;
EXPECT_TRUE(error_percentage < max_error_percent);
}
}
} // namespace
TEST(GnaPwlTest, Sigmoid) {
RunTest(
pwl_test::CreateActivationFunction<ngraph::opset8::Sigmoid>({1, 100}),
pwl_test::CreateActivationFunction<ngraph::opset8::Sigmoid>({1, 100}),
-10,
10);
GnaPWlTestsFixture<ngraph::opset9::Sigmoid> test_instance({1, 100}, -10.0, 10.0, 1.0);
test_instance.run();
}
TEST(GnaPwlTest, Tanh) {
RunTest(
pwl_test::CreateActivationFunction<ngraph::opset8::Tanh>({1, 32}),
pwl_test::CreateActivationFunction<ngraph::opset8::Tanh>({1, 32}),
-5,
5);
GnaPWlTestsFixture<ngraph::opset9::Tanh> test_instance({1, 32}, -5.0, 5.0, 1.0);
test_instance.run();
}
TEST(GnaPwlTest, Exp) {
RunTest(
pwl_test::CreateActivationFunction<ngraph::opset8::Exp>({1, 32}),
pwl_test::CreateActivationFunction<ngraph::opset8::Exp>({1, 32}),
-std::log2(INT16_MAX),
std::log10(INT16_MAX));
GnaPWlTestsFixture<ngraph::opset9::Exp> test_instance({1, 32}, -std::log2(INT16_MAX), std::log10(INT16_MAX), 1.0);
test_instance.run();
}
TEST(GnaPwlTest, SoftSign) {
RunTest(
pwl_test::CreateActivationFunction<ngraph::opset9::SoftSign>({1, 32}),
pwl_test::CreateActivationFunction<ngraph::opset9::SoftSign>({1, 32}),
-10,
10);
GnaPWlTestsFixture<ngraph::opset9::SoftSign> test_instance({1, 32}, -10, 10, 1.0);
test_instance.run();
}
TEST(GnaPwlTest, Log) {
RunTest(
pwl_test::CreateActivationFunction<ngraph::opset8::Log>({1, 32}),
pwl_test::CreateActivationFunction<ngraph::opset8::Log>({1, 32}),
0.001,
2981);
GnaPWlTestsFixture<ngraph::opset9::Log> test_instance({1, 32}, 0.001, 2981, 1.0);
test_instance.run();
}
TEST(GnaPwlTest, Power) {
for (float exp = 1; exp <= 2.2; exp+=0.1) {
RunTest(
pwl_test::CreateActivationFunction<ngraph::opset8::Power>({1, 32}, exp),
pwl_test::CreateActivationFunction<ngraph::opset8::Power>({1, 32}, exp),
ov::intel_gna::common::fp32eq(std::fmod(exp, 1.0), 0.0) ? -16 : 0,
16);
for (float exp = 1; exp <= 2.2; exp += 0.1) {
GnaPWlTestsFixture<ngraph::opset9::Power> test_instance({1, 32},
AreFpEq(std::fmod(exp, 1.0), 0.0) ? -16 : 0,
16,
exp,
1.0);
test_instance.run();
}
}
} // namespace