[GNA] Fixed scale factors propagation for Eltwise with very different inputs ranges (#7305)

* [GNA] Fix scale factors propogation for Eltwise with very different inputs ranges

* [GNA] Added test

* [GNA] Added exception for scale factor <= 0

* [GNA] Disable tests with integer weights

* [GNA] Added assert for CNNLayer in getScaleFactor()

* [GNA] Added check if scale factor is inf

* [GNA] Fixed legacy tests
This commit is contained in:
Elizaveta Lobanova 2021-09-08 10:48:05 +03:00 committed by GitHub
parent 5096fe19f1
commit 66a14f1ac3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 337 additions and 125 deletions

View File

@ -699,5 +699,53 @@ using QuantI8_I8 = frontend::QuantPair<frontend::QuantI8_I8, frontend::QuantI8_I
using FakeQuantI16 = frontend::QuantPair<frontend::FakeQuantI16, frontend::FakeQuantI16>;
using FakeQuantI8 = frontend::QuantPair<frontend::FakeQuantI8, frontend::FakeQuantI16>;
enum class QuantizedDataType {
input,
output,
weights,
bias
};
/**
* @brief Returns a scale factor for specific layer data
* @param layer Layer to be quantized
* @param data_type Type of data to be quantized
* @return scale factor
*/
inline float getScaleFactor(InferenceEngine::CNNLayerPtr layer, QuantizedDataType data_type) {
IE_ASSERT(layer != nullptr);
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
float scale_factor;
if (!quantized) {
scale_factor = 1.0f;
} else {
switch (data_type) {
case QuantizedDataType::input:
scale_factor = quantized->_src_quant.GetScale();
break;
case QuantizedDataType::output:
scale_factor = quantized->_dst_quant.GetScale();
break;
case QuantizedDataType::weights:
scale_factor = quantized->_weights_quant.GetScale();
break;
case QuantizedDataType::bias:
scale_factor = quantized->_bias_quant.GetScale();
break;
default:
THROW_GNA_LAYER_EXCEPTION(layer) << "Unsupported data type for quantization: " << static_cast<int>(data_type);
}
}
auto isZero = [](float p1) {
return std::abs(p1) <= 0.00001f;
};
if (scale_factor < 0.0 || isZero(scale_factor) || std::isinf(scale_factor)) {
THROW_GNA_LAYER_EXCEPTION(layer) << "Invalid scale factor: " << scale_factor;
}
return scale_factor;
}
} // namespace GNAPluginNS

View File

@ -490,7 +490,8 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
if ((!fakeQuantize && quantSibling->_dst_quant.IsScaleSet()) ||
(fakeQuantize && quantSibling->_dst_quant.IsScaleSet() && !fp32eq(quantSibling->_dst_quant.GetScale(), 1.0) &&
quantSibling->_dst_quant.GetScale() < inputQuant->_dst_quant.GetScale()) || infiniteLoopCount > 0) {
quantSibling->_dst_quant.GetScale() < inputQuant->_dst_quant.GetScale()) ||
quantSibling->_dst_quant.IsScaleSet() && infiniteLoopCount > 0) {
// means we already restarted propagation input memory layer
// need to search for requantiseable layer prior memory output layer
InferenceEngine::CNNLayerPtr restartedLayer;
@ -657,6 +658,73 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
template<>
class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
private:
bool requantizeEltwiseInput(InferenceEngine::EltwiseLayer* eltwiseLayer, uint8_t inputIx, int16_t maxValue,
bool fakeQuantize, ScaleFactorUpdateResult &result) {
auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*eltwiseLayer);
auto in = InferenceEngine::CNNNetPrevLayer(eltwiseLayer, inputIx);
bool has8BOr16BOut = LayerInfo(in).has8BOr16BOutput();
auto quantParams =
InferenceEngine::getInjectedData<QuantizedLayerParams>(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, inputIx));
// trick to get opposite index (for 0 -> 1 for 1 -> 0) by inversing i.
auto quantParamsOpposite =
InferenceEngine::getInjectedData<QuantizedLayerParams>(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, !inputIx));
while (in && !LayerInfo(in).isInput() && !LayerInfo(in).isMemory() && !LayerInfo(in).isCopy()) {
auto info = LayerInfo(in);
if (info.isActivation() || info.isConst()) {
auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
float newOutputScale;
if (has8BOr16BOut) {
newOutputScale = quantParamsOpposite->_dst_quant.GetScale() / maxValue;
} else {
newOutputScale = quantDataForInputLayer->_dst_quant.GetScale() *
quantParamsOpposite->_dst_quant.GetScale() * maxValue /
quantParams->_dst_quant.GetScale();
}
if (info.isActivation() && newOutputScale > static_cast<float>(std::numeric_limits<int16_t>::max()) / 2) {
return false;
}
gnawarn() << "[WARNING] saturated weights for " << eltwiseLayer->name
<< ". Layer new output scale: " << in->name << ", output_scale=" << newOutputScale
<< ", was " << quantDataForInputLayer->_dst_quant.GetScale() <<"\n" << std::flush;
quantDataForInputLayer->_dst_quant.SetScale(newOutputScale);
result = ScaleFactorUpdateResult(in.get());
return true;
}
if (fakeQuantize && info.isWeightableIdentity()) {
auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
if (!fp32eq(quantDataForInputLayer->_weights_quant.GetScale(), 1.0f)) {
auto reducer = quantData->_weights_quant.GetScale() / maxValue;
reducer = std::max(1.0f, reducer);
auto newWeightsScale = quantDataForInputLayer->_weights_quant.GetScale() / reducer;
newWeightsScale = std::max(1.0f, newWeightsScale);
quantDataForInputLayer->_weights_quant.SetScale(static_cast<int32_t>(newWeightsScale));
quantDataForInputLayer->_dst_quant.SetScale(quantDataForInputLayer->_weights_quant.GetScale() *
quantDataForInputLayer->_src_quant.GetScale());
result = ScaleFactorUpdateResult(in.get());
return true;
}
}
// if we are here it means that we are in the port 1
if (info.isFullyConnected() || info.isConvolution()) {
auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
auto newOutputScale = quantParamsOpposite->_dst_quant.GetScale() * maxValue;
auto newWeightScale = newOutputScale / quantDataForInputLayer->_src_quant.GetScale();
quantDataForInputLayer->_dst_quant.SetScale(newOutputScale);
quantDataForInputLayer->_weights_quant.SetScale(newWeightScale);
result = ScaleFactorUpdateResult(in.get());
return true;
}
in = InferenceEngine::CNNNetHasPrevLayer(in.get()) ? InferenceEngine::CNNNetPrevLayer(in) : nullptr;
}
return false;
}
public:
bool operator()(InferenceEngine::EltwiseLayer* eltwiseLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result,
bool fakeQuantize, int infiniteLoopCount) {
@ -734,7 +802,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
}
}
if (!fp32eq(bestWeightsScale, quantParams1->_weights_quant.GetScale())) {
if (bestWeightsScale > 0.0f && !fp32eq(bestWeightsScale, quantParams1->_weights_quant.GetScale())) {
quantParams1->_weights_quant.SetScale(bestWeightsScale);
quantParams1->_dst_quant.SetScale(quantParams1->_weights_quant.GetScale() * quantParams1->_src_quant.GetScale());
result = ScaleFactorUpdateResult(in1.get());
@ -746,79 +814,22 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
quantData->_dst_quant.SetScale(quantParams1->_dst_quant.GetScale());
// eltwise will work in int16 or int8 if low precision inputs are used
auto maxValue = lowPrecision ? (std::numeric_limits<int8_t>::max() - 1) : (std::numeric_limits<int16_t>::max() - 1);
if (quantData->_weights_quant.GetScale() > maxValue + 1) {
// rescaling it's activation input
// iterating thru previous layers of eltwise
for (uint8_t i = 0; i < 2; ++i) {
InferenceEngine::CNNLayerPtr in = InferenceEngine::CNNNetPrevLayer(eltwiseLayer, i);
bool has8BOr16BOut = LayerInfo(in).has8BOr16BOutput();
auto quantParams =
InferenceEngine::getInjectedData<QuantizedLayerParams>(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, i));
// trick to get opposite index (for 0 -> 1 for 1 -> 0) by inversing i.
auto quantParamsOpposite =
InferenceEngine::getInjectedData<QuantizedLayerParams>(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, !i));
for (; InferenceEngine::CNNNetHasPrevLayer(in.get()); in = CNNNetPrevLayer(in)) {
auto info = LayerInfo(in);
if (info.isSplit() || info.isSlice() || info.isConcat() || info.isNonFunctional()) {
continue;
} else if (info.has8BOr16BOutput() && info.isActivation()) {
auto quantDataForActivation = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
float newOutputScale;
if (has8BOr16BOut) {
newOutputScale = quantParamsOpposite->_dst_quant.GetScale() / maxValue;
} else {
newOutputScale = quantDataForActivation->_dst_quant.GetScale() *
quantParamsOpposite->_dst_quant.GetScale() * maxValue /
quantParams->_dst_quant.GetScale();
}
if (newOutputScale > static_cast<float>(std::numeric_limits<int16_t>::max()) / 2) {
break;
}
gnawarn() << "[WARNING] saturated weights for " << eltwiseLayer->name
<< ". Layer new output scale: " << in->name << ", output_scale=" << newOutputScale
<< ", was " << quantDataForActivation->_dst_quant.GetScale() <<"\n" << std::flush;
quantDataForActivation->_dst_quant.SetScale(newOutputScale);
result = ScaleFactorUpdateResult(in.get());
return true;
} else if (info.has8BOr16BOutput()) {
break;
}
if (fakeQuantize && info.isWeightableIdentity()) {
auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
if (!fp32eq(quantDataForInputLayer->_weights_quant.GetScale(), 1.0f)) {
auto reducer = quantData->_weights_quant.GetScale() / std::numeric_limits<int16_t>::max();
reducer = std::max(1.0f, reducer);
auto newWeightsScale = quantDataForInputLayer->_weights_quant.GetScale() / reducer;
newWeightsScale = std::max(1.0f, newWeightsScale);
quantDataForInputLayer->_weights_quant.SetScale(static_cast<int32_t>(newWeightsScale));
quantDataForInputLayer->_dst_quant.SetScale(quantDataForInputLayer->_weights_quant.GetScale() *
quantDataForInputLayer->_src_quant.GetScale());
result = ScaleFactorUpdateResult(in.get());
return true;
}
}
// if we are here it means that we are in the port 1
if (info.isFullyConnected() || info.isConvolution()) {
auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
auto newOutputScale = quantParamsOpposite->_dst_quant.GetScale() * maxValue;
auto newWeightScale = newOutputScale / quantDataForInputLayer->_src_quant.GetScale();
quantDataForInputLayer->_dst_quant.SetScale(newOutputScale);
quantDataForInputLayer->_weights_quant.SetScale(newWeightScale);
result = ScaleFactorUpdateResult(in.get());
return true;
}
}
}
// we unable to rescale the input - results might be bad
gnawarn() << "[INFO] weights saturated for " << eltwiseLayer->name << "\n";
auto maxValue = lowPrecision ? std::numeric_limits<int8_t>::max() : std::numeric_limits<int16_t>::max();
if (quantData->_weights_quant.GetScale() <= maxValue) {
return true;
}
break;
// rescaling it's activation input
// iterating thru previous layers of eltwise
for (uint8_t i = 0; i < 2; ++i) {
if (requantizeEltwiseInput(eltwiseLayer, i, maxValue - 1, fakeQuantize, result)) {
return true;
}
}
// we unable to rescale the input - results might be bad
gnawarn() << "[INFO] weights saturated for " << eltwiseLayer->name << "\n";
}
break;
default : THROW_GNA_EXCEPTION << "Unsupported Eltwise layer for quantisation: " << eltwiseLayer->_operation;
}
return true;
@ -1153,7 +1164,6 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
}
quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weights_reducer);
}
double tmp_dst_quant_scale = quant->_weights_quant.GetScale() * quant->_src_quant.GetScale();
if (weightsSize == 1) {
auto itt = thresholds.begin();

View File

@ -409,13 +409,9 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
uint32_t num_bytes_per_weight = convolution._weights->getTensorDesc().getPrecision().size();
uint32_t num_bytes_per_bias = biasPrecision.size();
float weight_scale_factor = 1.0f;
float output_scale_factor = 1.0f;
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(convolution);
if (quantized != nullptr) {
weight_scale_factor = quantized->_weights_quant.GetScale();
output_scale_factor = quantized->_dst_quant.GetScale();
}
float weight_scale_factor = getScaleFactor(layer, QuantizedDataType::weights);
float output_scale_factor = getScaleFactor(layer, QuantizedDataType::output);
auto& currentComponent = dnnComponents.addComponent(convolution.name, "convolution");
dnn->InitConvolutional1DComponent(currentComponent,
num_columns_in,
@ -586,13 +582,8 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
in_height, in_width, in_channels,
convolution._kernel_y, convolution._kernel_x, filter_n, convolution._stride_y, convolution._stride_x, inputPrec);
float weight_scale_factor = 1.0f;
float output_scale_factor = 1.0f;
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(convolution);
if (quantized != nullptr) {
weight_scale_factor = quantized->_weights_quant.GetScale();
output_scale_factor = quantized->_dst_quant.GetScale();
}
float weight_scale_factor = getScaleFactor(layer, QuantizedDataType::weights);
float output_scale_factor = getScaleFactor(layer, QuantizedDataType::output);
auto& currentComponent = dnnComponents.addComponent(convolution.name, "convolution");
dnn->InitConvolutional2DComponent(currentComponent,
@ -673,9 +664,6 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto& power = dynamic_cast<PowerLayer&>(*layer.get());
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
IE_ASSERT(gnaFlags->sw_fp32 ? (quantized == nullptr) : (quantized != nullptr));
if (power.power < 0.0f || power.power > 2.8f) {
IE_THROW() << "[GNA plugin] unsupported power factor, expected be in <0, 2.8> range but was " << power.power;
}
@ -705,6 +693,8 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto& currentComponent = dnnComponents.addComponent(layer->name, "power");
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
IE_ASSERT(gnaFlags->sw_fp32 ? (quantized == nullptr) : (quantized != nullptr));
dnn->InitAffineComponent(currentComponent,
num_rows_in + num_padding,
num_columns_in,
@ -764,8 +754,8 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
gna_pwl_segment_t* ptr_pwl_segments_target = nullptr;
float output_pwl_scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : 1.0f;
float input_pwl_scale_factor = quantized != nullptr ? quantized->_src_quant.GetScale() : 1.0f;
float output_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::output);
float input_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::input);
if (!gnaFlags->sw_fp32) {
if (gnaFlags->uniformPwlDesign) {
@ -823,7 +813,6 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto& pooling = dynamic_cast<PoolingLayer&>(*layer.get());
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
IE_ASSERT(!layer->insData.empty());
IE_ASSERT(!layer->outData.empty());
@ -883,7 +872,7 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
outputs->getPrecision().size(),
{ pooling._kernel[X_AXIS], pooling._kernel[Y_AXIS] },
{ pooling._stride[X_AXIS], pooling._stride[Y_AXIS] },
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
getScaleFactor(layer, QuantizedDataType::output),
ptr_inputs,
ptr_outputs);
@ -901,8 +890,6 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
}
void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
IE_ASSERT(!layer->insData.empty());
IE_ASSERT(!layer->outData.empty());
auto inputs = layer->insData.begin()->lock();
@ -928,7 +915,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
num_columns_out,
inputs->getPrecision().size(),
outputs->getPrecision().size(),
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
getScaleFactor(layer, QuantizedDataType::output),
num_rows_out + num_padding_out,
num_columns_out,
ptr_inputs,
@ -1053,7 +1040,6 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
<< axis.size() << ".";
}
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
size_t cropOffset = offset.front() * cropLayer->precision.size();
size_t cropOutputSize = dim.front() * cropLayer->precision.size();
const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ?
@ -1111,6 +1097,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto& currentComponent = dnnComponents.addComponent(layer->name, "crop");
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
dnn->InitAffineComponent(currentComponent,
num_rows_in + num_padding,
num_columns_in,
@ -1119,8 +1106,8 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
outputs->getPrecision().size(),
quantized == nullptr ? inputs->getPrecision().size() : (gnaFlags->input_low_precision ? 1 : 2),
gnaFlags->input_low_precision ? 1 : 4,
quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
getScaleFactor(layer, QuantizedDataType::weights),
getScaleFactor(layer, QuantizedDataType::output),
ptr_inputs,
ptr_outputs,
ptr_weights,
@ -1254,8 +1241,8 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
// TODO: only fp32 and Int16 tested
quantized == nullptr ? inputs2Bytes->getPrecision().size() : (gnaFlags->input_low_precision ? 1 : 2),
quantized == nullptr ? inputs4Bytes->getPrecision().size() : (gnaFlags->input_low_precision ? 1 : 4),
quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
getScaleFactor(layer, QuantizedDataType::weights),
getScaleFactor(layer, QuantizedDataType::output),
ptr_inputs,
ptr_outputs,
ptr_weights,
@ -1363,8 +1350,8 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
outputs->getPrecision().size(),
quantized == nullptr ? input_2->getPrecision().size() : 2,
quantized == nullptr ? input_2->getPrecision().size() : 4,
quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
getScaleFactor(layer, QuantizedDataType::weights),
getScaleFactor(layer, QuantizedDataType::output),
ptr_input_1,
ptr_outputs,
ptr_input_2,
@ -1452,8 +1439,8 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
outputs->getPrecision().size(),
weightable._weights->getTensorDesc().getPrecision().size(),
biasPrecisionSize,
quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
getScaleFactor(layer, QuantizedDataType::weights),
getScaleFactor(layer, QuantizedDataType::output),
ptr_inputs,
ptr_outputs,
ptr_weights,
@ -1592,8 +1579,6 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
return;
}
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
void* ptr_inputs = nullptr;
void* ptr_outputs = nullptr;
void* ptr_weights = nullptr;
@ -1632,7 +1617,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
num_columns_in,
inputs->getPrecision().size(),
inputs->getPrecision().size(),
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
getScaleFactor(layer, QuantizedDataType::output),
num_rows_copied,
num_columns_in,
ptr_inputs,
@ -1669,8 +1654,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
outputs->getPrecision().size(),
filterLayer->_weights->getTensorDesc().getPrecision().size(),
biasPrecisionSize,
quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
getScaleFactor(layer, QuantizedDataType::weights),
getScaleFactor(layer, QuantizedDataType::output),
ptr_inputs,
ptr_outputs,
ptr_weights,
@ -1726,8 +1711,6 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
return;
}
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
auto prevLayer = CNNNetPrevLayer(layer.get(), 0);
if (!LayerInfo(prevLayer).isSplit() && !LayerInfo(prevLayer).isSlice()) {
THROW_GNA_EXCEPTION << "Case with Affine Aligning Filter for not Split/Slice layers is not implemented yet!";
@ -1774,8 +1757,8 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
numberOfFilters,
filterWidth,
convolutionStride,
quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
getScaleFactor(layer, QuantizedDataType::weights),
getScaleFactor(layer, QuantizedDataType::output),
ptr_inputs,
ptr_outputs,
ptr_weights,
@ -1834,9 +1817,8 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto inputs = layer->insData.begin()->lock();
auto outputs = *layer->outData.begin();
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
float output_pwl_scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : 1.0f;
float input_pwl_scale_factor = quantized != nullptr ? quantized->_src_quant.GetScale() : 1.0f;
float output_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::output);
float input_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::input);
auto orientation = kDnnInterleavedOrientation;
@ -1903,6 +1885,7 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
}
auto activation_type = DnnActivation::fromType(it->second);
activation_type.fqParams.set = false;
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
if (quantized != nullptr && quantized->_dst_quant.IsStatsSet()) {
activation_type.fqParams.set = true;
activation_type.fqParams.levels = quantized->_dst_quant.GetLevels();
@ -2044,7 +2027,6 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
return;
}
auto layerOrder = layer->GetParamAsInts("order");
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
if (layer->insData.empty()) {
THROW_GNA_LAYER_EXCEPTION(layer) << "Input layer pointer is unexpectedly absent";
}
@ -2088,7 +2070,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
squeezedInputOrder[1],
inputs->getPrecision().size(),
outputs->getPrecision().size(),
(quantized == nullptr) ? 1.0f : quantized->_dst_quant.GetScale(),
getScaleFactor(layer, QuantizedDataType::output),
ptr_inputs,
ptr_outputs);
}
@ -2103,7 +2085,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
squeezedInputOrder[1],
inputs->getPrecision().size(),
outputs->getPrecision().size(),
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
getScaleFactor(layer, QuantizedDataType::output),
ptr_inputs,
ptr_outputs);
}
@ -2595,4 +2577,4 @@ GNAGraphCompiler::transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint
}
}
return temp_buffer;
}
}

View File

@ -0,0 +1,117 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <memory>
#include <tuple>
#include <vector>
#include <string>
#include <ie_core.hpp>
#include "common_test_utils/common_utils.hpp"
#include "functional_test_utils/plugin_cache.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "functional_test_utils/blob_utils.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
#include "ngraph_functions/pass/convert_prc.hpp"
typedef std::tuple<
InferenceEngine::Precision, // Network Precision
std::string, // Target Device
std::map<std::string, std::string>, // Configuration
std::pair<float, float>, // Input min/max values
std::pair<float, float> // Constant min/max values
> constInputAddParams;
namespace LayerTestsDefinitions {
class ConstInputAddTest : public testing::WithParamInterface<constInputAddParams>,
public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(testing::TestParamInfo<constInputAddParams> obj) {
InferenceEngine::Precision netPrecision;
std::string targetDevice;
std::map<std::string, std::string> configuration;
std::pair<float, float> inputRange;
std::pair<float, float> constRange;
std::tie(netPrecision, targetDevice, configuration, inputRange, constRange) = obj.param;
std::ostringstream result;
result << "netPRC=" << netPrecision.name() << "_";
result << "targetDevice=" << targetDevice << "_";
for (auto const& configItem : configuration) {
result << "_configItem=" << configItem.first << "_" << configItem.second;
}
result << "_IR=" << inputRange.first << "," << inputRange.second << "_";
result << "IR=" << constRange.first << "," << constRange.second;
return result.str();
}
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override {
return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputMax - inputMin, inputMin, (inputMax - inputMin) / 10);
}
protected:
void SetUp() override {
InferenceEngine::Precision netPrecision;
std::pair<float, float> inputRange;
std::pair<float, float> constRange;
std::tie(netPrecision, targetDevice, configuration, inputRange, constRange) = this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
std::tie(inputMin, inputMax) = inputRange;
ngraph::Shape shape = {1, 72};
auto params = ngraph::builder::makeParams(ngPrc, { shape });
auto constant = ngraph::builder::makeConstant<float>(ngPrc, shape, {}, true, constRange.second, constRange.first);
auto eltwise = ngraph::builder::makeEltwise(constant, params[0], ngraph::helpers::EltwiseTypes::ADD);
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(eltwise) };
function = std::make_shared<ngraph::Function>(results, params, "InputConstAdd");
}
private:
float inputMin = 0.0;
float inputMax = 0.0;
};
TEST_P(ConstInputAddTest, CompareWithRefImpl) {
Run();
};
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
};
const std::vector<std::map<std::string, std::string>> configs = {
{
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}
}
};
const std::vector<std::pair<float, float>> inputRange = {
{-10, 10},
{-100, 100}
};
const std::vector<std::pair<float, float>> constRange = {
{-10, 10},
{-0.1, 0.1},
{-1.0e-5, 1.0e-5}
};
INSTANTIATE_TEST_SUITE_P(smoke_const_input_add, ConstInputAddTest,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::ValuesIn(configs),
::testing::ValuesIn(inputRange),
::testing::ValuesIn(constRange)),
ConstInputAddTest::getTestCaseName);
} // namespace LayerTestsDefinitions

View File

@ -9,8 +9,9 @@ using namespace LayerTestsDefinitions;
namespace {
static const std::vector<ngraph::element::Type> precisionsGNA = {
ngraph::element::f32,
ngraph::element::u8,
ngraph::element::i16,
// integer weights are not supported by GNA so far
// ngraph::element::u8,
// ngraph::element::i16,
};
static const std::vector<std::size_t> batchSizesGNA = {

View File

@ -0,0 +1,54 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <limits>
#include <gtest/gtest.h>
// to suppress deprecated definition errors
#define IMPLEMENT_INFERENCE_ENGINE_PLUGIN
#include "legacy/layer_transform.hpp"
#include "frontend/layer_quantizer.hpp"
namespace {
class GnaGetScaleFactorTest : public ::testing::Test {
protected:
void GetScaleFactorAndCheck(float src_scale, float dst_scale, float weights_scale, float bias_scale) const {
InferenceEngine::LayerParams params("fc", "FullyConnected", InferenceEngine::Precision::FP32);
InferenceEngine::CNNLayerPtr layer = std::make_shared<InferenceEngine::CNNLayer>(params);
layer = InferenceEngine::injectData<GNAPluginNS::QuantizedLayerParams>(*layer);
auto quant = InferenceEngine::getInjectedData<GNAPluginNS::QuantizedLayerParams>(*layer);
quant->_src_quant.SetScale(src_scale);
quant->_dst_quant.SetScale(dst_scale);
quant->_weights_quant.SetScale(weights_scale);
quant->_bias_quant.SetScale(bias_scale);
ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::input), src_scale);
ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::output), dst_scale);
ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::weights), weights_scale);
ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::bias), bias_scale);
}
};
TEST_F(GnaGetScaleFactorTest, validSF) {
EXPECT_NO_THROW(GetScaleFactorAndCheck(100, 200, 300, 400));
}
TEST_F(GnaGetScaleFactorTest, invalidSF) {
EXPECT_ANY_THROW(GetScaleFactorAndCheck(0, 200, 300, 400));
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 0, 300, 400));
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 0, 400));
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 300, 0));
EXPECT_ANY_THROW(GetScaleFactorAndCheck(-100, 200, 300, 400));
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, -200, 300, 400));
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, -300, 400));
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 300, -400));
double inf = std::numeric_limits<float>::infinity();
EXPECT_ANY_THROW(GetScaleFactorAndCheck(inf, 200, 300, 400));
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, inf, 300, 400));
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, inf, 400));
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 300, inf));
}
} // namespace