[GNA] Fixed scale factors propagation for Eltwise with very different inputs ranges (#7305)
* [GNA] Fix scale factors propogation for Eltwise with very different inputs ranges * [GNA] Added test * [GNA] Added exception for scale factor <= 0 * [GNA] Disable tests with integer weights * [GNA] Added assert for CNNLayer in getScaleFactor() * [GNA] Added check if scale factor is inf * [GNA] Fixed legacy tests
This commit is contained in:
parent
5096fe19f1
commit
66a14f1ac3
@ -699,5 +699,53 @@ using QuantI8_I8 = frontend::QuantPair<frontend::QuantI8_I8, frontend::QuantI8_I
|
||||
using FakeQuantI16 = frontend::QuantPair<frontend::FakeQuantI16, frontend::FakeQuantI16>;
|
||||
using FakeQuantI8 = frontend::QuantPair<frontend::FakeQuantI8, frontend::FakeQuantI16>;
|
||||
|
||||
enum class QuantizedDataType {
|
||||
input,
|
||||
output,
|
||||
weights,
|
||||
bias
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Returns a scale factor for specific layer data
|
||||
* @param layer Layer to be quantized
|
||||
* @param data_type Type of data to be quantized
|
||||
* @return scale factor
|
||||
*/
|
||||
inline float getScaleFactor(InferenceEngine::CNNLayerPtr layer, QuantizedDataType data_type) {
|
||||
IE_ASSERT(layer != nullptr);
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
float scale_factor;
|
||||
if (!quantized) {
|
||||
scale_factor = 1.0f;
|
||||
} else {
|
||||
switch (data_type) {
|
||||
case QuantizedDataType::input:
|
||||
scale_factor = quantized->_src_quant.GetScale();
|
||||
break;
|
||||
case QuantizedDataType::output:
|
||||
scale_factor = quantized->_dst_quant.GetScale();
|
||||
break;
|
||||
case QuantizedDataType::weights:
|
||||
scale_factor = quantized->_weights_quant.GetScale();
|
||||
break;
|
||||
case QuantizedDataType::bias:
|
||||
scale_factor = quantized->_bias_quant.GetScale();
|
||||
break;
|
||||
default:
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) << "Unsupported data type for quantization: " << static_cast<int>(data_type);
|
||||
}
|
||||
}
|
||||
|
||||
auto isZero = [](float p1) {
|
||||
return std::abs(p1) <= 0.00001f;
|
||||
};
|
||||
|
||||
if (scale_factor < 0.0 || isZero(scale_factor) || std::isinf(scale_factor)) {
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) << "Invalid scale factor: " << scale_factor;
|
||||
}
|
||||
|
||||
return scale_factor;
|
||||
}
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
|
@ -490,7 +490,8 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
|
||||
|
||||
if ((!fakeQuantize && quantSibling->_dst_quant.IsScaleSet()) ||
|
||||
(fakeQuantize && quantSibling->_dst_quant.IsScaleSet() && !fp32eq(quantSibling->_dst_quant.GetScale(), 1.0) &&
|
||||
quantSibling->_dst_quant.GetScale() < inputQuant->_dst_quant.GetScale()) || infiniteLoopCount > 0) {
|
||||
quantSibling->_dst_quant.GetScale() < inputQuant->_dst_quant.GetScale()) ||
|
||||
quantSibling->_dst_quant.IsScaleSet() && infiniteLoopCount > 0) {
|
||||
// means we already restarted propagation input memory layer
|
||||
// need to search for requantiseable layer prior memory output layer
|
||||
InferenceEngine::CNNLayerPtr restartedLayer;
|
||||
@ -657,6 +658,73 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
|
||||
|
||||
template<>
|
||||
class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
|
||||
private:
|
||||
bool requantizeEltwiseInput(InferenceEngine::EltwiseLayer* eltwiseLayer, uint8_t inputIx, int16_t maxValue,
|
||||
bool fakeQuantize, ScaleFactorUpdateResult &result) {
|
||||
auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*eltwiseLayer);
|
||||
auto in = InferenceEngine::CNNNetPrevLayer(eltwiseLayer, inputIx);
|
||||
bool has8BOr16BOut = LayerInfo(in).has8BOr16BOutput();
|
||||
auto quantParams =
|
||||
InferenceEngine::getInjectedData<QuantizedLayerParams>(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, inputIx));
|
||||
// trick to get opposite index (for 0 -> 1 for 1 -> 0) by inversing i.
|
||||
auto quantParamsOpposite =
|
||||
InferenceEngine::getInjectedData<QuantizedLayerParams>(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, !inputIx));
|
||||
|
||||
while (in && !LayerInfo(in).isInput() && !LayerInfo(in).isMemory() && !LayerInfo(in).isCopy()) {
|
||||
auto info = LayerInfo(in);
|
||||
if (info.isActivation() || info.isConst()) {
|
||||
auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
|
||||
float newOutputScale;
|
||||
if (has8BOr16BOut) {
|
||||
newOutputScale = quantParamsOpposite->_dst_quant.GetScale() / maxValue;
|
||||
} else {
|
||||
newOutputScale = quantDataForInputLayer->_dst_quant.GetScale() *
|
||||
quantParamsOpposite->_dst_quant.GetScale() * maxValue /
|
||||
quantParams->_dst_quant.GetScale();
|
||||
}
|
||||
if (info.isActivation() && newOutputScale > static_cast<float>(std::numeric_limits<int16_t>::max()) / 2) {
|
||||
return false;
|
||||
}
|
||||
gnawarn() << "[WARNING] saturated weights for " << eltwiseLayer->name
|
||||
<< ". Layer new output scale: " << in->name << ", output_scale=" << newOutputScale
|
||||
<< ", was " << quantDataForInputLayer->_dst_quant.GetScale() <<"\n" << std::flush;
|
||||
quantDataForInputLayer->_dst_quant.SetScale(newOutputScale);
|
||||
result = ScaleFactorUpdateResult(in.get());
|
||||
return true;
|
||||
}
|
||||
|
||||
if (fakeQuantize && info.isWeightableIdentity()) {
|
||||
auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
|
||||
if (!fp32eq(quantDataForInputLayer->_weights_quant.GetScale(), 1.0f)) {
|
||||
auto reducer = quantData->_weights_quant.GetScale() / maxValue;
|
||||
reducer = std::max(1.0f, reducer);
|
||||
auto newWeightsScale = quantDataForInputLayer->_weights_quant.GetScale() / reducer;
|
||||
newWeightsScale = std::max(1.0f, newWeightsScale);
|
||||
quantDataForInputLayer->_weights_quant.SetScale(static_cast<int32_t>(newWeightsScale));
|
||||
quantDataForInputLayer->_dst_quant.SetScale(quantDataForInputLayer->_weights_quant.GetScale() *
|
||||
quantDataForInputLayer->_src_quant.GetScale());
|
||||
|
||||
result = ScaleFactorUpdateResult(in.get());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// if we are here it means that we are in the port 1
|
||||
if (info.isFullyConnected() || info.isConvolution()) {
|
||||
auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
|
||||
auto newOutputScale = quantParamsOpposite->_dst_quant.GetScale() * maxValue;
|
||||
auto newWeightScale = newOutputScale / quantDataForInputLayer->_src_quant.GetScale();
|
||||
quantDataForInputLayer->_dst_quant.SetScale(newOutputScale);
|
||||
quantDataForInputLayer->_weights_quant.SetScale(newWeightScale);
|
||||
result = ScaleFactorUpdateResult(in.get());
|
||||
return true;
|
||||
}
|
||||
|
||||
in = InferenceEngine::CNNNetHasPrevLayer(in.get()) ? InferenceEngine::CNNNetPrevLayer(in) : nullptr;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public:
|
||||
bool operator()(InferenceEngine::EltwiseLayer* eltwiseLayer, int weightsSize, int inputsSize, ScaleFactorUpdateResult &result,
|
||||
bool fakeQuantize, int infiniteLoopCount) {
|
||||
@ -734,7 +802,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
|
||||
}
|
||||
}
|
||||
|
||||
if (!fp32eq(bestWeightsScale, quantParams1->_weights_quant.GetScale())) {
|
||||
if (bestWeightsScale > 0.0f && !fp32eq(bestWeightsScale, quantParams1->_weights_quant.GetScale())) {
|
||||
quantParams1->_weights_quant.SetScale(bestWeightsScale);
|
||||
quantParams1->_dst_quant.SetScale(quantParams1->_weights_quant.GetScale() * quantParams1->_src_quant.GetScale());
|
||||
result = ScaleFactorUpdateResult(in1.get());
|
||||
@ -746,79 +814,22 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
|
||||
quantData->_dst_quant.SetScale(quantParams1->_dst_quant.GetScale());
|
||||
|
||||
// eltwise will work in int16 or int8 if low precision inputs are used
|
||||
auto maxValue = lowPrecision ? (std::numeric_limits<int8_t>::max() - 1) : (std::numeric_limits<int16_t>::max() - 1);
|
||||
if (quantData->_weights_quant.GetScale() > maxValue + 1) {
|
||||
// rescaling it's activation input
|
||||
// iterating thru previous layers of eltwise
|
||||
for (uint8_t i = 0; i < 2; ++i) {
|
||||
InferenceEngine::CNNLayerPtr in = InferenceEngine::CNNNetPrevLayer(eltwiseLayer, i);
|
||||
bool has8BOr16BOut = LayerInfo(in).has8BOr16BOutput();
|
||||
auto quantParams =
|
||||
InferenceEngine::getInjectedData<QuantizedLayerParams>(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, i));
|
||||
// trick to get opposite index (for 0 -> 1 for 1 -> 0) by inversing i.
|
||||
auto quantParamsOpposite =
|
||||
InferenceEngine::getInjectedData<QuantizedLayerParams>(InferenceEngine::CNNNetPrevLayer(eltwiseLayer, !i));
|
||||
|
||||
for (; InferenceEngine::CNNNetHasPrevLayer(in.get()); in = CNNNetPrevLayer(in)) {
|
||||
auto info = LayerInfo(in);
|
||||
if (info.isSplit() || info.isSlice() || info.isConcat() || info.isNonFunctional()) {
|
||||
continue;
|
||||
} else if (info.has8BOr16BOutput() && info.isActivation()) {
|
||||
auto quantDataForActivation = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
|
||||
float newOutputScale;
|
||||
if (has8BOr16BOut) {
|
||||
newOutputScale = quantParamsOpposite->_dst_quant.GetScale() / maxValue;
|
||||
} else {
|
||||
newOutputScale = quantDataForActivation->_dst_quant.GetScale() *
|
||||
quantParamsOpposite->_dst_quant.GetScale() * maxValue /
|
||||
quantParams->_dst_quant.GetScale();
|
||||
}
|
||||
if (newOutputScale > static_cast<float>(std::numeric_limits<int16_t>::max()) / 2) {
|
||||
break;
|
||||
}
|
||||
gnawarn() << "[WARNING] saturated weights for " << eltwiseLayer->name
|
||||
<< ". Layer new output scale: " << in->name << ", output_scale=" << newOutputScale
|
||||
<< ", was " << quantDataForActivation->_dst_quant.GetScale() <<"\n" << std::flush;
|
||||
quantDataForActivation->_dst_quant.SetScale(newOutputScale);
|
||||
result = ScaleFactorUpdateResult(in.get());
|
||||
return true;
|
||||
} else if (info.has8BOr16BOutput()) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (fakeQuantize && info.isWeightableIdentity()) {
|
||||
auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
|
||||
if (!fp32eq(quantDataForInputLayer->_weights_quant.GetScale(), 1.0f)) {
|
||||
auto reducer = quantData->_weights_quant.GetScale() / std::numeric_limits<int16_t>::max();
|
||||
reducer = std::max(1.0f, reducer);
|
||||
auto newWeightsScale = quantDataForInputLayer->_weights_quant.GetScale() / reducer;
|
||||
newWeightsScale = std::max(1.0f, newWeightsScale);
|
||||
quantDataForInputLayer->_weights_quant.SetScale(static_cast<int32_t>(newWeightsScale));
|
||||
quantDataForInputLayer->_dst_quant.SetScale(quantDataForInputLayer->_weights_quant.GetScale() *
|
||||
quantDataForInputLayer->_src_quant.GetScale());
|
||||
|
||||
result = ScaleFactorUpdateResult(in.get());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// if we are here it means that we are in the port 1
|
||||
if (info.isFullyConnected() || info.isConvolution()) {
|
||||
auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
|
||||
auto newOutputScale = quantParamsOpposite->_dst_quant.GetScale() * maxValue;
|
||||
auto newWeightScale = newOutputScale / quantDataForInputLayer->_src_quant.GetScale();
|
||||
quantDataForInputLayer->_dst_quant.SetScale(newOutputScale);
|
||||
quantDataForInputLayer->_weights_quant.SetScale(newWeightScale);
|
||||
result = ScaleFactorUpdateResult(in.get());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
// we unable to rescale the input - results might be bad
|
||||
gnawarn() << "[INFO] weights saturated for " << eltwiseLayer->name << "\n";
|
||||
auto maxValue = lowPrecision ? std::numeric_limits<int8_t>::max() : std::numeric_limits<int16_t>::max();
|
||||
if (quantData->_weights_quant.GetScale() <= maxValue) {
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
|
||||
// rescaling it's activation input
|
||||
// iterating thru previous layers of eltwise
|
||||
for (uint8_t i = 0; i < 2; ++i) {
|
||||
if (requantizeEltwiseInput(eltwiseLayer, i, maxValue - 1, fakeQuantize, result)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// we unable to rescale the input - results might be bad
|
||||
gnawarn() << "[INFO] weights saturated for " << eltwiseLayer->name << "\n";
|
||||
}
|
||||
break;
|
||||
default : THROW_GNA_EXCEPTION << "Unsupported Eltwise layer for quantisation: " << eltwiseLayer->_operation;
|
||||
}
|
||||
return true;
|
||||
@ -1153,7 +1164,6 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
|
||||
}
|
||||
quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weights_reducer);
|
||||
}
|
||||
|
||||
double tmp_dst_quant_scale = quant->_weights_quant.GetScale() * quant->_src_quant.GetScale();
|
||||
if (weightsSize == 1) {
|
||||
auto itt = thresholds.begin();
|
||||
|
@ -409,13 +409,9 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
uint32_t num_bytes_per_weight = convolution._weights->getTensorDesc().getPrecision().size();
|
||||
uint32_t num_bytes_per_bias = biasPrecision.size();
|
||||
|
||||
float weight_scale_factor = 1.0f;
|
||||
float output_scale_factor = 1.0f;
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(convolution);
|
||||
if (quantized != nullptr) {
|
||||
weight_scale_factor = quantized->_weights_quant.GetScale();
|
||||
output_scale_factor = quantized->_dst_quant.GetScale();
|
||||
}
|
||||
float weight_scale_factor = getScaleFactor(layer, QuantizedDataType::weights);
|
||||
float output_scale_factor = getScaleFactor(layer, QuantizedDataType::output);
|
||||
|
||||
auto& currentComponent = dnnComponents.addComponent(convolution.name, "convolution");
|
||||
dnn->InitConvolutional1DComponent(currentComponent,
|
||||
num_columns_in,
|
||||
@ -586,13 +582,8 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
in_height, in_width, in_channels,
|
||||
convolution._kernel_y, convolution._kernel_x, filter_n, convolution._stride_y, convolution._stride_x, inputPrec);
|
||||
|
||||
float weight_scale_factor = 1.0f;
|
||||
float output_scale_factor = 1.0f;
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(convolution);
|
||||
if (quantized != nullptr) {
|
||||
weight_scale_factor = quantized->_weights_quant.GetScale();
|
||||
output_scale_factor = quantized->_dst_quant.GetScale();
|
||||
}
|
||||
float weight_scale_factor = getScaleFactor(layer, QuantizedDataType::weights);
|
||||
float output_scale_factor = getScaleFactor(layer, QuantizedDataType::output);
|
||||
|
||||
auto& currentComponent = dnnComponents.addComponent(convolution.name, "convolution");
|
||||
dnn->InitConvolutional2DComponent(currentComponent,
|
||||
@ -673,9 +664,6 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
|
||||
void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto& power = dynamic_cast<PowerLayer&>(*layer.get());
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
IE_ASSERT(gnaFlags->sw_fp32 ? (quantized == nullptr) : (quantized != nullptr));
|
||||
|
||||
if (power.power < 0.0f || power.power > 2.8f) {
|
||||
IE_THROW() << "[GNA plugin] unsupported power factor, expected be in <0, 2.8> range but was " << power.power;
|
||||
}
|
||||
@ -705,6 +693,8 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
auto& currentComponent = dnnComponents.addComponent(layer->name, "power");
|
||||
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
IE_ASSERT(gnaFlags->sw_fp32 ? (quantized == nullptr) : (quantized != nullptr));
|
||||
dnn->InitAffineComponent(currentComponent,
|
||||
num_rows_in + num_padding,
|
||||
num_columns_in,
|
||||
@ -764,8 +754,8 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
gna_pwl_segment_t* ptr_pwl_segments_target = nullptr;
|
||||
|
||||
float output_pwl_scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : 1.0f;
|
||||
float input_pwl_scale_factor = quantized != nullptr ? quantized->_src_quant.GetScale() : 1.0f;
|
||||
float output_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::output);
|
||||
float input_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::input);
|
||||
|
||||
if (!gnaFlags->sw_fp32) {
|
||||
if (gnaFlags->uniformPwlDesign) {
|
||||
@ -823,7 +813,6 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto& pooling = dynamic_cast<PoolingLayer&>(*layer.get());
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
|
||||
IE_ASSERT(!layer->insData.empty());
|
||||
IE_ASSERT(!layer->outData.empty());
|
||||
@ -883,7 +872,7 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
outputs->getPrecision().size(),
|
||||
{ pooling._kernel[X_AXIS], pooling._kernel[Y_AXIS] },
|
||||
{ pooling._stride[X_AXIS], pooling._stride[Y_AXIS] },
|
||||
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
|
||||
getScaleFactor(layer, QuantizedDataType::output),
|
||||
ptr_inputs,
|
||||
ptr_outputs);
|
||||
|
||||
@ -901,8 +890,6 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
}
|
||||
|
||||
void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
|
||||
IE_ASSERT(!layer->insData.empty());
|
||||
IE_ASSERT(!layer->outData.empty());
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
@ -928,7 +915,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
num_columns_out,
|
||||
inputs->getPrecision().size(),
|
||||
outputs->getPrecision().size(),
|
||||
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
|
||||
getScaleFactor(layer, QuantizedDataType::output),
|
||||
num_rows_out + num_padding_out,
|
||||
num_columns_out,
|
||||
ptr_inputs,
|
||||
@ -1053,7 +1040,6 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
<< axis.size() << ".";
|
||||
}
|
||||
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
size_t cropOffset = offset.front() * cropLayer->precision.size();
|
||||
size_t cropOutputSize = dim.front() * cropLayer->precision.size();
|
||||
const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ?
|
||||
@ -1111,6 +1097,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
auto& currentComponent = dnnComponents.addComponent(layer->name, "crop");
|
||||
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
dnn->InitAffineComponent(currentComponent,
|
||||
num_rows_in + num_padding,
|
||||
num_columns_in,
|
||||
@ -1119,8 +1106,8 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
outputs->getPrecision().size(),
|
||||
quantized == nullptr ? inputs->getPrecision().size() : (gnaFlags->input_low_precision ? 1 : 2),
|
||||
gnaFlags->input_low_precision ? 1 : 4,
|
||||
quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
|
||||
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
|
||||
getScaleFactor(layer, QuantizedDataType::weights),
|
||||
getScaleFactor(layer, QuantizedDataType::output),
|
||||
ptr_inputs,
|
||||
ptr_outputs,
|
||||
ptr_weights,
|
||||
@ -1254,8 +1241,8 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
// TODO: only fp32 and Int16 tested
|
||||
quantized == nullptr ? inputs2Bytes->getPrecision().size() : (gnaFlags->input_low_precision ? 1 : 2),
|
||||
quantized == nullptr ? inputs4Bytes->getPrecision().size() : (gnaFlags->input_low_precision ? 1 : 4),
|
||||
quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
|
||||
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
|
||||
getScaleFactor(layer, QuantizedDataType::weights),
|
||||
getScaleFactor(layer, QuantizedDataType::output),
|
||||
ptr_inputs,
|
||||
ptr_outputs,
|
||||
ptr_weights,
|
||||
@ -1363,8 +1350,8 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
outputs->getPrecision().size(),
|
||||
quantized == nullptr ? input_2->getPrecision().size() : 2,
|
||||
quantized == nullptr ? input_2->getPrecision().size() : 4,
|
||||
quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
|
||||
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
|
||||
getScaleFactor(layer, QuantizedDataType::weights),
|
||||
getScaleFactor(layer, QuantizedDataType::output),
|
||||
ptr_input_1,
|
||||
ptr_outputs,
|
||||
ptr_input_2,
|
||||
@ -1452,8 +1439,8 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
outputs->getPrecision().size(),
|
||||
weightable._weights->getTensorDesc().getPrecision().size(),
|
||||
biasPrecisionSize,
|
||||
quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
|
||||
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
|
||||
getScaleFactor(layer, QuantizedDataType::weights),
|
||||
getScaleFactor(layer, QuantizedDataType::output),
|
||||
ptr_inputs,
|
||||
ptr_outputs,
|
||||
ptr_weights,
|
||||
@ -1592,8 +1579,6 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
return;
|
||||
}
|
||||
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
|
||||
void* ptr_inputs = nullptr;
|
||||
void* ptr_outputs = nullptr;
|
||||
void* ptr_weights = nullptr;
|
||||
@ -1632,7 +1617,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
num_columns_in,
|
||||
inputs->getPrecision().size(),
|
||||
inputs->getPrecision().size(),
|
||||
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
|
||||
getScaleFactor(layer, QuantizedDataType::output),
|
||||
num_rows_copied,
|
||||
num_columns_in,
|
||||
ptr_inputs,
|
||||
@ -1669,8 +1654,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
outputs->getPrecision().size(),
|
||||
filterLayer->_weights->getTensorDesc().getPrecision().size(),
|
||||
biasPrecisionSize,
|
||||
quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
|
||||
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
|
||||
getScaleFactor(layer, QuantizedDataType::weights),
|
||||
getScaleFactor(layer, QuantizedDataType::output),
|
||||
ptr_inputs,
|
||||
ptr_outputs,
|
||||
ptr_weights,
|
||||
@ -1726,8 +1711,6 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
return;
|
||||
}
|
||||
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
|
||||
auto prevLayer = CNNNetPrevLayer(layer.get(), 0);
|
||||
if (!LayerInfo(prevLayer).isSplit() && !LayerInfo(prevLayer).isSlice()) {
|
||||
THROW_GNA_EXCEPTION << "Case with Affine Aligning Filter for not Split/Slice layers is not implemented yet!";
|
||||
@ -1774,8 +1757,8 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
numberOfFilters,
|
||||
filterWidth,
|
||||
convolutionStride,
|
||||
quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
|
||||
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
|
||||
getScaleFactor(layer, QuantizedDataType::weights),
|
||||
getScaleFactor(layer, QuantizedDataType::output),
|
||||
ptr_inputs,
|
||||
ptr_outputs,
|
||||
ptr_weights,
|
||||
@ -1834,9 +1817,8 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
auto outputs = *layer->outData.begin();
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
float output_pwl_scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : 1.0f;
|
||||
float input_pwl_scale_factor = quantized != nullptr ? quantized->_src_quant.GetScale() : 1.0f;
|
||||
float output_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::output);
|
||||
float input_pwl_scale_factor = getScaleFactor(layer, QuantizedDataType::input);
|
||||
|
||||
auto orientation = kDnnInterleavedOrientation;
|
||||
|
||||
@ -1903,6 +1885,7 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
}
|
||||
auto activation_type = DnnActivation::fromType(it->second);
|
||||
activation_type.fqParams.set = false;
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
if (quantized != nullptr && quantized->_dst_quant.IsStatsSet()) {
|
||||
activation_type.fqParams.set = true;
|
||||
activation_type.fqParams.levels = quantized->_dst_quant.GetLevels();
|
||||
@ -2044,7 +2027,6 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
return;
|
||||
}
|
||||
auto layerOrder = layer->GetParamAsInts("order");
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
if (layer->insData.empty()) {
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) << "Input layer pointer is unexpectedly absent";
|
||||
}
|
||||
@ -2088,7 +2070,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
squeezedInputOrder[1],
|
||||
inputs->getPrecision().size(),
|
||||
outputs->getPrecision().size(),
|
||||
(quantized == nullptr) ? 1.0f : quantized->_dst_quant.GetScale(),
|
||||
getScaleFactor(layer, QuantizedDataType::output),
|
||||
ptr_inputs,
|
||||
ptr_outputs);
|
||||
}
|
||||
@ -2103,7 +2085,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
squeezedInputOrder[1],
|
||||
inputs->getPrecision().size(),
|
||||
outputs->getPrecision().size(),
|
||||
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
|
||||
getScaleFactor(layer, QuantizedDataType::output),
|
||||
ptr_inputs,
|
||||
ptr_outputs);
|
||||
}
|
||||
@ -2595,4 +2577,4 @@ GNAGraphCompiler::transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint
|
||||
}
|
||||
}
|
||||
return temp_buffer;
|
||||
}
|
||||
}
|
@ -0,0 +1,117 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
#include "functional_test_utils/plugin_cache.hpp"
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
#include "ngraph_functions/pass/convert_prc.hpp"
|
||||
|
||||
typedef std::tuple<
|
||||
InferenceEngine::Precision, // Network Precision
|
||||
std::string, // Target Device
|
||||
std::map<std::string, std::string>, // Configuration
|
||||
std::pair<float, float>, // Input min/max values
|
||||
std::pair<float, float> // Constant min/max values
|
||||
> constInputAddParams;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class ConstInputAddTest : public testing::WithParamInterface<constInputAddParams>,
|
||||
public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<constInputAddParams> obj) {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::string targetDevice;
|
||||
std::map<std::string, std::string> configuration;
|
||||
std::pair<float, float> inputRange;
|
||||
std::pair<float, float> constRange;
|
||||
std::tie(netPrecision, targetDevice, configuration, inputRange, constRange) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "targetDevice=" << targetDevice << "_";
|
||||
for (auto const& configItem : configuration) {
|
||||
result << "_configItem=" << configItem.first << "_" << configItem.second;
|
||||
}
|
||||
result << "_IR=" << inputRange.first << "," << inputRange.second << "_";
|
||||
result << "IR=" << constRange.first << "," << constRange.second;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override {
|
||||
return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputMax - inputMin, inputMin, (inputMax - inputMin) / 10);
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::pair<float, float> inputRange;
|
||||
std::pair<float, float> constRange;
|
||||
std::tie(netPrecision, targetDevice, configuration, inputRange, constRange) = this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
std::tie(inputMin, inputMax) = inputRange;
|
||||
|
||||
ngraph::Shape shape = {1, 72};
|
||||
auto params = ngraph::builder::makeParams(ngPrc, { shape });
|
||||
|
||||
auto constant = ngraph::builder::makeConstant<float>(ngPrc, shape, {}, true, constRange.second, constRange.first);
|
||||
auto eltwise = ngraph::builder::makeEltwise(constant, params[0], ngraph::helpers::EltwiseTypes::ADD);
|
||||
|
||||
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(eltwise) };
|
||||
function = std::make_shared<ngraph::Function>(results, params, "InputConstAdd");
|
||||
}
|
||||
|
||||
private:
|
||||
float inputMin = 0.0;
|
||||
float inputMax = 0.0;
|
||||
};
|
||||
|
||||
TEST_P(ConstInputAddTest, CompareWithRefImpl) {
|
||||
Run();
|
||||
};
|
||||
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::Precision::FP16
|
||||
};
|
||||
|
||||
const std::vector<std::map<std::string, std::string>> configs = {
|
||||
{
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<std::pair<float, float>> inputRange = {
|
||||
{-10, 10},
|
||||
{-100, 100}
|
||||
};
|
||||
|
||||
const std::vector<std::pair<float, float>> constRange = {
|
||||
{-10, 10},
|
||||
{-0.1, 0.1},
|
||||
{-1.0e-5, 1.0e-5}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_const_input_add, ConstInputAddTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::ValuesIn(configs),
|
||||
::testing::ValuesIn(inputRange),
|
||||
::testing::ValuesIn(constRange)),
|
||||
ConstInputAddTest::getTestCaseName);
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
@ -9,8 +9,9 @@ using namespace LayerTestsDefinitions;
|
||||
namespace {
|
||||
static const std::vector<ngraph::element::Type> precisionsGNA = {
|
||||
ngraph::element::f32,
|
||||
ngraph::element::u8,
|
||||
ngraph::element::i16,
|
||||
// integer weights are not supported by GNA so far
|
||||
// ngraph::element::u8,
|
||||
// ngraph::element::i16,
|
||||
};
|
||||
|
||||
static const std::vector<std::size_t> batchSizesGNA = {
|
||||
|
54
inference-engine/tests/unit/gna/gna_get_scale_factor.cpp
Normal file
54
inference-engine/tests/unit/gna/gna_get_scale_factor.cpp
Normal file
@ -0,0 +1,54 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
// to suppress deprecated definition errors
|
||||
#define IMPLEMENT_INFERENCE_ENGINE_PLUGIN
|
||||
#include "legacy/layer_transform.hpp"
|
||||
#include "frontend/layer_quantizer.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
class GnaGetScaleFactorTest : public ::testing::Test {
|
||||
protected:
|
||||
void GetScaleFactorAndCheck(float src_scale, float dst_scale, float weights_scale, float bias_scale) const {
|
||||
InferenceEngine::LayerParams params("fc", "FullyConnected", InferenceEngine::Precision::FP32);
|
||||
InferenceEngine::CNNLayerPtr layer = std::make_shared<InferenceEngine::CNNLayer>(params);
|
||||
layer = InferenceEngine::injectData<GNAPluginNS::QuantizedLayerParams>(*layer);
|
||||
auto quant = InferenceEngine::getInjectedData<GNAPluginNS::QuantizedLayerParams>(*layer);
|
||||
quant->_src_quant.SetScale(src_scale);
|
||||
quant->_dst_quant.SetScale(dst_scale);
|
||||
quant->_weights_quant.SetScale(weights_scale);
|
||||
quant->_bias_quant.SetScale(bias_scale);
|
||||
ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::input), src_scale);
|
||||
ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::output), dst_scale);
|
||||
ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::weights), weights_scale);
|
||||
ASSERT_EQ(GNAPluginNS::getScaleFactor(layer, GNAPluginNS::QuantizedDataType::bias), bias_scale);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(GnaGetScaleFactorTest, validSF) {
|
||||
EXPECT_NO_THROW(GetScaleFactorAndCheck(100, 200, 300, 400));
|
||||
}
|
||||
|
||||
TEST_F(GnaGetScaleFactorTest, invalidSF) {
|
||||
EXPECT_ANY_THROW(GetScaleFactorAndCheck(0, 200, 300, 400));
|
||||
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 0, 300, 400));
|
||||
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 0, 400));
|
||||
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 300, 0));
|
||||
EXPECT_ANY_THROW(GetScaleFactorAndCheck(-100, 200, 300, 400));
|
||||
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, -200, 300, 400));
|
||||
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, -300, 400));
|
||||
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 300, -400));
|
||||
double inf = std::numeric_limits<float>::infinity();
|
||||
EXPECT_ANY_THROW(GetScaleFactorAndCheck(inf, 200, 300, 400));
|
||||
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, inf, 300, 400));
|
||||
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, inf, 400));
|
||||
EXPECT_ANY_THROW(GetScaleFactorAndCheck(100, 200, 300, inf));
|
||||
}
|
||||
|
||||
} // namespace
|
Loading…
Reference in New Issue
Block a user