[GNA] fake quantize single layer tests for GNA plugin (#2060)

* fake quantize single layer test for GNA plugin

* implemented fakequantize for fp32 case as an activation function

* added proper seed randomisation within single test run

* [GNA] [FAKEQUANTIZE] fixed ref-fp32 implementation on GNA to use nearbyint instead of roundf

* [GNA] [FAKEQUANTIZE] restored random seed

* [GNA][FAKEQUANTIZE] disabled 4d and integer tests for FakeQuantize

* [GNA][FAKEQUANTIZE]updated ngraph FakeQuantize builder to accept seed

* [GNA][FAKEQUANTIZE]aligned FP calculations order on GNA with reference ngraph - this however gives more error

* [CPU]build of FakeQuantise tests restored

* [TESTS][FAKEQUANTIZE] ignore extra inferRequests for disabled tests

* [GNA] Fixed legacy unit test failuers appeared due to extra check for possible segfault in import frames

* [GNA] adopted fuse multiple identities for FakeQunatize layer

* [GNA]fp32 runtime code review
This commit is contained in:
Eugene Smirnov
2020-09-21 14:22:14 +03:00
committed by GitHub
parent dda6d9136b
commit f0b10bf071
31 changed files with 753 additions and 332 deletions

View File

@@ -380,65 +380,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dn
}
}
void GNAPluginNS::backend::AMIntelDNN::Propagate() {
for (uint32_t i = 0; i < component.size(); i++) {
intel_dnn_component_t *comp = &component[i];
uint32_t *ptr_active_outputs = nullptr;
uint32_t num_active_outputs = (comp->orientation_out == kDnnInterleavedOrientation)
? comp->num_rows_out : comp->num_columns_out;
if (i == component.size() - 1) { // active list applies to last component
ptr_active_outputs = ptr_active_outputs_;
num_active_outputs = num_active_outputs_;
} else if (i == component.size() - 2) { // also applies to last two components when last is PWL
if ((component[i].operation == kDnnAffineOp) && (component[i + 1].operation == kDnnPiecewiselinearOp)) {
ptr_active_outputs = ptr_active_outputs_;
num_active_outputs = num_active_outputs_;
}
}
switch (comp->operation) {
case kDnnAffineOp :ApplyAffineTransform(comp, ptr_active_outputs, num_active_outputs);
break;
case kDnnDiagonalOp:ApplyDiagonalTransform(comp);
break;
case kDnnRecurrentOp:
if ((i < component.size() - 1) && (component[i + 1].operation == kDnnPiecewiselinearOp)) {
intel_dnn_component_t *comp_pwl = &component[i + 1];
for (uint32_t j = 0; j < comp->num_rows_in; j++) {
void *ptr_feedbacks =
reinterpret_cast<void *>(reinterpret_cast<int32_t *>(comp->op.recurrent.ptr_feedbacks) + j * comp_pwl->num_columns_out);
ApplyRecurrentTransform(comp, j, ptr_feedbacks);
// PrintOutputs(i);
ApplyPiecewiseLinearTransform(comp_pwl, compute_precision_, num_active_outputs, j);
}
i++; // skip next component
} else {
fprintf(stderr, "Missing PiecewiseLinear component after Recurrent component in Propagate!\n");
throw -1;
}
break;
case kDnnConvolutional1dOp:ApplyConvolutional1DTransform(comp);
break;
case kDnnPiecewiselinearOp:ApplyPiecewiseLinearTransform(comp, compute_precision_, num_active_outputs);
break;
case kDnnMaxPoolOp:ApplyMaxPoolTransform(comp, compute_precision_);
break;
case kDnnInterleaveOp:ApplyTranspose(comp);
break;
case kDnnDeinterleaveOp:ApplyTranspose(comp);
break;
case kDnnCopyOp:ApplyCopy(comp);
break;
default:fprintf(stderr, "Bad operation in Propagate!\n");
throw -1;
break;
}
// PrintOutputs(i); fflush(stdout);
}
}
float GNAPluginNS::backend::AMIntelDNN::OutputScaleFactor(intel_dnn_component_t &comp) {
return comp.output_scale_factor;
}
@@ -529,11 +470,9 @@ void GNAPluginNS::backend::AMIntelDNN::WriteGraphWizModel(const char *filename)
graph << ", label=<<TABLE BORDER=\"0\" CELLBORDER=\"1\" CELLSPACING=\"0\">\n"
" <TR><TD colspan=\"2\">" << l << "</TD></TR>\n";
#ifdef PLOT
if (components[k].original_layer_name != nullptr) {
graph << " <TR><TD> IR </TD><TD>" << components[k].original_layer_name << "</TD></TR>\n";
}
#endif
graph << " <TR><TD> dims</TD><TD>" << components[k].num_rows_in << "x" << components[k].num_rows_out<< "</TD></TR>\n";
if (IS_AFFINE(k)) {
graph << " <TR><TD> wscale</TD><TD>" << components[k].op.affine.weight_scale_factor<< "</TD></TR>\n";
@@ -1191,6 +1130,35 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
out_file << "<num_bytes_per_slope> " << std::dec << sizeof(int16_t) << "\n";
out_file << "<num_bytes_per_intercept> " << std::dec << sizeof(int16_t) << "\n";
out_file << "<num_bytes_per_offset> " << std::dec << sizeof(int32_t) << "\n";
switch (func_id) {
case kActRelu:
case kActLeakyRelu:
out_file << "<lrelu.negative_slope> " <<
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.lrelu.negative_slope << "\n";
break;
case kActPow :
out_file << "<pow.exponent> " <<
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.pow.exponent << "\n";
out_file << "<pow.scale> " <<
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.pow.scale << "\n";
out_file << "<pow.offset> " <<
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.pow.offset << "\n";
break;
case kActFakeQuantize :
out_file << "<fakeQuantize.levels> " <<
std::dec << component[i].op.pwl.func_id.args.fakeQuantize.levels << "\n";
out_file << "<fakeQuantize.input_low> " <<
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.input_low << "\n";
out_file << "<fakeQuantize.input_high> " <<
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.input_high << "\n";
out_file << "<fakeQuantize.output_low> " <<
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.output_low << "\n";
out_file << "<fakeQuantize.output_high> " <<
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.output_high << "\n";
break;
default:
break;
}
if (logging_precision == kDnnFloat) {
out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> " << 1.0 << "\n";
out_file << "<num_segments> " << std::dec << 0 << "\n";

View File

@@ -266,8 +266,6 @@ public:
}
void Propagate();
float OutputScaleFactor(uint32_t component_index) {
return OutputScaleFactor(component[component_index]);
}

View File

@@ -27,179 +27,6 @@
#include "runtime/cnn.h"
void GNAPluginNS::backend::ApplyAffineTransform(intel_dnn_component_t *component, uint32_t *list, uint32_t listsize) {
if (4 != component->num_bytes_per_input) {
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
}
auto transform = &component->op.affine;
int m = component->num_rows_out;
int n = component->num_columns_in;
int k = component->num_rows_in;
int lda = component->num_rows_in;
int ldb = component->num_columns_in;
int ldc = component->num_columns_out;
auto A = reinterpret_cast<float *>(transform->ptr_weights);
auto B = reinterpret_cast<float *>(component->ptr_inputs);
auto C = reinterpret_cast<float *>(component->ptr_outputs);
auto bias = reinterpret_cast<float *>(transform->ptr_biases);
if (list == nullptr) {
for (uint32_t i = 0; i < m; i++) {
for (uint32_t j = 0; j < n; j++) {
C[i * ldc + j] = bias[i];
}
}
cblas_sgemm1(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, A, lda, B, ldb, 1.0, C, ldc);
} else {
for (int l = 0; l < listsize; l++) {
int i = list[l];
for (uint32_t j = 0; j < n; j++) {
C[l * ldc + j] = bias[i];
}
}
cblas_sgemm_subset(CblasRowMajor,
CblasNoTrans,
CblasNoTrans,
m,
n,
k,
1.0,
A,
lda,
B,
ldb,
1.0,
C,
ldc,
list,
listsize);
}
}
void GNAPluginNS::backend::ApplyDiagonalTransform(intel_dnn_component_t *component) {
if (4 != component->num_bytes_per_input) {
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
}
auto transform = &component->op.affine;
int m = component->num_rows_out;
int n = component->num_columns_in;
int ldb = component->num_columns_in;
int ldc = component->num_columns_out;
auto A = reinterpret_cast<float *>(transform->ptr_weights);
auto B = reinterpret_cast<float *>(component->ptr_inputs);
auto C = reinterpret_cast<float *>(component->ptr_outputs);
auto bias = reinterpret_cast<float *>(transform->ptr_biases);
for (uint32_t i = 0; i < m; i++) {
for (uint32_t j = 0; j < n; j++) {
C[i * ldc + j] = bias[i];
}
}
for (uint32_t j = 0; j < n; j++) {
float *Bcol = B + j * ldb;
float *Ccol = C + j * ldc;
cblas_ssbmv1(CblasRowMajor, CblasLower, m, 0, 1.0, A, 1, Bcol, 1, 1.0, Ccol, 1);
}
}
void GNAPluginNS::backend::ApplyRecurrentTransform(intel_dnn_component_t *component, uint32_t row, void *ptr_feedbacks) {
if (4 != component->num_bytes_per_input) {
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
}
intel_recurrent_t *transform = &component->op.recurrent;
int k1 = component->num_columns_in;
int k2 = component->num_columns_out;
int n = k2;
if (component->op.recurrent.ptr_feedbacks == nullptr) {
THROW_GNA_EXCEPTION << "nullptr feedback pointer";
}
auto A1 = reinterpret_cast<float *>(component->ptr_inputs) + row * component->num_columns_in;
auto A2 = reinterpret_cast<float *>(ptr_feedbacks);
auto X = reinterpret_cast<float *>(transform->ptr_weights);
auto B = reinterpret_cast<float *>(transform->ptr_biases);
auto C = reinterpret_cast<float *>(component->ptr_outputs) + row * component->num_columns_out;
sgemv_split(n, k1, k2, A1, A2, X, B, C);
}
void GNAPluginNS::backend::ApplyConvolutional1DTransform(intel_dnn_component_t *component) {
if (4 != component->num_bytes_per_input) {
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
}
CNNFilter32(component);
}
void GNAPluginNS::backend::ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
intel_dnn_number_type_t number_type,
uint32_t listsize) {
if (kDnnFloat != number_type) {
THROW_GNA_EXCEPTION << "Bad number type: " << number_type;
}
PwlApply32(component, listsize);
}
void GNAPluginNS::backend::ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
intel_dnn_number_type_t number_type,
uint32_t listsize,
uint32_t num_row) {
if (kDnnFloat != number_type) {
THROW_GNA_EXCEPTION << "Bad number type: " << number_type;
}
PwlApply32(component, num_row, num_row, 0, listsize - 1);
}
void GNAPluginNS::backend::ApplyMaxPoolTransform(intel_dnn_component_t *component, intel_dnn_number_type_t number_type) {
if (4 != component->num_bytes_per_input) {
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
}
CNNMaxPool(component, number_type);
}
void GNAPluginNS::backend::ApplyTranspose(intel_dnn_component_t *component) {
if (4 != component->num_bytes_per_input) {
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
}
int m = component->num_rows_in;
int n = component->num_columns_in;
int lda = component->num_columns_in;
int ldb = component->num_columns_out;
// B = Transpose(A) where A is mxn and B is nxm
auto A = reinterpret_cast<float *>(component->ptr_inputs);
auto B = reinterpret_cast<float *>(component->ptr_outputs);
for (uint32_t row = 0; row < m; row++) {
for (uint32_t col = 0; col < n; col++) {
B[col * ldb + row] = A[row * lda + col];
}
}
}
void GNAPluginNS::backend::ApplyCopy(intel_dnn_component_t *component) {
if (4 != component->num_bytes_per_input) {
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
}
auto src = reinterpret_cast<uint8_t *>(component->ptr_inputs);
auto dst = reinterpret_cast<uint8_t *>(component->ptr_outputs);
int32_t m = component->op.copy.num_copy_rows;
int32_t n = component->op.copy.num_copy_columns;
int32_t lda = component->num_columns_in;
int32_t ldb = component->num_columns_out;
if (m > component->num_rows_in) {
THROW_GNA_EXCEPTION << "Error: attempt to copy more columns than matrix has";
}
auto A = reinterpret_cast<float *>(src);
auto B = reinterpret_cast<float *>(dst);
for (uint32_t row = 0; row < m; row++) {
for (uint32_t col = 0; col < n; col++) {
B[row * ldb + col] = A[row * lda + col];
}
}
}
bool GNAPluginNS::backend::isCompatibleDnn(GNAPluginNS::backend::AMIntelDNN dnn1, GNAPluginNS::backend::AMIntelDNN dnn2) {
bool isCompatible = true;

View File

@@ -49,21 +49,6 @@
namespace GNAPluginNS {
namespace backend {
void ApplyAffineTransform(intel_dnn_component_t *component, uint32_t *list, uint32_t listsize);
void ApplyDiagonalTransform(intel_dnn_component_t *component);
void ApplyRecurrentTransform(intel_dnn_component_t *component, uint32_t row, void *ptr_feedbacks);
void ApplyConvolutional1DTransform(intel_dnn_component_t *component);
void ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
intel_dnn_number_type_t number_type,
uint32_t listsize);
void ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
intel_dnn_number_type_t number_type,
uint32_t listsize,
uint32_t num_row);
void ApplyMaxPoolTransform(intel_dnn_component_t *component, intel_dnn_number_type_t number_type);
void ApplyTranspose(intel_dnn_component_t *component);
void ApplyCopy(intel_dnn_component_t *component);
void PlotFloatIntDnn(GNAPluginNS::backend::AMIntelDNN *dnn, GNAPluginNS::backend::AMIntelDNN *dnn_int);
bool isCompatibleDnn(GNAPluginNS::backend::AMIntelDNN dnn1, GNAPluginNS::backend::AMIntelDNN dnn2);
void ClearScoreError(intel_score_error_t *error);

View File

@@ -18,9 +18,9 @@ intel_dnn_component_t & backend::DnnComponents::addComponent(const std::string l
components.emplace_back(layerName, intel_dnn_component_t());
auto &currentComponent = components.back().second;
#ifdef PLOT
currentComponent.original_layer_name = components.back().first.c_str();
std::cout << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << components.size() - 1 << std::endl;
#endif
currentComponent.original_layer_name = components.back().first.c_str();
return currentComponent;
}

View File

@@ -27,6 +27,7 @@ enum DnnActivationType : uint8_t {
kActNegHalfLog,
kActSoftSign,
kActPow,
kActFakeQuantize,
kActNumType
};
@@ -43,7 +44,14 @@ struct DnnActivation {
float offset;
} pow;
struct {
float reserved[3];
int32_t levels;
float input_low;
float input_high;
float output_low;
float output_high;
} fakeQuantize;
struct {
float reserved[5];
};
} args;
operator DnnActivationType () const noexcept {
@@ -75,7 +83,8 @@ static const char *intel_dnn_activation_name[kActNumType] = {
"kActNegHalfLog",
"kActCustom",
"kActSoftSign",
"kActPow"
"kActPow",
"kActFakeQuantize"
};
typedef enum DnnSoftmaxType {
@@ -232,9 +241,7 @@ typedef struct {
void *ptr_outputs;
float output_scale_factor;
float input_scale_factor;
#ifdef PLOT
const char * original_layer_name = nullptr;
#endif
} intel_dnn_component_t;
typedef struct {

View File

@@ -4,11 +4,35 @@
#include <vector>
#include <string>
#include <frontend/quantized_layer_params.hpp>
#include <legacy/layer_transform.hpp>
#include <layers/gna_layer_info.hpp>
#include "gna_input_desc.hpp"
#include "gna_plugin_log.hpp"
std::vector<void *>& GNAPluginNS::InputDesc::getPtrInputsGlobal(const std::string& name) {
using namespace InferenceEngine;
using namespace GNAPluginNS;
size_t InputDesc::minBytesRequiredForStoreInput(CNNLayerPtr layer) {
auto quantized = getInjectedData<QuantizedLayerParams>(layer);
size_t precision_bytes;
if (quantized) {
precision_bytes = 2;
} else {
precision_bytes = 4;
}
if (!LayerInfo(layer).isInput()) {
THROW_GNA_LAYER_EXCEPTION(layer) << "minBytesRequiredForStoreInput expect to worn on \"Input\" layer";
}
if (layer->outData.size() != 1) {
THROW_GNA_LAYER_EXCEPTION(layer) << "minBytesRequiredForStoreInput invalid outData for the layer";
}
auto dims = layer->outData.front()->getTensorDesc().getDims();
return details::product(dims.begin(), dims.end()) * precision_bytes;
}
std::vector<void *>& InputDesc::getPtrInputsGlobal(const std::string& name) {
if (ptr_inputs_global_id.find(name) == ptr_inputs_global_id.end()) {
ptr_inputs_global_storage.push_front({});
ptr_inputs_global_id[name] = ptr_inputs_global_storage.begin();
@@ -16,14 +40,14 @@ std::vector<void *>& GNAPluginNS::InputDesc::getPtrInputsGlobal(const std::strin
return *ptr_inputs_global_id[name];
}
intel_dnn_orientation_t GNAPluginNS::InputDesc::getOrientation(const std::string& name) {
intel_dnn_orientation_t InputDesc::getOrientation(const std::string& name) {
if (orientation_in.find(name) == orientation_in.end()) {
THROW_GNA_EXCEPTION << "Can't find orientation for input name '" << name << "'";
}
return orientation_in[name];
}
float GNAPluginNS::InputDesc::getScaleFactor(const std::size_t index) {
float InputDesc::getScaleFactor(const std::size_t index) {
if (index >= inputScaleFactors.size()) {
THROW_GNA_EXCEPTION << "Can't find scale factor for index = " << index;
}

View File

@@ -9,6 +9,8 @@
#include <list>
#include <unordered_map>
#include <string>
#include <legacy/ie_layers.h>
#include "backend/dnn_types.h"
namespace GNAPluginNS {
@@ -17,6 +19,8 @@ struct InputDesc {
/// order of scale factors matches inputs order in original topology
std::vector<float> inputScaleFactors;
std::map<std::string, int> bytes_allocated_for_input;
size_t minBytesRequiredForStoreInput(InferenceEngine::CNNLayerPtr);
std::unordered_map<std::string, std::list<std::vector<void *>>::iterator> ptr_inputs_global_id;
std::list<std::vector<void *>> ptr_inputs_global_storage;

View File

@@ -1480,6 +1480,14 @@ void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer)
}
}
void GNAGraphCompiler::FakeQuantizePrimitive(InferenceEngine::CNNLayerPtr layer) {
// in FP32 mode lets use special form of activation that satisfies fakeQuantize formula
if (gnaFlags->sw_fp32) {
PWLPrimitive(layer);
return;
}
}
void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto* generic = dynamic_cast<GenericLayer*>(layer.get());
std::string type;
@@ -1558,7 +1566,8 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
{"neglog", kActNegLog},
{"neghalflog", kActNegHalfLog},
{"identity", kActIdentity},
{"softsign", kActSoftSign}
{"softsign", kActSoftSign},
{"fakequantize", kActFakeQuantize}
};
auto it = supportedActivations.find(type);
@@ -1573,6 +1582,42 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
activation_type.args.lrelu.negative_slope = 0.0f;
}
if (it->second == kActFakeQuantize) {
// get params from const input
auto GetParamFromInputAsFloat = [](CNNLayerPtr input, size_t idx) {
if (input->insData.size() <= idx) {
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx << "input";
}
auto iLayerData = input->insData[idx].lock();
if (!iLayerData) {
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx << ", input: cannot dereference data weak-pointer";
}
auto iLayer = getCreatorLayer(iLayerData).lock();
if (!iLayer) {
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx << ", input: cannot dereference creator layer weak-pointer";
}
if (!LayerInfo(iLayer).isConst()) {
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx << ", input: expected to be of type const, but was: " << iLayer->type;
}
if (!iLayer->blobs.count("custom")) {
THROW_GNA_LAYER_EXCEPTION(iLayer) << "cannot get custom blob";
}
auto data = iLayer->blobs["custom"];
if (data->getTensorDesc().getPrecision() != Precision::FP32) {
THROW_GNA_LAYER_EXCEPTION(iLayer) << "cannot cast custom blob to type FP32, since it is of type: " << data->getTensorDesc().getPrecision();
}
return data->cbuffer().as<float*>()[0];
};
activation_type.args.fakeQuantize.levels = layer->GetParamAsInt("levels");
activation_type.args.fakeQuantize.input_low = GetParamFromInputAsFloat(layer, 1);
activation_type.args.fakeQuantize.input_high = GetParamFromInputAsFloat(layer, 2);
activation_type.args.fakeQuantize.output_low = GetParamFromInputAsFloat(layer, 3);
activation_type.args.fakeQuantize.output_high = GetParamFromInputAsFloat(layer, 4);
}
string actName = "unknown";
#ifdef PLOT
@@ -1776,7 +1821,8 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
{{"Crop"}, CREATE(CropPrimitive)},
{{"Copy"}, CREATE(CopyPrimitive)},
{{"TensorIterator"}, SKIP},
{{"LSTMCell"}, SKIP}
{{"LSTMCell"}, SKIP},
{{"FakeQuantize"}, CREATE(FakeQuantizePrimitive)} // TODO: fakequantize layer should be properly converted to GNA scale factors for integer case
};
auto it = LayersBuilder::getStorage().find(layer->type);
if (it != LayersBuilder::getStorage().end()) {
@@ -1914,10 +1960,17 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
if (included == concat_connection.end()) {
gnamem->reserve_ptr(&concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64);
size_t concatInputIdx = 0;
for (auto &&inputLayer : concatLayerInfoItem.concatInputLayers) {
if (InferenceEngine::details::CaselessEq<std::string>()
(inputLayer.name, "input")) {
inputDesc->bytes_allocated_for_input[inputLayer.name] = inputLayer.tensorSize;
// skipping non functional and reshape layer, as in that case input might be not connected to anything
auto realConcatInputs = CNNNetGetPrevLayersSkip(concat, [](CNNLayerPtr l) {
return !LayerInfo(l).isNonFunctional() && !LayerInfo(l).isSplit();
}, concatInputIdx++);
for (auto rInput : realConcatInputs) {
if (LayerInfo(rInput.first).isInput()) {
inputDesc->bytes_allocated_for_input[rInput.first->name] += inputLayer.tensorSize;
}
}
}
concatLayerInfoItem.input_allocated = true;
@@ -1960,7 +2013,14 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// real input not a memory input
if (LayerInfo(prevLayer).isInput()) {
if (0 == inputDesc->bytes_allocated_for_input[prevLayer->name]) {
// real allocation pointer will be kept in ptr not in ptf_inputs_global
// if request for allocation less that realTensorInput - we need to extend request
auto minInput = inputDesc->minBytesRequiredForStoreInput(prevLayer);
if (num_data_bytes_in < minInput) {
gnalog() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to" << ALIGN(minInput, 8);
num_data_bytes_in = ALIGN(minInput, 8);
}
// real allocation pointer will be kept in ptr not in ptr_inputs_global
if (offset < 0) {
gnamem->push_value(ptr,
static_cast<uint8_t>(0),
@@ -1972,7 +2032,6 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
num_data_bytes_in,
64);
}
inputDesc->bytes_allocated_for_input[prevLayer->name] = num_data_bytes_in;
}
if (ALIGN(num_data_bytes_in, 64) > ALIGN(inputDesc->bytes_allocated_for_input[prevLayer->name], 64)) {

View File

@@ -120,6 +120,7 @@ public:
void SplitPrimitive(InferenceEngine::CNNLayerPtr);
void SlicePrimitive(InferenceEngine::CNNLayerPtr);
void PWLPrimitive(InferenceEngine::CNNLayerPtr);
void FakeQuantizePrimitive(InferenceEngine::CNNLayerPtr);
void CopyPrimitive(InferenceEngine::CNNLayerPtr);
void Reset();

View File

@@ -185,9 +185,6 @@ inline std::pair<InferenceEngine::CNNLayerPtr, int> CNNNetCheckNextLayerSkipCer
*/
template <class Layer>
inline std::vector<CNNLayerPtr> CNNNetGetAllNextLayersSkipCertain(Layer layer, int oDataIdx, const std::function<bool(CNNLayerPtr)> &shouldSkip) {
// TODO: need to have generic function that creates slice of the graph : starting from given layer
// and skipped all non functional - ending up into functional one
std::list<CNNLayerPtr> currentSet;
std::vector<CNNLayerPtr> resultSet;

View File

@@ -696,6 +696,7 @@ void GNAModelSerial::ImportInputs(std::istream &is,
is.read(reinterpret_cast<char *>(&input), sizeof(input));
inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
inputsDesc->orientation_in[name] = input.orientation;
inputsDesc->bytes_allocated_for_input[name] = input.element_size * input.elements_count;
auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup});

View File

@@ -36,6 +36,7 @@
#include "memory/gna_allocator.hpp"
#include "memory/gna_memory_state.hpp"
#include "gna_model_serial.hpp"
#include "runtime/gna_float_runtime.hpp"
#if GNA_LIB_VER == 2
#include <gna2-model-api.h>
@@ -903,15 +904,28 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer
auto dims = input.second->getTensorDesc().getDims();
auto importedElements = is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 2] * dims[dims.size() - 3];
auto importedFrames = dims[0];
auto targetGroups = is2D ? dims[dims.size() - 2] : dims[0]; // TODO: no proper support for groups yet
auto importedElementSizeBytes = gnaFlags->sw_fp32 ? 4 : 2;
auto importedBytes = importedElements * importedFrames * importedElementSizeBytes;
if (inputsDesc->bytes_allocated_for_input[input.first] < importedBytes) {
THROW_GNA_EXCEPTION << "Cannot import input frames for :" << input.first
<< ", allocated size: " << inputsDesc->bytes_allocated_for_input[input.first]
<< ", but input blob size: " << importedBytes;
}
ImportFrames(inputsDesc->getPtrInputsGlobal(input.first)[idx],
input.second->cbuffer().as<float *>(),
input.second->getTensorDesc().getPrecision(),
gnaFlags->sw_fp32 ? 1.0f : inputsDesc->getScaleFactor(inputNum),
inputsDesc->getOrientation(input.first),
dims[0],
is2D ? dims[dims.size() - 2] : dims[0],
is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 2] * dims[dims.size() - 3],
is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 2] * dims[dims.size() - 3]);
importedFrames,
targetGroups,
importedElements,
importedElements);
bool isOneChannel = input.second->getTensorDesc().getDims()[1] == 1;
if (do_rotate_input && ((inputLayout == Layout::NC || inputLayout == Layout::NCHW)
@@ -929,7 +943,8 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer
}
if (!gnadevice) {
dnn->Propagate();
auto runtime = runtime::FP(dnn);
runtime.infer();
if (freeNnet != nnets.end()) {
std::get<1>(*freeNnet) = 1;
}

View File

@@ -49,6 +49,10 @@ class LayerInfo {
explicit LayerInfo(InferenceEngine::CNNLayer * layer)
: layer(layer) {
}
bool hasMultipleInputs() const noexcept {
IS_VALID();
return layer->insData.size() > 1;
}
bool has16BOutput() const noexcept {
IS_VALID();
static InferenceEngine::details::caseless_set<std::string> layersWith16BOutputs = {"memory", "input", "split", "slice", "concat", "copy", "const"};
@@ -200,6 +204,9 @@ class LayerInfo {
bool isConcat() const noexcept {
return isOfType("concat");
}
bool isFakeQnatize() const noexcept {
return isOfType("FakeQnatize");
}
bool isNonFunctional() const noexcept {
return isOfType("reshape") || isOfType("squeeze") || isOfType("unsqueeze");
}

View File

@@ -48,6 +48,7 @@ enum LayerType {
LSTMCell,
TensorIterator,
SoftSign,
FakeQuantize,
NO_TYPE
};
@@ -84,7 +85,8 @@ static const InferenceEngine::details::caseless_map<std::string, GNAPluginNS::La
{ "LSTMCell", LSTMCell },
{ "TensorIterator", TensorIterator },
{ "Abs", Abs },
{ "SoftSign", SoftSign }
{ "SoftSign", SoftSign },
{ "FakeQuantize", FakeQuantize },
};
GNAPluginNS::LayerType LayerTypeFromStr(const std::string &str);

View File

@@ -1293,11 +1293,12 @@ void FuseMultipleIdentitiesPass::run() {
auto isNonFunctional = [](CNNLayerPtr ptr) {
return LayerInfo(ptr).isNonFunctional();
};
auto eltwise = dynamic_cast<InferenceEngine::EltwiseLayer *>(l.get());
auto concat = dynamic_cast<InferenceEngine::ConcatLayer *>(l.get());
if (LayerInfo(l).isNonFunctional() || LayerInfo(l).has32BInput())
if (LayerInfo(l).hasMultipleInputs()) {
continue;
}
if (LayerInfo(l).isNonFunctional() || LayerInfo(l).has32BInput()) {
continue;
}
gnalog() << "CNNNetPrevLayer skip non functional from :: " << l->name;
auto isFunctional = [](CNNLayerPtr ptr) {
return !LayerInfo(ptr).isNonFunctional();
@@ -1310,7 +1311,7 @@ void FuseMultipleIdentitiesPass::run() {
return LayerInfo(candidate.first).isLink();
}), prevLayersReached.end());
if (prevLayersReached.size() != 1 && eltwise == nullptr && concat == nullptr) {
if (prevLayersReached.size() != 1) {
std::stringstream layers;
for (auto && prevLayer : prevLayersReached) {
layers << prevLayer.first->name;
@@ -1361,7 +1362,6 @@ void FuseMultipleIdentitiesPass::run() {
}
int PassManager::run(int index) {
// #define PLOT
#ifdef PLOT
auto dumpNetworkAfterPass = [&index, this] (std::shared_ptr<Pass> pass) {
std::string name = std::string("gna_passes_") + (index < 10 ? "0" : "") + std::to_string(index) + "_" + pass->getName();

View File

@@ -22,9 +22,7 @@ void CNNFilter32(intel_dnn_component_t *component) {
uint32_t num_filter_coefficients = component->op.conv1D.num_filter_coefficients;
std::string layer_name;
#ifdef PLOT
layer_name = " In layer '" + std::string(component->original_layer_name) + "'";
#endif
if (component->num_rows_in != 1 || component->num_rows_out != 1) {
THROW_GNA_EXCEPTION << "Bad number of rows in CNNFilter32!" << layer_name;
}

View File

@@ -0,0 +1,88 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gna_plugin_log.hpp>
#include <cstdint>
#include <backend/dnn_types.h>
#include "gna_float_runtime.hpp"
using namespace GNAPluginNS;
using namespace GNAPluginNS::runtime;
void FP::infer() {
if (!dnn) {
THROW_GNA_EXCEPTION << "[GNA FP32 RUNTIME] not initialized";
}
for (uint32_t i = 0; i < dnn->component.size(); i++) {
intel_dnn_component_t *comp = &dnn->component[i];
uint32_t *ptr_active_outputs = nullptr;
uint32_t num_active_outputs = (comp->orientation_out == kDnnInterleavedOrientation)
? comp->num_rows_out : comp->num_columns_out;
if (i == dnn->component.size() - 1) { // active list applies to last component
ptr_active_outputs = dnn->ptr_active_outputs();
num_active_outputs = dnn->num_active_outputs();
} else if (i == dnn->component.size() - 2) { // also applies to last two components when last is PWL
if ((dnn->component[i].operation == kDnnAffineOp) && (dnn->component[i + 1].operation == kDnnPiecewiselinearOp)) {
ptr_active_outputs = dnn->ptr_active_outputs();
num_active_outputs = dnn->num_active_outputs(); }
}
switch (comp->operation) {
case kDnnAffineOp : {
ApplyAffineTransform(comp, ptr_active_outputs, num_active_outputs);
break;
}
case kDnnDiagonalOp: {
ApplyDiagonalTransform(comp);
break;
}
case kDnnRecurrentOp: {
if ((i < dnn->component.size() - 1) && (dnn->component[i + 1].operation == kDnnPiecewiselinearOp)) {
intel_dnn_component_t *comp_pwl = &dnn->component[i + 1];
for (uint32_t j = 0; j < comp->num_rows_in; j++) {
void *ptr_feedbacks =
reinterpret_cast<void *>(reinterpret_cast<int32_t *>(comp->op.recurrent.ptr_feedbacks)
+ j * comp_pwl->num_columns_out);
ApplyRecurrentTransform(comp, j, ptr_feedbacks);
ApplyPiecewiseLinearTransform(comp_pwl, kDnnFloat, num_active_outputs, j);
}
i++; // skip next component
} else {
THROW_GNA_EXCEPTION << "Missing PiecewiseLinear component after Recurrent component in Propagate!";
}
break;
}
case kDnnConvolutional1dOp: {
ApplyConvolutional1DTransform(comp);
break;
}
case kDnnPiecewiselinearOp: {
ApplyPiecewiseLinearTransform(comp, kDnnFloat, num_active_outputs);
break;
}
case kDnnMaxPoolOp: {
ApplyMaxPoolTransform(comp, kDnnFloat);
break;
}
case kDnnInterleaveOp: {
ApplyTranspose(comp);
break;
}
case kDnnDeinterleaveOp: {
ApplyTranspose(comp);
break;
}
case kDnnCopyOp: {
ApplyCopy(comp);
break;
}
default:
THROW_GNA_EXCEPTION << "[GNA FP32 RUNTIME] Bad operation " << comp->operation;
}
}
}

View File

@@ -0,0 +1,41 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <backend/am_intel_dnn.hpp>
namespace GNAPluginNS {
namespace runtime {
/**
* @brief floating runtime for gna-plugin, in most case it uses same gna-primitives description as integer runtime, but execute them on CPU
*/
class FP {
std::shared_ptr<backend::AMIntelDNN> dnn;
public:
FP(std::shared_ptr<backend::AMIntelDNN> dnn) : dnn(dnn) {
}
virtual void infer();
/**
* atomic operations for floating inference
*/
static void ApplyAffineTransform(intel_dnn_component_t *component, uint32_t *list, uint32_t listsize);
static void ApplyDiagonalTransform(intel_dnn_component_t *component);
static void ApplyRecurrentTransform(intel_dnn_component_t *component, uint32_t row, void *ptr_feedbacks);
static void ApplyConvolutional1DTransform(intel_dnn_component_t *component);
static void ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
intel_dnn_number_type_t number_type,
uint32_t listsize);
static void ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
intel_dnn_number_type_t number_type,
uint32_t listsize,
uint32_t num_row);
static void ApplyMaxPoolTransform(intel_dnn_component_t *component, intel_dnn_number_type_t number_type);
static void ApplyTranspose(intel_dnn_component_t *component);
static void ApplyCopy(intel_dnn_component_t *component);
};
} // namespace runtime
} // namespace GNAPluginNS

View File

@@ -0,0 +1,184 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "gna_float_runtime.hpp"
#include "pwl.h"
#include "cnn.h"
#include "floatmath.h"
using namespace GNAPluginNS;
using namespace GNAPluginNS::runtime;
void FP::ApplyAffineTransform(intel_dnn_component_t *component, uint32_t *list, uint32_t listsize) {
if (4 != component->num_bytes_per_input) {
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
}
auto transform = &component->op.affine;
int m = component->num_rows_out;
int n = component->num_columns_in;
int k = component->num_rows_in;
int lda = component->num_rows_in;
int ldb = component->num_columns_in;
int ldc = component->num_columns_out;
auto A = reinterpret_cast<float *>(transform->ptr_weights);
auto B = reinterpret_cast<float *>(component->ptr_inputs);
auto C = reinterpret_cast<float *>(component->ptr_outputs);
auto bias = reinterpret_cast<float *>(transform->ptr_biases);
if (list == nullptr) {
for (uint32_t i = 0; i < m; i++) {
for (uint32_t j = 0; j < n; j++) {
C[i * ldc + j] = bias[i];
}
}
cblas_sgemm1(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, A, lda, B, ldb, 1.0, C, ldc);
} else {
for (int l = 0; l < listsize; l++) {
int i = list[l];
for (uint32_t j = 0; j < n; j++) {
C[l * ldc + j] = bias[i];
}
}
cblas_sgemm_subset(CblasRowMajor,
CblasNoTrans,
CblasNoTrans,
m,
n,
k,
1.0,
A,
lda,
B,
ldb,
1.0,
C,
ldc,
list,
listsize);
}
}
void FP::ApplyDiagonalTransform(intel_dnn_component_t *component) {
if (4 != component->num_bytes_per_input) {
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
}
auto transform = &component->op.affine;
int m = component->num_rows_out;
int n = component->num_columns_in;
int ldb = component->num_columns_in;
int ldc = component->num_columns_out;
auto A = reinterpret_cast<float *>(transform->ptr_weights);
auto B = reinterpret_cast<float *>(component->ptr_inputs);
auto C = reinterpret_cast<float *>(component->ptr_outputs);
auto bias = reinterpret_cast<float *>(transform->ptr_biases);
for (uint32_t i = 0; i < m; i++) {
for (uint32_t j = 0; j < n; j++) {
C[i * ldc + j] = bias[i];
}
}
for (uint32_t j = 0; j < n; j++) {
float *Bcol = B + j * ldb;
float *Ccol = C + j * ldc;
cblas_ssbmv1(CblasRowMajor, CblasLower, m, 0, 1.0, A, 1, Bcol, 1, 1.0, Ccol, 1);
}
}
void FP::ApplyRecurrentTransform(intel_dnn_component_t *component, uint32_t row, void *ptr_feedbacks) {
if (4 != component->num_bytes_per_input) {
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
}
intel_recurrent_t *transform = &component->op.recurrent;
int k1 = component->num_columns_in;
int k2 = component->num_columns_out;
int n = k2;
if (component->op.recurrent.ptr_feedbacks == nullptr) {
THROW_GNA_EXCEPTION << "nullptr feedback pointer";
}
auto A1 = reinterpret_cast<float *>(component->ptr_inputs) + row * component->num_columns_in;
auto A2 = reinterpret_cast<float *>(ptr_feedbacks);
auto X = reinterpret_cast<float *>(transform->ptr_weights);
auto B = reinterpret_cast<float *>(transform->ptr_biases);
auto C = reinterpret_cast<float *>(component->ptr_outputs) + row * component->num_columns_out;
sgemv_split(n, k1, k2, A1, A2, X, B, C);
}
void FP::ApplyConvolutional1DTransform(intel_dnn_component_t *component) {
if (4 != component->num_bytes_per_input) {
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
}
CNNFilter32(component);
}
void FP::ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
intel_dnn_number_type_t number_type,
uint32_t listsize) {
if (kDnnFloat != number_type) {
THROW_GNA_EXCEPTION << "Bad number type: " << number_type;
}
PwlApply32(component, listsize);
}
void FP::ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
intel_dnn_number_type_t number_type,
uint32_t listsize,
uint32_t num_row) {
if (kDnnFloat != number_type) {
THROW_GNA_EXCEPTION << "Bad number type: " << number_type;
}
PwlApply32(component, num_row, num_row, 0, listsize - 1);
}
void FP::ApplyMaxPoolTransform(intel_dnn_component_t *component, intel_dnn_number_type_t number_type) {
if (4 != component->num_bytes_per_input) {
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
}
CNNMaxPool(component, number_type);
}
void FP::ApplyTranspose(intel_dnn_component_t *component) {
if (4 != component->num_bytes_per_input) {
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
}
int m = component->num_rows_in;
int n = component->num_columns_in;
int lda = component->num_columns_in;
int ldb = component->num_columns_out;
// B = Transpose(A) where A is mxn and B is nxm
auto A = reinterpret_cast<float *>(component->ptr_inputs);
auto B = reinterpret_cast<float *>(component->ptr_outputs);
for (uint32_t row = 0; row < m; row++) {
for (uint32_t col = 0; col < n; col++) {
B[col * ldb + row] = A[row * lda + col];
}
}
}
void FP::ApplyCopy(intel_dnn_component_t *component) {
if (4 != component->num_bytes_per_input) {
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
}
auto src = reinterpret_cast<uint8_t *>(component->ptr_inputs);
auto dst = reinterpret_cast<uint8_t *>(component->ptr_outputs);
int32_t m = component->op.copy.num_copy_rows;
int32_t n = component->op.copy.num_copy_columns;
int32_t lda = component->num_columns_in;
int32_t ldb = component->num_columns_out;
if (m > component->num_rows_in) {
THROW_GNA_EXCEPTION << "Error: attempt to copy more columns than matrix has";
}
auto A = reinterpret_cast<float *>(src);
auto B = reinterpret_cast<float *>(dst);
for (uint32_t row = 0; row < m; row++) {
for (uint32_t col = 0; col < n; col++) {
B[row * ldb + col] = A[row * lda + col];
}
}
}

View File

@@ -1046,9 +1046,33 @@ void PwlApply32(intel_dnn_component_t *component,
}
}
break;
case kActFakeQuantize: {
auto input_low = transform->func_id.args.fakeQuantize.input_low;
auto input_high = transform->func_id.args.fakeQuantize.input_high;
auto output_low = transform->func_id.args.fakeQuantize.output_low;
auto output_high = transform->func_id.args.fakeQuantize.output_high;
auto levels = transform->func_id.args.fakeQuantize.levels;
// TODO: this special modification for spedup-compute give different result with straight FQ forulae
// but this used in referencen graph FakeQuantize implementations so we need to honor it for a while
float scaleInput = (input_high - input_low) / (levels-1);
float scaleOutputs = (output_high - output_low) / (levels-1);
for (uint32_t i = num_row_start; i <= num_row_end; i++) {
for (uint32_t j = num_col_start; j <= num_col_end; j++) {
auto x = ptr_in[i * num_columns + j];
if (x < std::min(input_low, input_high)) {
ptr_out[i * num_columns + j] = output_low;
} else if (x > std::max(input_low, input_high)) {
ptr_out[i * num_columns + j] = output_high;
} else {
ptr_out[i * num_columns + j] = nearbyint((x - input_low) / scaleInput) * scaleOutputs + output_low;
}
}
}
break;
}
case kActCustom:
// break;
default:fprintf(stderr, "Unknown piecewise linear function type!\n");
throw -1;
default:
THROW_GNA_EXCEPTION << component->original_layer_name << ", Unknown piecewise linear function type: " << transform->func_id.type;
}
}

View File

@@ -20,9 +20,16 @@ const std::vector<std::vector<size_t>> inputShapes = {{1, 1, 1, 1}, {3, 10, 5, 6
const std::vector<std::vector<size_t>> constShapes = {{1}};
const std::vector<size_t> levels = {16, 255, 256};
const std::pair<std::string, std::map<std::string, std::string>> config = {};
const std::vector<float> fqArgs = {};
const std::vector<float> inputParams = {};
const auto fqParams = ::testing::Combine(
::testing::ValuesIn(levels),
::testing::ValuesIn(constShapes)
::testing::ValuesIn(constShapes),
::testing::Values(fqArgs),
::testing::Values(inputParams)
);
INSTANTIATE_TEST_CASE_P(FakeQuantize, FakeQuantizeLayerTest,
@@ -30,7 +37,8 @@ INSTANTIATE_TEST_CASE_P(FakeQuantize, FakeQuantizeLayerTest,
fqParams,
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(inputShapes),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::Values(config)),
FakeQuantizeLayerTest::getTestCaseName);
} // namespace

View File

@@ -0,0 +1,68 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <gna/gna_config.hpp>
#include "single_layer_tests/fake_quantize.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
};
using ConfigType = std::map<std::string, std::string>;
const ConfigType configFP32 = {
{"GNA_DEVICE_MODE", "GNA_SW_FP32"},
};
const ConfigType configInt16 = {
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
{InferenceEngine::GNAConfigParams::KEY_GNA_PRECISION, "I16"},
{"GNA_SCALE_FACTOR_0", "327.67"}
};
const ConfigType configInt8 = {
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
{InferenceEngine::GNAConfigParams::KEY_GNA_PRECISION, "I8"},
{"GNA_SCALE_FACTOR_0", "327.67"}
};
/**
* @brief specific quantisation mode to be used internally
*/
const std::vector<std::pair<std::string, ConfigType>> gnaQuantModes = {
{"sw_fp32", configFP32},
// TODO: support FakeQuantize in integer mode
// {"sw_exact_i16", configInt16},
// {"sw_exact_i8", configInt8},
};
// TODO: uncomment once fixed proper 4d import for GNA-plugin issue: 38806
const std::vector<std::vector<size_t>> inputShapes = {{1, 1, 1, 1}, /*{3, 10, 5, 6}*/};
const std::vector<std::vector<size_t>> constShapes = {{1}};
const std::vector<size_t> levels = {16, 255, 256};
const std::vector<std::vector<float>> fqArgs = {{0, 10, 2, 5}, {}};
const std::vector<std::vector<float>> inputParams = {{-10, 10, 0.1}, {}};
const auto fqParams = ::testing::Combine(
::testing::ValuesIn(levels),
::testing::ValuesIn(constShapes),
::testing::ValuesIn(fqArgs),
::testing::ValuesIn(inputParams)
);
INSTANTIATE_TEST_CASE_P(FakeQuantize, FakeQuantizeLayerTest,
::testing::Combine(
fqParams,
::testing::ValuesIn(netPrecisions),
::testing::ValuesIn(inputShapes),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::ValuesIn(gnaQuantModes)),
FakeQuantizeLayerTest::getTestCaseName);
} // namespace

View File

@@ -14,14 +14,18 @@
#include "ngraph_functions/utils/ngraph_helpers.hpp"
typedef std::tuple<
size_t, // levels
std::vector<size_t> // const inputs shape
size_t, // levels
std::vector<size_t>, // const inputs shape
std::vector<float>, // fake quantize inputLow, inputHigh, outputLow, outputHigh or empty for random
std::vector<float> // input generator data: low, high, resolution
> fqSpecificParams;
typedef std::tuple<
fqSpecificParams,
InferenceEngine::Precision, // Net precision
InferenceEngine::SizeVector, // Input shapes
LayerTestsUtils::TargetDevice // Device name
InferenceEngine::Precision, // Net precision
InferenceEngine::SizeVector, // Input shapes
LayerTestsUtils::TargetDevice, // Device name
std::pair<std::string, std::map<std::string, std::string>> // Additional backend configuration and alis name to it
> fqLayerTestParamsSet;
namespace LayerTestsDefinitions {
@@ -30,9 +34,16 @@ class FakeQuantizeLayerTest : public testing::WithParamInterface<fqLayerTestPara
virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(testing::TestParamInfo<fqLayerTestParamsSet> obj);
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
protected:
void SetUp() override;
void UpdateSeed();
protected:
float inputDataMin = 0.0;
float inputDataMax = 10.0;
float inputDataResolution = 1.0;
int32_t seed = 1;
};
} // namespace LayerTestsDefinitions

View File

@@ -18,6 +18,17 @@
#include "single_layer_tests/fake_quantize.hpp"
// seed selected using current cloc time
#define USE_CLOCK_TIME 1
// seed started from default value, and incremented every time using big number like 9999
#define USE_INCREMENTAL_SEED 2
/**
* redefine this seed to reproduce issue with given seed that can be read from gtest logs
*/
#define BASE_SEED USE_CLOCK_TIME
#define NGRAPH_SEED USE_CLOCK_TIME
namespace LayerTestsDefinitions {
std::string FakeQuantizeLayerTest::getTestCaseName(testing::TestParamInfo<fqLayerTestParamsSet> obj) {
@@ -25,10 +36,13 @@ std::string FakeQuantizeLayerTest::getTestCaseName(testing::TestParamInfo<fqLaye
InferenceEngine::Precision netPrecision;
InferenceEngine::SizeVector inputShapes;
std::string targetDevice;
std::tie(fqParams, netPrecision, inputShapes, targetDevice) = obj.param;
std::pair<std::string, std::map<std::string, std::string>> config;
std::tie(fqParams, netPrecision, inputShapes, targetDevice, config) = obj.param;
size_t levels;
std::vector<size_t> constShape;
std::tie(levels, constShape) = fqParams;
std::vector<float> fqDirectArgs;
std::vector<float> inputArg;
std::tie(levels, constShape, fqDirectArgs, inputArg) = fqParams;
std::ostringstream result;
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
@@ -36,29 +50,101 @@ std::string FakeQuantizeLayerTest::getTestCaseName(testing::TestParamInfo<fqLaye
result << "LEVELS=" << levels << "_";
result << "netPRC=" << netPrecision.name() << "_";
result << "targetDevice=" << targetDevice;
if (!config.first.empty()) {
result << "_targetConfig=" << config.first;
}
if (!fqDirectArgs.empty()) {
result << "_fqArgs=" << fqDirectArgs[0] << "_" << fqDirectArgs[1] << "_" << fqDirectArgs[2] << "_" << fqDirectArgs[3];
}
if (inputArg.size() == 3) {
result << "_inputArg=" << inputArg[0] << "_" << inputArg[1] << "_" << inputArg[2];
}
return result.str();
}
void FakeQuantizeLayerTest::SetUp() {
fqSpecificParams fqParams;
std::vector<size_t> inputShape;
std::pair<std::string, std::map<std::string, std::string>> config;
auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
std::tie(fqParams, netPrecision, inputShape, targetDevice) = this->GetParam();
std::tie(fqParams, netPrecision, inputShape, targetDevice, config) = this->GetParam();
InferenceEngine::SizeVector kernel, stride, dilation;
size_t levels;
std::vector<size_t> constShape;
std::tie(levels, constShape) = fqParams;
std::vector<float> fqDirectArg;
std::vector<float> inputArg;
std::tie(levels, constShape, fqDirectArg, inputArg) = fqParams;
if (inputArg.size() == 3) {
inputDataMin = inputArg[0];
inputDataMax = inputArg[1];
inputDataResolution = inputArg[2];
}
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
auto fq = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, levels, constShape));
UpdateSeed();
std::shared_ptr<ngraph::Node> fakeQNode;
if (fqDirectArg.empty()) {
int32_t ngraphSeed = seed;
if (NGRAPH_SEED != USE_CLOCK_TIME) {
ngraphSeed = NGRAPH_SEED;
}
std::cout << "\033[0;32m" << "[ ] " << "\033[0;0m"
<< "ngraphSeed = " << ngraphSeed << std::endl;
fakeQNode = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, levels, constShape, ngraphSeed);
} else {
fakeQNode = ngraph::builder::makeFakeQuantize(
paramOuts[0],
ngPrc,
levels,
constShape,
{fqDirectArg[0]},
{fqDirectArg[1]},
{fqDirectArg[2]},
{fqDirectArg[3]});
}
auto fq = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(fakeQNode);
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(fq)};
function = std::make_shared<ngraph::Function>(results, params, "fakeQuantize");
configuration = config.second;
}
InferenceEngine::Blob::Ptr FakeQuantizeLayerTest::GenerateInput(const InferenceEngine::InputInfo &info) const {
return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, 1 / inputDataResolution, seed);
}
void FakeQuantizeLayerTest::UpdateSeed() {
if (BASE_SEED == USE_CLOCK_TIME) {
seed = std::chrono::system_clock::now().time_since_epoch().count();
} else if (BASE_SEED == USE_INCREMENTAL_SEED) {
seed += 9999;
} else {
seed = BASE_SEED;
}
std::cout << "\033[0;32m" << "[ ] " << "\033[0;0m"
<< "seed = " << seed << std::endl;
}
TEST_P(FakeQuantizeLayerTest, CompareWithRefs) {
Run();
SKIP_IF_CURRENT_TEST_IS_DISABLED();
if (BASE_SEED != USE_CLOCK_TIME &&
BASE_SEED != USE_INCREMENTAL_SEED) {
return;
}
size_t nIterations = (inputDataMax - inputDataMin) / inputDataResolution;
for (; nIterations != 0; nIterations--) {
UpdateSeed();
Infer();
Validate();
}
}
} // namespace LayerTestsDefinitions

View File

@@ -110,7 +110,7 @@ static void fill_data_bbox(float *data, size_t size, int height, int width, floa
* - With k = 4 numbers resolution will 1/4 so outputs only .0 .25 .50 0.75 and etc.
*/
template<InferenceEngine::Precision::ePrecision PRC>
void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, const uint32_t range = 10, int32_t start_from = 0, const int32_t k = 1) {
void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, const uint32_t range = 10, int32_t start_from = 0, const int32_t k = 1, const int seed = 1) {
using dataType = typename InferenceEngine::PrecisionTrait<PRC>::value_type;
testing::internal::Random random(1);
random.Generate(range);
@@ -144,8 +144,7 @@ void inline fill_data_consistently(InferenceEngine::Blob::Ptr &blob, const uint3
}
template<InferenceEngine::Precision::ePrecision PRC>
void inline fill_data_random_float(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k,
const int seed = 1) {
void inline fill_data_random_float(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k, const int seed = 1) {
using dataType = typename InferenceEngine::PrecisionTrait<PRC>::value_type;
std::default_random_engine random(seed);
// 1/k is the resolution of the floating point numbers
@@ -199,13 +198,20 @@ void inline fill_data_float_array(InferenceEngine::Blob::Ptr &blob, const float
}
template<>
void inline fill_data_random<InferenceEngine::Precision::FP32>(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k) {
fill_data_random_float<InferenceEngine::Precision::FP32>(blob, range, start_from, k);
void inline fill_data_random<InferenceEngine::Precision::FP32>(InferenceEngine::Blob::Ptr &blob,
const uint32_t range,
int32_t start_from,
const int32_t k,
const int seed) {
fill_data_random_float<InferenceEngine::Precision::FP32>(blob, range, start_from, k, seed);
}
template<>
void inline fill_data_random<InferenceEngine::Precision::FP16>(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k) {
fill_data_random_float<InferenceEngine::Precision::FP16>(blob, range, start_from, k);
void inline fill_data_random<InferenceEngine::Precision::FP16>(InferenceEngine::Blob::Ptr &blob,
const uint32_t range,
int32_t start_from,
const int32_t k, const int seed) {
fill_data_random_float<InferenceEngine::Precision::FP16>(blob, range, start_from, k, seed);
}
} // namespace CommonTestUtils

View File

@@ -455,11 +455,12 @@ InferenceEngine::Blob::Ptr inline createAndFillBlobWithFloatArray(const Inferenc
InferenceEngine::Blob::Ptr inline createAndFillBlob(const InferenceEngine::TensorDesc &td,
const uint32_t range = 10,
const int32_t start_from = 0,
const int32_t resolution = 1) {
const int32_t resolution = 1,
const int seed = 1) {
InferenceEngine::Blob::Ptr blob = make_blob_with_precision(td);
blob->allocate();
switch (td.getPrecision()) {
#define CASE(X) case X: CommonTestUtils::fill_data_random<X>(blob, range, start_from, resolution); break;
#define CASE(X) case X: CommonTestUtils::fill_data_random<X>(blob, range, start_from, resolution, seed); break;
CASE(InferenceEngine::Precision::FP32)
CASE(InferenceEngine::Precision::FP16)
CASE(InferenceEngine::Precision::U8)

View File

@@ -26,14 +26,14 @@ makeParams(const element::Type &type, const std::vector<std::pair<std::string, s
template<typename T>
std::shared_ptr<Node> makeConstant(const element::Type &type, const std::vector<size_t> &shape,
const std::vector<T> &data, bool random = false,
uint32_t upTo = 10, uint32_t startFrom = 1) {
uint32_t upTo = 10, uint32_t startFrom = 1, const int seed = 1) {
std::shared_ptr<ngraph::Node> weightsNode;
#define makeNode(TYPE) \
case TYPE: \
weightsNode = std::make_shared<ngraph::opset1::Constant>( \
type, shape, \
random ? NGraphFunctions::Utils::generateVector<TYPE>(ngraph::shape_size(shape), upTo, startFrom) : \
random ? NGraphFunctions::Utils::generateVector<TYPE>(ngraph::shape_size(shape), upTo, startFrom, seed) : \
NGraphFunctions::Utils::castVector<T, ngraph::helpers::nGraphTypesTrait<TYPE>::value_type >(data)); \
break;
switch (type) {
@@ -274,7 +274,8 @@ std::shared_ptr<Node> makeFakeQuantize(const ngraph::Output<Node> &in,
std::shared_ptr<Node> makeFakeQuantize(const ngraph::Output<Node> &in,
const element::Type &type,
std::size_t levels,
std::vector<size_t> constShapes);
std::vector<size_t> constShapes,
const int32_t seed = 1);
std::shared_ptr<ngraph::Node> makeCumSum(const ngraph::Output<Node> &in,
const ngraph::Output<Node> &axis,

View File

@@ -17,11 +17,14 @@ namespace Utils {
template<ngraph::element::Type_t dType>
std::vector<typename ngraph::helpers::nGraphTypesTrait<dType>::value_type> inline
generateVector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1) {
generateVector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1, int32_t seed = 1) {
std::vector<typename ngraph::helpers::nGraphTypesTrait<dType>::value_type> res;
std::mt19937 gen(
static_cast<unsigned long>(std::chrono::high_resolution_clock::now().time_since_epoch().count()));
if (seed == 1) {
seed = static_cast<unsigned long>(std::chrono::high_resolution_clock::now().time_since_epoch().count());
}
std::mt19937 gen(seed);
// chose values between this range to avoid type overrun (e.g. in case of I8 precision)
std::uniform_int_distribution<unsigned long> dist(startFrom, upTo);
@@ -32,11 +35,14 @@ generateVector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1) {
return res;
}
std::vector<ngraph::float16> inline generateF16Vector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1) {
std::vector<ngraph::float16> inline generateF16Vector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1, int32_t seed = 1) {
std::vector<ngraph::float16> res;
std::mt19937 gen(
static_cast<unsigned long>(std::chrono::high_resolution_clock::now().time_since_epoch().count()));
if (seed == 1) {
seed = static_cast<unsigned long>(std::chrono::high_resolution_clock::now().time_since_epoch().count());
}
std::mt19937 gen(seed);
// chose values between this range to avoid type overrun (e.g. in case of I8 precision)
std::uniform_int_distribution<unsigned long> dist(startFrom, upTo);
@@ -46,11 +52,13 @@ std::vector<ngraph::float16> inline generateF16Vector(size_t vec_len, uint32_t u
return res;
}
std::vector<ngraph::bfloat16> inline generateBF16Vector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1) {
std::vector<ngraph::bfloat16> inline generateBF16Vector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1, int32_t seed = 1) {
std::vector<ngraph::bfloat16> res;
std::mt19937 gen(
static_cast<unsigned long>(std::chrono::high_resolution_clock::now().time_since_epoch().count()));
if (seed == 1) {
seed = static_cast<unsigned long>(std::chrono::high_resolution_clock::now().time_since_epoch().count());
}
std::mt19937 gen(seed);
// chose values between this range to avoid type overrun (e.g. in case of I8 precision)
std::uniform_int_distribution<unsigned long> dist(startFrom, upTo);

View File

@@ -32,18 +32,19 @@ std::shared_ptr<Node> makeFakeQuantize(const ngraph::Output<Node> &in,
std::shared_ptr<ngraph::Node> makeFakeQuantize(const ngraph::Output<ngraph::Node> &in,
const ngraph::element::Type &type,
std::size_t levels,
std::vector<size_t> constShapes) {
std::vector<size_t> constShapes,
const int32_t seed) {
size_t constDataSize = ngraph::shape_size(constShapes);
std::vector<float> inputLowData, inputHighData, outputLowData, outputHighData;
inputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
inputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize, 10, 1, seed);
if (levels != 2) {
inputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
outputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
outputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
inputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize, 10, 1, seed);
outputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize, 10, 1, seed);
outputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize, 10, 1, seed);
} else {
inputHighData = inputLowData;
outputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
outputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
outputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize, 10, 1, seed);
outputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize, 10, 1, seed);
for (int i = 0; i < constDataSize; i++) {
if (outputLowData[i] > outputHighData[i]) {
@@ -70,10 +71,10 @@ std::shared_ptr<ngraph::Node> makeFakeQuantize(const ngraph::Output<ngraph::Node
outputHighData[i] += 1;
}
auto inputLowNode = ngraph::builder::makeConstant(type, constShapes, inputLowData, inputLowData.empty());
auto inputHighNode = ngraph::builder::makeConstant(type, constShapes, inputHighData, inputHighData.empty());
auto outputLowNode = ngraph::builder::makeConstant(type, constShapes, outputLowData, outputLowData.empty());
auto outputHighNode = ngraph::builder::makeConstant(type, constShapes, outputHighData, outputHighData.empty());
auto inputLowNode = ngraph::builder::makeConstant(type, constShapes, inputLowData, inputLowData.empty(), seed);
auto inputHighNode = ngraph::builder::makeConstant(type, constShapes, inputHighData, inputHighData.empty(), seed);
auto outputLowNode = ngraph::builder::makeConstant(type, constShapes, outputLowData, outputLowData.empty(), seed);
auto outputHighNode = ngraph::builder::makeConstant(type, constShapes, outputHighData, outputHighData.empty(), seed);
auto fq = std::make_shared<ngraph::opset1::FakeQuantize>(in, inputLowNode, inputHighNode, outputLowNode, outputHighNode, levels);

View File

@@ -224,7 +224,8 @@ void GNAPropagateMatcher :: match() {
ASSERT_NO_THROW_IE_EXCEPTION(network = CNNNetwork(_env.ngraph_model));
ASSERT_NO_FATAL_FAILURE(loadCNNNetwork(network));
#ifdef GNA_DEBUG
network.serialize("CNNNetworkFromNgraphModel.xml", "CNNNetworkFromNgraphModel.bin");
// TODO: crash on activation tests so far on addOutput call
// network.serialize("CNNNetworkFromNgraphModel.xml", "CNNNetworkFromNgraphModel.bin");
#endif
}
else if (!_env.importedModelFileName.empty()) {