[GNA] fake quantize single layer tests for GNA plugin (#2060)
* fake quantize single layer test for GNA plugin * implemented fakequantize for fp32 case as an activation function * added proper seed randomisation within single test run * [GNA] [FAKEQUANTIZE] fixed ref-fp32 implementation on GNA to use nearbyint instead of roundf * [GNA] [FAKEQUANTIZE] restored random seed * [GNA][FAKEQUANTIZE] disabled 4d and integer tests for FakeQuantize * [GNA][FAKEQUANTIZE]updated ngraph FakeQuantize builder to accept seed * [GNA][FAKEQUANTIZE]aligned FP calculations order on GNA with reference ngraph - this however gives more error * [CPU]build of FakeQuantise tests restored * [TESTS][FAKEQUANTIZE] ignore extra inferRequests for disabled tests * [GNA] Fixed legacy unit test failuers appeared due to extra check for possible segfault in import frames * [GNA] adopted fuse multiple identities for FakeQunatize layer * [GNA]fp32 runtime code review
This commit is contained in:
@@ -380,65 +380,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dn
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::Propagate() {
|
||||
for (uint32_t i = 0; i < component.size(); i++) {
|
||||
intel_dnn_component_t *comp = &component[i];
|
||||
uint32_t *ptr_active_outputs = nullptr;
|
||||
uint32_t num_active_outputs = (comp->orientation_out == kDnnInterleavedOrientation)
|
||||
? comp->num_rows_out : comp->num_columns_out;
|
||||
|
||||
if (i == component.size() - 1) { // active list applies to last component
|
||||
ptr_active_outputs = ptr_active_outputs_;
|
||||
num_active_outputs = num_active_outputs_;
|
||||
} else if (i == component.size() - 2) { // also applies to last two components when last is PWL
|
||||
if ((component[i].operation == kDnnAffineOp) && (component[i + 1].operation == kDnnPiecewiselinearOp)) {
|
||||
ptr_active_outputs = ptr_active_outputs_;
|
||||
num_active_outputs = num_active_outputs_;
|
||||
}
|
||||
}
|
||||
|
||||
switch (comp->operation) {
|
||||
case kDnnAffineOp :ApplyAffineTransform(comp, ptr_active_outputs, num_active_outputs);
|
||||
break;
|
||||
case kDnnDiagonalOp:ApplyDiagonalTransform(comp);
|
||||
break;
|
||||
case kDnnRecurrentOp:
|
||||
if ((i < component.size() - 1) && (component[i + 1].operation == kDnnPiecewiselinearOp)) {
|
||||
intel_dnn_component_t *comp_pwl = &component[i + 1];
|
||||
for (uint32_t j = 0; j < comp->num_rows_in; j++) {
|
||||
void *ptr_feedbacks =
|
||||
reinterpret_cast<void *>(reinterpret_cast<int32_t *>(comp->op.recurrent.ptr_feedbacks) + j * comp_pwl->num_columns_out);
|
||||
ApplyRecurrentTransform(comp, j, ptr_feedbacks);
|
||||
// PrintOutputs(i);
|
||||
ApplyPiecewiseLinearTransform(comp_pwl, compute_precision_, num_active_outputs, j);
|
||||
}
|
||||
i++; // skip next component
|
||||
} else {
|
||||
fprintf(stderr, "Missing PiecewiseLinear component after Recurrent component in Propagate!\n");
|
||||
throw -1;
|
||||
}
|
||||
break;
|
||||
case kDnnConvolutional1dOp:ApplyConvolutional1DTransform(comp);
|
||||
break;
|
||||
case kDnnPiecewiselinearOp:ApplyPiecewiseLinearTransform(comp, compute_precision_, num_active_outputs);
|
||||
break;
|
||||
case kDnnMaxPoolOp:ApplyMaxPoolTransform(comp, compute_precision_);
|
||||
break;
|
||||
case kDnnInterleaveOp:ApplyTranspose(comp);
|
||||
break;
|
||||
case kDnnDeinterleaveOp:ApplyTranspose(comp);
|
||||
break;
|
||||
case kDnnCopyOp:ApplyCopy(comp);
|
||||
break;
|
||||
default:fprintf(stderr, "Bad operation in Propagate!\n");
|
||||
throw -1;
|
||||
break;
|
||||
}
|
||||
// PrintOutputs(i); fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
float GNAPluginNS::backend::AMIntelDNN::OutputScaleFactor(intel_dnn_component_t &comp) {
|
||||
return comp.output_scale_factor;
|
||||
}
|
||||
@@ -529,11 +470,9 @@ void GNAPluginNS::backend::AMIntelDNN::WriteGraphWizModel(const char *filename)
|
||||
graph << ", label=<<TABLE BORDER=\"0\" CELLBORDER=\"1\" CELLSPACING=\"0\">\n"
|
||||
" <TR><TD colspan=\"2\">" << l << "</TD></TR>\n";
|
||||
|
||||
#ifdef PLOT
|
||||
if (components[k].original_layer_name != nullptr) {
|
||||
graph << " <TR><TD> IR </TD><TD>" << components[k].original_layer_name << "</TD></TR>\n";
|
||||
}
|
||||
#endif
|
||||
graph << " <TR><TD> dims</TD><TD>" << components[k].num_rows_in << "x" << components[k].num_rows_out<< "</TD></TR>\n";
|
||||
if (IS_AFFINE(k)) {
|
||||
graph << " <TR><TD> wscale</TD><TD>" << components[k].op.affine.weight_scale_factor<< "</TD></TR>\n";
|
||||
@@ -1191,6 +1130,35 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
|
||||
out_file << "<num_bytes_per_slope> " << std::dec << sizeof(int16_t) << "\n";
|
||||
out_file << "<num_bytes_per_intercept> " << std::dec << sizeof(int16_t) << "\n";
|
||||
out_file << "<num_bytes_per_offset> " << std::dec << sizeof(int32_t) << "\n";
|
||||
switch (func_id) {
|
||||
case kActRelu:
|
||||
case kActLeakyRelu:
|
||||
out_file << "<lrelu.negative_slope> " <<
|
||||
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.lrelu.negative_slope << "\n";
|
||||
break;
|
||||
case kActPow :
|
||||
out_file << "<pow.exponent> " <<
|
||||
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.pow.exponent << "\n";
|
||||
out_file << "<pow.scale> " <<
|
||||
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.pow.scale << "\n";
|
||||
out_file << "<pow.offset> " <<
|
||||
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.pow.offset << "\n";
|
||||
break;
|
||||
case kActFakeQuantize :
|
||||
out_file << "<fakeQuantize.levels> " <<
|
||||
std::dec << component[i].op.pwl.func_id.args.fakeQuantize.levels << "\n";
|
||||
out_file << "<fakeQuantize.input_low> " <<
|
||||
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.input_low << "\n";
|
||||
out_file << "<fakeQuantize.input_high> " <<
|
||||
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.input_high << "\n";
|
||||
out_file << "<fakeQuantize.output_low> " <<
|
||||
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.output_low << "\n";
|
||||
out_file << "<fakeQuantize.output_high> " <<
|
||||
std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.output_high << "\n";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (logging_precision == kDnnFloat) {
|
||||
out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> " << 1.0 << "\n";
|
||||
out_file << "<num_segments> " << std::dec << 0 << "\n";
|
||||
|
||||
@@ -266,8 +266,6 @@ public:
|
||||
}
|
||||
|
||||
|
||||
void Propagate();
|
||||
|
||||
float OutputScaleFactor(uint32_t component_index) {
|
||||
return OutputScaleFactor(component[component_index]);
|
||||
}
|
||||
|
||||
@@ -27,179 +27,6 @@
|
||||
#include "runtime/cnn.h"
|
||||
|
||||
|
||||
void GNAPluginNS::backend::ApplyAffineTransform(intel_dnn_component_t *component, uint32_t *list, uint32_t listsize) {
|
||||
if (4 != component->num_bytes_per_input) {
|
||||
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
|
||||
}
|
||||
|
||||
auto transform = &component->op.affine;
|
||||
int m = component->num_rows_out;
|
||||
int n = component->num_columns_in;
|
||||
int k = component->num_rows_in;
|
||||
int lda = component->num_rows_in;
|
||||
int ldb = component->num_columns_in;
|
||||
int ldc = component->num_columns_out;
|
||||
|
||||
auto A = reinterpret_cast<float *>(transform->ptr_weights);
|
||||
auto B = reinterpret_cast<float *>(component->ptr_inputs);
|
||||
auto C = reinterpret_cast<float *>(component->ptr_outputs);
|
||||
auto bias = reinterpret_cast<float *>(transform->ptr_biases);
|
||||
if (list == nullptr) {
|
||||
for (uint32_t i = 0; i < m; i++) {
|
||||
for (uint32_t j = 0; j < n; j++) {
|
||||
C[i * ldc + j] = bias[i];
|
||||
}
|
||||
}
|
||||
cblas_sgemm1(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, A, lda, B, ldb, 1.0, C, ldc);
|
||||
} else {
|
||||
for (int l = 0; l < listsize; l++) {
|
||||
int i = list[l];
|
||||
for (uint32_t j = 0; j < n; j++) {
|
||||
C[l * ldc + j] = bias[i];
|
||||
}
|
||||
}
|
||||
cblas_sgemm_subset(CblasRowMajor,
|
||||
CblasNoTrans,
|
||||
CblasNoTrans,
|
||||
m,
|
||||
n,
|
||||
k,
|
||||
1.0,
|
||||
A,
|
||||
lda,
|
||||
B,
|
||||
ldb,
|
||||
1.0,
|
||||
C,
|
||||
ldc,
|
||||
list,
|
||||
listsize);
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::ApplyDiagonalTransform(intel_dnn_component_t *component) {
|
||||
if (4 != component->num_bytes_per_input) {
|
||||
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
|
||||
}
|
||||
|
||||
auto transform = &component->op.affine;
|
||||
int m = component->num_rows_out;
|
||||
int n = component->num_columns_in;
|
||||
int ldb = component->num_columns_in;
|
||||
int ldc = component->num_columns_out;
|
||||
|
||||
auto A = reinterpret_cast<float *>(transform->ptr_weights);
|
||||
auto B = reinterpret_cast<float *>(component->ptr_inputs);
|
||||
auto C = reinterpret_cast<float *>(component->ptr_outputs);
|
||||
auto bias = reinterpret_cast<float *>(transform->ptr_biases);
|
||||
for (uint32_t i = 0; i < m; i++) {
|
||||
for (uint32_t j = 0; j < n; j++) {
|
||||
C[i * ldc + j] = bias[i];
|
||||
}
|
||||
}
|
||||
for (uint32_t j = 0; j < n; j++) {
|
||||
float *Bcol = B + j * ldb;
|
||||
float *Ccol = C + j * ldc;
|
||||
cblas_ssbmv1(CblasRowMajor, CblasLower, m, 0, 1.0, A, 1, Bcol, 1, 1.0, Ccol, 1);
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::ApplyRecurrentTransform(intel_dnn_component_t *component, uint32_t row, void *ptr_feedbacks) {
|
||||
if (4 != component->num_bytes_per_input) {
|
||||
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
|
||||
}
|
||||
|
||||
intel_recurrent_t *transform = &component->op.recurrent;
|
||||
int k1 = component->num_columns_in;
|
||||
int k2 = component->num_columns_out;
|
||||
int n = k2;
|
||||
|
||||
if (component->op.recurrent.ptr_feedbacks == nullptr) {
|
||||
THROW_GNA_EXCEPTION << "nullptr feedback pointer";
|
||||
}
|
||||
auto A1 = reinterpret_cast<float *>(component->ptr_inputs) + row * component->num_columns_in;
|
||||
auto A2 = reinterpret_cast<float *>(ptr_feedbacks);
|
||||
auto X = reinterpret_cast<float *>(transform->ptr_weights);
|
||||
auto B = reinterpret_cast<float *>(transform->ptr_biases);
|
||||
auto C = reinterpret_cast<float *>(component->ptr_outputs) + row * component->num_columns_out;
|
||||
sgemv_split(n, k1, k2, A1, A2, X, B, C);
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::ApplyConvolutional1DTransform(intel_dnn_component_t *component) {
|
||||
if (4 != component->num_bytes_per_input) {
|
||||
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
|
||||
}
|
||||
CNNFilter32(component);
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
|
||||
intel_dnn_number_type_t number_type,
|
||||
uint32_t listsize) {
|
||||
if (kDnnFloat != number_type) {
|
||||
THROW_GNA_EXCEPTION << "Bad number type: " << number_type;
|
||||
}
|
||||
PwlApply32(component, listsize);
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
|
||||
intel_dnn_number_type_t number_type,
|
||||
uint32_t listsize,
|
||||
uint32_t num_row) {
|
||||
if (kDnnFloat != number_type) {
|
||||
THROW_GNA_EXCEPTION << "Bad number type: " << number_type;
|
||||
}
|
||||
PwlApply32(component, num_row, num_row, 0, listsize - 1);
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::ApplyMaxPoolTransform(intel_dnn_component_t *component, intel_dnn_number_type_t number_type) {
|
||||
if (4 != component->num_bytes_per_input) {
|
||||
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
|
||||
}
|
||||
CNNMaxPool(component, number_type);
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::ApplyTranspose(intel_dnn_component_t *component) {
|
||||
if (4 != component->num_bytes_per_input) {
|
||||
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
|
||||
}
|
||||
|
||||
int m = component->num_rows_in;
|
||||
int n = component->num_columns_in;
|
||||
int lda = component->num_columns_in;
|
||||
int ldb = component->num_columns_out;
|
||||
// B = Transpose(A) where A is mxn and B is nxm
|
||||
auto A = reinterpret_cast<float *>(component->ptr_inputs);
|
||||
auto B = reinterpret_cast<float *>(component->ptr_outputs);
|
||||
for (uint32_t row = 0; row < m; row++) {
|
||||
for (uint32_t col = 0; col < n; col++) {
|
||||
B[col * ldb + row] = A[row * lda + col];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::ApplyCopy(intel_dnn_component_t *component) {
|
||||
if (4 != component->num_bytes_per_input) {
|
||||
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
|
||||
}
|
||||
|
||||
auto src = reinterpret_cast<uint8_t *>(component->ptr_inputs);
|
||||
auto dst = reinterpret_cast<uint8_t *>(component->ptr_outputs);
|
||||
int32_t m = component->op.copy.num_copy_rows;
|
||||
int32_t n = component->op.copy.num_copy_columns;
|
||||
int32_t lda = component->num_columns_in;
|
||||
int32_t ldb = component->num_columns_out;
|
||||
if (m > component->num_rows_in) {
|
||||
THROW_GNA_EXCEPTION << "Error: attempt to copy more columns than matrix has";
|
||||
}
|
||||
auto A = reinterpret_cast<float *>(src);
|
||||
auto B = reinterpret_cast<float *>(dst);
|
||||
for (uint32_t row = 0; row < m; row++) {
|
||||
for (uint32_t col = 0; col < n; col++) {
|
||||
B[row * ldb + col] = A[row * lda + col];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool GNAPluginNS::backend::isCompatibleDnn(GNAPluginNS::backend::AMIntelDNN dnn1, GNAPluginNS::backend::AMIntelDNN dnn2) {
|
||||
bool isCompatible = true;
|
||||
|
||||
|
||||
@@ -49,21 +49,6 @@
|
||||
namespace GNAPluginNS {
|
||||
namespace backend {
|
||||
|
||||
void ApplyAffineTransform(intel_dnn_component_t *component, uint32_t *list, uint32_t listsize);
|
||||
void ApplyDiagonalTransform(intel_dnn_component_t *component);
|
||||
void ApplyRecurrentTransform(intel_dnn_component_t *component, uint32_t row, void *ptr_feedbacks);
|
||||
void ApplyConvolutional1DTransform(intel_dnn_component_t *component);
|
||||
void ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
|
||||
intel_dnn_number_type_t number_type,
|
||||
uint32_t listsize);
|
||||
void ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
|
||||
intel_dnn_number_type_t number_type,
|
||||
uint32_t listsize,
|
||||
uint32_t num_row);
|
||||
void ApplyMaxPoolTransform(intel_dnn_component_t *component, intel_dnn_number_type_t number_type);
|
||||
void ApplyTranspose(intel_dnn_component_t *component);
|
||||
void ApplyCopy(intel_dnn_component_t *component);
|
||||
|
||||
void PlotFloatIntDnn(GNAPluginNS::backend::AMIntelDNN *dnn, GNAPluginNS::backend::AMIntelDNN *dnn_int);
|
||||
bool isCompatibleDnn(GNAPluginNS::backend::AMIntelDNN dnn1, GNAPluginNS::backend::AMIntelDNN dnn2);
|
||||
void ClearScoreError(intel_score_error_t *error);
|
||||
|
||||
@@ -18,9 +18,9 @@ intel_dnn_component_t & backend::DnnComponents::addComponent(const std::string l
|
||||
components.emplace_back(layerName, intel_dnn_component_t());
|
||||
auto ¤tComponent = components.back().second;
|
||||
#ifdef PLOT
|
||||
currentComponent.original_layer_name = components.back().first.c_str();
|
||||
std::cout << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << components.size() - 1 << std::endl;
|
||||
#endif
|
||||
currentComponent.original_layer_name = components.back().first.c_str();
|
||||
return currentComponent;
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ enum DnnActivationType : uint8_t {
|
||||
kActNegHalfLog,
|
||||
kActSoftSign,
|
||||
kActPow,
|
||||
kActFakeQuantize,
|
||||
kActNumType
|
||||
};
|
||||
|
||||
@@ -43,7 +44,14 @@ struct DnnActivation {
|
||||
float offset;
|
||||
} pow;
|
||||
struct {
|
||||
float reserved[3];
|
||||
int32_t levels;
|
||||
float input_low;
|
||||
float input_high;
|
||||
float output_low;
|
||||
float output_high;
|
||||
} fakeQuantize;
|
||||
struct {
|
||||
float reserved[5];
|
||||
};
|
||||
} args;
|
||||
operator DnnActivationType () const noexcept {
|
||||
@@ -75,7 +83,8 @@ static const char *intel_dnn_activation_name[kActNumType] = {
|
||||
"kActNegHalfLog",
|
||||
"kActCustom",
|
||||
"kActSoftSign",
|
||||
"kActPow"
|
||||
"kActPow",
|
||||
"kActFakeQuantize"
|
||||
};
|
||||
|
||||
typedef enum DnnSoftmaxType {
|
||||
@@ -232,9 +241,7 @@ typedef struct {
|
||||
void *ptr_outputs;
|
||||
float output_scale_factor;
|
||||
float input_scale_factor;
|
||||
#ifdef PLOT
|
||||
const char * original_layer_name = nullptr;
|
||||
#endif
|
||||
} intel_dnn_component_t;
|
||||
|
||||
typedef struct {
|
||||
|
||||
@@ -4,11 +4,35 @@
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <frontend/quantized_layer_params.hpp>
|
||||
#include <legacy/layer_transform.hpp>
|
||||
#include <layers/gna_layer_info.hpp>
|
||||
|
||||
#include "gna_input_desc.hpp"
|
||||
#include "gna_plugin_log.hpp"
|
||||
|
||||
std::vector<void *>& GNAPluginNS::InputDesc::getPtrInputsGlobal(const std::string& name) {
|
||||
using namespace InferenceEngine;
|
||||
using namespace GNAPluginNS;
|
||||
|
||||
size_t InputDesc::minBytesRequiredForStoreInput(CNNLayerPtr layer) {
|
||||
auto quantized = getInjectedData<QuantizedLayerParams>(layer);
|
||||
size_t precision_bytes;
|
||||
if (quantized) {
|
||||
precision_bytes = 2;
|
||||
} else {
|
||||
precision_bytes = 4;
|
||||
}
|
||||
if (!LayerInfo(layer).isInput()) {
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) << "minBytesRequiredForStoreInput expect to worn on \"Input\" layer";
|
||||
}
|
||||
if (layer->outData.size() != 1) {
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) << "minBytesRequiredForStoreInput invalid outData for the layer";
|
||||
}
|
||||
auto dims = layer->outData.front()->getTensorDesc().getDims();
|
||||
return details::product(dims.begin(), dims.end()) * precision_bytes;
|
||||
}
|
||||
|
||||
std::vector<void *>& InputDesc::getPtrInputsGlobal(const std::string& name) {
|
||||
if (ptr_inputs_global_id.find(name) == ptr_inputs_global_id.end()) {
|
||||
ptr_inputs_global_storage.push_front({});
|
||||
ptr_inputs_global_id[name] = ptr_inputs_global_storage.begin();
|
||||
@@ -16,14 +40,14 @@ std::vector<void *>& GNAPluginNS::InputDesc::getPtrInputsGlobal(const std::strin
|
||||
return *ptr_inputs_global_id[name];
|
||||
}
|
||||
|
||||
intel_dnn_orientation_t GNAPluginNS::InputDesc::getOrientation(const std::string& name) {
|
||||
intel_dnn_orientation_t InputDesc::getOrientation(const std::string& name) {
|
||||
if (orientation_in.find(name) == orientation_in.end()) {
|
||||
THROW_GNA_EXCEPTION << "Can't find orientation for input name '" << name << "'";
|
||||
}
|
||||
return orientation_in[name];
|
||||
}
|
||||
|
||||
float GNAPluginNS::InputDesc::getScaleFactor(const std::size_t index) {
|
||||
float InputDesc::getScaleFactor(const std::size_t index) {
|
||||
if (index >= inputScaleFactors.size()) {
|
||||
THROW_GNA_EXCEPTION << "Can't find scale factor for index = " << index;
|
||||
}
|
||||
|
||||
@@ -9,6 +9,8 @@
|
||||
#include <list>
|
||||
#include <unordered_map>
|
||||
#include <string>
|
||||
#include <legacy/ie_layers.h>
|
||||
|
||||
#include "backend/dnn_types.h"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
@@ -17,6 +19,8 @@ struct InputDesc {
|
||||
/// order of scale factors matches inputs order in original topology
|
||||
std::vector<float> inputScaleFactors;
|
||||
std::map<std::string, int> bytes_allocated_for_input;
|
||||
size_t minBytesRequiredForStoreInput(InferenceEngine::CNNLayerPtr);
|
||||
|
||||
std::unordered_map<std::string, std::list<std::vector<void *>>::iterator> ptr_inputs_global_id;
|
||||
std::list<std::vector<void *>> ptr_inputs_global_storage;
|
||||
|
||||
|
||||
@@ -1480,6 +1480,14 @@ void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer)
|
||||
}
|
||||
}
|
||||
|
||||
void GNAGraphCompiler::FakeQuantizePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
// in FP32 mode lets use special form of activation that satisfies fakeQuantize formula
|
||||
if (gnaFlags->sw_fp32) {
|
||||
PWLPrimitive(layer);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto* generic = dynamic_cast<GenericLayer*>(layer.get());
|
||||
std::string type;
|
||||
@@ -1558,7 +1566,8 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
{"neglog", kActNegLog},
|
||||
{"neghalflog", kActNegHalfLog},
|
||||
{"identity", kActIdentity},
|
||||
{"softsign", kActSoftSign}
|
||||
{"softsign", kActSoftSign},
|
||||
{"fakequantize", kActFakeQuantize}
|
||||
};
|
||||
|
||||
auto it = supportedActivations.find(type);
|
||||
@@ -1573,6 +1582,42 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
activation_type.args.lrelu.negative_slope = 0.0f;
|
||||
}
|
||||
|
||||
if (it->second == kActFakeQuantize) {
|
||||
// get params from const input
|
||||
auto GetParamFromInputAsFloat = [](CNNLayerPtr input, size_t idx) {
|
||||
if (input->insData.size() <= idx) {
|
||||
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx << "input";
|
||||
}
|
||||
auto iLayerData = input->insData[idx].lock();
|
||||
if (!iLayerData) {
|
||||
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx << ", input: cannot dereference data weak-pointer";
|
||||
}
|
||||
auto iLayer = getCreatorLayer(iLayerData).lock();
|
||||
if (!iLayer) {
|
||||
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx << ", input: cannot dereference creator layer weak-pointer";
|
||||
}
|
||||
if (!LayerInfo(iLayer).isConst()) {
|
||||
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx << ", input: expected to be of type const, but was: " << iLayer->type;
|
||||
}
|
||||
|
||||
if (!iLayer->blobs.count("custom")) {
|
||||
THROW_GNA_LAYER_EXCEPTION(iLayer) << "cannot get custom blob";
|
||||
}
|
||||
auto data = iLayer->blobs["custom"];
|
||||
if (data->getTensorDesc().getPrecision() != Precision::FP32) {
|
||||
THROW_GNA_LAYER_EXCEPTION(iLayer) << "cannot cast custom blob to type FP32, since it is of type: " << data->getTensorDesc().getPrecision();
|
||||
}
|
||||
|
||||
return data->cbuffer().as<float*>()[0];
|
||||
};
|
||||
|
||||
activation_type.args.fakeQuantize.levels = layer->GetParamAsInt("levels");
|
||||
activation_type.args.fakeQuantize.input_low = GetParamFromInputAsFloat(layer, 1);
|
||||
activation_type.args.fakeQuantize.input_high = GetParamFromInputAsFloat(layer, 2);
|
||||
activation_type.args.fakeQuantize.output_low = GetParamFromInputAsFloat(layer, 3);
|
||||
activation_type.args.fakeQuantize.output_high = GetParamFromInputAsFloat(layer, 4);
|
||||
}
|
||||
|
||||
string actName = "unknown";
|
||||
|
||||
#ifdef PLOT
|
||||
@@ -1776,7 +1821,8 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
|
||||
{{"Crop"}, CREATE(CropPrimitive)},
|
||||
{{"Copy"}, CREATE(CopyPrimitive)},
|
||||
{{"TensorIterator"}, SKIP},
|
||||
{{"LSTMCell"}, SKIP}
|
||||
{{"LSTMCell"}, SKIP},
|
||||
{{"FakeQuantize"}, CREATE(FakeQuantizePrimitive)} // TODO: fakequantize layer should be properly converted to GNA scale factors for integer case
|
||||
};
|
||||
auto it = LayersBuilder::getStorage().find(layer->type);
|
||||
if (it != LayersBuilder::getStorage().end()) {
|
||||
@@ -1914,10 +1960,17 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, void *p
|
||||
if (included == concat_connection.end()) {
|
||||
gnamem->reserve_ptr(&concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64);
|
||||
|
||||
size_t concatInputIdx = 0;
|
||||
for (auto &&inputLayer : concatLayerInfoItem.concatInputLayers) {
|
||||
if (InferenceEngine::details::CaselessEq<std::string>()
|
||||
(inputLayer.name, "input")) {
|
||||
inputDesc->bytes_allocated_for_input[inputLayer.name] = inputLayer.tensorSize;
|
||||
// skipping non functional and reshape layer, as in that case input might be not connected to anything
|
||||
auto realConcatInputs = CNNNetGetPrevLayersSkip(concat, [](CNNLayerPtr l) {
|
||||
return !LayerInfo(l).isNonFunctional() && !LayerInfo(l).isSplit();
|
||||
}, concatInputIdx++);
|
||||
|
||||
for (auto rInput : realConcatInputs) {
|
||||
if (LayerInfo(rInput.first).isInput()) {
|
||||
inputDesc->bytes_allocated_for_input[rInput.first->name] += inputLayer.tensorSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
concatLayerInfoItem.input_allocated = true;
|
||||
@@ -1960,7 +2013,14 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
// real input not a memory input
|
||||
if (LayerInfo(prevLayer).isInput()) {
|
||||
if (0 == inputDesc->bytes_allocated_for_input[prevLayer->name]) {
|
||||
// real allocation pointer will be kept in ptr not in ptf_inputs_global
|
||||
// if request for allocation less that realTensorInput - we need to extend request
|
||||
auto minInput = inputDesc->minBytesRequiredForStoreInput(prevLayer);
|
||||
if (num_data_bytes_in < minInput) {
|
||||
gnalog() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to" << ALIGN(minInput, 8);
|
||||
num_data_bytes_in = ALIGN(minInput, 8);
|
||||
}
|
||||
|
||||
// real allocation pointer will be kept in ptr not in ptr_inputs_global
|
||||
if (offset < 0) {
|
||||
gnamem->push_value(ptr,
|
||||
static_cast<uint8_t>(0),
|
||||
@@ -1972,7 +2032,6 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
num_data_bytes_in,
|
||||
64);
|
||||
}
|
||||
|
||||
inputDesc->bytes_allocated_for_input[prevLayer->name] = num_data_bytes_in;
|
||||
}
|
||||
if (ALIGN(num_data_bytes_in, 64) > ALIGN(inputDesc->bytes_allocated_for_input[prevLayer->name], 64)) {
|
||||
|
||||
@@ -120,6 +120,7 @@ public:
|
||||
void SplitPrimitive(InferenceEngine::CNNLayerPtr);
|
||||
void SlicePrimitive(InferenceEngine::CNNLayerPtr);
|
||||
void PWLPrimitive(InferenceEngine::CNNLayerPtr);
|
||||
void FakeQuantizePrimitive(InferenceEngine::CNNLayerPtr);
|
||||
void CopyPrimitive(InferenceEngine::CNNLayerPtr);
|
||||
|
||||
void Reset();
|
||||
|
||||
@@ -185,9 +185,6 @@ inline std::pair<InferenceEngine::CNNLayerPtr, int> CNNNetCheckNextLayerSkipCer
|
||||
*/
|
||||
template <class Layer>
|
||||
inline std::vector<CNNLayerPtr> CNNNetGetAllNextLayersSkipCertain(Layer layer, int oDataIdx, const std::function<bool(CNNLayerPtr)> &shouldSkip) {
|
||||
// TODO: need to have generic function that creates slice of the graph : starting from given layer
|
||||
// and skipped all non functional - ending up into functional one
|
||||
|
||||
std::list<CNNLayerPtr> currentSet;
|
||||
std::vector<CNNLayerPtr> resultSet;
|
||||
|
||||
|
||||
@@ -696,6 +696,7 @@ void GNAModelSerial::ImportInputs(std::istream &is,
|
||||
is.read(reinterpret_cast<char *>(&input), sizeof(input));
|
||||
inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
|
||||
inputsDesc->orientation_in[name] = input.orientation;
|
||||
inputsDesc->bytes_allocated_for_input[name] = input.element_size * input.elements_count;
|
||||
|
||||
auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup});
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
#include "memory/gna_allocator.hpp"
|
||||
#include "memory/gna_memory_state.hpp"
|
||||
#include "gna_model_serial.hpp"
|
||||
#include "runtime/gna_float_runtime.hpp"
|
||||
|
||||
#if GNA_LIB_VER == 2
|
||||
#include <gna2-model-api.h>
|
||||
@@ -903,15 +904,28 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer
|
||||
|
||||
auto dims = input.second->getTensorDesc().getDims();
|
||||
|
||||
auto importedElements = is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 2] * dims[dims.size() - 3];
|
||||
auto importedFrames = dims[0];
|
||||
auto targetGroups = is2D ? dims[dims.size() - 2] : dims[0]; // TODO: no proper support for groups yet
|
||||
|
||||
auto importedElementSizeBytes = gnaFlags->sw_fp32 ? 4 : 2;
|
||||
auto importedBytes = importedElements * importedFrames * importedElementSizeBytes;
|
||||
|
||||
if (inputsDesc->bytes_allocated_for_input[input.first] < importedBytes) {
|
||||
THROW_GNA_EXCEPTION << "Cannot import input frames for :" << input.first
|
||||
<< ", allocated size: " << inputsDesc->bytes_allocated_for_input[input.first]
|
||||
<< ", but input blob size: " << importedBytes;
|
||||
}
|
||||
|
||||
ImportFrames(inputsDesc->getPtrInputsGlobal(input.first)[idx],
|
||||
input.second->cbuffer().as<float *>(),
|
||||
input.second->getTensorDesc().getPrecision(),
|
||||
gnaFlags->sw_fp32 ? 1.0f : inputsDesc->getScaleFactor(inputNum),
|
||||
inputsDesc->getOrientation(input.first),
|
||||
dims[0],
|
||||
is2D ? dims[dims.size() - 2] : dims[0],
|
||||
is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 2] * dims[dims.size() - 3],
|
||||
is2D ? dims[dims.size() - 1] : dims[dims.size() - 1] * dims[dims.size() - 2] * dims[dims.size() - 3]);
|
||||
importedFrames,
|
||||
targetGroups,
|
||||
importedElements,
|
||||
importedElements);
|
||||
|
||||
bool isOneChannel = input.second->getTensorDesc().getDims()[1] == 1;
|
||||
if (do_rotate_input && ((inputLayout == Layout::NC || inputLayout == Layout::NCHW)
|
||||
@@ -929,7 +943,8 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, Infer
|
||||
}
|
||||
|
||||
if (!gnadevice) {
|
||||
dnn->Propagate();
|
||||
auto runtime = runtime::FP(dnn);
|
||||
runtime.infer();
|
||||
if (freeNnet != nnets.end()) {
|
||||
std::get<1>(*freeNnet) = 1;
|
||||
}
|
||||
|
||||
@@ -49,6 +49,10 @@ class LayerInfo {
|
||||
explicit LayerInfo(InferenceEngine::CNNLayer * layer)
|
||||
: layer(layer) {
|
||||
}
|
||||
bool hasMultipleInputs() const noexcept {
|
||||
IS_VALID();
|
||||
return layer->insData.size() > 1;
|
||||
}
|
||||
bool has16BOutput() const noexcept {
|
||||
IS_VALID();
|
||||
static InferenceEngine::details::caseless_set<std::string> layersWith16BOutputs = {"memory", "input", "split", "slice", "concat", "copy", "const"};
|
||||
@@ -200,6 +204,9 @@ class LayerInfo {
|
||||
bool isConcat() const noexcept {
|
||||
return isOfType("concat");
|
||||
}
|
||||
bool isFakeQnatize() const noexcept {
|
||||
return isOfType("FakeQnatize");
|
||||
}
|
||||
bool isNonFunctional() const noexcept {
|
||||
return isOfType("reshape") || isOfType("squeeze") || isOfType("unsqueeze");
|
||||
}
|
||||
|
||||
@@ -48,6 +48,7 @@ enum LayerType {
|
||||
LSTMCell,
|
||||
TensorIterator,
|
||||
SoftSign,
|
||||
FakeQuantize,
|
||||
NO_TYPE
|
||||
};
|
||||
|
||||
@@ -84,7 +85,8 @@ static const InferenceEngine::details::caseless_map<std::string, GNAPluginNS::La
|
||||
{ "LSTMCell", LSTMCell },
|
||||
{ "TensorIterator", TensorIterator },
|
||||
{ "Abs", Abs },
|
||||
{ "SoftSign", SoftSign }
|
||||
{ "SoftSign", SoftSign },
|
||||
{ "FakeQuantize", FakeQuantize },
|
||||
};
|
||||
|
||||
GNAPluginNS::LayerType LayerTypeFromStr(const std::string &str);
|
||||
|
||||
@@ -1293,11 +1293,12 @@ void FuseMultipleIdentitiesPass::run() {
|
||||
auto isNonFunctional = [](CNNLayerPtr ptr) {
|
||||
return LayerInfo(ptr).isNonFunctional();
|
||||
};
|
||||
auto eltwise = dynamic_cast<InferenceEngine::EltwiseLayer *>(l.get());
|
||||
auto concat = dynamic_cast<InferenceEngine::ConcatLayer *>(l.get());
|
||||
|
||||
if (LayerInfo(l).isNonFunctional() || LayerInfo(l).has32BInput())
|
||||
if (LayerInfo(l).hasMultipleInputs()) {
|
||||
continue;
|
||||
}
|
||||
if (LayerInfo(l).isNonFunctional() || LayerInfo(l).has32BInput()) {
|
||||
continue;
|
||||
}
|
||||
gnalog() << "CNNNetPrevLayer skip non functional from :: " << l->name;
|
||||
auto isFunctional = [](CNNLayerPtr ptr) {
|
||||
return !LayerInfo(ptr).isNonFunctional();
|
||||
@@ -1310,7 +1311,7 @@ void FuseMultipleIdentitiesPass::run() {
|
||||
return LayerInfo(candidate.first).isLink();
|
||||
}), prevLayersReached.end());
|
||||
|
||||
if (prevLayersReached.size() != 1 && eltwise == nullptr && concat == nullptr) {
|
||||
if (prevLayersReached.size() != 1) {
|
||||
std::stringstream layers;
|
||||
for (auto && prevLayer : prevLayersReached) {
|
||||
layers << prevLayer.first->name;
|
||||
@@ -1361,7 +1362,6 @@ void FuseMultipleIdentitiesPass::run() {
|
||||
}
|
||||
|
||||
int PassManager::run(int index) {
|
||||
// #define PLOT
|
||||
#ifdef PLOT
|
||||
auto dumpNetworkAfterPass = [&index, this] (std::shared_ptr<Pass> pass) {
|
||||
std::string name = std::string("gna_passes_") + (index < 10 ? "0" : "") + std::to_string(index) + "_" + pass->getName();
|
||||
|
||||
@@ -22,9 +22,7 @@ void CNNFilter32(intel_dnn_component_t *component) {
|
||||
uint32_t num_filter_coefficients = component->op.conv1D.num_filter_coefficients;
|
||||
|
||||
std::string layer_name;
|
||||
#ifdef PLOT
|
||||
layer_name = " In layer '" + std::string(component->original_layer_name) + "'";
|
||||
#endif
|
||||
if (component->num_rows_in != 1 || component->num_rows_out != 1) {
|
||||
THROW_GNA_EXCEPTION << "Bad number of rows in CNNFilter32!" << layer_name;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
|
||||
#include <gna_plugin_log.hpp>
|
||||
#include <cstdint>
|
||||
#include <backend/dnn_types.h>
|
||||
#include "gna_float_runtime.hpp"
|
||||
|
||||
using namespace GNAPluginNS;
|
||||
using namespace GNAPluginNS::runtime;
|
||||
|
||||
|
||||
void FP::infer() {
|
||||
if (!dnn) {
|
||||
THROW_GNA_EXCEPTION << "[GNA FP32 RUNTIME] not initialized";
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < dnn->component.size(); i++) {
|
||||
intel_dnn_component_t *comp = &dnn->component[i];
|
||||
uint32_t *ptr_active_outputs = nullptr;
|
||||
uint32_t num_active_outputs = (comp->orientation_out == kDnnInterleavedOrientation)
|
||||
? comp->num_rows_out : comp->num_columns_out;
|
||||
|
||||
if (i == dnn->component.size() - 1) { // active list applies to last component
|
||||
ptr_active_outputs = dnn->ptr_active_outputs();
|
||||
num_active_outputs = dnn->num_active_outputs();
|
||||
} else if (i == dnn->component.size() - 2) { // also applies to last two components when last is PWL
|
||||
if ((dnn->component[i].operation == kDnnAffineOp) && (dnn->component[i + 1].operation == kDnnPiecewiselinearOp)) {
|
||||
ptr_active_outputs = dnn->ptr_active_outputs();
|
||||
num_active_outputs = dnn->num_active_outputs(); }
|
||||
}
|
||||
|
||||
switch (comp->operation) {
|
||||
case kDnnAffineOp : {
|
||||
ApplyAffineTransform(comp, ptr_active_outputs, num_active_outputs);
|
||||
break;
|
||||
}
|
||||
case kDnnDiagonalOp: {
|
||||
ApplyDiagonalTransform(comp);
|
||||
break;
|
||||
}
|
||||
case kDnnRecurrentOp: {
|
||||
if ((i < dnn->component.size() - 1) && (dnn->component[i + 1].operation == kDnnPiecewiselinearOp)) {
|
||||
intel_dnn_component_t *comp_pwl = &dnn->component[i + 1];
|
||||
for (uint32_t j = 0; j < comp->num_rows_in; j++) {
|
||||
void *ptr_feedbacks =
|
||||
reinterpret_cast<void *>(reinterpret_cast<int32_t *>(comp->op.recurrent.ptr_feedbacks)
|
||||
+ j * comp_pwl->num_columns_out);
|
||||
ApplyRecurrentTransform(comp, j, ptr_feedbacks);
|
||||
ApplyPiecewiseLinearTransform(comp_pwl, kDnnFloat, num_active_outputs, j);
|
||||
}
|
||||
i++; // skip next component
|
||||
} else {
|
||||
THROW_GNA_EXCEPTION << "Missing PiecewiseLinear component after Recurrent component in Propagate!";
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kDnnConvolutional1dOp: {
|
||||
ApplyConvolutional1DTransform(comp);
|
||||
break;
|
||||
}
|
||||
case kDnnPiecewiselinearOp: {
|
||||
ApplyPiecewiseLinearTransform(comp, kDnnFloat, num_active_outputs);
|
||||
break;
|
||||
}
|
||||
case kDnnMaxPoolOp: {
|
||||
ApplyMaxPoolTransform(comp, kDnnFloat);
|
||||
break;
|
||||
}
|
||||
case kDnnInterleaveOp: {
|
||||
ApplyTranspose(comp);
|
||||
break;
|
||||
}
|
||||
case kDnnDeinterleaveOp: {
|
||||
ApplyTranspose(comp);
|
||||
break;
|
||||
}
|
||||
case kDnnCopyOp: {
|
||||
ApplyCopy(comp);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << "[GNA FP32 RUNTIME] Bad operation " << comp->operation;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
#include <backend/am_intel_dnn.hpp>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace runtime {
|
||||
/**
|
||||
* @brief floating runtime for gna-plugin, in most case it uses same gna-primitives description as integer runtime, but execute them on CPU
|
||||
*/
|
||||
class FP {
|
||||
std::shared_ptr<backend::AMIntelDNN> dnn;
|
||||
public:
|
||||
FP(std::shared_ptr<backend::AMIntelDNN> dnn) : dnn(dnn) {
|
||||
}
|
||||
virtual void infer();
|
||||
|
||||
/**
|
||||
* atomic operations for floating inference
|
||||
*/
|
||||
static void ApplyAffineTransform(intel_dnn_component_t *component, uint32_t *list, uint32_t listsize);
|
||||
static void ApplyDiagonalTransform(intel_dnn_component_t *component);
|
||||
static void ApplyRecurrentTransform(intel_dnn_component_t *component, uint32_t row, void *ptr_feedbacks);
|
||||
static void ApplyConvolutional1DTransform(intel_dnn_component_t *component);
|
||||
static void ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
|
||||
intel_dnn_number_type_t number_type,
|
||||
uint32_t listsize);
|
||||
static void ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
|
||||
intel_dnn_number_type_t number_type,
|
||||
uint32_t listsize,
|
||||
uint32_t num_row);
|
||||
static void ApplyMaxPoolTransform(intel_dnn_component_t *component, intel_dnn_number_type_t number_type);
|
||||
static void ApplyTranspose(intel_dnn_component_t *component);
|
||||
static void ApplyCopy(intel_dnn_component_t *component);
|
||||
};
|
||||
|
||||
} // namespace runtime
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
184
inference-engine/src/gna_plugin/runtime/gna_float_runtime_op.cpp
Normal file
184
inference-engine/src/gna_plugin/runtime/gna_float_runtime_op.cpp
Normal file
@@ -0,0 +1,184 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "gna_float_runtime.hpp"
|
||||
#include "pwl.h"
|
||||
#include "cnn.h"
|
||||
#include "floatmath.h"
|
||||
|
||||
using namespace GNAPluginNS;
|
||||
using namespace GNAPluginNS::runtime;
|
||||
|
||||
void FP::ApplyAffineTransform(intel_dnn_component_t *component, uint32_t *list, uint32_t listsize) {
|
||||
if (4 != component->num_bytes_per_input) {
|
||||
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
|
||||
}
|
||||
|
||||
auto transform = &component->op.affine;
|
||||
int m = component->num_rows_out;
|
||||
int n = component->num_columns_in;
|
||||
int k = component->num_rows_in;
|
||||
int lda = component->num_rows_in;
|
||||
int ldb = component->num_columns_in;
|
||||
int ldc = component->num_columns_out;
|
||||
|
||||
auto A = reinterpret_cast<float *>(transform->ptr_weights);
|
||||
auto B = reinterpret_cast<float *>(component->ptr_inputs);
|
||||
auto C = reinterpret_cast<float *>(component->ptr_outputs);
|
||||
auto bias = reinterpret_cast<float *>(transform->ptr_biases);
|
||||
if (list == nullptr) {
|
||||
for (uint32_t i = 0; i < m; i++) {
|
||||
for (uint32_t j = 0; j < n; j++) {
|
||||
C[i * ldc + j] = bias[i];
|
||||
}
|
||||
}
|
||||
cblas_sgemm1(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, A, lda, B, ldb, 1.0, C, ldc);
|
||||
} else {
|
||||
for (int l = 0; l < listsize; l++) {
|
||||
int i = list[l];
|
||||
for (uint32_t j = 0; j < n; j++) {
|
||||
C[l * ldc + j] = bias[i];
|
||||
}
|
||||
}
|
||||
cblas_sgemm_subset(CblasRowMajor,
|
||||
CblasNoTrans,
|
||||
CblasNoTrans,
|
||||
m,
|
||||
n,
|
||||
k,
|
||||
1.0,
|
||||
A,
|
||||
lda,
|
||||
B,
|
||||
ldb,
|
||||
1.0,
|
||||
C,
|
||||
ldc,
|
||||
list,
|
||||
listsize);
|
||||
}
|
||||
}
|
||||
|
||||
void FP::ApplyDiagonalTransform(intel_dnn_component_t *component) {
|
||||
if (4 != component->num_bytes_per_input) {
|
||||
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
|
||||
}
|
||||
|
||||
auto transform = &component->op.affine;
|
||||
int m = component->num_rows_out;
|
||||
int n = component->num_columns_in;
|
||||
int ldb = component->num_columns_in;
|
||||
int ldc = component->num_columns_out;
|
||||
|
||||
auto A = reinterpret_cast<float *>(transform->ptr_weights);
|
||||
auto B = reinterpret_cast<float *>(component->ptr_inputs);
|
||||
auto C = reinterpret_cast<float *>(component->ptr_outputs);
|
||||
auto bias = reinterpret_cast<float *>(transform->ptr_biases);
|
||||
for (uint32_t i = 0; i < m; i++) {
|
||||
for (uint32_t j = 0; j < n; j++) {
|
||||
C[i * ldc + j] = bias[i];
|
||||
}
|
||||
}
|
||||
for (uint32_t j = 0; j < n; j++) {
|
||||
float *Bcol = B + j * ldb;
|
||||
float *Ccol = C + j * ldc;
|
||||
cblas_ssbmv1(CblasRowMajor, CblasLower, m, 0, 1.0, A, 1, Bcol, 1, 1.0, Ccol, 1);
|
||||
}
|
||||
}
|
||||
|
||||
void FP::ApplyRecurrentTransform(intel_dnn_component_t *component, uint32_t row, void *ptr_feedbacks) {
|
||||
if (4 != component->num_bytes_per_input) {
|
||||
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
|
||||
}
|
||||
|
||||
intel_recurrent_t *transform = &component->op.recurrent;
|
||||
int k1 = component->num_columns_in;
|
||||
int k2 = component->num_columns_out;
|
||||
int n = k2;
|
||||
|
||||
if (component->op.recurrent.ptr_feedbacks == nullptr) {
|
||||
THROW_GNA_EXCEPTION << "nullptr feedback pointer";
|
||||
}
|
||||
auto A1 = reinterpret_cast<float *>(component->ptr_inputs) + row * component->num_columns_in;
|
||||
auto A2 = reinterpret_cast<float *>(ptr_feedbacks);
|
||||
auto X = reinterpret_cast<float *>(transform->ptr_weights);
|
||||
auto B = reinterpret_cast<float *>(transform->ptr_biases);
|
||||
auto C = reinterpret_cast<float *>(component->ptr_outputs) + row * component->num_columns_out;
|
||||
sgemv_split(n, k1, k2, A1, A2, X, B, C);
|
||||
}
|
||||
|
||||
void FP::ApplyConvolutional1DTransform(intel_dnn_component_t *component) {
|
||||
if (4 != component->num_bytes_per_input) {
|
||||
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
|
||||
}
|
||||
CNNFilter32(component);
|
||||
}
|
||||
|
||||
void FP::ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
|
||||
intel_dnn_number_type_t number_type,
|
||||
uint32_t listsize) {
|
||||
if (kDnnFloat != number_type) {
|
||||
THROW_GNA_EXCEPTION << "Bad number type: " << number_type;
|
||||
}
|
||||
PwlApply32(component, listsize);
|
||||
}
|
||||
|
||||
void FP::ApplyPiecewiseLinearTransform(intel_dnn_component_t *component,
|
||||
intel_dnn_number_type_t number_type,
|
||||
uint32_t listsize,
|
||||
uint32_t num_row) {
|
||||
if (kDnnFloat != number_type) {
|
||||
THROW_GNA_EXCEPTION << "Bad number type: " << number_type;
|
||||
}
|
||||
PwlApply32(component, num_row, num_row, 0, listsize - 1);
|
||||
}
|
||||
|
||||
void FP::ApplyMaxPoolTransform(intel_dnn_component_t *component, intel_dnn_number_type_t number_type) {
|
||||
if (4 != component->num_bytes_per_input) {
|
||||
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
|
||||
}
|
||||
CNNMaxPool(component, number_type);
|
||||
}
|
||||
|
||||
void FP::ApplyTranspose(intel_dnn_component_t *component) {
|
||||
if (4 != component->num_bytes_per_input) {
|
||||
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
|
||||
}
|
||||
|
||||
int m = component->num_rows_in;
|
||||
int n = component->num_columns_in;
|
||||
int lda = component->num_columns_in;
|
||||
int ldb = component->num_columns_out;
|
||||
// B = Transpose(A) where A is mxn and B is nxm
|
||||
auto A = reinterpret_cast<float *>(component->ptr_inputs);
|
||||
auto B = reinterpret_cast<float *>(component->ptr_outputs);
|
||||
for (uint32_t row = 0; row < m; row++) {
|
||||
for (uint32_t col = 0; col < n; col++) {
|
||||
B[col * ldb + row] = A[row * lda + col];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FP::ApplyCopy(intel_dnn_component_t *component) {
|
||||
if (4 != component->num_bytes_per_input) {
|
||||
THROW_GNA_EXCEPTION << "Bad data width: " << component->num_bytes_per_input;
|
||||
}
|
||||
|
||||
auto src = reinterpret_cast<uint8_t *>(component->ptr_inputs);
|
||||
auto dst = reinterpret_cast<uint8_t *>(component->ptr_outputs);
|
||||
int32_t m = component->op.copy.num_copy_rows;
|
||||
int32_t n = component->op.copy.num_copy_columns;
|
||||
int32_t lda = component->num_columns_in;
|
||||
int32_t ldb = component->num_columns_out;
|
||||
if (m > component->num_rows_in) {
|
||||
THROW_GNA_EXCEPTION << "Error: attempt to copy more columns than matrix has";
|
||||
}
|
||||
auto A = reinterpret_cast<float *>(src);
|
||||
auto B = reinterpret_cast<float *>(dst);
|
||||
for (uint32_t row = 0; row < m; row++) {
|
||||
for (uint32_t col = 0; col < n; col++) {
|
||||
B[row * ldb + col] = A[row * lda + col];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1046,9 +1046,33 @@ void PwlApply32(intel_dnn_component_t *component,
|
||||
}
|
||||
}
|
||||
break;
|
||||
case kActFakeQuantize: {
|
||||
auto input_low = transform->func_id.args.fakeQuantize.input_low;
|
||||
auto input_high = transform->func_id.args.fakeQuantize.input_high;
|
||||
auto output_low = transform->func_id.args.fakeQuantize.output_low;
|
||||
auto output_high = transform->func_id.args.fakeQuantize.output_high;
|
||||
auto levels = transform->func_id.args.fakeQuantize.levels;
|
||||
// TODO: this special modification for spedup-compute give different result with straight FQ forulae
|
||||
// but this used in referencen graph FakeQuantize implementations so we need to honor it for a while
|
||||
float scaleInput = (input_high - input_low) / (levels-1);
|
||||
float scaleOutputs = (output_high - output_low) / (levels-1);
|
||||
|
||||
for (uint32_t i = num_row_start; i <= num_row_end; i++) {
|
||||
for (uint32_t j = num_col_start; j <= num_col_end; j++) {
|
||||
auto x = ptr_in[i * num_columns + j];
|
||||
if (x < std::min(input_low, input_high)) {
|
||||
ptr_out[i * num_columns + j] = output_low;
|
||||
} else if (x > std::max(input_low, input_high)) {
|
||||
ptr_out[i * num_columns + j] = output_high;
|
||||
} else {
|
||||
ptr_out[i * num_columns + j] = nearbyint((x - input_low) / scaleInput) * scaleOutputs + output_low;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kActCustom:
|
||||
// break;
|
||||
default:fprintf(stderr, "Unknown piecewise linear function type!\n");
|
||||
throw -1;
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << component->original_layer_name << ", Unknown piecewise linear function type: " << transform->func_id.type;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,9 +20,16 @@ const std::vector<std::vector<size_t>> inputShapes = {{1, 1, 1, 1}, {3, 10, 5, 6
|
||||
const std::vector<std::vector<size_t>> constShapes = {{1}};
|
||||
const std::vector<size_t> levels = {16, 255, 256};
|
||||
|
||||
const std::pair<std::string, std::map<std::string, std::string>> config = {};
|
||||
const std::vector<float> fqArgs = {};
|
||||
const std::vector<float> inputParams = {};
|
||||
|
||||
|
||||
const auto fqParams = ::testing::Combine(
|
||||
::testing::ValuesIn(levels),
|
||||
::testing::ValuesIn(constShapes)
|
||||
::testing::ValuesIn(constShapes),
|
||||
::testing::Values(fqArgs),
|
||||
::testing::Values(inputParams)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(FakeQuantize, FakeQuantizeLayerTest,
|
||||
@@ -30,7 +37,8 @@ INSTANTIATE_TEST_CASE_P(FakeQuantize, FakeQuantizeLayerTest,
|
||||
fqParams,
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::ValuesIn(inputShapes),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU)),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::Values(config)),
|
||||
FakeQuantizeLayerTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
#include <gna/gna_config.hpp>
|
||||
|
||||
#include "single_layer_tests/fake_quantize.hpp"
|
||||
#include "common_test_utils/test_constants.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
InferenceEngine::Precision::FP32,
|
||||
};
|
||||
|
||||
using ConfigType = std::map<std::string, std::string>;
|
||||
const ConfigType configFP32 = {
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_FP32"},
|
||||
};
|
||||
const ConfigType configInt16 = {
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
|
||||
{InferenceEngine::GNAConfigParams::KEY_GNA_PRECISION, "I16"},
|
||||
{"GNA_SCALE_FACTOR_0", "327.67"}
|
||||
};
|
||||
const ConfigType configInt8 = {
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
|
||||
{InferenceEngine::GNAConfigParams::KEY_GNA_PRECISION, "I8"},
|
||||
{"GNA_SCALE_FACTOR_0", "327.67"}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief specific quantisation mode to be used internally
|
||||
*/
|
||||
const std::vector<std::pair<std::string, ConfigType>> gnaQuantModes = {
|
||||
{"sw_fp32", configFP32},
|
||||
// TODO: support FakeQuantize in integer mode
|
||||
// {"sw_exact_i16", configInt16},
|
||||
// {"sw_exact_i8", configInt8},
|
||||
};
|
||||
|
||||
// TODO: uncomment once fixed proper 4d import for GNA-plugin issue: 38806
|
||||
const std::vector<std::vector<size_t>> inputShapes = {{1, 1, 1, 1}, /*{3, 10, 5, 6}*/};
|
||||
const std::vector<std::vector<size_t>> constShapes = {{1}};
|
||||
const std::vector<size_t> levels = {16, 255, 256};
|
||||
|
||||
const std::vector<std::vector<float>> fqArgs = {{0, 10, 2, 5}, {}};
|
||||
const std::vector<std::vector<float>> inputParams = {{-10, 10, 0.1}, {}};
|
||||
|
||||
const auto fqParams = ::testing::Combine(
|
||||
::testing::ValuesIn(levels),
|
||||
::testing::ValuesIn(constShapes),
|
||||
::testing::ValuesIn(fqArgs),
|
||||
::testing::ValuesIn(inputParams)
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(FakeQuantize, FakeQuantizeLayerTest,
|
||||
::testing::Combine(
|
||||
fqParams,
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::ValuesIn(inputShapes),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::ValuesIn(gnaQuantModes)),
|
||||
FakeQuantizeLayerTest::getTestCaseName);
|
||||
|
||||
} // namespace
|
||||
@@ -14,14 +14,18 @@
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
|
||||
typedef std::tuple<
|
||||
size_t, // levels
|
||||
std::vector<size_t> // const inputs shape
|
||||
size_t, // levels
|
||||
std::vector<size_t>, // const inputs shape
|
||||
std::vector<float>, // fake quantize inputLow, inputHigh, outputLow, outputHigh or empty for random
|
||||
std::vector<float> // input generator data: low, high, resolution
|
||||
> fqSpecificParams;
|
||||
typedef std::tuple<
|
||||
fqSpecificParams,
|
||||
InferenceEngine::Precision, // Net precision
|
||||
InferenceEngine::SizeVector, // Input shapes
|
||||
LayerTestsUtils::TargetDevice // Device name
|
||||
InferenceEngine::Precision, // Net precision
|
||||
InferenceEngine::SizeVector, // Input shapes
|
||||
LayerTestsUtils::TargetDevice, // Device name
|
||||
|
||||
std::pair<std::string, std::map<std::string, std::string>> // Additional backend configuration and alis name to it
|
||||
> fqLayerTestParamsSet;
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
@@ -30,9 +34,16 @@ class FakeQuantizeLayerTest : public testing::WithParamInterface<fqLayerTestPara
|
||||
virtual public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<fqLayerTestParamsSet> obj);
|
||||
|
||||
InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo &info) const override;
|
||||
protected:
|
||||
void SetUp() override;
|
||||
void UpdateSeed();
|
||||
|
||||
protected:
|
||||
float inputDataMin = 0.0;
|
||||
float inputDataMax = 10.0;
|
||||
float inputDataResolution = 1.0;
|
||||
int32_t seed = 1;
|
||||
};
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
||||
|
||||
@@ -18,6 +18,17 @@
|
||||
|
||||
#include "single_layer_tests/fake_quantize.hpp"
|
||||
|
||||
// seed selected using current cloc time
|
||||
#define USE_CLOCK_TIME 1
|
||||
// seed started from default value, and incremented every time using big number like 9999
|
||||
#define USE_INCREMENTAL_SEED 2
|
||||
|
||||
/**
|
||||
* redefine this seed to reproduce issue with given seed that can be read from gtest logs
|
||||
*/
|
||||
#define BASE_SEED USE_CLOCK_TIME
|
||||
#define NGRAPH_SEED USE_CLOCK_TIME
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
std::string FakeQuantizeLayerTest::getTestCaseName(testing::TestParamInfo<fqLayerTestParamsSet> obj) {
|
||||
@@ -25,10 +36,13 @@ std::string FakeQuantizeLayerTest::getTestCaseName(testing::TestParamInfo<fqLaye
|
||||
InferenceEngine::Precision netPrecision;
|
||||
InferenceEngine::SizeVector inputShapes;
|
||||
std::string targetDevice;
|
||||
std::tie(fqParams, netPrecision, inputShapes, targetDevice) = obj.param;
|
||||
std::pair<std::string, std::map<std::string, std::string>> config;
|
||||
std::tie(fqParams, netPrecision, inputShapes, targetDevice, config) = obj.param;
|
||||
size_t levels;
|
||||
std::vector<size_t> constShape;
|
||||
std::tie(levels, constShape) = fqParams;
|
||||
std::vector<float> fqDirectArgs;
|
||||
std::vector<float> inputArg;
|
||||
std::tie(levels, constShape, fqDirectArgs, inputArg) = fqParams;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
|
||||
@@ -36,29 +50,101 @@ std::string FakeQuantizeLayerTest::getTestCaseName(testing::TestParamInfo<fqLaye
|
||||
result << "LEVELS=" << levels << "_";
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "targetDevice=" << targetDevice;
|
||||
if (!config.first.empty()) {
|
||||
result << "_targetConfig=" << config.first;
|
||||
}
|
||||
if (!fqDirectArgs.empty()) {
|
||||
result << "_fqArgs=" << fqDirectArgs[0] << "_" << fqDirectArgs[1] << "_" << fqDirectArgs[2] << "_" << fqDirectArgs[3];
|
||||
}
|
||||
if (inputArg.size() == 3) {
|
||||
result << "_inputArg=" << inputArg[0] << "_" << inputArg[1] << "_" << inputArg[2];
|
||||
}
|
||||
return result.str();
|
||||
}
|
||||
|
||||
void FakeQuantizeLayerTest::SetUp() {
|
||||
fqSpecificParams fqParams;
|
||||
std::vector<size_t> inputShape;
|
||||
std::pair<std::string, std::map<std::string, std::string>> config;
|
||||
auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
|
||||
std::tie(fqParams, netPrecision, inputShape, targetDevice) = this->GetParam();
|
||||
std::tie(fqParams, netPrecision, inputShape, targetDevice, config) = this->GetParam();
|
||||
InferenceEngine::SizeVector kernel, stride, dilation;
|
||||
size_t levels;
|
||||
std::vector<size_t> constShape;
|
||||
std::tie(levels, constShape) = fqParams;
|
||||
std::vector<float> fqDirectArg;
|
||||
std::vector<float> inputArg;
|
||||
std::tie(levels, constShape, fqDirectArg, inputArg) = fqParams;
|
||||
if (inputArg.size() == 3) {
|
||||
inputDataMin = inputArg[0];
|
||||
inputDataMax = inputArg[1];
|
||||
inputDataResolution = inputArg[2];
|
||||
}
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {inputShape});
|
||||
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(params));
|
||||
|
||||
auto fq = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, levels, constShape));
|
||||
UpdateSeed();
|
||||
|
||||
std::shared_ptr<ngraph::Node> fakeQNode;
|
||||
if (fqDirectArg.empty()) {
|
||||
int32_t ngraphSeed = seed;
|
||||
if (NGRAPH_SEED != USE_CLOCK_TIME) {
|
||||
ngraphSeed = NGRAPH_SEED;
|
||||
}
|
||||
std::cout << "\033[0;32m" << "[ ] " << "\033[0;0m"
|
||||
<< "ngraphSeed = " << ngraphSeed << std::endl;
|
||||
fakeQNode = ngraph::builder::makeFakeQuantize(paramOuts[0], ngPrc, levels, constShape, ngraphSeed);
|
||||
} else {
|
||||
fakeQNode = ngraph::builder::makeFakeQuantize(
|
||||
paramOuts[0],
|
||||
ngPrc,
|
||||
levels,
|
||||
constShape,
|
||||
{fqDirectArg[0]},
|
||||
{fqDirectArg[1]},
|
||||
{fqDirectArg[2]},
|
||||
{fqDirectArg[3]});
|
||||
}
|
||||
|
||||
|
||||
auto fq = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(fakeQNode);
|
||||
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(fq)};
|
||||
function = std::make_shared<ngraph::Function>(results, params, "fakeQuantize");
|
||||
|
||||
configuration = config.second;
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::Ptr FakeQuantizeLayerTest::GenerateInput(const InferenceEngine::InputInfo &info) const {
|
||||
return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, 1 / inputDataResolution, seed);
|
||||
}
|
||||
|
||||
void FakeQuantizeLayerTest::UpdateSeed() {
|
||||
if (BASE_SEED == USE_CLOCK_TIME) {
|
||||
seed = std::chrono::system_clock::now().time_since_epoch().count();
|
||||
} else if (BASE_SEED == USE_INCREMENTAL_SEED) {
|
||||
seed += 9999;
|
||||
} else {
|
||||
seed = BASE_SEED;
|
||||
}
|
||||
std::cout << "\033[0;32m" << "[ ] " << "\033[0;0m"
|
||||
<< "seed = " << seed << std::endl;
|
||||
}
|
||||
|
||||
TEST_P(FakeQuantizeLayerTest, CompareWithRefs) {
|
||||
Run();
|
||||
SKIP_IF_CURRENT_TEST_IS_DISABLED();
|
||||
|
||||
if (BASE_SEED != USE_CLOCK_TIME &&
|
||||
BASE_SEED != USE_INCREMENTAL_SEED) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t nIterations = (inputDataMax - inputDataMin) / inputDataResolution;
|
||||
for (; nIterations != 0; nIterations--) {
|
||||
UpdateSeed();
|
||||
Infer();
|
||||
Validate();
|
||||
}
|
||||
}
|
||||
} // namespace LayerTestsDefinitions
|
||||
|
||||
@@ -110,7 +110,7 @@ static void fill_data_bbox(float *data, size_t size, int height, int width, floa
|
||||
* - With k = 4 numbers resolution will 1/4 so outputs only .0 .25 .50 0.75 and etc.
|
||||
*/
|
||||
template<InferenceEngine::Precision::ePrecision PRC>
|
||||
void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, const uint32_t range = 10, int32_t start_from = 0, const int32_t k = 1) {
|
||||
void inline fill_data_random(InferenceEngine::Blob::Ptr &blob, const uint32_t range = 10, int32_t start_from = 0, const int32_t k = 1, const int seed = 1) {
|
||||
using dataType = typename InferenceEngine::PrecisionTrait<PRC>::value_type;
|
||||
testing::internal::Random random(1);
|
||||
random.Generate(range);
|
||||
@@ -144,8 +144,7 @@ void inline fill_data_consistently(InferenceEngine::Blob::Ptr &blob, const uint3
|
||||
}
|
||||
|
||||
template<InferenceEngine::Precision::ePrecision PRC>
|
||||
void inline fill_data_random_float(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k,
|
||||
const int seed = 1) {
|
||||
void inline fill_data_random_float(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k, const int seed = 1) {
|
||||
using dataType = typename InferenceEngine::PrecisionTrait<PRC>::value_type;
|
||||
std::default_random_engine random(seed);
|
||||
// 1/k is the resolution of the floating point numbers
|
||||
@@ -199,13 +198,20 @@ void inline fill_data_float_array(InferenceEngine::Blob::Ptr &blob, const float
|
||||
}
|
||||
|
||||
template<>
|
||||
void inline fill_data_random<InferenceEngine::Precision::FP32>(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k) {
|
||||
fill_data_random_float<InferenceEngine::Precision::FP32>(blob, range, start_from, k);
|
||||
void inline fill_data_random<InferenceEngine::Precision::FP32>(InferenceEngine::Blob::Ptr &blob,
|
||||
const uint32_t range,
|
||||
int32_t start_from,
|
||||
const int32_t k,
|
||||
const int seed) {
|
||||
fill_data_random_float<InferenceEngine::Precision::FP32>(blob, range, start_from, k, seed);
|
||||
}
|
||||
|
||||
template<>
|
||||
void inline fill_data_random<InferenceEngine::Precision::FP16>(InferenceEngine::Blob::Ptr &blob, const uint32_t range, int32_t start_from, const int32_t k) {
|
||||
fill_data_random_float<InferenceEngine::Precision::FP16>(blob, range, start_from, k);
|
||||
void inline fill_data_random<InferenceEngine::Precision::FP16>(InferenceEngine::Blob::Ptr &blob,
|
||||
const uint32_t range,
|
||||
int32_t start_from,
|
||||
const int32_t k, const int seed) {
|
||||
fill_data_random_float<InferenceEngine::Precision::FP16>(blob, range, start_from, k, seed);
|
||||
}
|
||||
|
||||
} // namespace CommonTestUtils
|
||||
|
||||
@@ -455,11 +455,12 @@ InferenceEngine::Blob::Ptr inline createAndFillBlobWithFloatArray(const Inferenc
|
||||
InferenceEngine::Blob::Ptr inline createAndFillBlob(const InferenceEngine::TensorDesc &td,
|
||||
const uint32_t range = 10,
|
||||
const int32_t start_from = 0,
|
||||
const int32_t resolution = 1) {
|
||||
const int32_t resolution = 1,
|
||||
const int seed = 1) {
|
||||
InferenceEngine::Blob::Ptr blob = make_blob_with_precision(td);
|
||||
blob->allocate();
|
||||
switch (td.getPrecision()) {
|
||||
#define CASE(X) case X: CommonTestUtils::fill_data_random<X>(blob, range, start_from, resolution); break;
|
||||
#define CASE(X) case X: CommonTestUtils::fill_data_random<X>(blob, range, start_from, resolution, seed); break;
|
||||
CASE(InferenceEngine::Precision::FP32)
|
||||
CASE(InferenceEngine::Precision::FP16)
|
||||
CASE(InferenceEngine::Precision::U8)
|
||||
|
||||
@@ -26,14 +26,14 @@ makeParams(const element::Type &type, const std::vector<std::pair<std::string, s
|
||||
template<typename T>
|
||||
std::shared_ptr<Node> makeConstant(const element::Type &type, const std::vector<size_t> &shape,
|
||||
const std::vector<T> &data, bool random = false,
|
||||
uint32_t upTo = 10, uint32_t startFrom = 1) {
|
||||
uint32_t upTo = 10, uint32_t startFrom = 1, const int seed = 1) {
|
||||
std::shared_ptr<ngraph::Node> weightsNode;
|
||||
|
||||
#define makeNode(TYPE) \
|
||||
case TYPE: \
|
||||
weightsNode = std::make_shared<ngraph::opset1::Constant>( \
|
||||
type, shape, \
|
||||
random ? NGraphFunctions::Utils::generateVector<TYPE>(ngraph::shape_size(shape), upTo, startFrom) : \
|
||||
random ? NGraphFunctions::Utils::generateVector<TYPE>(ngraph::shape_size(shape), upTo, startFrom, seed) : \
|
||||
NGraphFunctions::Utils::castVector<T, ngraph::helpers::nGraphTypesTrait<TYPE>::value_type >(data)); \
|
||||
break;
|
||||
switch (type) {
|
||||
@@ -274,7 +274,8 @@ std::shared_ptr<Node> makeFakeQuantize(const ngraph::Output<Node> &in,
|
||||
std::shared_ptr<Node> makeFakeQuantize(const ngraph::Output<Node> &in,
|
||||
const element::Type &type,
|
||||
std::size_t levels,
|
||||
std::vector<size_t> constShapes);
|
||||
std::vector<size_t> constShapes,
|
||||
const int32_t seed = 1);
|
||||
|
||||
std::shared_ptr<ngraph::Node> makeCumSum(const ngraph::Output<Node> &in,
|
||||
const ngraph::Output<Node> &axis,
|
||||
|
||||
@@ -17,11 +17,14 @@ namespace Utils {
|
||||
|
||||
template<ngraph::element::Type_t dType>
|
||||
std::vector<typename ngraph::helpers::nGraphTypesTrait<dType>::value_type> inline
|
||||
generateVector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1) {
|
||||
generateVector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1, int32_t seed = 1) {
|
||||
std::vector<typename ngraph::helpers::nGraphTypesTrait<dType>::value_type> res;
|
||||
|
||||
std::mt19937 gen(
|
||||
static_cast<unsigned long>(std::chrono::high_resolution_clock::now().time_since_epoch().count()));
|
||||
if (seed == 1) {
|
||||
seed = static_cast<unsigned long>(std::chrono::high_resolution_clock::now().time_since_epoch().count());
|
||||
}
|
||||
|
||||
std::mt19937 gen(seed);
|
||||
// chose values between this range to avoid type overrun (e.g. in case of I8 precision)
|
||||
std::uniform_int_distribution<unsigned long> dist(startFrom, upTo);
|
||||
|
||||
@@ -32,11 +35,14 @@ generateVector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1) {
|
||||
return res;
|
||||
}
|
||||
|
||||
std::vector<ngraph::float16> inline generateF16Vector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1) {
|
||||
std::vector<ngraph::float16> inline generateF16Vector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1, int32_t seed = 1) {
|
||||
std::vector<ngraph::float16> res;
|
||||
|
||||
std::mt19937 gen(
|
||||
static_cast<unsigned long>(std::chrono::high_resolution_clock::now().time_since_epoch().count()));
|
||||
if (seed == 1) {
|
||||
seed = static_cast<unsigned long>(std::chrono::high_resolution_clock::now().time_since_epoch().count());
|
||||
}
|
||||
|
||||
std::mt19937 gen(seed);
|
||||
// chose values between this range to avoid type overrun (e.g. in case of I8 precision)
|
||||
std::uniform_int_distribution<unsigned long> dist(startFrom, upTo);
|
||||
|
||||
@@ -46,11 +52,13 @@ std::vector<ngraph::float16> inline generateF16Vector(size_t vec_len, uint32_t u
|
||||
return res;
|
||||
}
|
||||
|
||||
std::vector<ngraph::bfloat16> inline generateBF16Vector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1) {
|
||||
std::vector<ngraph::bfloat16> inline generateBF16Vector(size_t vec_len, uint32_t upTo = 10, uint32_t startFrom = 1, int32_t seed = 1) {
|
||||
std::vector<ngraph::bfloat16> res;
|
||||
|
||||
std::mt19937 gen(
|
||||
static_cast<unsigned long>(std::chrono::high_resolution_clock::now().time_since_epoch().count()));
|
||||
if (seed == 1) {
|
||||
seed = static_cast<unsigned long>(std::chrono::high_resolution_clock::now().time_since_epoch().count());
|
||||
}
|
||||
std::mt19937 gen(seed);
|
||||
// chose values between this range to avoid type overrun (e.g. in case of I8 precision)
|
||||
std::uniform_int_distribution<unsigned long> dist(startFrom, upTo);
|
||||
|
||||
|
||||
@@ -32,18 +32,19 @@ std::shared_ptr<Node> makeFakeQuantize(const ngraph::Output<Node> &in,
|
||||
std::shared_ptr<ngraph::Node> makeFakeQuantize(const ngraph::Output<ngraph::Node> &in,
|
||||
const ngraph::element::Type &type,
|
||||
std::size_t levels,
|
||||
std::vector<size_t> constShapes) {
|
||||
std::vector<size_t> constShapes,
|
||||
const int32_t seed) {
|
||||
size_t constDataSize = ngraph::shape_size(constShapes);
|
||||
std::vector<float> inputLowData, inputHighData, outputLowData, outputHighData;
|
||||
inputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
|
||||
inputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize, 10, 1, seed);
|
||||
if (levels != 2) {
|
||||
inputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
|
||||
outputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
|
||||
outputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
|
||||
inputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize, 10, 1, seed);
|
||||
outputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize, 10, 1, seed);
|
||||
outputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize, 10, 1, seed);
|
||||
} else {
|
||||
inputHighData = inputLowData;
|
||||
outputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
|
||||
outputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize);
|
||||
outputLowData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize, 10, 1, seed);
|
||||
outputHighData = NGraphFunctions::Utils::generateVector<ngraph::element::Type_t::f32>(constDataSize, 10, 1, seed);
|
||||
|
||||
for (int i = 0; i < constDataSize; i++) {
|
||||
if (outputLowData[i] > outputHighData[i]) {
|
||||
@@ -70,10 +71,10 @@ std::shared_ptr<ngraph::Node> makeFakeQuantize(const ngraph::Output<ngraph::Node
|
||||
outputHighData[i] += 1;
|
||||
}
|
||||
|
||||
auto inputLowNode = ngraph::builder::makeConstant(type, constShapes, inputLowData, inputLowData.empty());
|
||||
auto inputHighNode = ngraph::builder::makeConstant(type, constShapes, inputHighData, inputHighData.empty());
|
||||
auto outputLowNode = ngraph::builder::makeConstant(type, constShapes, outputLowData, outputLowData.empty());
|
||||
auto outputHighNode = ngraph::builder::makeConstant(type, constShapes, outputHighData, outputHighData.empty());
|
||||
auto inputLowNode = ngraph::builder::makeConstant(type, constShapes, inputLowData, inputLowData.empty(), seed);
|
||||
auto inputHighNode = ngraph::builder::makeConstant(type, constShapes, inputHighData, inputHighData.empty(), seed);
|
||||
auto outputLowNode = ngraph::builder::makeConstant(type, constShapes, outputLowData, outputLowData.empty(), seed);
|
||||
auto outputHighNode = ngraph::builder::makeConstant(type, constShapes, outputHighData, outputHighData.empty(), seed);
|
||||
|
||||
auto fq = std::make_shared<ngraph::opset1::FakeQuantize>(in, inputLowNode, inputHighNode, outputLowNode, outputHighNode, levels);
|
||||
|
||||
|
||||
@@ -224,7 +224,8 @@ void GNAPropagateMatcher :: match() {
|
||||
ASSERT_NO_THROW_IE_EXCEPTION(network = CNNNetwork(_env.ngraph_model));
|
||||
ASSERT_NO_FATAL_FAILURE(loadCNNNetwork(network));
|
||||
#ifdef GNA_DEBUG
|
||||
network.serialize("CNNNetworkFromNgraphModel.xml", "CNNNetworkFromNgraphModel.bin");
|
||||
// TODO: crash on activation tests so far on addOutput call
|
||||
// network.serialize("CNNNetworkFromNgraphModel.xml", "CNNNetworkFromNgraphModel.bin");
|
||||
#endif
|
||||
}
|
||||
else if (!_env.importedModelFileName.empty()) {
|
||||
|
||||
Reference in New Issue
Block a user