Gna namespace (#14877)

* [GNA] Cleanup intel_dnn

* [GNA] Replace GNAPluginNS

* [GNA] Rename headers
This commit is contained in:
Szymon Irzabek 2023-01-03 16:06:23 +01:00 committed by GitHub
parent f2d93f4a79
commit c683a72400
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
178 changed files with 1928 additions and 1702 deletions

View File

@ -164,7 +164,7 @@ inline std::istream& operator>>(std::istream& is, HWGeneration& hw_generation) {
static constexpr Property<ExecutionMode> execution_mode{"GNA_DEVICE_MODE"};
/**
* @brief The option to override the GNA HW execution target. May be one of GNA_2_0, GNA_3_0.
* @brief The option to override the GNA HW execution target. May be one of GNA_2_0, GNA_3_0, GNA_3_5.
* By default (in case of no value set) the behavior depends on GNA HW availability:
* If GNA HW is present, use the option corresponding to this HW.
* If HW is not present, use the option corresponding to the latest fully supported GNA HW generation.
@ -175,7 +175,7 @@ static constexpr Property<ExecutionMode> execution_mode{"GNA_DEVICE_MODE"};
static constexpr Property<HWGeneration> execution_target{"GNA_HW_EXECUTION_TARGET"};
/**
* @brief The option to override the GNA HW compile target. May be one of GNA_2_0, GNA_3_0.
* @brief The option to override the GNA HW compile target. May be one of GNA_2_0, GNA_3_0, GNA_3_5.
* By default the same as execution_target.
* @ingroup ov_runtime_gna_prop_cpp_api
*/

View File

@ -22,11 +22,10 @@
#include "memory/gna_memory_util.hpp"
#include "log/log.hpp"
#include "log/dump.hpp"
#include "backend/dnn.hpp"
#include "backend/am_intel_dnn.hpp"
#include "backend/dnn_types.h"
#include "backend/dnn_types.hpp"
#include "gna/gna_config.hpp"
#include "backend/gna_types.h"
#include "backend/gna_types.hpp"
#include "backend/gna_limitations.hpp"
#include "layers/gna_convolution_layer.hpp"
#include "memory/gna_memory.hpp"
@ -46,20 +45,18 @@
*/
#define LIGHT_DUMP
using namespace GNAPluginNS::backend;
using namespace ov::intel_gna;
using gna_convolution_layer::outputFromConv;
using gna_convolution_layer::outputFromPooling;
using GNAPluginNS::GNAConvolutionLayer::outputFromConv;
using GNAPluginNS::GNAConvolutionLayer::outputFromPooling;
using GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy;
namespace ov {
namespace intel_gna {
namespace backend {
using GNAPluginNS::memory::GNAMemoryInterface;
void GNAPluginNS::backend::AMIntelDNN::BeginNewWrite(uint32_t index) {
void backend::AMIntelDNN::BeginNewWrite(uint32_t index) {
dump_write_index = index;
}
void GNAPluginNS::backend::AMIntelDNN::Init(GNAMemoryInterface* memoryInterface,
void backend::AMIntelDNN::Init(memory::GNAMemoryInterface* memoryInterface,
intel_dnn_number_type_t compute_precision,
float scale_factor) {
memory = memoryInterface;
@ -68,25 +65,13 @@ void GNAPluginNS::backend::AMIntelDNN::Init(GNAMemoryInterface* memoryInterface,
ptr_active_outputs_ = nullptr;
num_active_outputs_ = 0;
num_left_context = 0;
num_right_context = 0;
softmax_type = kSoftmaxNone;
ptr_sumgroup_sizes = nullptr;
num_sumgroup_sizes = 0;
ptr_priors = nullptr;
}
GNAPluginNS::backend::AMIntelDNN::~AMIntelDNN() {
backend::AMIntelDNN::~AMIntelDNN() {
component.clear();
if (ptr_sumgroup_sizes != NULL) {
_mm_free(ptr_sumgroup_sizes);
}
if (ptr_priors != NULL) {
_mm_free(ptr_priors);
}
}
void GNAPluginNS::backend::AMIntelDNN::InitActiveList(uint32_t *ptr_active_list) {
void backend::AMIntelDNN::InitActiveList(uint32_t *ptr_active_list) {
ptr_active_outputs_ = ptr_active_list;
if (ptr_active_list == nullptr) {
if (component[component.size() - 1].orientation_out == kDnnInterleavedOrientation) {
@ -100,7 +85,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitActiveList(uint32_t *ptr_active_list)
}
void GNAPluginNS::backend::AMIntelDNN::InitAffineComponentPrivate(intel_dnn_component_t &comp,
void backend::AMIntelDNN::InitAffineComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_rows_in,
uint32_t num_columns,
uint32_t num_rows_out,
@ -123,7 +108,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitAffineComponentPrivate(intel_dnn_comp
comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = isDiag ? kDnnDiagonalOp : kDnnAffineOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = kDnnInterleavedOrientation;
comp.orientation_out = kDnnInterleavedOrientation;
comp.op.affine.num_bytes_per_weight = num_bytes_per_weight;
@ -145,23 +129,23 @@ void GNAPluginNS::backend::AMIntelDNN::InitAffineComponentPrivate(intel_dnn_comp
}
void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_columns_in,
uint32_t num_columns_out,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
uint32_t num_bytes_per_weight,
uint32_t num_bytes_per_bias,
uint32_t num_filters,
uint32_t num_filter_coefficients,
const uint32_t convStride,
float weight_scale_factor,
float output_scale_factor,
void *&ptr_inputs,
void *&ptr_outputs,
void *&ptr_filters,
void *&ptr_biases,
bool postInitMem) {
void backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_columns_in,
uint32_t num_columns_out,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
uint32_t num_bytes_per_weight,
uint32_t num_bytes_per_bias,
uint32_t num_filters,
uint32_t num_filter_coefficients,
const uint32_t convStride,
float weight_scale_factor,
float output_scale_factor,
void *&ptr_inputs,
void *&ptr_outputs,
void *&ptr_filters,
void *&ptr_biases,
bool postInitMem) {
comp.num_rows_in = 1;
comp.num_columns_in = num_columns_in;
comp.num_rows_out = 1;
@ -169,7 +153,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = kDnnConvolutional1dOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = kDnnNonInterleavedOrientation;
comp.orientation_out = kDnnNonInterleavedOrientation;
comp.ptr_inputs = ptr_inputs;
@ -199,9 +182,9 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << num_columns_in <<
") is not a multiply by 8";
}
if (num_filters < GNALimitations::convMinFiltersNum ||
num_filters > GNALimitations::convMaxFiltersNum ||
num_filters % GNALimitations::convFiltersNumDivider != 0) {
if (num_filters < limitations::convMinFiltersNum ||
num_filters > limitations::convMaxFiltersNum ||
num_filters % limitations::convFiltersNumDivider != 0) {
THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << num_filters;
}
auto max_number_of_out_elements = outputFromConv(num_columns_in, num_filter_coefficients, convStride);
@ -210,26 +193,25 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
}
}
void GNAPluginNS::backend::AMIntelDNN::InitConvolutional2DComponentPrivate(intel_dnn_component_t& comp,
OvGnaTensor inputTensor,
OvGnaTensor outputTensor,
OvGnaTensor filterTensor,
OvGnaTensor biasTensor,
std::array<uint32_t, 2> convStride,
std::array<uint32_t, 2> zeroPadding,
float weight_scale_factor,
float output_scale_factor,
void*& ptr_inputs,
void*& ptr_outputs,
void*& ptr_filters,
void*& ptr_biases) {
void backend::AMIntelDNN::InitConvolutional2DComponentPrivate(intel_dnn_component_t& comp,
OvGnaTensor inputTensor,
OvGnaTensor outputTensor,
OvGnaTensor filterTensor,
OvGnaTensor biasTensor,
std::array<uint32_t, 2> convStride,
std::array<uint32_t, 2> zeroPadding,
float weight_scale_factor,
float output_scale_factor,
void*& ptr_inputs,
void*& ptr_outputs,
void*& ptr_filters,
void*& ptr_biases) {
comp.tensors.clear();
comp.tensors.push_back(inputTensor);
comp.tensors.push_back(outputTensor);
comp.tensors.push_back(filterTensor);
comp.tensors.push_back(biasTensor);
comp.operation = kDnnConvolutional2dOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = kDnnNonInterleavedOrientation;
comp.orientation_out = kDnnNonInterleavedOrientation;
comp.ptr_inputs = ptr_inputs;
@ -246,7 +228,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional2DComponentPrivate(intel
ptr_outputs = &comp.ptr_outputs;
}
bool GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(const Gna2Operation& op) {
bool backend::AMIntelDNN::isOperationCnnLegacySpecific(const Gna2Operation& op) {
// GNA compile target GNA_TARGET_3_0 does not support pooling window < pooling stride
return op.Type == Gna2OperationTypeConvolution &&
op.NumberOfParameters > std::max(PoolStrideParamIdx, PoolWinParamIdx) &&
@ -256,7 +238,7 @@ bool GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(const Gna2Op
static_cast<Gna2Shape*>(op.Parameters[PoolStrideParamIdx])->Dimensions[0] > static_cast<Gna2Shape*>(op.Parameters[PoolWinParamIdx])->Dimensions[0];
}
void GNAPluginNS::backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(Gna2Model& gnaModel, bool useLegacyFormula) {
void backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(Gna2Model& gnaModel, bool useLegacyFormula) {
IE_ASSERT(gnaModel.Operations != nullptr || gnaModel.NumberOfOperations == 0);
for (uint32_t i = 0; i < gnaModel.NumberOfOperations; i++) {
auto& gnaOp = gnaModel.Operations[i];
@ -277,10 +259,10 @@ void GNAPluginNS::backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(Gna
const auto fltStride = fltStrideShape.Dimensions[0];
const auto inVecCnt = inputShape.Dimensions[1];
const auto nFltSize = gnaOp.Operands[FilterOpIdx]->Shape.Dimensions[1];
const auto outFromConv = GNAPluginNS::GNAConvolutionLayer::outputFromConv(inVecCnt, nFltSize, fltStride);
const auto outFromConv = gna_convolution_layer::outputFromConv(inVecCnt, nFltSize, fltStride);
const auto& poolWindow = *static_cast<Gna2Shape*>(gnaOp.Parameters[PoolWinParamIdx]);
const auto& poolStride = *static_cast<Gna2Shape*>(gnaOp.Parameters[PoolStrideParamIdx]);
const auto numberOfOutputs = GNAPluginNS::GNAConvolutionLayer::outputFromPooling(
const auto numberOfOutputs = gna_convolution_layer::outputFromPooling(
outFromConv, poolWindow.Dimensions[0], poolStride.Dimensions[0],
useLegacyFormula || isOperationCnnLegacySpecific(gnaOp));
auto& outputTensor = *gnaOp.Operands[OutOpIdx];
@ -289,21 +271,20 @@ void GNAPluginNS::backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(Gna
}
}
void GNAPluginNS::backend::AMIntelDNN::InitMaxpoolComponentPrivate(intel_dnn_component_t &comp,
std::array<uint32_t, 3> inCHW,
std::array<uint32_t, 3> outCHW,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
std::array<uint32_t, 2> poolingWindowXY,
std::array<uint32_t, 2> poolingStrideXY,
float output_scale_factor,
void *&ptr_inputs,
void *&ptr_outputs,
bool postInitMem) {
void backend::AMIntelDNN::InitMaxpoolComponentPrivate(intel_dnn_component_t &comp,
std::array<uint32_t, 3> inCHW,
std::array<uint32_t, 3> outCHW,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
std::array<uint32_t, 2> poolingWindowXY,
std::array<uint32_t, 2> poolingStrideXY,
float output_scale_factor,
void *&ptr_inputs,
void *&ptr_outputs,
bool postInitMem) {
comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = kDnnMaxPoolOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = kDnnNonInterleavedOrientation;
comp.orientation_out = kDnnNonInterleavedOrientation;
comp.op.maxpool.inCHW = inCHW;
@ -321,20 +302,20 @@ void GNAPluginNS::backend::AMIntelDNN::InitMaxpoolComponentPrivate(intel_dnn_com
}
}
void GNAPluginNS::backend::AMIntelDNN::InitCopyComponentPrivate(intel_dnn_component_t &comp,
intel_dnn_orientation_t orientation,
uint32_t num_rows_in,
uint32_t num_columns_in,
uint32_t num_rows_out,
uint32_t num_columns_out,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
float output_scale_factor,
uint32_t num_copy_rows,
uint32_t num_copy_columns,
void *&ptr_inputs,
void *&ptr_outputs,
bool postInitMem) {
void backend::AMIntelDNN::InitCopyComponentPrivate(intel_dnn_component_t &comp,
intel_dnn_orientation_t orientation,
uint32_t num_rows_in,
uint32_t num_columns_in,
uint32_t num_rows_out,
uint32_t num_columns_out,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
float output_scale_factor,
uint32_t num_copy_rows,
uint32_t num_copy_columns,
void *&ptr_inputs,
void *&ptr_outputs,
bool postInitMem) {
comp.num_rows_in = num_rows_in;
comp.num_columns_in = num_columns_in;
comp.num_rows_out = num_rows_out;
@ -342,7 +323,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitCopyComponentPrivate(intel_dnn_compon
comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = kDnnCopyOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = orientation;
comp.orientation_out = orientation;
comp.ptr_inputs = ptr_inputs;
@ -361,20 +341,20 @@ void GNAPluginNS::backend::AMIntelDNN::InitCopyComponentPrivate(intel_dnn_compon
}
}
void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel_dnn_component_t &comp,
const DnnActivation& function_id,
intel_dnn_orientation_t orientation,
uint32_t num_rows,
uint32_t num_columns,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
uint32_t num_segments,
float output_scale_factor,
float input_scale_factor,
void *&ptr_inputs,
void *&ptr_outputs,
gna_pwl_segment_t *ptr_segments,
bool postInitMem) {
void backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel_dnn_component_t &comp,
const DnnActivation& function_id,
intel_dnn_orientation_t orientation,
uint32_t num_rows,
uint32_t num_columns,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
uint32_t num_segments,
float output_scale_factor,
float input_scale_factor,
void *&ptr_inputs,
void *&ptr_outputs,
gna_pwl_segment_t *ptr_segments,
bool postInitMem) {
comp.num_rows_in = num_rows;
comp.num_columns_in = num_columns;
comp.num_rows_out = num_rows;
@ -382,7 +362,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel
comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = kDnnPiecewiselinearOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = orientation;
comp.orientation_out = orientation;
comp.op.pwl.func_id = function_id;
@ -404,15 +383,15 @@ void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel
}
}
void GNAPluginNS::backend::AMIntelDNN::InitInterleaveComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_rows_in,
uint32_t num_columns_in,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
float output_scale_factor,
void *&ptr_inputs,
void *&ptr_outputs,
bool postInitMem) {
void backend::AMIntelDNN::InitInterleaveComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_rows_in,
uint32_t num_columns_in,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
float output_scale_factor,
void *&ptr_inputs,
void *&ptr_outputs,
bool postInitMem) {
comp.num_rows_in = num_rows_in;
comp.num_columns_in = num_columns_in;
comp.num_rows_out = num_columns_in;
@ -420,7 +399,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitInterleaveComponentPrivate(intel_dnn_
comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = kDnnInterleaveOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = kDnnNonInterleavedOrientation;
comp.orientation_out = kDnnInterleavedOrientation;
comp.output_scale_factor = output_scale_factor;
@ -434,15 +412,15 @@ void GNAPluginNS::backend::AMIntelDNN::InitInterleaveComponentPrivate(intel_dnn_
}
}
void GNAPluginNS::backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_rows_in,
uint32_t num_columns_in,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
float output_scale_factor,
void *&ptr_inputs,
void *&ptr_outputs,
bool postInitMem) {
void backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_rows_in,
uint32_t num_columns_in,
uint32_t num_bytes_per_input,
uint32_t num_bytes_per_output,
float output_scale_factor,
void *&ptr_inputs,
void *&ptr_outputs,
bool postInitMem) {
comp.num_rows_in = num_rows_in;
comp.num_columns_in = num_columns_in;
comp.num_rows_out = num_columns_in;
@ -450,7 +428,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dn
comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = kDnnDeinterleaveOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = kDnnInterleavedOrientation;
comp.orientation_out = kDnnInterleavedOrientation;
comp.output_scale_factor = output_scale_factor;
@ -464,7 +441,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dn
}
}
float GNAPluginNS::backend::AMIntelDNN::OutputScaleFactor(intel_dnn_component_t &comp) {
float backend::AMIntelDNN::OutputScaleFactor(intel_dnn_component_t &comp) {
return comp.output_scale_factor;
}
@ -476,7 +453,7 @@ struct InputEndPoint {
InputEndPoint(int nidx, size_t sz, size_t esize) : idx(nidx), size(sz), num_bytes_per_output(esize) {}
};
void GNAPluginNS::backend::AMIntelDNN::WriteGraphWizModel(const char *filename) {
void backend::AMIntelDNN::WriteGraphWizModel(const char *filename) {
auto & components = component;
#define IS_AFFINE(k)\
@ -743,12 +720,12 @@ void PrintTensors(std::ofstream& out, T tensors) {
}
}
void GNAPluginNS::backend::AMIntelDNN::PrintOffset(std::ofstream& out, const std::string& type, void* ptr) {
void backend::AMIntelDNN::PrintOffset(std::ofstream& out, const std::string& type, void* ptr) {
const auto queue = memory->getQueue(ptr);
std::string typeOfRegion = "UNKNOWN_QUEUE";
auto offset = std::numeric_limits<uint32_t>::max();
if (queue != nullptr) {
typeOfRegion = GNAPluginNS::memory::rRegionToStr(queue->regionType());
typeOfRegion = memory::rRegionToStr(queue->regionType());
offset = queue->getOffset(ptr).second;
}
out << "<memory_region_type> " << typeOfRegion << "\n";
@ -756,9 +733,9 @@ void GNAPluginNS::backend::AMIntelDNN::PrintOffset(std::ofstream& out, const std
<< "0x" << std::setfill('0') << std::setw(8) << std::hex << offset << "\n";
}
void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision) {
void backend::AMIntelDNN::WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision) {
if ((compute_precision_ == kDnnFloat) && (logging_precision == kDnnInt)) {
fprintf(stderr, "Error trying to write floating point DNN as integer in GNAPluginNS::backend::AMIntelDNN::WriteDnnText().\n");
fprintf(stderr, "Error trying to write floating point DNN as integer in backend::AMIntelDNN::WriteDnnText().\n");
fprintf(stderr, " Please convert to integer first.\n");
throw -1;
}
@ -777,8 +754,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
out_file << "<intel_dnn_file>\n";
out_file << "<number_type> " << intel_dnn_number_type_name[logging_precision] << "\n";
out_file << "<softmax_type> " << intel_dnn_softmax_name[softmax_type] << "\n";
const auto& regionsMap = GNAPluginNS::memory::GetAllRegionsToStrMap();
const auto& regionsMap = memory::GetAllRegionsToStrMap();
for (const auto& regionPair : regionsMap) {
out_file << "<memory_region_type> " << std::dec << regionPair.second << "\n";
out_file << "<num_memory_region_bytes> " << std::dec << memory->getRegionBytes(regionPair.first) << "\n";
@ -818,7 +794,6 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
layer++;
}
out_file << "<component_operation> " << intel_dnn_operation_name[component[i].operation] << "\n";
out_file << "<macro_operation> " << intel_dnn_macro_operation_name[component[i].macro_operation] << "\n";
out_file << "<num_rows_in> " << std::dec << num_rows_in << "\n";
out_file << "<num_columns_in> " << std::dec << num_columns_in << "\n";
out_file << "<num_rows_out> " << std::dec << num_rows_out << "\n";
@ -1383,7 +1358,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
}
}
uint32_t GNAPluginNS::backend::AMIntelDNN::CountLayers() {
uint32_t backend::AMIntelDNN::CountLayers() {
uint32_t n = 0;
for (auto && c : component) {
if (c.operation == kDnnAffineOp
@ -1401,7 +1376,7 @@ uint32_t GNAPluginNS::backend::AMIntelDNN::CountLayers() {
return n;
}
void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const std::string& gnaCompileTarget) {
void backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const std::string& gnaCompileTarget) {
Gna2Operation * gnaOperation;
if (gnaModel == nullptr)
THROW_GNA_EXCEPTION << "Invalid input parameter";
@ -1409,12 +1384,12 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const
THROW_GNA_EXCEPTION << "InitGNAStruct can't work on preallocated layers array";
if (component.empty())
THROW_GNA_EXCEPTION << "empty model in GNAPluginNS::backend::AMIntelDNN::InitGNAStruct()";
THROW_GNA_EXCEPTION << "empty model in backend::AMIntelDNN::InitGNAStruct()";
gnaModel->NumberOfOperations = CountLayers();
gnaModel->Operations = reinterpret_cast<Gna2Operation*>(gnaUserAllocator(gnaModel->NumberOfOperations * sizeof(Gna2Operation)));
if (gnaModel->Operations == nullptr)
THROW_GNA_EXCEPTION << "out of memory in GNAPluginNS::backend::AMIntelDNN::InitGNAStruct()";
THROW_GNA_EXCEPTION << "out of memory in backend::AMIntelDNN::InitGNAStruct()";
memset(gnaModel->Operations, 0, gnaModel->NumberOfOperations * sizeof(Gna2Operation));
gnaOperation = gnaModel->Operations;
for (int i = 0; i < component.size(); i++) {
@ -1666,7 +1641,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const
gnaModel->NumberOfOperations = static_cast<uint32_t>(std::distance(gnaModel->Operations, gnaOperation));
}
void GNAPluginNS::backend::AMIntelDNN::DestroyGNAStruct(Gna2Model *gnaModel) {
void backend::AMIntelDNN::DestroyGNAStruct(Gna2Model *gnaModel) {
if (gnaModel->Operations != nullptr) {
for (uint32_t i = 0; i < gnaModel->NumberOfOperations; i++) {
switch (gnaModel->Operations[i].Type) {
@ -1686,7 +1661,7 @@ void GNAPluginNS::backend::AMIntelDNN::DestroyGNAStruct(Gna2Model *gnaModel) {
gnaModel->NumberOfOperations = 0;
}
void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputTextGNA(const Gna2Model & model) {
void backend::AMIntelDNN::WriteInputAndOutputTextGNA(const Gna2Model & model) {
#ifdef LIGHT_DUMP
dump::WriteInputAndOutputTextGNAImpl(
model,
@ -1695,7 +1670,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputTextGNA(const Gna2Mode
#endif
}
void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputText() {
void backend::AMIntelDNN::WriteInputAndOutputText() {
#ifdef LIGHT_DUMP
for (uint32_t i = 0; i < num_components(); i++) {
std::stringstream out_file_name;
@ -1791,11 +1766,11 @@ void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputText() {
#endif
}
uint32_t GNAPluginNS::backend::AMIntelDNN::num_components() {
uint32_t backend::AMIntelDNN::num_components() {
return static_cast<uint32_t>(component.size());
}
uint32_t GNAPluginNS::backend::AMIntelDNN::num_gna_layers() {
uint32_t backend::AMIntelDNN::num_gna_layers() {
uint32_t num_layers = 0;
std::set<intel_dnn_operation_t> gna_layers({ kDnnAffineOp,
kDnnDiagonalOp,
@ -1812,27 +1787,27 @@ uint32_t GNAPluginNS::backend::AMIntelDNN::num_gna_layers() {
return num_layers;
}
uint32_t GNAPluginNS::backend::AMIntelDNN::num_group_in() {
uint32_t backend::AMIntelDNN::num_group_in() {
return ((!component.empty()) ? ((component[0].orientation_in == kDnnInterleavedOrientation)
? component[0].num_columns_in : component[0].num_rows_in) : 0);
}
uint32_t GNAPluginNS::backend::AMIntelDNN::num_group_out() {
uint32_t backend::AMIntelDNN::num_group_out() {
return ((!component.empty()) ? ((component[component.size() - 1].orientation_out == kDnnInterleavedOrientation)
? component[component.size() - 1].num_columns_out : component[component.size() -
1].num_rows_out) : 0);
}
uint32_t GNAPluginNS::backend::AMIntelDNN::num_inputs() {
uint32_t backend::AMIntelDNN::num_inputs() {
return component.empty() ? 0 : component[0].num_rows_in;
}
uint32_t GNAPluginNS::backend::AMIntelDNN::num_outputs() {
uint32_t backend::AMIntelDNN::num_outputs() {
return (component[component.size() - 1].orientation_out == kDnnInterleavedOrientation) ? component[
component.size() - 1].num_rows_out : component[component.size() - 1].num_columns_out;
}
std::string GNAPluginNS::backend::AMIntelDNN::getDumpFilePrefix(const std::string& folder) {
std::string backend::AMIntelDNN::getDumpFilePrefix(const std::string& folder) {
const char pathSeparator =
#ifdef _WIN32
'\\';
@ -1842,14 +1817,18 @@ std::string GNAPluginNS::backend::AMIntelDNN::getDumpFilePrefix(const std::strin
return std::string(".") + pathSeparator + folder + pathSeparator + std::to_string(dump_write_index) + pathSeparator;
}
std::string GNAPluginNS::backend::AMIntelDNN::getDumpFilePrefixGNA() {
std::string backend::AMIntelDNN::getDumpFilePrefixGNA() {
return getDumpFilePrefix("gna_layers");
}
std::string GNAPluginNS::backend::AMIntelDNN::getDumpFolderName() {
std::string backend::AMIntelDNN::getDumpFolderName() {
return getDumpFilePrefix("layers");
}
std::string GNAPluginNS::backend::AMIntelDNN::getRefFolderName() {
std::string backend::AMIntelDNN::getRefFolderName() {
return getDumpFilePrefix("ref_layers");
}
} // namespace backend
} // namespace intel_gna
} // namespace ov

View File

@ -8,8 +8,8 @@
#include <string>
#include <vector>
#include "dnn_types.h"
#include "gna_types.h"
#include "dnn_types.hpp"
#include "gna_types.hpp"
#include "gna/gna_config.hpp"
#include "log/debug.hpp"
@ -17,9 +17,8 @@
#include "memory/gna_memory.hpp"
#include <gna2-model-api.h>
using GNAPluginNS::memory::GNAMemoryInterface;
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace backend {
class AMIntelDNN {
@ -28,24 +27,15 @@ public:
: ptr_active_outputs_(NULL),
num_active_outputs_(0),
input_scale_factor_(1.0),
num_left_context(0),
num_right_context(0),
do_rotate_input(false),
do_rotate_output(false),
num_rotate_rows(0),
num_rotate_columns(0),
num_rotate_output_rows(0),
num_rotate_output_columns(0),
softmax_type(kSoftmaxNone),
ptr_sumgroup_sizes(NULL),
num_sumgroup_sizes(0),
ptr_priors(NULL),
compute_precision_(kDnnNumNumberType) {
}
~AMIntelDNN();
void Init(GNAMemoryInterface * memoryInterface,
void Init(memory::GNAMemoryInterface * memoryInterface,
intel_dnn_number_type_t compute_precision,
float scale_factor);
@ -284,9 +274,31 @@ public:
true);
}
template <class T>
void AdvanceOperationIfAllApplied(const std::vector<intel_dnn_component_t>& cmp, int i, T*& operation) {
if (i == cmp.size() - 1 || cmp[i + 1].operation != kDnnPiecewiselinearOp) {
++operation;
}
}
float OutputScaleFactor(uint32_t component_index) {
return OutputScaleFactor(component[component_index]);
template <class T>
void AdvanceCnnOperationIfAllApplied(const std::vector<intel_dnn_component_t>& cmp, int i, T*& operation) {
if (i == cmp.size() - 1 ||
((cmp[i + 1].operation != kDnnMaxPoolOp) && (cmp[i + 1].operation != kDnnPiecewiselinearOp))) {
operation++;
}
}
template <class T>
void AdvancePwlOperationIfAllApplied(const std::vector<intel_dnn_component_t>& cmp, int i, T*& operation) {
if (i == cmp.size() - 1 ||
((cmp[i + 1].operation != kDnnMaxPoolOp) && (cmp[i + 1].operation != kDnnPiecewiselinearOp))) {
operation++;
}
}
float OutputScaleFactor(uint32_t cmp_index) {
return OutputScaleFactor(component[cmp_index]);
}
float OutputScaleFactor(intel_dnn_component_t &comp);
@ -318,19 +330,10 @@ public:
uint32_t num_outputs();
std::vector<intel_dnn_component_t> component;
uint32_t num_left_context;
uint32_t num_right_context;
uint32_t new_num_conv_columns = 0;
bool do_rotate_input;
bool do_rotate_output;
uint32_t num_rotate_rows = 0;
uint32_t num_rotate_columns = 0;
uint32_t num_rotate_output_rows = 0;
uint32_t num_rotate_output_columns = 0;
DnnSoftmaxType softmax_type;
uint32_t *ptr_sumgroup_sizes;
uint32_t num_sumgroup_sizes;
float *ptr_priors;
void WriteInputAndOutputText();
@ -339,7 +342,7 @@ public:
void BeginNewWrite(uint32_t index);
private:
GNAMemoryInterface* memory = nullptr;
memory::GNAMemoryInterface* memory = nullptr;
uint32_t *ptr_active_outputs_;
uint32_t num_active_outputs_;
intel_dnn_number_type_t compute_precision_;
@ -442,6 +445,20 @@ private:
void*& ptr_filters,
void*& ptr_biases);
static void InitDWSCComponentPrivate(intel_dnn_component_t& comp,
OvGnaTensor inputTensor,
OvGnaTensor outputTensor,
OvGnaTensor filterTensor,
OvGnaTensor biasTensor,
std::array<uint32_t, 2> convStride,
std::array<uint32_t, 2> zeroPadding,
float weight_scale_factor,
float output_scale_factor,
void*& ptr_inputs,
void*& ptr_outputs,
void*& ptr_filters,
void*& ptr_biases);
static void InitAffineComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_rows_in,
uint32_t num_columns,
@ -464,5 +481,7 @@ private:
std::string getDumpFolderName();
std::string getRefFolderName();
};
} // namespace backend
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -1,73 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <cstdio>
#include <cmath>
#include <gna2-model-api.h>
#include "gna2_model_helper.hpp"
#include "log/dump.hpp"
#ifndef _NO_MKL_
#include <mkl_dnn.h>
#endif
#include "runtime/floatmath.h"
#include "dnn.hpp"
#include "runtime/pwl.h"
#include "runtime/cnn.h"
void GNAPluginNS::backend::ClearScoreError(intel_score_error_t *error) {
error->num_scores = 0;
error->num_errors = 0;
error->max_error = 0.0;
error->sum_error = 0.0;
error->sum_squared_error = 0.0;
error->max_rel_error = 0.0;
error->sum_rel_error = 0.0;
error->sum_squared_rel_error = 0.0;
}
void GNAPluginNS::backend::UpdateScoreError(intel_score_error_t *error, intel_score_error_t *total_error) {
total_error->num_errors += error->num_errors;
total_error->num_scores += error->num_scores;
total_error->sum_error += error->sum_error;
total_error->sum_squared_error += error->sum_squared_error;
if (error->max_error > total_error->max_error) {
total_error->max_error = error->max_error;
}
total_error->sum_rel_error += error->sum_rel_error;
total_error->sum_squared_rel_error += error->sum_squared_rel_error;
if (error->max_rel_error > total_error->max_rel_error) {
total_error->max_rel_error = error->max_rel_error;
}
}
void GNAPluginNS::backend::SoftmaxGoogle(float *ptr_output, float *ptr_input, const uint32_t num_outputs, const uint32_t num_inputs) {
// Assumes input vector contains log likelihoods
// The computes x[i] = x[i] - log(sum_j exp(x[j]))
// This normalizes the likelihoods by the sum of likelihoods but stores them as log likelihoods
float max_score = ptr_input[0];
float sum = 0.0;
float diff;
// find max score for normalization to [0,1]
for (uint32_t i = 0; i < num_inputs; i++) {
if (ptr_input[i] > max_score) {
max_score = ptr_input[i];
}
}
for (uint32_t i = 0; i < num_inputs; i++) {
sum += exp(ptr_input[i] - max_score);
}
if (sum < 1.0e-20) {
fprintf(stderr, "Warning: attempt to take log(0) in SoftmaxGoogle()!\n");
sum = 1.0e-20f;
}
diff = max_score + std::log(sum);
for (uint32_t i = 0; i < num_outputs; i++) {
ptr_output[i] = ptr_input[i] - diff;
}
}

View File

@ -1,66 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <cstdlib>
#include <cstdio>
#include <memory.h>
#include <xmmintrin.h>
#include <fstream>
#include <sstream>
#include <string>
#include <iomanip>
#include <type_traits>
#include <vector>
#include "am_intel_dnn.hpp"
#include "dnn_types.h"
#include <gna2-model-api.h>
#define DNN_MAX_BATCH_SIZE 8
#define DNN_MAX_INPUTS 3072
#define DNN_MAX_OUTPUTS 8192
#define DNN_MAX_ERROR 1.0e-4f
#define DNN_NUM_BYTES_INT_BIAS 4
#define DNN_NUM_BYTES_INT_AFFINE_OUT 4
#define DNN_RAND_INT8_AMPLITUDE 127.0f
#define DNN_RAND_INT16_AMPLITUDE 16384.0f
#define DNN_RAND_INT32_AMPLITUDE 1048576.0f
#define DNN_RAND_FLOAT32_AMPLITUDE 8.0f
namespace GNAPluginNS {
namespace backend {
void PlotFloatIntDnn(GNAPluginNS::backend::AMIntelDNN *dnn, GNAPluginNS::backend::AMIntelDNN *dnn_int);
void ClearScoreError(intel_score_error_t *error);
void UpdateScoreError(intel_score_error_t *error, intel_score_error_t *total_error);
void SoftmaxGoogle(float *ptr_output, float *ptr_input, const uint32_t num_outputs, const uint32_t num_inputs);
template <class T>
void AdvanceOperationIfAllApplied(const std::vector<intel_dnn_component_t>& component, int i, T*& operation) {
if (i == component.size() - 1 || component[i + 1].operation != kDnnPiecewiselinearOp) {
++operation;
}
}
template <class T>
void AdvanceCnnOperationIfAllApplied(const std::vector<intel_dnn_component_t>& component, int i, T*& operation) {
if (i == component.size() - 1 || ((component[i + 1].operation != kDnnMaxPoolOp)
&& (component[i + 1].operation != kDnnPiecewiselinearOp))) {
operation++;
}
}
template <class T>
void AdvancePwlOperationIfAllApplied(const std::vector<intel_dnn_component_t>& component, int i, T*& operation) {
if (i == component.size() - 1 || ((component[i + 1].operation != kDnnMaxPoolOp)
&& (component[i + 1].operation != kDnnPiecewiselinearOp))) {
operation++;
}
}
} // namespace backend
} // namespace GNAPluginNS

View File

@ -14,17 +14,18 @@
#include "dnn_components.hpp"
#include "log/log.hpp"
using namespace ov::intel_gna;
using namespace GNAPluginNS;
using namespace GNAPluginNS::backend;
namespace ov {
namespace intel_gna {
namespace backend {
intel_dnn_component_t & DnnComponents::addComponent(const std::string layerName, const std::string layerMetaType) {
intel_dnn_component_t& DnnComponents::addComponent(const std::string layerName, const std::string layerMetaType) {
auto isDelayed = InferenceEngine::details::CaselessEq<std::string>()(layerMetaType, DelayedCopyLayerName);
delayedOperations += isDelayed ? 1 : 0;
components.emplace_back(DnnComponentExtra{layerName, {}, isDelayed});
auto &currentComponent = components.back().dnnComponent;
auto& currentComponent = components.back().dnnComponent;
log::trace() << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << components.size() - 1 << std::endl;
log::trace() << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_"
<< components.size() - 1 << std::endl;
currentComponent.original_layer_name = components.back().name.c_str();
int execOrder = 0;
@ -32,10 +33,11 @@ intel_dnn_component_t & DnnComponents::addComponent(const std::string layerName,
execOrder = static_cast<int>(components.size() - 1 - delayedOperations);
} else {
// todo: not perfect - propose to create mapping table that will be printed out by extra request
execOrder = - static_cast<int>(delayedOperations);
execOrder = -static_cast<int>(delayedOperations);
}
log::debug() << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << execOrder << std::endl;
log::debug() << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << execOrder
<< std::endl;
return currentComponent;
}
@ -47,7 +49,7 @@ intel_dnn_component_t* DnnComponents::findComponent(InferenceEngine::CNNLayerPtr
return nullptr;
}
intel_dnn_component_t* GNAPluginNS::backend::DnnComponents::findComponent(const std::string& layerName) {
intel_dnn_component_t* DnnComponents::findComponent(const std::string& layerName) {
auto component = std::find_if(begin(components), end(components), [&](const storage_type ::value_type& comp) {
return comp.name == layerName;
});
@ -57,8 +59,7 @@ intel_dnn_component_t* GNAPluginNS::backend::DnnComponents::findComponent(const
return nullptr;
}
const intel_dnn_component_t* GNAPluginNS::backend::DnnComponents::findComponent(
const InferenceEngine::CNNLayerPtr layer) const {
const intel_dnn_component_t* DnnComponents::findComponent(const InferenceEngine::CNNLayerPtr layer) const {
if (layer) {
return findComponent(layer->name);
}
@ -66,7 +67,7 @@ const intel_dnn_component_t* GNAPluginNS::backend::DnnComponents::findComponent(
return nullptr;
}
const intel_dnn_component_t* GNAPluginNS::backend::DnnComponents::findComponent(const std::string& layerName) const {
const intel_dnn_component_t* DnnComponents::findComponent(const std::string& layerName) const {
auto component = std::find_if(begin(components), end(components), [&](const storage_type ::value_type& comp) {
return comp.name == layerName;
});
@ -82,10 +83,14 @@ std::vector<intel_dnn_component_t> DnnComponents::getExecutionOrder() {
uint32_t direct_id = 0;
uint32_t delayed_id = static_cast<uint32_t>(components.size() - delayedOperations);
for (auto &&c : components) {
uint32_t &id = c.isDelayed ? delayed_id : direct_id;
for (auto&& c : components) {
uint32_t& id = c.isDelayed ? delayed_id : direct_id;
result[id] = c.dnnComponent;
id++;
}
return result;
}
} // namespace backend
} // namespace intel_gna
} // namespace ov

View File

@ -11,11 +11,13 @@
#include <ie_common.h>
#include <legacy/ie_layers.h>
#include "dnn_types.h"
#include "dnn_types.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace backend {
struct DnnComponentExtra {
std::string name;
intel_dnn_component_t dnnComponent;
@ -71,5 +73,7 @@ struct DnnComponents {
private:
uint32_t delayedOperations = 0;
};
} // namespace backend
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -1,92 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
// dnn_traits.hpp : c++ trait approach to define dnn objects
//
#pragma once
#include "dnn_types.h"
template<intel_dnn_operation_t layer>
struct DnnTrait {};
template<>
struct DnnTrait<kDnnDiagonalOp> {
using Type = intel_affine_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.affine;
}
};
template<>
struct DnnTrait<kDnnPiecewiselinearOp> {
using Type = intel_piecewiselinear_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.pwl;
}
};
template<>
struct DnnTrait<kDnnAffineOp> {
using Type = intel_affine_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.affine;
}
};
template<>
struct DnnTrait<kDnnConvolutional1dOp> {
using Type = intel_convolutionalD_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.conv1D;
}
};
template<>
struct DnnTrait<kDnnMaxPoolOp> {
using Type = intel_maxpool_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.maxpool;
}
};
template<>
struct DnnTrait<kDnnRecurrentOp> {
using Type = intel_recurrent_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.recurrent;
}
};
template<>
struct DnnTrait<kDnnInterleaveOp> {
using Type = intel_interleave_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.interleave;
}
};
template<>
struct DnnTrait<kDnnDeinterleaveOp> {
using Type = intel_deinterleave_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.deinterleave;
}
};
template<>
struct DnnTrait<kDnnCopyOp> {
using Type = intel_copy_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.copy;
}
};
template<>
struct DnnTrait<kDnnNullOp> {
using Type = void;
static Type *getLayer(intel_dnn_component_t &component) {
return nullptr;
}
};

View File

@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "dnn_types.h"
#include "dnn_types.hpp"
const char *intel_dnn_activation_name[kActNumType] = {
"kActNone",
@ -25,13 +25,6 @@ const char *intel_dnn_activation_name[kActNumType] = {
"kActPwl"
};
const char *intel_dnn_softmax_name[kSoftmaxNumType] = {
"kSoftmaxNone",
"kSoftmaxKaldiSumGroup",
"kSoftmaxKaldiApplyLog",
"kSoftmaxGoogle"
};
const char* intel_dnn_operation_name[kDnnNumOp] = {
"kDnnNullOp",
"kDnnAffineOp",
@ -46,12 +39,6 @@ const char* intel_dnn_operation_name[kDnnNumOp] = {
"kDnnCopyOp"
};
const char *intel_dnn_macro_operation_name[kDnnNumMacroOp] = {
"kDnnMacroOpNone",
"kDnnMacroOpLstm",
"kDnnMacroOpBiLstm"
};
const char *intel_dnn_number_type_name[kDnnNumNumberType] = {
"kDnnFloat",
"kDnnInt"

View File

@ -10,7 +10,7 @@
#include <string>
#include <type_traits>
#include "gna_types.h"
#include "gna_types.hpp"
#include "log/debug.hpp"
enum DnnActivationType : uint8_t {
@ -83,16 +83,6 @@ static_assert(std::is_trivial<DnnActivation>::value, "DnnActivation is not triva
extern const char *intel_dnn_activation_name[kActNumType];
typedef enum DnnSoftmaxType {
kSoftmaxNone,
kSoftmaxKaldiSumgroup,
kSoftmaxEesen,
kSoftmaxGoogle,
kSoftmaxNumType
} intel_dnn_softmax_type_t;
extern const char *intel_dnn_softmax_name[kSoftmaxNumType];
typedef enum {
kDnnUnknownOrientation = 100,
kDnnInterleavedOrientation,
@ -117,15 +107,6 @@ typedef enum {
extern const char* intel_dnn_operation_name[kDnnNumOp];
typedef enum {
kDnnMacroOpNone,
kDnnMacroOpLstm,
kDnnMacroOpBiLstm,
kDnnNumMacroOp
} intel_dnn_macro_operation_t;
extern const char *intel_dnn_macro_operation_name[kDnnNumMacroOp];
typedef enum {
kDnnFloat,
kDnnInt,
@ -262,7 +243,6 @@ struct intel_dnn_component_t {
uint32_t num_bytes_per_input;
uint32_t num_bytes_per_output;
intel_dnn_operation_t operation;
intel_dnn_macro_operation_t macro_operation;
intel_dnn_orientation_t orientation_in;
intel_dnn_orientation_t orientation_out;
union operation_struct_t {

View File

@ -15,11 +15,10 @@
#include "gna_limitations.hpp"
#include "gna/gna_config.hpp"
using namespace ov::intel_gna;
namespace GNAPluginNS {
namespace GNALimitations {
namespace Cnn2D {
namespace ov {
namespace intel_gna {
namespace limitations {
namespace cnn2d {
bool IsEqualToLimit::isValid(const uint32_t val) const {
return val == compared_value;
@ -353,7 +352,7 @@ bool AbstractValidator::ValidationSuccesful(const bool throwOnError,
return error.empty();
}
} // namespace Cnn2D
} // namespace cnn2d
IE_SUPPRESS_DEPRECATED_START
static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage) {
@ -370,7 +369,7 @@ static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::st
auto isFusableWithConv = [](InferenceEngine::CNNLayerPtr ptr) {
return (LayerInfo(ptr).isFusableWithConv() || LayerInfo(ptr).isNonFunctional() ||
(LayerInfo(ptr).isPermute() && ((ptr->input()->getLayout() == InferenceEngine::Layout::NCHW &&
ptr->GetParamAsInts("order") == GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC)) ||
ptr->GetParamAsInts("order") == permute::GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC)) ||
(ptr->input()->getLayout() == InferenceEngine::Layout::CHW &&
ptr->GetParamAsInts("order") == std::vector<int32_t>{0, 2, 1} /* NCW to NWC */))));
};
@ -505,12 +504,12 @@ bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concat_layer) {
break;
// Convert dims to NHWC layout to allow later verification
auto new_order = GetPermuteOrder(concat_layout, InferenceEngine::Layout::NHWC);
auto new_order = permute::GetPermuteOrder(concat_layout, InferenceEngine::Layout::NHWC);
InferenceEngine::SizeVector new_dims;
for (size_t i = 0; i < dims_size; ++i) {
new_dims.push_back(in_dims[new_order[i]]);
}
concat_axis = GetPermuteOrder(InferenceEngine::Layout::NHWC, concat_layout)[concat_axis];
concat_axis = permute::GetPermuteOrder(InferenceEngine::Layout::NHWC, concat_layout)[concat_axis];
// Looking for any axis with dimension > 1 before concatentaion axis;
// in general such concatenation is unsupported
@ -565,7 +564,7 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
startLayer,
[&](const InferenceEngine::CNNLayerPtr layer) {
LayerInfo info(layer);
if (GNAPluginNS::LayerTypeFromStr(layer->type) == GNAPluginNS::LayerType::NO_TYPE) {
if (LayerTypeFromStr(layer->type) == LayerType::NO_TYPE) {
errMessage = "The plugin does not support layer: " + layer->name + ":" + layer->type + "\n";
check_result = false;
}
@ -591,5 +590,6 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
}
IE_SUPPRESS_DEPRECATED_END
} // namespace GNALimitations
} // namespace GNAPluginNS
} // namespace limitations
} // namespace intel_gna
} // namespace ov

View File

@ -4,15 +4,16 @@
#pragma once
#include "dnn_types.h"
#include "dnn_types.hpp"
#include <cstdint>
#include <cpp/ie_cnn_network.h>
#include <ie_algorithm.hpp>
#include <legacy/ie_layers.h>
#include "gna_lib_ver_selector.hpp"
namespace GNAPluginNS {
namespace GNALimitations {
namespace ov {
namespace intel_gna {
namespace limitations {
constexpr uint32_t bufferMaxSize = 65528;
@ -65,7 +66,8 @@ inline bool IsTransposeSupported(const std::vector<size_t>& shape) {
return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize;
}
namespace Cnn2D {
namespace cnn2d {
struct IsEqualToLimit {
uint32_t compared_value;
std::string what;
@ -118,11 +120,10 @@ struct VectorOrSquareLimit {
};
struct RectLimitByChannels {
std::vector<std::pair<uint32_t, RectLimit> > limitPerChannel;
std::vector<std::pair<uint32_t, RectLimit>> limitPerChannel;
RectLimit GetByChannels(const uint32_t channels) const;
bool isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w,
const uint32_t channels, std::string what) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, const uint32_t channels, std::string what) const;
};
struct RectLimitByChannelsAndPrecision {
@ -130,8 +131,11 @@ struct RectLimitByChannelsAndPrecision {
RectLimitByChannels limit_for_int16;
RectLimitByChannels GetByPrecision(const OvGnaType precision) const;
bool isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w,
const OvGnaType precision, const uint32_t channels, std::string what) const;
std::string GetErrorOrEmpty(const uint32_t h,
const uint32_t w,
const OvGnaType precision,
const uint32_t channels,
std::string what) const;
};
class AbstractValidator {
@ -144,29 +148,51 @@ protected:
public:
virtual ~AbstractValidator() = default;
virtual bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
OvGnaType inPrecision, bool exception = true) const = 0;
virtual bool ValidateCnn2D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const = 0;
virtual bool ValidatePooling2D(const std::string& name,
const uint32_t windowH, const uint32_t windowW,
const uint32_t strideH, const uint32_t strideW,
bool exception = true) const = 0;
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW,
bool exception = true) const = 0;
virtual bool ValidateInputPadding(const std::string& name,
const uint32_t pad_h_begin, const uint32_t pad_h_end,
const uint32_t pad_w_begin, const uint32_t pad_w_end,
const uint32_t kernel_h,
const uint32_t kernel_w,
const bool throwOnError = true) const = 0;
const uint32_t pad_h_begin,
const uint32_t pad_h_end,
const uint32_t pad_w_begin,
const uint32_t pad_w_end,
const uint32_t kernel_h,
const uint32_t kernel_w,
const bool throwOnError = true) const = 0;
virtual bool ShouldUseOnlyConv2DGnaIface() const = 0;
virtual bool ValidateCnn1D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
OvGnaType inPrecision, bool exception = true) const = 0;
virtual bool ValidateCnn1D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const = 0;
static std::unique_ptr<AbstractValidator> Create(const std::string&);
};
@ -184,29 +210,51 @@ class Validator_30 : public AbstractValidator {
public:
Validator_30() = default;
bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
OvGnaType inPrecision, bool exception = true) const override;
bool ValidateCnn2D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
bool ValidatePooling2D(const std::string& name,
const uint32_t windowH, const uint32_t windowW,
const uint32_t strideH, const uint32_t strideW,
bool exception = true) const override;
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW,
bool exception = true) const override;
bool ValidateInputPadding(const std::string& name,
const uint32_t pad_h_begin, const uint32_t pad_h_end,
const uint32_t pad_w_begin, const uint32_t pad_w_end,
const uint32_t kernel_h,
const uint32_t kernel_w,
const bool throwOnError = true) const override;
const uint32_t pad_h_begin,
const uint32_t pad_h_end,
const uint32_t pad_w_begin,
const uint32_t pad_w_end,
const uint32_t kernel_h,
const uint32_t kernel_w,
const bool throwOnError = true) const override;
bool ShouldUseOnlyConv2DGnaIface() const override;
bool ValidateCnn1D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
OvGnaType inPrecision, bool exception = true) const override;
bool ValidateCnn1D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
};
class Validator_35 : public AbstractValidator {
@ -228,7 +276,30 @@ class Validator_35 : public AbstractValidator {
static const CnnLimits kCnn1DLimits;
std::string ValidateCnn(const CnnLimits& limits,
const std::string& name,
const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision) const;
std::string ValidatePooling(const CnnLimits& limits,
const std::string& name,
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW) const;
public:
Validator_35() = default;
bool ValidateCnn2D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
@ -239,43 +310,43 @@ class Validator_35 : public AbstractValidator {
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision) const;
OvGnaType inPrecision,
bool exception = true) const override;
std::string ValidatePooling(const CnnLimits& limits,
const std::string& name,
bool ValidatePooling2D(const std::string& name,
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW) const;
public:
Validator_35() = default;
bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
OvGnaType inPrecision, bool exception = true) const override;
bool ValidatePooling2D(const std::string& name,
const uint32_t windowH, const uint32_t windowW,
const uint32_t strideH, const uint32_t strideW,
bool exception = true) const override;
const uint32_t strideW,
bool exception = true) const override;
bool ValidateInputPadding(const std::string& name,
const uint32_t pad_h_begin, const uint32_t pad_h_end,
const uint32_t pad_w_begin, const uint32_t pad_w_end,
const uint32_t kernel_h,
const uint32_t kernel_w,
const bool throwOnError = true) const override;
const uint32_t pad_h_begin,
const uint32_t pad_h_end,
const uint32_t pad_w_begin,
const uint32_t pad_w_end,
const uint32_t kernel_h,
const uint32_t kernel_w,
const bool throwOnError = true) const override;
bool ShouldUseOnlyConv2DGnaIface() const override;
bool ValidateCnn1D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
OvGnaType inPrecision, bool exception = true) const override;
bool ValidateCnn1D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
};
} // namespace Cnn2D
} // namespace cnn2d
bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
@ -293,5 +364,6 @@ IE_SUPPRESS_DEPRECATED_START
bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concatLayer);
IE_SUPPRESS_DEPRECATED_END
} // namespace GNALimitations
} // namespace GNAPluginNS
} // namespace limitations
} // namespace intel_gna
} // namespace ov

View File

@ -10,9 +10,9 @@
#include "runtime/pwl.h"
#include "make_pwl.hpp"
#include "gna_slope_scale.h"
#include "dnn_types.h"
#include "backend/gna_types.h"
#include "gna_slope_scale.hpp"
#include "dnn_types.hpp"
#include "backend/gna_types.hpp"
#include "common/numerical_utils.hpp"
#include "pwl_input_params.hpp"
#include "pwl_segments_creator_factory.hpp"

View File

@ -4,7 +4,7 @@
#pragma once
#include "backend/dnn_types.h"
#include "backend/dnn_types.hpp"
namespace ov {
namespace intel_gna {

View File

@ -7,7 +7,7 @@
#include <memory>
#include <vector>
#include "backend/gna_types.h"
#include "backend/gna_types.hpp"
#include "pwl_border_values_counter.hpp"
namespace ov {

View File

@ -7,7 +7,7 @@
#include <functional>
#include <unordered_map>
#include "backend/dnn_types.h"
#include "backend/dnn_types.hpp"
#include "pwl_border_values_counter_identity.hpp"
#include "pwl_segments_creator_identity.hpp"

View File

@ -6,7 +6,7 @@
#include "log/debug.hpp"
#include "log/log.hpp"
#include "gna_slope_scale.h"
#include "gna_slope_scale.hpp"
#include "pwl_input_params.hpp"
#include "pwl_tools.hpp"
#include "runtime/pwl.h"

View File

@ -4,7 +4,7 @@
#include "pwl_tools.hpp"
#include "gna_slope_scale.h"
#include "gna_slope_scale.hpp"
#include "common/numerical_utils.hpp"
#include "runtime/pwl.h"

View File

@ -4,7 +4,7 @@
#pragma once
#include "backend/gna_types.h"
#include "backend/gna_types.hpp"
namespace ov {
namespace intel_gna {

View File

@ -4,13 +4,17 @@
#pragma once
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace common {
static constexpr const char* kGnaTargetUnspecified = "";
static constexpr const char* kGnaTarget2_0 = "GNA_TARGET_2_0";
static constexpr const char* kGnaTarget3_0 = "GNA_TARGET_3_0";
static constexpr const char* kGnaTarget3_1 = "GNA_TARGET_3_1";
static constexpr const char* kGnaTarget3_5 = "GNA_TARGET_3_5";
static constexpr const char* kGnaDefaultTarget = kGnaTarget3_0;
} // namespace common
} // namespace GNAPluginNS
} // namespace common
} // namespace intel_gna
} // namespace ov

View File

@ -6,7 +6,9 @@
#include <legacy/ie_layers.h>
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
struct ConnectionDetails {
InferenceEngine::CNNLayerPtr input;
bool needTransposeWeights = false;
@ -19,4 +21,6 @@ struct ConnectionDetails {
, permute(permute) {
}
};
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -13,10 +13,11 @@
#include "ie_input_info.hpp"
#include "ie_algorithm.hpp"
#include "backend/dnn_types.h"
#include "backend/dnn_types.hpp"
#include "gna_plugin_config.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
/*
* This base structure accumulates all required information for network inputs and outputs
@ -27,15 +28,15 @@ struct GnaDesc {
std::unordered_set<std::string> tensor_names = {};
InferenceEngine::Layout model_layout = InferenceEngine::Layout::ANY;
InferenceEngine::SizeVector dims = {};
InferenceEngine::Precision model_precision = InferenceEngine::Precision::UNSPECIFIED;
InferenceEngine::Precision model_precision = InferenceEngine::Precision::UNSPECIFIED;
InferenceEngine::Precision tensor_precision = InferenceEngine::Precision::UNSPECIFIED;
// gna specific properties
double scale_factor = GNAPluginNS::kScaleFactorDefault;
double scale_factor = kScaleFactorDefault;
intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
uint32_t num_elements = 0;
uint32_t allocated_size = 0;
std::vector<void *> ptrs = {}; // ptr per each infer request
std::vector<void*> ptrs = {}; // ptr per each infer request
// help methods
uint32_t get_required_size() const {
@ -53,25 +54,27 @@ struct GnaDesc {
// helps to get the precision for gna layers, because they use num_bytes instead of precision values
void set_precision(uint32_t num_bytes) {
switch (num_bytes) {
case sizeof(int8_t) : {
set_precision(InferenceEngine::Precision::I8);
break;
}
case sizeof(int16_t) : {
set_precision(InferenceEngine::Precision::I16);
break;
}
case sizeof(int32_t) : {
set_precision(InferenceEngine::Precision::I32);
break;
}
default :
set_precision(InferenceEngine::Precision::UNSPECIFIED);
case sizeof(int8_t): {
set_precision(InferenceEngine::Precision::I8);
break;
}
case sizeof(int16_t): {
set_precision(InferenceEngine::Precision::I16);
break;
}
case sizeof(int32_t): {
set_precision(InferenceEngine::Precision::I32);
break;
}
default:
set_precision(InferenceEngine::Precision::UNSPECIFIED);
}
}
InferenceEngine::DataPtr to_ie_data() {
return std::make_shared<InferenceEngine::Data>(name, InferenceEngine::TensorDesc(model_precision, dims, model_layout));
return std::make_shared<InferenceEngine::Data>(
name,
InferenceEngine::TensorDesc(model_precision, dims, model_layout));
}
};
@ -79,7 +82,9 @@ struct GnaDesc {
* This structure accumulates all required information for one the network input
*/
struct InputDesc : GnaDesc {
InputDesc(const std::string &name) { this->name = name; }
InputDesc(const std::string& name) {
this->name = name;
}
void Update(const InferenceEngine::InputInfo::Ptr inputInfo) {
this->model_precision = inputInfo->getPrecision();
@ -101,7 +106,9 @@ struct InputDesc : GnaDesc {
* This structure accumulates all required information for one network output
*/
struct OutputDesc : GnaDesc {
OutputDesc(const std::string &name) { this->name = name; }
OutputDesc(const std::string& name) {
this->name = name;
}
void Update(const InferenceEngine::DataPtr outputData) {
this->model_precision = outputData->getPrecision();
@ -123,9 +130,9 @@ private:
std::vector<T> infos_;
public:
GnaNetworkInfo(): infos_({}) { }
GnaNetworkInfo() : infos_({}) {}
const T& at(const std::string &key) const {
const T& at(const std::string& key) const {
if (key.empty()) {
throw std::invalid_argument("The key cannot be empty");
}
@ -136,8 +143,8 @@ public:
return *desc_it;
}
T& at(const std::string &key) {
return const_cast<T&>( static_cast<const GnaNetworkInfo&>(*this).at(key) );
T& at(const std::string& key) {
return const_cast<T&>(static_cast<const GnaNetworkInfo&>(*this).at(key));
}
typename std::vector<T>::iterator end() {
@ -156,11 +163,13 @@ public:
});
}
T& operator[](const std::string &key) {
T& operator[](const std::string& key) {
if (key.empty()) {
throw std::invalid_argument("The key cannot be empty");
}
auto desc_it = std::find_if(infos_.begin(), infos_.end(), [&key](const T& desc){return desc.name == key;});
auto desc_it = std::find_if(infos_.begin(), infos_.end(), [&key](const T& desc) {
return desc.name == key;
});
if (desc_it == infos_.end()) {
infos_.push_back(T(key));
return infos_.back();
@ -168,16 +177,25 @@ public:
return *desc_it;
}
size_t size() const { return infos_.size(); }
size_t size() const {
return infos_.size();
}
bool empty() const { return infos_.empty(); }
bool empty() const {
return infos_.empty();
}
const std::vector<T>& Get() const { return infos_; }
const std::vector<T>& Get() const {
return infos_;
}
std::vector<T>& Get() { return infos_; }
std::vector<T>& Get() {
return infos_;
}
};
typedef GnaNetworkInfo<InputDesc> GnaInputs;
typedef GnaNetworkInfo<OutputDesc> GnaOutputs;
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -8,7 +8,9 @@
#include "openvino/runtime/intel_gna/properties.hpp"
#include "gna/gna_config.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
struct GNAFlags {
uint8_t num_requests = 1;
bool compact_mode = true;
@ -22,4 +24,6 @@ struct GNAFlags {
bool input_low_precision = false;
ov::log::Level log_level = ov::log::Level::NO;
};
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -4,12 +4,10 @@
#include "layer_quantizer.hpp"
#include "weights_converter.hpp"
#include "backend/gna_types.h"
#include "backend/gna_types.hpp"
#include "common/gna_target.hpp"
#include "gna_graph_tools.hpp"
using namespace GNAPluginNS;
namespace ov {
namespace intel_gna {
namespace frontend {
@ -252,7 +250,7 @@ void LayerQuantizer::QuantizeWeightsBiases(InferenceEngine::WeightableLayer& wl)
QuantizationData common_data{
num_rows,
num_columns,
GNAPluginNS::kScaleFactorDefault,
kScaleFactorDefault,
quant_layer_params->_weights_quant
};

View File

@ -13,8 +13,6 @@ namespace ov {
namespace intel_gna {
namespace frontend {
using namespace GNAPluginNS;
/**
* @brief Returns layer's target input precision
* @return layer's target input precision

View File

@ -9,7 +9,7 @@
#include "log/debug.hpp"
#include "log/log.hpp"
#include "layers/gna_fake_quantize_layer.hpp"
#include "backend/gna_types.h"
#include "backend/gna_types.hpp"
#include "quantization.hpp"
namespace ov {

View File

@ -10,7 +10,7 @@
#include <vector>
#include <cstdint>
#include "quantized_layer_params.hpp"
#include "backend/gna_types.h"
#include "backend/gna_types.hpp"
namespace ov {
namespace intel_gna {

View File

@ -3,7 +3,7 @@
//
#include "scale_factor_calc.hpp"
#include "gna_slope_scale.h"
#include "gna_slope_scale.hpp"
#include "common/numerical_utils.hpp"
#include "layer_quantizer.hpp"
#include "gna_upstream_iterator.hpp"
@ -323,7 +323,7 @@ bool ScaleFactorCalculator::requantizeInput(InferenceEngine::CNNLayerPtr input,
*/
float ScaleFactorCalculator::adjustScaleFactor(float sf,
InferenceEngine::CNNLayer const* cnnLayer,
GNAPluginNS::LayerInfo const& layer,
LayerInfo const& layer,
QuantizedLayerParams* quantizedParams) const {
auto get_rank = [](uint32_t value) {
uint8_t rank = 0;
@ -365,7 +365,7 @@ float ScaleFactorCalculator::adjustScaleFactor(float sf,
}
float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const* cnnLayer,
GNAPluginNS::LayerInfo const& layer,
LayerInfo const& layer,
int inputsSize,
const bool fake_quantized) const {
auto quantizedParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer);
@ -420,9 +420,9 @@ float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const*
double offset = 0;
auto powerLayer = dynamic_cast<InferenceEngine::PowerLayer const*>(cnnLayer);
if (!powerLayer) {
std::shared_ptr<ov::intel_gna::op::Pwl> pwl_node;
std::shared_ptr<op::Pwl> pwl_node;
if (!cnnLayer->getNode() ||
!(pwl_node = std::dynamic_pointer_cast<ov::intel_gna::op::Pwl>(cnnLayer->getNode()))) {
!(pwl_node = std::dynamic_pointer_cast<op::Pwl>(cnnLayer->getNode()))) {
IE_THROW() << "Incorrect Power Layer pointer \n";
} else {
auto powerIE = std::dynamic_pointer_cast<ngraph::op::PowerIE>(pwl_node->get_base_node());
@ -587,7 +587,7 @@ float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const*
bool ScaleFactorCalculator::ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cnnLayer,
ScaleFactorUpdateResult& result,
int infiniteLoopCount,
const GNAPluginNS::Config& gna_config) const {
const Config& gna_config) const {
if ( !cnnLayer ) {
IE_THROW() << "Incorrect Layer pointer \n";
}
@ -1234,7 +1234,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerWeightable(InferenceEngine::Weigh
auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl);
if (conv && !LayerInfo(conv).isConvolutionFilter()) {
const auto inDepth = GetDataDimByName(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
weights_reducer = GNAConvolutionLayer::getWeightsReducer(*conv);
weights_reducer = gna_convolution_layer::getWeightsReducer(*conv);
weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();
weights_reducer = std::max(1.0, weights_reducer);
}

View File

@ -49,13 +49,13 @@ struct ScaleFactorUpdateResult {
class ScaleFactorCalculator {
using Cnt = std::vector<InferenceEngine::CNNLayerPtr>;
Cnt net;
const GNAPluginNS::Config& gna_config;
const Config& gna_config;
const bool fake_quantized;
mutable Cnt::const_iterator idx;
mutable bool needRestart = false;
int infiniteLoopCount = 0;
std::vector<double> getPWLSlopes(const GNAPluginNS::LayerInfo& info) const;
std::vector<double> getPWLSlopes(const LayerInfo& info) const;
static float selectBestOutputScaleFactors(float inScale,
std::vector<float> outScales,
const std::vector<double>& slopes);
@ -71,35 +71,35 @@ class ScaleFactorCalculator {
int infiniteLoopCount);
float adjustScaleFactor(float sf,
InferenceEngine::CNNLayer const* cnnLayer,
GNAPluginNS::LayerInfo const& layer,
LayerInfo const& layer,
QuantizedLayerParams* quantizedParams) const;
float getActivationScale(InferenceEngine::CNNLayer const* cnnLayer,
GNAPluginNS::LayerInfo const& layer,
LayerInfo const& layer,
int inputsSize,
const bool fakeQuantize) const;
bool ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cnnLayer,
ScaleFactorUpdateResult& result,
int infiniteLoopCount,
const GNAPluginNS::Config& gna_config) const;
const Config& gna_config) const;
bool ScaleFactorPerLayerConcat(InferenceEngine::ConcatLayer* concatLayer,
ScaleFactorUpdateResult& result,
int infiniteLoopCount,
const GNAPluginNS::Config& gna_config) const;
const Config& gna_config) const;
bool ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseLayer* eltwiseLayer,
ScaleFactorUpdateResult& result,
int infiniteLoopCount,
const GNAPluginNS::Config& gna_config) const;
const Config& gna_config) const;
bool ScaleFactorPerLayerGemm(InferenceEngine::GemmLayer* gemmLayer,
ScaleFactorUpdateResult& result,
int infiniteLoopCount,
const GNAPluginNS::Config& gna_config) const;
const Config& gna_config) const;
bool ScaleFactorPerLayerWeightable(InferenceEngine::WeightableLayer* wl,
ScaleFactorUpdateResult& result,
int infiniteLoopCount,
const GNAPluginNS::Config& gna_config) const;
const Config& gna_config) const;
public:
ScaleFactorCalculator(Cnt& net, const GNAPluginNS::Config& gna_config, const bool fake_quantized)
ScaleFactorCalculator(Cnt& net, const Config& gna_config, const bool fake_quantized)
: net(net),
gna_config(gna_config),
fake_quantized(fake_quantized) {
@ -120,7 +120,7 @@ class ScaleFactorCalculator {
bool CalculateScaleFactor(InferenceEngine::CNNLayerPtr layer_ptr) const {
ScaleFactorUpdateResult result;
needRestart = false;
auto layer_info = GNAPluginNS::LayerInfo(layer_ptr);
auto layer_info = LayerInfo(layer_ptr);
if (layer_info.isConcat()) {
if (!ScaleFactorPerLayerConcat(dynamic_cast<InferenceEngine::ConcatLayer*>(layer_ptr.get()),

View File

@ -42,7 +42,7 @@ InferenceEngine::Blob::Ptr make_fp32_blob(InferenceEngine::Blob::Ptr fp16_blob)
}
void convert_blobs_precision(InferenceEngine::CNNLayer& layer) {
auto layer_info = GNAPluginNS::LayerInfo(layer);
auto layer_info = LayerInfo(layer);
if (layer_info.isWeightable()) {
InferenceEngine::WeightableLayer& wl = dynamic_cast<InferenceEngine::WeightableLayer&>(layer);

View File

@ -91,8 +91,8 @@ std::vector<char> GetStringAsTlv(Gna2TlvType type, const std::string& s) {
Gna2DeviceVersion getEmbeddedTargetFromCompileTarget(const std::string compileTarget) {
static const std::map<std::string, Gna2DeviceVersion> targetMap = {
{GNAPluginNS::common::kGnaTarget3_1, Gna2DeviceVersionEmbedded3_1},
{GNAPluginNS::common::kGnaTarget3_5, Gna2DeviceVersionEmbedded3_5},
{common::kGnaTarget3_1, Gna2DeviceVersionEmbedded3_1},
{common::kGnaTarget3_5, Gna2DeviceVersionEmbedded3_5},
};
auto found = targetMap.find(compileTarget);
if (found == targetMap.end()) {

View File

@ -6,7 +6,7 @@
#include <gna2-common-api.h>
#include <gna2-model-api.h>
#include "backend/dnn_types.h"
#include "backend/dnn_types.hpp"
#include <cstdint>

View File

@ -29,10 +29,12 @@ static inline bool FoundPartToTranspose(const std::vector<TranspositionInfo> &tr
return partToTranspose != std::end(transpositionInfo);
}
namespace GNAPluginNS {
using gna_memory_type = GNAPluginNS::memory::GNAMemoryInterface;
using gna_memory_float = GNAPluginNS::memory::GNAMemory<memory::GNAFloatAllocator>;
using gna_memory_device = GNAPluginNS::memory::GNAMemory<>;
namespace ov {
namespace intel_gna {
using gna_memory_type = memory::GNAMemoryInterface;
using gna_memory_float = memory::GNAMemory<memory::GNAFloatAllocator>;
using gna_memory_device = memory::GNAMemory<>;
using DnnComponentsForLayer = std::list<std::pair<std::string, intel_dnn_component_t>>;
using MemoryConnection = std::list<std::pair<std::string, GNAMemoryLayer>>;
@ -40,4 +42,6 @@ namespace GNAPluginNS {
using SplitConnection = std::unordered_map<std::string, GNASplitLayer>;
using CropConnection = std::unordered_map<std::string, GNACropLayer>;
using ConstConnections = std::unordered_map<std::string, void*>;
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -80,9 +80,9 @@ uint8_t* GNADeviceHelper::alloc(uint32_t size_requested, uint32_t *size_granted)
return static_cast<uint8_t *>(memPtr);
}
void GNADeviceHelper::tagMemoryRegion(void* memPtr, const GNAPluginNS::memory::rRegion tag) {
void GNADeviceHelper::tagMemoryRegion(void* memPtr, const memory::rRegion tag) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
using GNAPluginNS::memory::rRegion;
using memory::rRegion;
static const std::map<rRegion, Gna2MemoryTag> tagMap {
{rRegion::REGION_INPUTS, Gna2MemoryTagInput},
{rRegion::REGION_OUTPUTS, Gna2MemoryTagOutput},
@ -192,7 +192,7 @@ void GNADeviceHelper::enforceLegacyCnns(Gna2Model& gnaModel) {
void GNADeviceHelper::enforceLegacyCnnsWhenNeeded(Gna2Model& gnaModel) {
for (uint32_t i = 0; i < gnaModel.NumberOfOperations; i++) {
auto& op = gnaModel.Operations[i];
if (GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(op)) {
if (backend::AMIntelDNN::isOperationCnnLegacySpecific(op)) {
enforceLegacyCnn(op);
}
}
@ -207,7 +207,7 @@ uint32_t GNADeviceHelper::createModel(Gna2Model& gnaModel) const {
}
enforceLegacyCnnsWhenNeeded(gnaModel);
GNAPluginNS::backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(gnaModel, legacyExecTarget);
backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(gnaModel, legacyExecTarget);
if (per_model_diagnostics) {
std::string path =
@ -240,10 +240,10 @@ bool GNADeviceHelper::enforceLegacyCnnNeeded() const {
Gna2DeviceVersion GNADeviceHelper::parseTarget(const std::string& target) {
static const std::map<std::string, Gna2DeviceVersion> targetMap {
{GNAPluginNS::common::kGnaTarget2_0, Gna2DeviceVersion2_0},
{GNAPluginNS::common::kGnaTarget3_0, Gna2DeviceVersion3_0},
{GNAPluginNS::common::kGnaTarget3_5, Gna2DeviceVersion3_5},
{GNAPluginNS::common::kGnaTargetUnspecified, Gna2DeviceVersionSoftwareEmulation},
{common::kGnaTarget2_0, Gna2DeviceVersion2_0},
{common::kGnaTarget3_0, Gna2DeviceVersion3_0},
{common::kGnaTarget3_5, Gna2DeviceVersion3_5},
{common::kGnaTargetUnspecified, Gna2DeviceVersionSoftwareEmulation},
};
const auto f = targetMap.find(target);
if (f != targetMap.end()) {
@ -254,13 +254,13 @@ Gna2DeviceVersion GNADeviceHelper::parseTarget(const std::string& target) {
Gna2DeviceVersion GNADeviceHelper::getDefaultTarget() const {
if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation)
return parseTarget(GNAPluginNS::common::kGnaDefaultTarget);
return parseTarget(common::kGnaDefaultTarget);
return detectedGnaDevVersion;
}
Gna2DeviceVersion GNADeviceHelper::getTargetDevice(const bool execTarget) const {
const auto declared = execTarget ? executionTarget : compileTarget;
if (declared == GNAPluginNS::common::kGnaTargetUnspecified) {
if (declared == common::kGnaTargetUnspecified) {
return execTarget ? getDefaultTarget() : getTargetDevice(true);
}
return parseTarget(declared);
@ -465,15 +465,15 @@ const std::map <const std::pair<Gna2OperationType, int32_t>, const std::string>
{{Gna2OperationTypeThreshold, 1}, "Output"}
};
GNAPluginNS::RequestStatus GNADeviceHelper::waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds) {
RequestStatus GNADeviceHelper::waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
const auto status = Gna2RequestWait(requestID, static_cast<uint32_t>(timeoutMilliseconds));
if (status == Gna2StatusWarningDeviceBusy) {
return GNAPluginNS::RequestStatus::kPending;
return RequestStatus::kPending;
}
unwaitedRequestIds.erase(requestID);
if (status == Gna2StatusDriverQoSTimeoutExceeded) {
return GNAPluginNS::RequestStatus::kAborted;
return RequestStatus::kAborted;
}
if (per_request_diagnostics) {
@ -485,7 +485,7 @@ GNAPluginNS::RequestStatus GNADeviceHelper::waitForRequest(uint32_t requestID, i
// handle error case after updating statistics data.
checkGna2Status(status, "Gna2RequestWait");
return GNAPluginNS::RequestStatus::kCompleted;
return RequestStatus::kCompleted;
}
GNADeviceHelper::DumpResult GNADeviceHelper::dumpXnn(const uint32_t modelId) {
@ -559,7 +559,7 @@ void GNADeviceHelper::close() {
for (auto requestId : requestsToClose)
try {
if (waitForRequest(requestId) == GNAPluginNS::RequestStatus::kPending)
if (waitForRequest(requestId) == RequestStatus::kPending)
log::warning() << "Request with Id " << requestId << " is still pending";
} catch (...) {
log::warning() << "Request with Id " << requestId << " was not awaited successfully";
@ -598,10 +598,10 @@ void GNADeviceHelper::getGnaPerfCounters(std::map<std::string, InferenceEngine::
std::string GNADeviceHelper::GetCompileTarget() const {
static const std::map<Gna2DeviceVersion, std::string> targetMap = {
{Gna2DeviceVersion2_0, GNAPluginNS::common::kGnaTarget2_0},
{Gna2DeviceVersion3_0, GNAPluginNS::common::kGnaTarget3_0},
{Gna2DeviceVersion3_5, GNAPluginNS::common::kGnaTarget3_5},
{Gna2DeviceVersionEmbedded3_5, GNAPluginNS::common::kGnaTarget3_5},
{Gna2DeviceVersion2_0, common::kGnaTarget2_0},
{Gna2DeviceVersion3_0, common::kGnaTarget3_0},
{Gna2DeviceVersion3_5, common::kGnaTarget3_5},
{Gna2DeviceVersionEmbedded3_5, common::kGnaTarget3_5},
};
const auto target = getTargetDevice(false);
auto found = targetMap.find(target);
@ -616,7 +616,7 @@ uint32_t GNADeviceHelper::maxLayersCount() const {
}
uint32_t GNADeviceHelper::retrieveMaxLayersCount() {
using namespace GNAPluginNS::GNALimitations;
using namespace limitations;
switch (getTargetDevice(true)) {
case Gna2DeviceVersion1_0:

View File

@ -34,7 +34,7 @@
/**
* holds gna - style handle in RAII way
*/
class GNADeviceHelper : public GNAPluginNS::GNADevice {
class GNADeviceHelper : public ov::intel_gna::GNADevice {
using UnwaitedRequestIds = std::set<uint32_t>;
static std::mutex acrossPluginsSync;
static std::string decoratedGnaLibVersion() {
@ -92,7 +92,7 @@ public:
void dumpAllAllocations(uint64_t idx, const std::string& infix) const;
uint8_t *alloc(uint32_t size_requested, uint32_t *size_granted);
void tagMemoryRegion(void* memPtr, const GNAPluginNS::memory::rRegion memoryTag);
void tagMemoryRegion(void* memPtr, const ov::intel_gna::memory::rRegion memoryTag);
void releaseModel(const uint32_t model_id);
static uint32_t getNumberOfGnaDevices();
@ -155,7 +155,7 @@ public:
/**
* @see GNADevice::waitForRequest()
*/
GNAPluginNS::RequestStatus waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds = MAX_TIMEOUT) override;
ov::intel_gna::RequestStatus waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds = MAX_TIMEOUT) override;
/**
* @see GNADevice::maxLayersCount()

View File

@ -20,7 +20,7 @@
#include "memory/gna_mem_regions.hpp"
#include "gna_lib_ver_selector.hpp"
using GNAPluginNS::memory::rRegion;
using ov::intel_gna::memory::rRegion;
struct GnaAllocation {
void* ptr = nullptr;

View File

@ -13,10 +13,12 @@
enum Gna2AccelerationMode;
class Gna2Model;
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
// Interface name is different to the file naem due the lagacy reason.
// 1. Implementation file names should be changed in next PR.
// 2. Implementation of interface should be moved to GNAPluginNS namespace
// 2. Implementation of interface should be moved to ov::intel_gna namespace
/**
* @interface Interface for invoking operation on GNA device.
@ -57,10 +59,10 @@ public:
* @brief Wait for request to be finished.
* @param requestID id of request enqueued on device
* @param timeoutMilliseconds maximum timeout to be used for waiting
* @return status of request given to the methoid. @see GNAPluginNS::RequestStatus.
* @return status of request given to the methoid. @see RequestStatus.
* @throw Exception in case of error
*/
virtual GNAPluginNS::RequestStatus waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds) = 0;
virtual RequestStatus waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds) = 0;
/**
* @brief Return maximum number of layers supported by device.
@ -74,4 +76,5 @@ public:
virtual void close() {}
};
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -15,7 +15,8 @@
#include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp>
#include <ie_icore.hpp>
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
class GNAExecutableNetwork : public InferenceEngine::IExecutableNetworkInternal {
std::shared_ptr<GNAPlugin> plg;
@ -135,4 +136,5 @@ class GNAExecutableNetwork : public InferenceEngine::IExecutableNetworkInternal
}
};
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -10,7 +10,9 @@
#include "gna_graph_tools.hpp"
#include "layers/gna_layer_helpers.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
/**
* @brief Modify child layers walking order to maintain strict ordering required for gna_fuse logic
*/
@ -99,4 +101,5 @@ inline FuzedLayersContainer make_fuzed_order(InferenceEngine::CNNLayer* origin)
return fusedCnt;
}
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -43,12 +43,11 @@
using namespace InferenceEngine;
using namespace std;
using namespace ov::intel_gna;
using namespace GNAPluginNS;
using namespace ov::intel_gna::frontend;
using namespace ov::intel_gna::common;
using namespace memory;
static bool CheckIFLastComponentIsPrecededByConv2D(const GNAPluginNS::backend::DnnComponents::storage_type& components,
static bool CheckIFLastComponentIsPrecededByConv2D(const backend::DnnComponents::storage_type& components,
bool verify_with_pooling = true) {
bool proceded_by_conv2D = false;
auto last_element = components.rbegin();
@ -71,15 +70,15 @@ static bool CheckIFLastComponentIsPrecededByConv2D(const GNAPluginNS::backend::D
GNAGraphCompiler::GNAGraphCompiler(const Config& gna_config) : gna_config(gna_config) {}
void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<GNAPluginNS::gna_memory_type> gnaMemPtr) {
void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr) {
this->gnamem = std::move(gnaMemPtr);
}
void GNAGraphCompiler::setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr) {
void GNAGraphCompiler::setDNNPtr(std::shared_ptr<backend::AMIntelDNN> dnnPtr) {
this->dnn = std::move(dnnPtr);
}
void GNAGraphCompiler::setInputsPtr(std::shared_ptr<GNAPluginNS::GnaInputs> inputsPtr) {
void GNAGraphCompiler::setInputsPtr(std::shared_ptr<GnaInputs> inputsPtr) {
this->inputs_ptr_ = std::move(inputsPtr);
}
@ -110,7 +109,7 @@ void GNAGraphCompiler::fillMemoryConnections(std::unordered_map<std::string,
void GNAGraphCompiler::fillConcatConnections(InferenceEngine::CNNLayerPtr layer) {
// creating connection for each layer outputs as form of extramap
GNAPluginNS::GNAConcatLayer layerInfoItem(layer);
GNAConcatLayer layerInfoItem(layer);
size_t concat_size = 0;
std::string& id = layer->name;
@ -148,7 +147,7 @@ void GNAGraphCompiler::fillConcatConnections(InferenceEngine::CNNLayerPtr layer)
void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) {
// creating connection for each layer inputs as form of extramap
GNAPluginNS::GNASplitLayer layerInfoItem(layer);
GNASplitLayer layerInfoItem(layer);
size_t split_size = 0;
std::string& id = layer->name;
IE_ASSERT(!layer->insData.empty());
@ -214,16 +213,16 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
split_connection.emplace(id, layerInfoItem);
}
void GNAPluginNS::GNAGraphCompiler::SetValidatorTarget(const std::string& target) {
auto temp = GNALimitations::Cnn2D::AbstractValidator::Create(target);
void GNAGraphCompiler::SetValidatorTarget(const std::string& target) {
auto temp = limitations::cnn2d::AbstractValidator::Create(target);
cnn2dValidator.reset(temp.release());
}
bool GNAPluginNS::GNAGraphCompiler::ShouldUseOnlyConv2DGnaIface() const {
bool GNAGraphCompiler::ShouldUseOnlyConv2DGnaIface() const {
return cnn2dValidator && cnn2dValidator->ShouldUseOnlyConv2DGnaIface();
}
void GNAPluginNS::GNAGraphCompiler::ValidateCnn2D(const std::string& name,
void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
@ -245,7 +244,7 @@ void GNAPluginNS::GNAGraphCompiler::ValidateCnn2D(const std::string& name,
}
}
void GNAPluginNS::GNAGraphCompiler::ValidatePooling2D(const std::string& name,
void GNAGraphCompiler::ValidatePooling2D(const std::string& name,
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
@ -280,9 +279,8 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer)
}
void GNAGraphCompiler::assertConvolutionLayoutProper(const InferenceEngine::DataPtr& data) {
if (data->getLayout() != Layout::NHWC &&
data->getLayout() != Layout::NCHW &&
data->getLayout() != Layout::NC) {
if (data->getLayout() != InferenceEngine::Layout::NHWC && data->getLayout() != InferenceEngine::Layout::NCHW &&
data->getLayout() != InferenceEngine::Layout::NC) {
THROW_GNA_EXCEPTION << "layer: \"Convolution\" with layout " << data->getLayout() << " isn't currently supported on GNA";
}
}
@ -338,10 +336,10 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
// Map 2d convolution to 1d if it's possible.
if (!ShouldUseOnlyConv2DGnaIface() &&
GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, in_channels,
gna_convolution_layer::isMappableFrom2DTo1D(in_height, in_width, in_channels,
convolution._kernel_y, convolution._kernel_x,
convolution._stride_y, convolution._stride_x)) {
transpose_h_w = GNAConvolutionLayer::should_transpose_h_w(in_height,
transpose_h_w = gna_convolution_layer::should_transpose_h_w(in_height,
convolution._kernel_y,
in_channels,
convolution._stride_y);
@ -382,7 +380,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
}
if (ShouldUseOnlyConv2DGnaIface() ||
GNAConvolutionLayer::is3DInputOr2DKernel(in_height, in_width, in_channels, convolution._kernel_y, convolution._kernel_x) ||
gna_convolution_layer::is3DInputOr2DKernel(in_height, in_width, in_channels, convolution._kernel_y, convolution._kernel_x) ||
in_height != 1) {
// TensorFlow default layout is NHWC
// OpenVino Default layout is NCHW
@ -518,7 +516,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
// Keep both variants of kaldi models working:
// Old one has layout which is different from NHWC
// New one has layout NHWC, but it is mapped from 2d by H
if (inputs->getLayout() == Layout::NHWC && !transpose_h_w) {
if (inputs->getLayout() == InferenceEngine::Layout::NHWC && !transpose_h_w) {
currentComponent.orientation_in = kDnnInterleavedOrientation;
currentComponent.orientation_out = kDnnInterleavedOrientation;
}
@ -536,7 +534,8 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
if (!dnn->do_rotate_input) {
if ((inputs->getLayout() != Layout::NHWC || transpose_h_w) && LayerInfo(connectedInputLayer).isInput()) {
if ((inputs->getLayout() != InferenceEngine::Layout::NHWC || transpose_h_w) &&
LayerInfo(connectedInputLayer).isInput()) {
// Kaldi features are opposite orientation
dnn->do_rotate_input = true;
dnn->num_rotate_rows = effectiveStride;
@ -699,7 +698,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
currentComponent.num_bytes_per_input = inputs->getPrecision().size();
currentComponent.num_bytes_per_output = outputs->getPrecision().size();
if (inputs->getLayout() == Layout::NHWC) {
if (inputs->getLayout() == InferenceEngine::Layout::NHWC) {
currentComponent.orientation_in = kDnnInterleavedOrientation;
currentComponent.orientation_out = kDnnInterleavedOrientation;
}
@ -713,7 +712,8 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
if (!dnn->do_rotate_input && inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) {
if (!dnn->do_rotate_input && inputs->getLayout() != InferenceEngine::Layout::NHWC &&
LayerInfo(connectedInputLayer).isInput()) {
// Kaldi features are opposite orientation
dnn->do_rotate_input = true;
dnn->num_rotate_rows = in_channels;
@ -766,9 +766,9 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto input = layer->insData[0].lock();
auto outputs = *layer->outData.begin();
auto reshaped_dims = Get2DReshapedData(input, GNALimitations::GetMinBatchToFitInBuffer(input), 8)->getDims();
auto reshaped_dims = Get2DReshapedData(input, limitations::GetMinBatchToFitInBuffer(input), 8)->getDims();
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
uint32_t num_rows_in = reshaped_dims[1];
uint32_t num_columns_in = reshaped_dims[0];
uint32_t num_rows_out = num_rows_in;
@ -914,10 +914,10 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
const uint32_t c_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::C);
if (w_dim_in == 1) { // swap dimensions if needed to support swapped 1D case
swap(h_dim_in, w_dim_in);
swap(h_dim_out, w_dim_out);
swap(pooling._kernel[X_AXIS], pooling._kernel[Y_AXIS]);
swap(pooling._stride[X_AXIS], pooling._stride[Y_AXIS]);
std::swap(h_dim_in, w_dim_in);
std::swap(h_dim_out, w_dim_out);
std::swap(pooling._kernel[X_AXIS], pooling._kernel[Y_AXIS]);
std::swap(pooling._stride[X_AXIS], pooling._stride[Y_AXIS]);
}
void* ptr_inputs = nullptr;
@ -968,9 +968,9 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
// but it does not use any specific new GNA features it should be correct to import and run using previous GNA HW
if (!is2DPooling) {
const auto hLegacy =
GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy(h_dim_in, pooling._stride[X_AXIS]);
gna_convolution_layer::outputFromPoolingLegacy(h_dim_in, pooling._stride[X_AXIS]);
const auto wLegacy =
GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy(w_dim_in, pooling._stride[Y_AXIS]);
gna_convolution_layer::outputFromPoolingLegacy(w_dim_in, pooling._stride[Y_AXIS]);
if (num_data_bytes_out < hLegacy * wLegacy * c_dim_out) {
num_data_bytes_out = hLegacy * wLegacy * c_dim_out;
}
@ -1007,7 +1007,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto inputs = layer->insData.begin()->lock();
auto outputs = *layer->outData.begin();
auto reshaped_dims = Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims();
auto reshaped_dims = Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims();
uint32_t num_rows_in = reshaped_dims[1];
uint32_t num_columns_in = reshaped_dims[0];
uint32_t num_rows_out = num_rows_in;
@ -1068,7 +1068,7 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
}
// Concat axis validation
if (!GNALimitations::ValidateConvConcatAxis(concatLayer)) {
if (!limitations::ValidateConvConcatAxis(concatLayer)) {
std::ostringstream in_dims_oss;
auto in_dims = concatLayer->insData[0].lock()->getDims();
std::copy(in_dims.begin(), in_dims.end(), std::ostream_iterator<size_t>(in_dims_oss, ","));
@ -1147,7 +1147,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
if (!LayerInfo(cropLayer).isCropAffined()) {
// leave crop as it is
GNAPluginNS::GNACropLayer cropLayerInfoItem(layer);
GNACropLayer cropLayerInfoItem(layer);
std::string& id = layer->name;
crop_connection.emplace(id, cropLayerInfoItem);
auto cropLayerInfo = crop_connection.find(cropLayer->name);
@ -1178,7 +1178,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
uint32_t num_rows_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()));
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
void* ptr_inputs = nullptr;
@ -1234,7 +1234,7 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
auto& eltwise = dynamic_cast<EltwiseLayer&>(*layer.get());
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
// for eltwise sum/sub in 16-bit precision one input should be 4 bytes and one 2 bytes - detecting that below
// the names of variables are left for clarity although not always reflecting the real precision/size
@ -1414,7 +1414,7 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto input_2 = layer->insData[1].lock(); // the second input corresponds to ptr_weights in component
auto outputs = *layer->outData.begin();
auto inputPrecision = quantized ? Precision(Precision::I16) : input_1->getPrecision();
uint32_t noOfInputsDivisor = GNALimitations::noOfInputsDivisor;
uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor;
auto in_dims = input_1->getDims();
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
@ -1478,7 +1478,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
auto outputs = *layer->outData.begin();
const auto out_dims = outputs->getDims();
Precision inputPrecision;
uint32_t noOfInputsDivisor = GNALimitations::noOfInputsDivisor;
uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor;
if (!quantized) {
inputPrecision = inputs->getPrecision();
@ -1486,11 +1486,11 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
inputPrecision = Precision(Precision::I16);
} else {
inputPrecision = Precision(Precision::I8);
noOfInputsDivisor = GNALimitations::noOfInputsLowPrecDivisor;
noOfInputsDivisor = limitations::noOfInputsLowPrecDivisor;
}
auto input_data = HasTo2DReshapeData(layer) ?
Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8) : inputs;
Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8) : inputs;
auto in_dims = input_data->getDims();
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size;
@ -1690,7 +1690,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
auto inputs = layer->insData.begin()->lock();
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
uint32_t num_columns_in = GetDimFromBack(inputs->getDims(), 2);
uint32_t num_rows_out = GetDimFromBack(outputs->getDims(), 1);
uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
@ -1826,7 +1826,7 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
auto inputs = layer->insData.begin()->lock();
const auto noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
const uint32_t orginalInputSize =
InferenceEngine::details::product(std::next(inputs->getDims().begin()), inputs->getDims().end());
const uint32_t orginalOutputSize =
@ -1842,7 +1842,7 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
const auto minInputsNeeded = (minOutputsPerFilter - 1) * convolutionStride + filterWidth;
const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, noOfInputsDivisor);
auto numOutputs = GNAConvolutionLayer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride);
auto numOutputs = gna_convolution_layer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride);
numOutputs *= numberOfFilters;
const auto& biasPrecision = filterLayer->_biases ? filterLayer->_biases->getTensorDesc().getPrecision() : outputs->getPrecision();
auto& currentComponent = dnnComponents.addComponent(layer->name, "affine");
@ -2154,7 +2154,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
}
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
// now this can be run on GNA
if (squeezedInputOrder[0] < squeezedInputOrder[1]) { // interleave case
@ -2359,7 +2359,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
// find this input in vector sum all outputs in primitive
auto it = std::find_if(concatLayerInfoItem.concatInputLayers.begin(),
concatLayerInfoItem.concatInputLayers.end(),
[&name](GNAPluginNS::GNAConcatLayer::ConcatConnectedLayerInfo &item) {
[&name](GNAConcatLayer::ConcatConnectedLayerInfo &item) {
return item.name == name;
});
if (it != concatLayerInfoItem.concatInputLayers.end()) {
@ -2371,11 +2371,11 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
std::find_if(concat_connection.begin(),
concat_connection.end(),
[&concatLayerInfo]
(const std::pair<std::string, GNAPluginNS::GNAConcatLayer> &concatItem) -> bool {
(const std::pair<std::string, GNAConcatLayer> &concatItem) -> bool {
auto it = std::find_if(concatItem.second.concatInputLayers.begin(),
concatItem.second.concatInputLayers.end(),
[&concatLayerInfo]
(const GNAPluginNS::GNAConcatLayer::ConcatConnectedLayerInfo &item) -> bool {
(const GNAConcatLayer::ConcatConnectedLayerInfo &item) -> bool {
return item.name == concatLayerInfo->first;
});
return it != concatItem.second.concatInputLayers.end();
@ -2384,9 +2384,9 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
auto outputSize = std::max(concatLayerInfoItem.reserved_size, num_data_bytes_out * 2);
gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, ALIGN64(outputSize), 64);
std::function<void(GNAConcatLayer, GNAPluginNS::GnaInputs&, ConcatConnection&)> allocate_input_recursively =
std::function<void(GNAConcatLayer, GnaInputs&, ConcatConnection&)> allocate_input_recursively =
[&allocate_input_recursively](GNAConcatLayer clayer,
GNAPluginNS::GnaInputs &inputs,
GnaInputs &inputs,
ConcatConnection& concat_connection) {
size_t concatInputIdx = 0;
for (auto &&inputLayer : clayer.concatInputLayers) {
@ -2437,7 +2437,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
gnamem->getQueue(mem_region)->reserve_ptr(layer, ptr, ALIGN64(num_data_bytes_out), 64);
}
GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
void *ptr,
size_t num_data_bytes_in,
int32_t offset,
@ -2465,7 +2465,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
auto minInput = inputs_ptr_->at(prevLayer->name).get_required_size();
if (num_data_bytes_in < minInput) {
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
log::debug() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to" << ALIGN(minInput, noOfInputsDivisor);
num_data_bytes_in = ALIGN(minInput, noOfInputsDivisor);
}
@ -2528,7 +2528,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// find this input in vector sum all outputs in primitive
auto it = std::find_if(splitLayerInfoItem.splitOutputLayers.begin(),
splitLayerInfoItem.splitOutputLayers.end(),
[&idx, &layer](GNAPluginNS::GNASplitLayer::SplitConnectedLayerInfo &item) {
[&idx, &layer](GNASplitLayer::SplitConnectedLayerInfo &item) {
return item.connectedTo == layer && item.insDataIdx == idx;
});

View File

@ -15,7 +15,6 @@
#include "descriptions/gna_desc.hpp"
#include "descriptions/gna_flags.hpp"
#include "connection_details.hpp"
#include "backend/dnn.hpp"
#include "memory/gna_memory.hpp"
#include "layers/gna_memory_layer.hpp"
#include "layers/gna_concat_layer.hpp"
@ -27,12 +26,14 @@
#include "gna_device.hpp"
#include "gna_data_types.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
class GNAGraphCompiler {
private:
std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnn;
std::shared_ptr<GNAPluginNS::gna_memory_type> gnamem;
std::shared_ptr<GNAPluginNS::GnaInputs> inputs_ptr_;
std::shared_ptr<backend::AMIntelDNN> dnn;
std::shared_ptr<gna_memory_type> gnamem;
std::shared_ptr<GnaInputs> inputs_ptr_;
// layers with extra storage for connections and additional
// non trivial processing
@ -49,20 +50,20 @@ private:
static void assertConvolutionLayoutProper(const InferenceEngine::DataPtr&);
std::vector<uint8_t> static transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols);
std::unique_ptr<const GNALimitations::Cnn2D::AbstractValidator> cnn2dValidator;
std::unique_ptr<const limitations::cnn2d::AbstractValidator> cnn2dValidator;
bool ShouldUseOnlyConv2DGnaIface() const;
public:
GNAPluginNS::backend::DnnComponents dnnComponents;
backend::DnnComponents dnnComponents;
MemoryConnection memory_connection;
ConcatConnection concat_connection;
ConstConnections const_connections;
GNAGraphCompiler(const Config& gna_config);
void setGNAMemoryPtr(std::shared_ptr<GNAPluginNS::gna_memory_type> gnaMemPtr);
void setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr);
void setInputsPtr(std::shared_ptr<GNAPluginNS::GnaInputs> inputsPtr);
void setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr);
void setDNNPtr(std::shared_ptr<backend::AMIntelDNN> dnnPtr);
void setInputsPtr(std::shared_ptr<GnaInputs> inputsPtr);
void fillMemoryConnections(std::unordered_map<std::string,
std::vector<InferenceEngine::CNNLayerPtr>> &memoryPairs);
@ -102,7 +103,7 @@ public:
* in case when we would like to use zero offset and connect from pointer set this to negative
* @return layer used as input
*/
GNAPluginNS::ConnectionDetails connectInput(InferenceEngine::CNNLayerPtr layer,
ConnectionDetails connectInput(InferenceEngine::CNNLayerPtr layer,
void *pVoid,
size_t num_data_bytes_in,
int32_t offset = 0,
@ -149,4 +150,6 @@ public:
void Reset();
};
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -11,7 +11,8 @@
#include "layers/gna_layer_info.hpp"
#include "ops/util/util.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
/**
* @brief checks if it's a reshape from 4d to 3d tensor
@ -104,7 +105,7 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin
const auto layout = next->outData[0]->getLayout();
const auto order = next->GetParamAsInts("order");
if (layout != InferenceEngine::Layout::NCHW && layout != InferenceEngine::Layout::CHW ||
order != GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC) &&
order != permute::GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC) &&
order != std::vector<int32_t>{0, 2, 1} /* NCW to NWC */) {
return std::make_pair(nullptr, nullptr);
}
@ -155,7 +156,7 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin
const auto layout = prev->outData[0]->getLayout();
const auto order = prev->GetParamAsInts("order");
if (layout != InferenceEngine::Layout::NCHW && layout != InferenceEngine::Layout::CHW ||
order != GetPermuteOrder(InferenceEngine::Layout::NHWC, InferenceEngine::Layout::NCHW) &&
order != permute::GetPermuteOrder(InferenceEngine::Layout::NHWC, InferenceEngine::Layout::NCHW) &&
order != std::vector<int32_t>{0, 2, 1} /* NWC to NCW */) {
return std::make_pair(nullptr, nullptr);
}
@ -427,4 +428,5 @@ inline std::vector<TranspositionInfo> FindTranspositionInfoFromNextLayers(Infere
return findTranspositionInfoRecursive(layer);
}
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -8,7 +8,9 @@
#include "gna_graph_tools.hpp"
#include "layers/gna_layer_info.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
/**
* @brief returns a pointer to 2D reshaped data to satisfy maximum size of zero dimension
* @param input a pointer to data to be reshaped
@ -47,14 +49,15 @@ inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input
* @param layer
*/
inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
if (GNAPluginNS::LayerInfo(layer).isPower() || GNAPluginNS::LayerInfo(layer).isCopy())
if (LayerInfo(layer).isPower() || LayerInfo(layer).isCopy())
return true;
if (!GNAPluginNS::LayerInfo(layer).isSyntheticScaleShift())
if (!LayerInfo(layer).isSyntheticScaleShift())
return false;
// Don't reshape diagonallayers with bias connection
return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
return !LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
}
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -6,7 +6,9 @@
#include "gna_plugin.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
GNAInferRequest::GNAInferRequest(const std::shared_ptr<GNAPlugin>& plg,
const std::vector<std::shared_ptr<const ov::Node>>& inputs,
@ -170,4 +172,5 @@ void GNAInferRequest::CreateInferRequest() {
}
}
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,9 @@
#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
#include "request_status.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
class GNAPlugin;
class GNAInferRequest : public InferenceEngine::IInferRequestInternal {
@ -60,4 +62,6 @@ private:
uint32_t _infer_request_idx = kRequestIndexInvalid;
std::shared_ptr<GNAPlugin> plg;
};
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -11,11 +11,15 @@
#include <openvino/itt.hpp>
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace itt {
namespace domains {
OV_ITT_DOMAIN(GNAPlugin);
OV_ITT_DOMAIN(GNA_LT);
}
}
}
} // namespace domains
} // namespace itt
} // namespace intel_gna
} // namespace ov

View File

@ -28,12 +28,15 @@
*/
#define ALIGN64(number) ALIGN(number, 64)
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace tools {
template <typename T, typename... Args>
std::unique_ptr<T> make_unique(Args&&... args) {
return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
}
} // namespace tools
} // namespace GNAPluginNS
} // namespace tools
} // namespace intel_gna
} // namespace ov

View File

@ -29,7 +29,7 @@
#include "serial/headers/latest/gna_model_header.hpp"
#include "common/versioning.hpp"
using namespace GNAPluginNS;
using namespace ov::intel_gna;
inline void writeNBytes(const void *ptr, uint32_t size, std::ostream & os) {
os.write(static_cast<const char*>(ptr), size);
@ -108,7 +108,7 @@ std::string GNAVersionSerializer::Import(std::istream& is) const {
const int gna_header_magic = is_little_endian() ? 0x4d414e47 : 0x474e414d;
GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
header_latest::ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
is.exceptions(std::istream::failbit);
auto startPos = is.tellg();
if (startPos == -1) {
@ -122,11 +122,11 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
stream_len -= startPos;
is.seekg(startPos, is.beg);
HeaderLatest::ModelHeader header;
header_latest::ModelHeader header;
header.version.major = 0u;
header.version.minor = 0u;
auto size_of_headers_header = sizeof(HeaderLatest::ModelHeader::gnam) + sizeof(HeaderLatest::ModelHeader::headerSize)
+ sizeof(HeaderLatest::ModelHeader::Version);
auto size_of_headers_header = sizeof(header_latest::ModelHeader::gnam) + sizeof(header_latest::ModelHeader::headerSize)
+ sizeof(header_latest::ModelHeader::Version);
if (stream_len > size_of_headers_header) {
readNBytes(&header, static_cast<uint32_t>(size_of_headers_header), is);
} else {
@ -142,34 +142,34 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
}
is.seekg(startPos, is.beg);
Header2dot1::ModelHeader tempHeader2dot1;
header_2_dot_1::ModelHeader tempheader_2_dot_1;
switch (header.version.major) {
case 2:
switch (header.version.minor) {
case 1:
readBits(tempHeader2dot1, is);
header = HeaderLatest::ModelHeader(tempHeader2dot1);
readBits(tempheader_2_dot_1, is);
header = header_latest::ModelHeader(tempheader_2_dot_1);
break;
case 2:
case 3:
{
Header2dot3::ModelHeader tempHeader2dot3;
readBits(tempHeader2dot3, is);
header = HeaderLatest::ModelHeader(tempHeader2dot3);
header_2_dot_3::ModelHeader tempheader_2_dot_3;
readBits(tempheader_2_dot_3, is);
header = header_latest::ModelHeader(tempheader_2_dot_3);
break;
}
case 4:
{
Header2dot4::ModelHeader tempHeader2dot4;
readBits(tempHeader2dot4, is);
header = HeaderLatest::ModelHeader(tempHeader2dot4);
header_2_dot_4::ModelHeader tempheader_2_dot_4;
readBits(tempheader_2_dot_4, is);
header = header_latest::ModelHeader(tempheader_2_dot_4);
break;
}
case 5:
case 6:
case 7:
case 8:
readNBytes(&header, sizeof(HeaderLatest::ModelHeader), is);
readNBytes(&header, sizeof(header_latest::ModelHeader), is);
break;
default:
THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 8 and is: " << header.version.minor;
@ -190,10 +190,10 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
return header;
}
GNAPluginNS::HeaderLatest::RuntimeEndPoint GNAModelSerial::ReadEndPoint(std::istream &is) {
header_latest::RuntimeEndPoint GNAModelSerial::ReadEndPoint(std::istream &is) {
is.exceptions(std::istream::failbit);
HeaderLatest::RuntimeEndPoint endPoint;
header_latest::RuntimeEndPoint endPoint;
switch (model_header_.version.major) {
case 2:
switch (model_header_.version.minor) {
@ -204,20 +204,20 @@ GNAPluginNS::HeaderLatest::RuntimeEndPoint GNAModelSerial::ReadEndPoint(std::ist
case 5:
case 6:
{
Header2dot6::RuntimeEndPoint tempEndPoint2dot6;
header_2_dot_6::RuntimeEndPoint tempEndPoint2dot6;
readBits(tempEndPoint2dot6, is);
endPoint = HeaderLatest::RuntimeEndPoint(tempEndPoint2dot6, model_header_.nGroup);
endPoint = header_latest::RuntimeEndPoint(tempEndPoint2dot6, model_header_.nGroup);
break;
}
case 7:
{
Header2dot7::RuntimeEndPoint tempEndPoint2dot7;
header_2_dot_7::RuntimeEndPoint tempEndPoint2dot7;
readBits(tempEndPoint2dot7, is);
endPoint = HeaderLatest::RuntimeEndPoint(tempEndPoint2dot7);
endPoint = header_latest::RuntimeEndPoint(tempEndPoint2dot7);
break;
}
case 8:
readNBytes(&endPoint, sizeof(HeaderLatest::RuntimeEndPoint), is);
readNBytes(&endPoint, sizeof(header_latest::RuntimeEndPoint), is);
break;
default:
THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 8 and is: "
@ -259,8 +259,8 @@ static const std::map<Gna2OperationType, std::vector<uint32_t>> GnaParamSize{
void GNAModelSerial::Import(void *basePointer,
size_t gnaGraphSize,
std::istream &is,
GNAPluginNS::GnaInputs &inputs,
GNAPluginNS::GnaOutputs &outputs,
GnaInputs &inputs,
GnaOutputs &outputs,
TranspositionInfoMap &inputsTranspositionInfo,
TranspositionInfoMap &outputsTranspositionInfo,
std::string & libVersionFromFile) {
@ -269,7 +269,7 @@ void GNAModelSerial::Import(void *basePointer,
if (model_header_.version.major == 2) {
for (auto inputIndex = 0; inputIndex < model_header_.nInputs; inputIndex++) {
std::string name = (model_header_.version.minor >= 3) ? readString(is) : std::string("input" + std::to_string(inputIndex));
inputs[name] = GNAPluginNS::InputDesc(name);
inputs[name] = InputDesc(name);
}
if (model_header_.version.minor >= 5) {
// 3. Read transposition input info
@ -294,7 +294,7 @@ void GNAModelSerial::Import(void *basePointer,
if (model_header_.version.major == 2) {
for (auto outputIndex = 0; outputIndex < model_header_.nOutputs; outputIndex++) {
std::string name = (model_header_.version.minor >= 3) ? readString(is) : std::string("output" + std::to_string(outputIndex));
outputs[name] = GNAPluginNS::OutputDesc(name);
outputs[name] = OutputDesc(name);
}
}
// 7. Read outputs
@ -416,8 +416,8 @@ void GNAModelSerial::Export(const GnaAllocations& allocations, std::ostream& os)
return out;
};
auto convert_to_serial = [&allocationsOrdered](const GNAPluginNS::GnaDesc& desc) {
HeaderLatest::RuntimeEndPoint ep;
auto convert_to_serial = [&allocationsOrdered](const GnaDesc& desc) {
header_latest::RuntimeEndPoint ep;
ep.elements_count = desc.num_elements;
ep.scaleFactor = desc.scale_factor;
ep.element_size = desc.tensor_precision.size();
@ -441,12 +441,12 @@ void GNAModelSerial::Export(const GnaAllocations& allocations, std::ostream& os)
/**
* writing header
*/
HeaderLatest::ModelHeader header;
header_latest::ModelHeader header;
header.gnam[0] = 'G';
header.gnam[1] = 'N';
header.gnam[2] = 'A';
header.gnam[3] = 'M';
header.headerSize = sizeof(HeaderLatest::ModelHeader);
header.headerSize = sizeof(header_latest::ModelHeader);
header.gnaMemSize = gnaGraphSize;
header.layersCount = layers.size();
header.nGroup = 1; // just to support the old models
@ -561,9 +561,9 @@ void GNAModelSerial::Export(const GnaAllocations& allocations, std::ostream& os)
version_.Export(os);
}
void GNAModelSerial::ImportInputs(std::istream &is, void* basePtr, GNAPluginNS::GnaInputs &inputs) {
void GNAModelSerial::ImportInputs(std::istream &is, void* basePtr, GnaInputs &inputs) {
for (auto &input : inputs.Get()) {
HeaderLatest::RuntimeEndPoint ep = ReadEndPoint(is);
header_latest::RuntimeEndPoint ep = ReadEndPoint(is);
input.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + ep.descriptor_offset));
input.orientation = ep.orientation;
@ -589,9 +589,9 @@ void GNAModelSerial::ImportInputs(std::istream &is, void* basePtr, GNAPluginNS::
}
}
void GNAModelSerial::ImportOutputs(std::istream &is, void* basePtr, GNAPluginNS::GnaOutputs &outputs) {
void GNAModelSerial::ImportOutputs(std::istream &is, void* basePtr, GnaOutputs &outputs) {
for (auto &output : outputs.Get()) {
HeaderLatest::RuntimeEndPoint ep = ReadEndPoint(is);
header_latest::RuntimeEndPoint ep = ReadEndPoint(is);
output.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + ep.descriptor_offset));
output.orientation = ep.orientation;
@ -648,9 +648,9 @@ void GNAModelSerial::ExportTranspositionInfo(std::ostream &os,
}
void GNAModelSerial::AppendTensorNameIfNeeded(GnaDesc& nodeDesc) const {
static constexpr Header2dot8::ModelHeader::Version kHasTensorNamesVersion;
static constexpr header_2_dot_8::ModelHeader::Version kHasTensorNamesVersion;
if (HeaderLatest::IsFirstVersionLower(model_header_.version, kHasTensorNamesVersion) &&
if (header_latest::IsFirstVersionLower(model_header_.version, kHasTensorNamesVersion) &&
nodeDesc.tensor_names.empty()) {
nodeDesc.tensor_names.insert(nodeDesc.name);
}

View File

@ -34,16 +34,16 @@ public:
private:
Gna2Model * gna2model_;
MemoryType states, *pstates_ = nullptr;
GNAPluginNS::GnaInputs inputs_;
GNAPluginNS::GnaOutputs outputs_;
ov::intel_gna::GnaInputs inputs_;
ov::intel_gna::GnaOutputs outputs_;
TranspositionInfoMap inputs_transpose_info_;
TranspositionInfoMap outputs_transpose_info_;
GNAPluginNS::HeaderLatest::ModelHeader model_header_;
ov::intel_gna::header_latest::ModelHeader model_header_;
GNAVersionSerializer version_;
void ImportInputs(std::istream &is, void* basePtr, GNAPluginNS::GnaInputs &inputs);
void ImportInputs(std::istream &is, void* basePtr, ov::intel_gna::GnaInputs &inputs);
void ImportOutputs(std::istream &is, void* basePtr, GNAPluginNS::GnaOutputs &outputs);
void ImportOutputs(std::istream &is, void* basePtr, ov::intel_gna::GnaOutputs &outputs);
void ImportTranspositionInfo(std::istream &is, std::string &name, std::vector<TranspositionInfo> &transpositionInfo);
@ -53,7 +53,7 @@ private:
* @brief Update input or output description to support importing of < 2.8 format where tensor_names were not present
* @param nodeDesc input or output description to be appended
*/
void AppendTensorNameIfNeeded(GNAPluginNS::GnaDesc& nodeDesc) const;
void AppendTensorNameIfNeeded(ov::intel_gna::GnaDesc& nodeDesc) const;
public:
GNAModelSerial(Gna2Model* model, MemoryType& states_holder)
@ -62,14 +62,14 @@ private:
}
GNAModelSerial(Gna2Model* model,
GNAPluginNS::GnaInputs& inputs,
GNAPluginNS::GnaOutputs& outputs)
ov::intel_gna::GnaInputs& inputs,
ov::intel_gna::GnaOutputs& outputs)
: gna2model_(model),
inputs_(inputs),
outputs_(outputs) {
}
void setHeader(GNAPluginNS::HeaderLatest::ModelHeader header) {
void setHeader(ov::intel_gna::header_latest::ModelHeader header) {
model_header_ = header;
}
@ -100,9 +100,9 @@ private:
* @param is - opened input stream
* @return
*/
static GNAPluginNS::HeaderLatest::ModelHeader ReadHeader(std::istream &is);
static ov::intel_gna::header_latest::ModelHeader ReadHeader(std::istream &is);
GNAPluginNS::HeaderLatest::RuntimeEndPoint ReadEndPoint(std::istream &is);
ov::intel_gna::header_latest::RuntimeEndPoint ReadEndPoint(std::istream &is);
/**
* @brief Import model from FS into preallocated buffer,
@ -114,8 +114,8 @@ private:
void Import(void *basePointer,
size_t gnaGraphSize,
std::istream &is,
GNAPluginNS::GnaInputs &inputs,
GNAPluginNS::GnaOutputs &outputs,
ov::intel_gna::GnaInputs &inputs,
ov::intel_gna::GnaOutputs &outputs,
TranspositionInfoMap& inputstranspositionInfo,
TranspositionInfoMap& outputstranspositionInfo,
std::string& modelLibVersion);

View File

@ -125,8 +125,8 @@ inline uint32_t ToByteSize(const Gna2DataType type) {
using namespace std;
using namespace InferenceEngine;
using namespace InferenceEngine::details;
using namespace GNAPluginNS;
using namespace GNAPluginNS::memory;
using namespace ov::intel_gna::memory;
using namespace ov::intel_gna::frontend;
namespace InferenceEngine {
@ -355,9 +355,9 @@ GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) :
void GNAPlugin::Init() {
OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "Init");
dnn = std::make_shared<backend::AMIntelDNN>(backend::AMIntelDNN());
gnaFlags = std::make_shared<GNAPluginNS::GNAFlags>(GNAPluginNS::GNAFlags());
inputs_ptr_ = std::make_shared<GNAPluginNS::GnaInputs>(GNAPluginNS::GnaInputs());
outputs_ = GNAPluginNS::GnaOutputs();
gnaFlags = std::make_shared<GNAFlags>(GNAFlags());
inputs_ptr_ = std::make_shared<GnaInputs>(GnaInputs());
outputs_ = GnaOutputs();
graphCompiler.setDNNPtr(dnn);
graphCompiler.setInputsPtr(inputs_ptr_);
@ -508,7 +508,7 @@ bool GNAPlugin::TryToInitOutput(const std::string &portName, InferenceEngine::CN
outputs_.at(portName).ptrs.resize(gnaFlags->num_requests);
outputs_.at(portName).orientation = orientation;
outputs_.at(portName).set_precision(numBytesPerElem);
outputs_.at(portName).scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : GNAPluginNS::kScaleFactorDefault;
outputs_.at(portName).scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : kScaleFactorDefault;
outputs_.at(portName).num_elements = numElem;
// binding ptr for first infer request - then others will be setup during relocation
@ -787,7 +787,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
// Check the network
std::string error;
if (!GNAPluginNS::GNALimitations::AreLayersSupported(network, error)) {
if (!limitations::AreLayersSupported(network, error)) {
THROW_GNA_EXCEPTION << error.c_str();
}
@ -1082,7 +1082,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
// update orientation of model intput layer
for (auto& inputLayer : inputLayers) {
if (LayerInfo(inputLayer).isInput()) {
ov::intela_gna::helpers::updateModelInputOrientationWithoutConvolution(*inputLayer,
ov::intel_gna::helpers::updateModelInputOrientationWithoutConvolution(*inputLayer,
graphCompiler.dnnComponents,
*inputs_ptr_);
}
@ -1092,7 +1092,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
for (auto&& outPort : outputs_data_map_) {
auto outLayer = getCreatorLayer(outPort.second).lock();
if (outLayer && LayerInfo(outLayer).isOutput()) {
ov::intela_gna::helpers::updateModelOutputOrientation(outPort.first,
ov::intel_gna::helpers::updateModelOutputOrientation(outPort.first,
outLayer->name,
graphCompiler.dnnComponents,
outputs_);
@ -1113,11 +1113,11 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
#endif
}
bool GNAPluginNS::GNAPlugin::isFP32ModeActive() const {
bool GNAPlugin::isFP32ModeActive() const {
return gnaFlags->sw_fp32 || !gnadevice;
}
std::string GNAPluginNS::GNAPlugin::effectiveGnaCompileTarget() const {
std::string GNAPlugin::effectiveGnaCompileTarget() const {
if (gnadevice) {
return gnadevice->GetCompileTarget();
} else if (!config.gnaCompileTarget.empty()) {
@ -1161,7 +1161,7 @@ std::shared_ptr<request::ModelWrapper> GNAPlugin::createModelWrapperForLoadNetwo
THROW_GNA_EXCEPTION << "dnn is nullptr cannot load network";
}
std::weak_ptr<GNAPluginNS::backend::AMIntelDNN> weakDnn = dnn;
std::weak_ptr<backend::AMIntelDNN> weakDnn = dnn;
auto compileTarget = effectiveGnaCompileTarget();
auto initializer = [weakDnn, compileTarget](Gna2Model* model) {
if (auto dnn = weakDnn.lock()) {
@ -1174,7 +1174,7 @@ std::shared_ptr<request::ModelWrapper> GNAPlugin::createModelWrapperForLoadNetwo
return request::ModelWrapperFactory::createInitialized(std::move(initializer));
}
std::shared_ptr<request::ModelWrapper> GNAPluginNS::GNAPlugin::createModelWrapperForImportNetwork(
std::shared_ptr<request::ModelWrapper> GNAPlugin::createModelWrapperForImportNetwork(
uint32_t numberOfOperations) {
return request::ModelWrapperFactory::createWithNumberOfEmptyOperations(numberOfOperations);
}
@ -1238,20 +1238,21 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap& inputs, Infer
int inputNum = 0;
for (auto& input : inputs) {
auto inputLayout = input.second->getTensorDesc().getLayout();
if (inputLayout != Layout::C && inputLayout != Layout::NC && inputLayout != Layout::CN &&
inputLayout != Layout::CHW && inputLayout != Layout::NCHW) {
if (inputLayout != InferenceEngine::Layout::C && inputLayout != InferenceEngine::Layout::NC &&
inputLayout != InferenceEngine::Layout::CN && inputLayout != InferenceEngine::Layout::CHW &&
inputLayout != InferenceEngine::Layout::NCHW) {
THROW_GNA_EXCEPTION << "Expected input blob to have Layout::C, Layout::NC, Layout::CN, Layout::NCHW or "
"Layout::CHW. But was: "
<< input.second->getTensorDesc().getLayout();
}
if (inputLayout == Layout::NCHW || inputLayout == Layout::CHW) {
if (inputLayout == InferenceEngine::Layout::NCHW || inputLayout == InferenceEngine::Layout::CHW) {
// specific case that can be squeezed to 2d
inputLayout = Layout::NC;
inputLayout = InferenceEngine::Layout::NC;
}
auto is1D = input.second->getTensorDesc().getLayout() == Layout::C;
auto is3D = input.second->getTensorDesc().getLayout() == Layout::CHW;
auto is1D = input.second->getTensorDesc().getLayout() == InferenceEngine::Layout::C;
auto is3D = input.second->getTensorDesc().getLayout() == InferenceEngine::Layout::CHW;
if (inputs_ptr_->at(input.first).ptrs.empty()) {
// should not happen in user code however might happen if there any non executable network based integration
@ -1297,7 +1298,7 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap& inputs, Infer
ImportFrames(inputs_ptr_->at(input.first).ptrs[index],
input.second->cbuffer().as<float*>(),
input.second->getTensorDesc().getPrecision(),
gnaFlags->sw_fp32 ? GNAPluginNS::kScaleFactorDefault : inputs_ptr_->at(input.first).scale_factor,
gnaFlags->sw_fp32 ? kScaleFactorDefault : inputs_ptr_->at(input.first).scale_factor,
inputOrientation,
importedFrames,
targetGroups,
@ -1394,21 +1395,21 @@ RequestStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
for (auto&& outputBlobIt : requestResult) {
auto& outputBlob = outputBlobIt.second;
auto& outputDesc = outputs_.at(outputBlobIt.first);
if (outputBlob->getTensorDesc().getLayout() != Layout::C &&
outputBlob->getTensorDesc().getLayout() != Layout::NC &&
outputBlob->getTensorDesc().getLayout() != Layout::CN &&
outputBlob->getTensorDesc().getLayout() != Layout::NCHW &&
outputBlob->getTensorDesc().getLayout() != Layout::CHW &&
outputBlob->getTensorDesc().getLayout() != Layout::SCALAR) {
if (outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::C &&
outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::NC &&
outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::CN &&
outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::NCHW &&
outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::CHW &&
outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR) {
THROW_GNA_EXCEPTION << "Expected output blob to have Layout::C, Layout::NC, Layout::CN, Layout::NCHW or "
"Layout::CHW. But was "
<< outputBlob->getTensorDesc().getLayout();
}
auto dims = outputBlob->getTensorDesc().getDims();
auto is1D = outputBlob->getTensorDesc().getLayout() == Layout::C;
auto isScalar = outputBlob->getTensorDesc().getLayout() == Layout::SCALAR;
auto is3D = outputBlob->getTensorDesc().getLayout() == Layout::CHW;
auto is1D = outputBlob->getTensorDesc().getLayout() == InferenceEngine::Layout::C;
auto isScalar = outputBlob->getTensorDesc().getLayout() == InferenceEngine::Layout::SCALAR;
auto is3D = outputBlob->getTensorDesc().getLayout() == InferenceEngine::Layout::CHW;
auto batchSize = (is1D || isScalar || is3D) ? 1 : dims[0];
auto elementsPerBatch =
isScalar ? 1
@ -1635,7 +1636,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
SetNetworkInputs();
SetNetworkOutputs();
ov::intela_gna::helpers::ApplyInputScaleFactors(config, header, *inputs_ptr_);
ov::intel_gna::helpers::ApplyInputScaleFactors(config, header, *inputs_ptr_);
auto getOrientation = [](Gna2Operation& gnaOperation) {
return gnaOperation.Type == Gna2OperationTypeConvolution ? kDnnNonInterleavedOrientation

View File

@ -26,8 +26,10 @@
#include <legacy/ie_util_internal.hpp>
#include <gna2-model-api.h>
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace request {
class ModelWrapper;
class WorkerPool;
class Worker;
@ -38,13 +40,13 @@ protected:
std::string _pluginName = "GNA";
Config config {};
std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnn;
std::shared_ptr<GNAPluginNS::GNAFlags> gnaFlags;
std::shared_ptr<GNAPluginNS::gna_memory_type> gnamem;
std::shared_ptr<GNAPluginNS::GnaInputs> inputs_ptr_;
GNAPluginNS::GnaOutputs outputs_;
std::shared_ptr<backend::AMIntelDNN> dnn;
std::shared_ptr<GNAFlags> gnaFlags;
std::shared_ptr<gna_memory_type> gnamem;
std::shared_ptr<GnaInputs> inputs_ptr_;
GnaOutputs outputs_;
GNAPluginNS::GNAGraphCompiler graphCompiler;
GNAGraphCompiler graphCompiler;
uint32_t activeLayerIndex = 0xffffffff;
TranspositionInfoMap transpose_inputs_info;
@ -237,4 +239,5 @@ protected:
#endif
};
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -19,10 +19,11 @@
using namespace InferenceEngine;
using namespace InferenceEngine::details;
using namespace ov::intel_gna;
using namespace ov::intel_gna::common;
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
const uint8_t Config::max_num_requests;
OPENVINO_SUPPRESS_DEPRECATED_START
@ -127,7 +128,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
check_scale_factor(scale_factor);
// missing scale factors are set to be 1.0f
if (inputScaleFactors.size() <= input_index) {
inputScaleFactors.resize(input_index + 1, GNAPluginNS::kScaleFactorDefault);
inputScaleFactors.resize(input_index + 1, kScaleFactorDefault);
}
inputScaleFactors[input_index] = InferenceEngine::CNNLayer::ie_parse_float(value);
} else if (key == GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE) || key == ov::intel_gna::firmware_model_image_path) {
@ -414,4 +415,6 @@ std::vector<std::string> Config::GetSupportedKeys() const {
}
return result;
}
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -14,7 +14,8 @@
#include <map>
#include <mutex>
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
static const float kScaleFactorDefault = 1.f;
@ -76,4 +77,5 @@ struct Config {
static const uint8_t max_num_requests = 127;
};
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -7,7 +7,6 @@
using namespace InferenceEngine;
using namespace std;
using namespace GNAPluginNS;
static const Version gnaPluginDescription = {
{2, 1},

View File

@ -13,7 +13,8 @@
#include "gna_plugin_config.hpp"
#include <legacy/ie_util_internal.hpp>
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
class GNAPluginInternal : public InferenceEngine::IInferencePlugin {
private:
@ -104,4 +105,5 @@ public:
}
};
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -13,7 +13,6 @@
#include <unordered_map>
#include <memory>
using namespace GNAPluginNS;
using namespace InferenceEngine;
using namespace InferenceEngine::PluginConfigParams;

View File

@ -5,7 +5,7 @@
#include <cstdint>
#include <limits>
#include "gna_slope_scale.h"
#include "gna_slope_scale.hpp"
pwl_gna_slope_scale_t gna_slope(const double slope,
const double in_scale,

View File

@ -5,7 +5,8 @@
#include <ie_memcpy.h>
#include "gna_data_types.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
/**
* @brief convert a tensor or its parts from NCHW to NHWC order on the base of transposition information.
@ -79,4 +80,5 @@ inline void ConvertTensorFromNCHWToNHWC(size_t precision, size_t rows, size_t co
}
}
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,9 @@
#include <string>
#include "gna_graph_tools.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
/**
* @brief implements upstream search for BFS routine
*/
@ -113,5 +115,5 @@ inline UpstreamLayersContainer make_upstream_order(InferenceEngine::CNNLayer* or
return fusedCnt;
}
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,9 @@
#include <legacy/ie_layers.h>
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
class GNAConcatLayer {
InferenceEngine::CNNLayerPtr concatLayer;
@ -46,4 +48,6 @@ public:
std::vector<ConcatConnectedLayerInfo> concatInputLayers;
};
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -14,8 +14,10 @@
#include "gna_graph_tools.hpp"
#include "log/debug.hpp"
namespace GNAPluginNS {
namespace GNAConvolutionLayer {
namespace ov {
namespace intel_gna {
namespace gna_convolution_layer {
bool should_transpose_h_w(const uint32_t in_height,
const uint32_t kernel_height,
const uint32_t in_channels,
@ -23,9 +25,13 @@ bool should_transpose_h_w(const uint32_t in_height,
return in_height == kernel_height && in_channels == 1 && stride_height == 1;
}
bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t in_channels,
const uint32_t kernelHeight, const uint32_t kernelWidth,
const uint32_t strideHeight, const uint32_t strideWidth) {
bool isMappableFrom2DTo1D(const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t in_channels,
const uint32_t kernelHeight,
const uint32_t kernelWidth,
const uint32_t strideHeight,
const uint32_t strideWidth) {
if (inHeight <= 1 || inWidth <= 1) {
// Mapping not needed since input is already 1D
return false;
@ -34,8 +40,11 @@ bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const
should_transpose_h_w(inHeight, kernelHeight, in_channels, strideHeight);
}
bool is3DInputOr2DKernel(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
const uint32_t kernelHeight, const uint32_t kernelWidth) {
bool is3DInputOr2DKernel(const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inDepth,
const uint32_t kernelHeight,
const uint32_t kernelWidth) {
return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1);
}
@ -46,18 +55,27 @@ double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
// for kernelSize >= 14 -> 1.7
// for kernelSize >= 9 -> 1.3
// for kernelSize in {7, 8} -> 1.2
const std::vector< KRT > reducers{ {49, 3.0}, {36, 2.6}, {21, 2.3}, {14, 1.7}, {9, 1.3}, {7, 1.2} };
const std::vector<KRT> reducers{{49, 3.0}, {36, 2.6}, {21, 2.3}, {14, 1.7}, {9, 1.3}, {7, 1.2}};
auto reducer = 1.0;
const auto inDepth = InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
const auto inDepth =
InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
const auto inHeight =
InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
const auto inWidth =
InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
if (is3DInputOr2DKernel(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
!isMappableFrom2DTo1D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x, conv._stride_y, conv._stride_x)) {
!isMappableFrom2DTo1D(inHeight,
inWidth,
inDepth,
conv._kernel_y,
conv._kernel_x,
conv._stride_y,
conv._stride_x)) {
const auto kernelSize = conv._kernel_x * conv._kernel_y;
auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize,
[](const KRT& l, const KRT::first_type& r) {return l.first > r; });
auto r =
std::lower_bound(reducers.begin(), reducers.end(), kernelSize, [](const KRT& l, const KRT::first_type& r) {
return l.first > r;
});
if (r != reducers.end())
reducer = r->second;
}
@ -80,7 +98,8 @@ uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint3
if (window > in || window == 0 || stride == 0) {
THROW_GNA_EXCEPTION << "Invalid (input, window, stride) = (" << in << "," << window << "," << stride << ")";
}
if (window == in) return 1;
if (window == in)
return 1;
return (in - window - 1) / stride + 2;
}
@ -94,5 +113,6 @@ uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride) {
return (in - 1) / stride + 1;
}
} // namespace GNAConvolutionLayer
} // namespace GNAPluginNS
} // namespace gna_convolution_layer
} // namespace intel_gna
} // namespace ov

View File

@ -8,20 +8,28 @@
#include <legacy/ie_layers.h>
namespace GNAPluginNS {
namespace GNAConvolutionLayer {
namespace ov {
namespace intel_gna {
namespace gna_convolution_layer {
bool should_transpose_h_w(const uint32_t in_height,
const uint32_t kernel_height,
const uint32_t in_channels,
const uint32_t stride_height);
bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inChannels,
const uint32_t kernelHeight, const uint32_t kernelWidth,
const uint32_t strideHeight, const uint32_t strideWidth);
bool isMappableFrom2DTo1D(const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kernelHeight,
const uint32_t kernelWidth,
const uint32_t strideHeight,
const uint32_t strideWidth);
bool is3DInputOr2DKernel(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
const uint32_t kernelHeight, const uint32_t kernelWidth);
bool is3DInputOr2DKernel(const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inDepth,
const uint32_t kernelHeight,
const uint32_t kernelWidth);
double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv);
@ -31,5 +39,6 @@ uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint3
uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride);
} // namespace GNAConvolutionLayer
} // namespace GNAPluginNS
} // namespace gna_convolution_layer
} // namespace intel_gna
} // namespace ov

View File

@ -4,7 +4,9 @@
#pragma once
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
/**
* GNA primitive created in sorting order for this copy layer
*/
@ -14,4 +16,5 @@ static constexpr auto CopyLayerName = "Copy";
*/
static constexpr auto DelayedCopyLayerName = "DelayedCopy";
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -7,9 +7,8 @@
#include "log/log.hpp"
#include "log/debug.hpp"
using namespace ov::intel_gna;
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
SimpleCrop get_crop_params(const std::vector<int32_t>& axis_in,
const std::vector<int32_t>& offset_in,
@ -57,4 +56,5 @@ SimpleCrop GetCropParams(InferenceEngine::CropLayer* cropLayer) {
return out_val;
}
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -8,7 +8,9 @@
#include <cstdint>
#include <vector>
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
class GNACropLayer {
InferenceEngine::CNNLayerPtr cropLayer;
@ -41,4 +43,5 @@ SimpleCrop get_crop_params(const std::vector<int32_t>& axis_in,
SimpleCrop GetCropParams(InferenceEngine::CropLayer* cropLayer);
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -12,12 +12,14 @@
using ov::intel_gna::frontend::make_fp32_blob;
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
class GNAFakeQuantizeLayer {
InferenceEngine::CNNLayerPtr fqLayer;
public :
GNAFakeQuantizeLayer(InferenceEngine::CNNLayerPtr fqLayer)
: fqLayer(fqLayer) {
public:
GNAFakeQuantizeLayer(InferenceEngine::CNNLayerPtr fqLayer) : fqLayer(fqLayer) {
if (!LayerInfo(fqLayer).isFakeQuantize()) {
THROW_GNA_LAYER_EXCEPTION(fqLayer) << "cannot parse as fake quantize";
}
@ -30,7 +32,7 @@ class GNAFakeQuantizeLayer {
DnnActivation fqActivation{};
fqActivation.fqParams.levels = fqLayer->GetParamAsSizeT("levels");
auto inputShape = getShapeForRange(fqLayer, 1);
auto inputShape = getShapeForRange(fqLayer, 1);
auto outputShape = getShapeForRange(fqLayer, 3);
// TODO: check shapes broadcasting to shape of input at 0
@ -40,26 +42,27 @@ class GNAFakeQuantizeLayer {
fqActivation.fqParams.set = true;
fqActivation.fqParams.inputPerChannel = inputRangeSize != 1;
fqActivation.fqParams.input_low = getParamFromInputAsFloats(fqLayer, 1);
fqActivation.fqParams.input_high = getParamFromInputAsFloats(fqLayer, 2);
fqActivation.fqParams.input_low = getParamFromInputAsFloats(fqLayer, 1);
fqActivation.fqParams.input_high = getParamFromInputAsFloats(fqLayer, 2);
fqActivation.fqParams.outputPerChannel = outputRangeSize != 1;
fqActivation.fqParams.output_low = getParamFromInputAsFloats(fqLayer, 3);
fqActivation.fqParams.output_low = getParamFromInputAsFloats(fqLayer, 3);
fqActivation.fqParams.output_high = getParamFromInputAsFloats(fqLayer, 4);
fqActivation.type = kActFakeQuantize;
return fqActivation;
}
}
/**
* @brief Retrieve input blob for FQ layer that connected to const layer
*/
InferenceEngine::Blob::Ptr getConstInputData() const {
return LayerUtils::getParamFromInputAsBlob(fqLayer, 0);
return layer_utils::getParamFromInputAsBlob(fqLayer, 0);
}
/**
* @brief Fake quantize has 5 input layers, while 4 of them always constant layer, and 1 might be a tensor - connection
* @brief Fake quantize has 5 input layers, while 4 of them always constant layer, and 1 might be a tensor -
* connection
*/
InferenceEngine::CNNLayerPtr getInputLayer() const {
return getInputLayerAt(fqLayer, 0);
@ -77,24 +80,24 @@ class GNAFakeQuantizeLayer {
return getRange(fqLayer, 3);
}
operator InferenceEngine::CNNLayerPtr () const {
operator InferenceEngine::CNNLayerPtr() const {
return fqLayer;
}
InferenceEngine::CNNLayerPtr operator -> () const {
InferenceEngine::CNNLayerPtr operator->() const {
return fqLayer;
}
InferenceEngine::CNNLayerPtr operator * () const {
InferenceEngine::CNNLayerPtr operator*() const {
return fqLayer;
}
protected :
protected:
static std::pair<std::vector<float>, std::vector<float>> getRange(InferenceEngine::CNNLayerPtr input, size_t idx) {
auto shape = getShapeForRange(input, idx);
auto shape = getShapeForRange(input, idx);
auto rangeSize = InferenceEngine::details::product(shape.begin(), shape.end());
auto dataMin = LayerUtils::getParamFromInputAsBlob(input, idx);
auto dataMax = LayerUtils::getParamFromInputAsBlob(input, idx + 1);
auto dataMin = layer_utils::getParamFromInputAsBlob(input, idx);
auto dataMax = layer_utils::getParamFromInputAsBlob(input, idx + 1);
std::vector<float> minValues(rangeSize), maxValues(rangeSize);
switch (dataMin->getTensorDesc().getPrecision()) {
case InferenceEngine::Precision::FP32: {
@ -112,46 +115,46 @@ class GNAFakeQuantizeLayer {
}
default:
THROW_GNA_LAYER_EXCEPTION(input) << "cannot cast custom blob to type FP32, since it is of type: "
<< dataMin->getTensorDesc().getPrecision();
<< dataMin->getTensorDesc().getPrecision();
break;
}
return {minValues, maxValues};
}
static float* getParamFromInputAsFloats(InferenceEngine::CNNLayerPtr input, size_t idx) {
auto data = LayerUtils::getParamFromInputAsBlob(input, idx);
static float* getParamFromInputAsFloats(InferenceEngine::CNNLayerPtr input, size_t idx) {
auto data = layer_utils::getParamFromInputAsBlob(input, idx);
if (data->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP32) {
THROW_GNA_LAYER_EXCEPTION(input) << "cannot cast custom blob to type FP32, since it is of type: "
<< data->getTensorDesc().getPrecision();
<< data->getTensorDesc().getPrecision();
}
return data->buffer().as<float*>();
}
static InferenceEngine::SizeVector getShapeFromInput(InferenceEngine::CNNLayerPtr input, size_t idx) {
auto data = LayerUtils::getParamFromInputAsBlob(input, idx);
static InferenceEngine::SizeVector getShapeFromInput(InferenceEngine::CNNLayerPtr input, size_t idx) {
auto data = layer_utils::getParamFromInputAsBlob(input, idx);
return data->getTensorDesc().getDims();
}
static InferenceEngine::CNNLayerPtr getInputLayerAt(InferenceEngine::CNNLayerPtr input, size_t idx) {
static InferenceEngine::CNNLayerPtr getInputLayerAt(InferenceEngine::CNNLayerPtr input, size_t idx) {
if (input->insData.size() <= idx) {
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx << "input";
}
auto iLayerData = input->insData[idx].lock();
if (!iLayerData) {
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx
<< ", input: cannot dereference data weak-pointer";
THROW_GNA_LAYER_EXCEPTION(input)
<< "cannot get data from " << idx << ", input: cannot dereference data weak-pointer";
}
auto iLayer = getCreatorLayer(iLayerData).lock();
if (!iLayer) {
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx
<< ", input: cannot dereference creator layer weak-pointer";
THROW_GNA_LAYER_EXCEPTION(input)
<< "cannot get data from " << idx << ", input: cannot dereference creator layer weak-pointer";
}
return iLayer;
}
static InferenceEngine::SizeVector getShapeForRange(InferenceEngine::CNNLayerPtr input, size_t idx) {
auto lowShape = getShapeFromInput(input, idx);
auto lowShape = getShapeFromInput(input, idx);
auto highShape = getShapeFromInput(input, idx + 1);
if (lowShape.size() != highShape.size()) {
THROW_GNA_LAYER_EXCEPTION(input) << "shapes mismatch for " << idx << " and " << idx + 1 << " inputs";
@ -162,6 +165,8 @@ class GNAFakeQuantizeLayer {
}
}
return lowShape;
}
}
};
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -6,8 +6,10 @@
#include "gna_layer_info.hpp"
namespace GNAPluginNS {
namespace LayerUtils {
namespace ov {
namespace intel_gna {
namespace layer_utils {
/**
* @brief retrievs blob from const layer connected to certain layer
* @param input
@ -38,5 +40,7 @@ inline InferenceEngine::Blob::Ptr getParamFromInputAsBlob(InferenceEngine::CNNLa
return iLayer->blobs["custom"];
}
} // namespace LayerUtils
} // namespace GNAPluginNS
} // namespace layer_utils
} // namespace intel_gna
} // namespace ov

View File

@ -10,7 +10,7 @@
#include <legacy/ie_layers.h>
#include "caseless.hpp"
#include "ie_algorithm.hpp"
#include "backend/gna_types.h"
#include "backend/gna_types.hpp"
#include "gna_permute.hpp"
#include "gna_lib_ver_selector.hpp"
#include "gna_copy_layer.hpp"
@ -21,7 +21,8 @@
#include "backend/gna_limitations.hpp"
#include "transformations/rt_info/gna_transpose_fusable.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
/**
* @brief detecting of const pointer for dynamic cast operations
@ -321,7 +322,7 @@ class LayerInfo {
auto inputs = layer->insData.begin()->lock();
auto inputsOrder = inputs->getTensorDesc().getDims();
return GNAPluginNS::isTrivialPermute(std::vector<int64_t>{begin(layerOrder), end(layerOrder)},
return permute::isTrivialPermute(std::vector<int64_t>{begin(layerOrder), end(layerOrder)},
inputsOrder);
}
bool isNonValuesChangable() const {
@ -356,7 +357,7 @@ class LayerInfo {
auto cropLayer = dynamic_cast<InferenceEngine::CropLayer *> (layer);
if (cropLayer != nullptr && !cropLayer->offset.empty()) {
const auto crop_params = GetCropParams(cropLayer);
return GNAPluginNS::GNALimitations::isCropAffinedOffset(crop_params.start_offset);
return limitations::isCropAffinedOffset(crop_params.start_offset);
}
return false;
}
@ -425,4 +426,5 @@ inline std::ostream & operator <<(std::ostream &os, const LayerInfo & info) {
return os;
}
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -8,10 +8,16 @@
#include "gna_layer_type.hpp"
#include "gna_layer_info.hpp"
GNAPluginNS::LayerType GNAPluginNS::LayerTypeFromStr(const std::string &str) {
namespace ov {
namespace intel_gna {
LayerType LayerTypeFromStr(const std::string& str) {
auto it = LayerNameToType.find(str);
if (it != LayerNameToType.end())
return it->second;
else
return LayerType::NO_TYPE;
}
} // namespace intel_gna
} // namespace ov

View File

@ -9,9 +9,11 @@
#include <caseless.hpp>
#include "backend/dnn_types.h"
#include "backend/dnn_types.hpp"
namespace ov {
namespace intel_gna {
namespace GNAPluginNS {
enum class LayerType {
Input,
Convolution,
@ -54,7 +56,7 @@ enum class LayerType {
NO_TYPE
};
static const InferenceEngine::details::caseless_map<std::string, GNAPluginNS::LayerType> LayerNameToType = {
static const InferenceEngine::details::caseless_map<std::string, LayerType> LayerNameToType = {
{ "Input" , LayerType::Input },
{ "Convolution" , LayerType::Convolution },
{ "ReLU" , LayerType::ReLU },
@ -94,5 +96,7 @@ static const InferenceEngine::details::caseless_map<std::string, GNAPluginNS::La
{"Gemm", LayerType::Gemm},
};
GNAPluginNS::LayerType LayerTypeFromStr(const std::string &str);
} // namespace GNAPluginNS
LayerType LayerTypeFromStr(const std::string &str);
} // namespace intel_gna
} // namespace ov

View File

@ -7,7 +7,9 @@
#include "legacy/ie_layers.h"
#include "debug.h"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
/**
* maps type of connection to input and output layers also stores gna_pointer for alloc request
*/
@ -63,4 +65,6 @@ public:
*/
float scale_factor = 1.0f;
};
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,10 @@
#include "ie_common.h"
#include "log/debug.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace permute {
template <class T>
class PermuteSequence {
public:
@ -20,14 +23,14 @@ private:
cnt_type permutes;
public:
explicit PermuteSequence(std::vector<T> && orderVecIn) : orderVec(std::move(orderVecIn)) {
explicit PermuteSequence(std::vector<T>&& orderVecIn) : orderVec(std::move(orderVecIn)) {
std::vector<bool> counter(orderVec.size());
for (auto && x : this->orderVec) {
for (auto&& x : this->orderVec) {
if (x < 0) {
THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be >= 0";
}
if (x >= counter.size()) {
THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be < "<< counter.size();
THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be < " << counter.size();
}
if (counter[x]) {
THROW_GNA_EXCEPTION << "invalid order: element " << x << " present more than once";
@ -65,13 +68,13 @@ public:
i++;
}
for (auto && cycle : permuteCycles) {
for (auto&& cycle : permuteCycles) {
for (int i = 0; i + 1 < cycle.size(); i++) {
permutes.push_back(cycle[i]);
}
}
}
const cnt_type & cnt() const noexcept {
const cnt_type& cnt() const noexcept {
return permutes;
}
};
@ -83,22 +86,22 @@ public:
*/
template <class Iterator>
inline typename PermuteSequence<typename std::iterator_traits<Iterator>::value_type>::cnt_type genPermutations(
Iterator beg, Iterator en) {
static_assert(
std::is_same<std::random_access_iterator_tag,
typename std::iterator_traits<Iterator>::iterator_category>::value,
"The genPermutations() function only accepts random access iterators or raw pointers to an array.\n");
Iterator beg,
Iterator en) {
static_assert(std::is_same<std::random_access_iterator_tag,
typename std::iterator_traits<Iterator>::iterator_category>::value,
"The genPermutations() function only accepts random access iterators or raw pointers to an array.\n");
using value_type = typename std::iterator_traits<Iterator>::value_type;
std::vector<value_type> v;
for (; beg != en; beg++) {
v.push_back(*beg);
}
auto permute = PermuteSequence<value_type> (std::move(v));
auto permute = PermuteSequence<value_type>(std::move(v));
return permute.cnt();
}
template <class T>
inline typename PermuteSequence<T>::cnt_type genPermutations(const std::initializer_list<T> & lst) {
inline typename PermuteSequence<T>::cnt_type genPermutations(const std::initializer_list<T>& lst) {
return genPermutations(lst.begin(), lst.end());
}
@ -121,14 +124,12 @@ inline bool isTrivialPermute(const std::vector<int64_t> order, const std::vector
// cases when all permutations happened either between 1 and X shape where no other dims in between
auto transpose_seq = genPermutations(order.begin(), order.end());
auto input_order_transformed = input_shape;
for (auto && transp : transpose_seq) {
for (auto&& transp : transpose_seq) {
// check dims of transposed
if (input_order_transformed[transp.first] == 1 &&
input_order_transformed[transp.second] == 1) {
if (input_order_transformed[transp.first] == 1 && input_order_transformed[transp.second] == 1) {
return true;
}
if (input_order_transformed[transp.first] != 1 &&
input_order_transformed[transp.second] != 1) {
if (input_order_transformed[transp.first] != 1 && input_order_transformed[transp.second] != 1) {
return false;
}
// check dims in between
@ -143,4 +144,6 @@ inline bool isTrivialPermute(const std::vector<int64_t> order, const std::vector
return true;
}
} // namespace GNAPluginNS
} // namespace permute
} // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,9 @@
#include <legacy/ie_layers.h>
#include "backend/gna_limitations.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
// Split, Slice
class GNASplitLayer {
InferenceEngine::CNNLayerPtr splitLayer;
@ -48,7 +50,7 @@ public:
};
// @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
static std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t maxSplitSize, uint32_t alignment = GNALimitations::inputByteAlignment) {
static std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t maxSplitSize, uint32_t alignment = limitations::inputByteAlignment) {
std::vector<uint32_t> splitSizes;
uint32_t maxAlignedSplitSize = std::max(maxSplitSize - maxSplitSize % alignment, alignment);
uint32_t usedSize = 0;
@ -68,7 +70,7 @@ static std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
IE_ASSERT(firstValuableDim != std::end(dims));
auto splittedElementsSize = *firstValuableDim;
auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim);
auto alignment = GNALimitations::inputByteAlignment;
auto alignment = limitations::inputByteAlignment;
// Split output size should be multiple by 64 to avoid align filters insertion,
// but we need to check if our input size to split exceeds 64; if not we can always
@ -81,8 +83,9 @@ static std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
}
}
splitSizes = GetAlignedSplitSizes(splittedElementsSize,
GNALimitations::bufferMaxSize * splittedElementsSize / totalElementsSize, alignment);
limitations::bufferMaxSize * splittedElementsSize / totalElementsSize, alignment);
return {splittedDimIx, splitSizes};
}
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -11,7 +11,9 @@
#include <caseless.hpp>
#include "gna_graph_compiler.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
class LayersBuilder {
using CreatorFnc = std::function<void(GNAGraphCompiler*, InferenceEngine::CNNLayerPtr)>;
@ -26,4 +28,6 @@ public:
return LayerBuilder;
}
};
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -12,7 +12,8 @@
#include "gna_device.hpp"
#include "memory/gna_mem_requests.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace memory {
/**
* wrap GNA interface into c++ allocator friendly one
@ -35,9 +36,11 @@ class GNAAllocator {
void deallocate(uint8_t *p, std::size_t n) {
_device->free(p);
}
void setTag(void* memPtr, GNAPluginNS::memory::rRegion tagValue) {
void setTag(void* memPtr, memory::rRegion tagValue) {
_device->tagMemoryRegion(memPtr, tagValue);
}
};
} // namespace memory
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,8 @@
#include "log/debug.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace memory {
/**
@ -47,4 +48,5 @@ inline std::string rRegionToStr(const rRegion region) {
}
} // namespace memory
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -10,7 +10,8 @@
#include "gna_mem_regions.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace memory {
enum rType : uint8_t {
@ -126,5 +127,7 @@ struct MemRequest {
_initializer(initializer) {
}
};
} // namespace memory
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -19,7 +19,8 @@
using namespace ov::intel_gna;
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace memory {
/**
@ -200,7 +201,7 @@ public:
}
template<class T>
void iterate_binded(GNAPluginNS::memory::MemRequest & reference, const T & visitor) {
void iterate_binded(memory::MemRequest & reference, const T & visitor) {
for (auto &re : _mem_requests) {
if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) {
log::trace() << " [binded=" << re._type << ", ptr=" << re._ptr_out <<"]\n";
@ -284,4 +285,5 @@ public:
};
} // namespace memory
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -26,12 +26,13 @@
#include <iomanip>
#endif
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace memory {
class GNAFloatAllocator : public std::allocator < uint8_t > {
public:
void setTag(void*, GNAPluginNS::memory::rRegion) {
void setTag(void*, memory::rRegion) {
}
};
@ -154,7 +155,7 @@ protected:
}
template<class T>
void iterate_binded(GNAPluginNS::memory::MemRequest & reference, const T & visitor) {
void iterate_binded(memory::MemRequest & reference, const T & visitor) {
for (auto &re : getQueue(REGION_AUTO)->_mem_requests) {
if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) {
// log::trace() << " [binded=" << rTypeToStr(re._type) << ", ptr=" << re._ptr_out <<"]\n";
@ -291,4 +292,5 @@ protected:
};
} // namespace memory
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -8,123 +8,127 @@
#include "ie_layouts.h"
#include "gna_graph_tools.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace memory {
void GNAVariableState::Reset() {
state->Reset();
}
void GNAVariableState::Reset() {
state->Reset();
}
InferenceEngine::Precision GNAVariableState::getPrecision() const {
InferenceEngine::Precision state_precision;
InferenceEngine::Precision GNAVariableState::getPrecision() const {
InferenceEngine::Precision state_precision;
if (state->getInput()) {
state_precision = state->getInput()->precision;
} else {
auto element_size = state->elementSizeBytes();
switch (element_size) {
case 4:
state_precision = InferenceEngine::Precision::FP32;
break;
case 2:
state_precision = InferenceEngine::Precision::I16;
break;
default:
THROW_GNA_EXCEPTION << "Incorrect state element size " << element_size <<
" to determine precision for VariableState " << name;
}
}
return state_precision;
}
void GNAVariableState::SetState(const InferenceEngine::Blob::Ptr& newState) {
IE_ASSERT(newState != nullptr);
auto data_ptr = newState->cbuffer().as<void*>();
IE_ASSERT(data_ptr != nullptr);
auto data_size = newState->byteSize();
auto data_elements = data_size / newState->element_size();
if (ALIGN64(state->reserved_size) != ALIGN64((data_size / (newState->element_size() / state->elementSizeBytes())))) {
THROW_GNA_EXCEPTION << "Failed to SetState. Sizes of new and old states do not match. ("
<< state->reserved_size << " != " << (newState->element_size() / state->elementSizeBytes()) << ")";
}
InferenceEngine::Precision state_precision = getPrecision();
auto new_state_precision = newState->getTensorDesc().getPrecision();
if (state->gna_ptr == data_ptr) {
return;
}
if (new_state_precision == state_precision) {
std::memcpy(state->gna_ptr, data_ptr, data_size);
return;
}
switch (state_precision) {
case InferenceEngine::Precision::I16: {
if (new_state_precision == InferenceEngine::Precision::FP32) {
auto quantized =
InferenceEngine::getInjectedData<ov::intel_gna::frontend::QuantizedLayerParams>(state->getInput());
auto scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : state->scale_factor;
GNAPluginNS::ConvertToInt16(static_cast<int16_t*>(state->gna_ptr),
newState->buffer().as<float*>(),
1,
data_elements,
scale_factor);
} else {
THROW_GNA_EXCEPTION << "Failed to SetState for VariableState " << name
<< ". If old state precision is I16 only I16 and FP32 are allowed as new state precisions."
<< " Old state: " << state_precision << " New state: " << new_state_precision;
}
if (state->getInput()) {
state_precision = state->getInput()->precision;
} else {
auto element_size = state->elementSizeBytes();
switch (element_size) {
case 4:
state_precision = InferenceEngine::Precision::FP32;
break;
case 2:
state_precision = InferenceEngine::Precision::I16;
break;
}
default:
THROW_GNA_EXCEPTION << "Failed to SetState for VariableState " << name
<< ". Incorrect new/old precision pair"
<< " Old state: " << state_precision << " New state: " << new_state_precision;
THROW_GNA_EXCEPTION << "Incorrect state element size " << element_size
<< " to determine precision for VariableState " << name;
}
}
InferenceEngine::Blob::CPtr GNAVariableState::GetState() const {
auto elements = state->reserved_size / state->elementSizeBytes();
InferenceEngine::Precision state_precision = getPrecision();
return state_precision;
}
if (state->getInput() && state_precision == InferenceEngine::Precision::I16) {
void GNAVariableState::SetState(const InferenceEngine::Blob::Ptr& newState) {
IE_ASSERT(newState != nullptr);
auto data_ptr = newState->cbuffer().as<void*>();
IE_ASSERT(data_ptr != nullptr);
auto data_size = newState->byteSize();
auto data_elements = data_size / newState->element_size();
if (ALIGN64(state->reserved_size) !=
ALIGN64((data_size / (newState->element_size() / state->elementSizeBytes())))) {
THROW_GNA_EXCEPTION << "Failed to SetState. Sizes of new and old states do not match. (" << state->reserved_size
<< " != " << (newState->element_size() / state->elementSizeBytes()) << ")";
}
InferenceEngine::Precision state_precision = getPrecision();
auto new_state_precision = newState->getTensorDesc().getPrecision();
if (state->gna_ptr == data_ptr) {
return;
}
if (new_state_precision == state_precision) {
std::memcpy(state->gna_ptr, data_ptr, data_size);
return;
}
switch (state_precision) {
case InferenceEngine::Precision::I16: {
if (new_state_precision == InferenceEngine::Precision::FP32) {
auto quantized =
InferenceEngine::getInjectedData<ov::intel_gna::frontend::QuantizedLayerParams>(state->getInput());
auto scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : state->scale_factor;
auto result_blob = make_blob_with_precision(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32,
InferenceEngine::SizeVector({ 1, elements }),
InferenceEngine::NC));
result_blob->allocate();
auto buffer = result_blob->buffer().as<float*>();
auto new_gna_ptr = static_cast<int16_t*>(state->gna_ptr);
for (int i = 0; i < elements; i++) {
buffer[i] = new_gna_ptr[i] / scale_factor;
}
return result_blob;
ConvertToInt16(static_cast<int16_t*>(state->gna_ptr),
newState->buffer().as<float*>(),
1,
data_elements,
scale_factor);
} else {
auto result_blob = make_blob_with_precision(InferenceEngine::TensorDesc(state_precision,
InferenceEngine::SizeVector({ 1, elements }),
InferenceEngine::NC));
result_blob->allocate();
std::memcpy(result_blob->buffer(), state->gna_ptr, state->reserved_size);
return result_blob;
THROW_GNA_EXCEPTION
<< "Failed to SetState for VariableState " << name
<< ". If old state precision is I16 only I16 and FP32 are allowed as new state precisions."
<< " Old state: " << state_precision << " New state: " << new_state_precision;
}
break;
}
default:
THROW_GNA_EXCEPTION << "Failed to SetState for VariableState " << name << ". Incorrect new/old precision pair"
<< " Old state: " << state_precision << " New state: " << new_state_precision;
}
}
float GNAVariableState::GetScaleFactor() const {
InferenceEngine::Blob::CPtr GNAVariableState::GetState() const {
auto elements = state->reserved_size / state->elementSizeBytes();
InferenceEngine::Precision state_precision = getPrecision();
if (state->getInput() && state_precision == InferenceEngine::Precision::I16) {
auto quantized =
InferenceEngine::getInjectedData<ov::intel_gna::frontend::QuantizedLayerParams>(state->getInput());
auto scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : state->scale_factor;
return scale_factor;
auto result_blob =
make_blob_with_precision(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32,
InferenceEngine::SizeVector({1, elements}),
InferenceEngine::NC));
result_blob->allocate();
auto buffer = result_blob->buffer().as<float*>();
auto new_gna_ptr = static_cast<int16_t*>(state->gna_ptr);
for (int i = 0; i < elements; i++) {
buffer[i] = new_gna_ptr[i] / scale_factor;
}
return result_blob;
} else {
auto result_blob =
make_blob_with_precision(InferenceEngine::TensorDesc(state_precision,
InferenceEngine::SizeVector({1, elements}),
InferenceEngine::NC));
result_blob->allocate();
std::memcpy(result_blob->buffer(), state->gna_ptr, state->reserved_size);
return result_blob;
}
}
float GNAVariableState::GetScaleFactor() const {
auto quantized = InferenceEngine::getInjectedData<ov::intel_gna::frontend::QuantizedLayerParams>(state->getInput());
auto scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : state->scale_factor;
return scale_factor;
}
} // namespace memory
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -9,8 +9,10 @@
#include <cpp_interfaces/interface/ie_ivariable_state_internal.hpp>
#include "gna_plugin.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace memory {
class GNAVariableState : public InferenceEngine::IVariableStateInternal {
public:
GNAVariableState(std::string name, std::shared_ptr<GNAMemoryLayer> state)
@ -33,5 +35,7 @@ private:
*/
InferenceEngine::Precision getPrecision() const;
};
} // namespace memory
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -7,13 +7,17 @@
#include <cstdint>
#include "log/debug.hpp"
int32_t GNAPluginNS::memory::MemoryOffset(void *ptr_target, void *ptr_base) {
namespace ov {
namespace intel_gna {
namespace memory {
int32_t MemoryOffset(void* ptr_target, void* ptr_base) {
auto target = reinterpret_cast<uintptr_t>(ptr_target);
auto base = reinterpret_cast<uintptr_t>(ptr_base);
if (target == 0) { // handle NULL pointers separately
return (-1);
} else if (target < base) {
THROW_GNA_EXCEPTION << "Target address value " << target << " is less than base address " << base;
THROW_GNA_EXCEPTION << "Target address value " << target << " is less than base address " << base;
} else {
uint64_t diff = target - base;
if (diff > 0x7fffffff) {
@ -23,3 +27,6 @@ int32_t GNAPluginNS::memory::MemoryOffset(void *ptr_target, void *ptr_base) {
}
}
} // namespace memory
} // namespace intel_gna
} // namespace ov

View File

@ -6,10 +6,12 @@
#include <cstdint>
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace memory {
int32_t MemoryOffset(void *ptr_target, void *ptr_base);
} // namespace memory
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -84,7 +84,7 @@ static bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
if (std::dynamic_pointer_cast<ngraph::opset8::Split>(input_op) || std::dynamic_pointer_cast<ngraph::opset8::VariadicSplit>(input_op)) {
for (size_t index = 0; index < input_op_out_index; index++) {
size_t outputSize = ngraph::shape_size(input_op->get_output_shape(index));
offset += outputSize * GNAPluginNS::GNALimitations::bytesPerSplitElement;
offset += outputSize * limitations::bytesPerSplitElement;
}
}
return (offset == ALIGN64(offset));
@ -93,7 +93,7 @@ static bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
static bool is_crop_affined(std::shared_ptr<ngraph::Node> node) {
auto crop = std::dynamic_pointer_cast<ngraph::op::CropIE>(node);
if (crop != nullptr && !crop->offset.empty()) {
return GNAPluginNS::GNALimitations::isCropAffinedOffset(crop->offset.back());
return limitations::isCropAffinedOffset(crop->offset.back());
}
return false;
}
@ -117,7 +117,7 @@ static bool is_trivial_transpose(std::shared_ptr<ngraph::Node> node) {
auto input = transpose->input(0).get_source_output().get_node_shared_ptr();
auto input_order = transpose->get_input_shape(0);
return GNAPluginNS::isTrivialPermute(node_order, input_order);
return permute::isTrivialPermute(node_order, input_order);
}
inline std::shared_ptr<ov::Node> get_prev_node_skipping_certain(const std::shared_ptr<ngraph::Node>& node,

View File

@ -24,7 +24,7 @@
#include <legacy/net_pass.h>
#include <layers/gna_copy_layer.hpp>
#include "backend/dnn_types.h"
#include "backend/dnn_types.hpp"
#include "log/debug.hpp"
#include "log/log.hpp"
#include "frontend/quantization.hpp"
@ -46,10 +46,12 @@
using namespace InferenceEngine;
using namespace InferenceEngine::details;
using namespace GNAPluginNS;
using namespace ov::intel_gna::frontend;
using namespace ov::intel_gna::common;
namespace ov {
namespace intel_gna {
#define pass_trace() log::debug() << "[" << getName() << "] "
std::shared_ptr<IPassManager> BasePass::getPassManager() {
@ -98,14 +100,14 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
});
IE_ASSERT(inputLayer != nullptr);
size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() :
Get2DReshapedData(nextLayer->outData[0], GNALimitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8)->getDims()[1];
Get2DReshapedData(nextLayer->outData[0], limitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8)->getDims()[1];
std::vector<float> weightsValues(weightsSize, fillValue);
IE_ASSERT(diagLayer != nullptr);
diagLayer->_weights = make_shared_blob<float>(
TensorDesc(
nextLayer->outData[0]->getTensorDesc().getPrecision(),
SizeVector({weightsValues.size()}),
Layout::C));
InferenceEngine::Layout::C));
diagLayer->_weights->allocate();
CopyVectorToBlob(diagLayer->_weights, weightsValues);
auto dataPtr = std::make_shared<Data>(diagName, nextLayer->outData[0]->getTensorDesc());
@ -666,7 +668,7 @@ void RemovePermutationsNHWCToNCHWPass::run() {
}
// HWC layout enum is used here as the only available in CNNNetwork for 3D vectors,
// but the real layout is NCW and it's the one used in order vector later
return dims_size == 4 ? Layout::NHWC : Layout::HWC;
return dims_size == 4 ? InferenceEngine::Layout::NHWC : InferenceEngine::Layout::HWC;
};
auto setTransposedOrder = [getTransposedLayout](InferenceEngine::DataPtr data) {
@ -677,13 +679,17 @@ void RemovePermutationsNHWCToNCHWPass::run() {
if (LayerInfo(current_layer).isConcat()) {
auto concat_layer = dynamic_cast<InferenceEngine::ConcatLayer*> (current_layer.get());
auto dims_size = data->getDims().size();
concat_layer->_axis = (dims_size == 4 ? GetPermuteOrder(Layout::NHWC, Layout::NCHW) :
concat_layer->_axis = (dims_size == 4 ? permute::GetPermuteOrder(InferenceEngine::Layout::NHWC,
InferenceEngine::Layout::NCHW)
:
std::vector<int32_t>{0, 2, 1})[concat_layer->_axis];
}
// NWC->NCW layouts are used here for order vector, see comments a few lines above
auto dims = data->getDims();
auto order = dims.size() == 4 ? GetPermuteOrder(Layout::NCHW, Layout::NHWC) :
auto order = dims.size() == 4
? permute::GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC)
:
std::vector<int32_t>{0, 2, 1};
InferenceEngine::SizeVector new_dims;
for (int i = 0; i < dims.size(); ++i) {
@ -1074,7 +1080,7 @@ void FlattenTrivialConcatPass::run() {
auto concatInput = getLayerByIndex(input_idx, concatLayer);
auto tensor = InferenceEngine::TensorDesc(concatInput->getTensorDesc());
tensor.reshape(SizeVector({1, total_sizes[input_idx]}), Layout::NC);
tensor.reshape(SizeVector({1, total_sizes[input_idx]}), InferenceEngine::Layout::NC);
auto reshapeName = l->name + "_input_"+ std::to_string(input_idx) +"_reshape";
auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized);
@ -1091,7 +1097,7 @@ void FlattenTrivialConcatPass::run() {
auto total_size = std::accumulate(dims.begin(), dims.end(), size_t(1), std::multiplies<size_t>());
auto new_tensor = output->getTensorDesc();
new_tensor.reshape(SizeVector({1, total_size}), Layout::NC);
new_tensor.reshape(SizeVector({1, total_size}), InferenceEngine::Layout::NC);
auto new_output = CNNReplaceDataWithChangedTensorDescription(output, new_tensor);
log::debug() << "\tChanged " << output->getName() << " dims to 2D" << std::endl;
@ -1197,7 +1203,7 @@ void InsertConcatAligningFilterPass::run() {
TensorDesc(
concatInput->getTensorDesc().getPrecision(),
SizeVector({filterWeights.size()}),
Layout::C));
InferenceEngine::Layout::C));
concatAligningFilter->_weights->allocate();
if (!concatAligningFilter->_weights->buffer().as<float*>()) {
THROW_GNA_EXCEPTION << "Failed to allocate weights of size " << filterWeights.size() << " for " << filterName;
@ -1208,10 +1214,10 @@ void InsertConcatAligningFilterPass::run() {
// modifying output rows to be used - to avoid modification to original concat we are store num of elements in params
dims[1] = num_rows_out;
if ((concatInput->getLayout() == Layout::NC && dims[0] > 8) ||
(concatInput->getLayout() == Layout::CN && dims[1] > 8)) {
THROW_GNA_EXCEPTION << "unsupported batch number '" <<
(concatInput->getLayout() == Layout::NC ? dims[0] : dims[1]) <<
if ((concatInput->getLayout() == InferenceEngine::Layout::NC && dims[0] > 8) ||
(concatInput->getLayout() == InferenceEngine::Layout::CN && dims[1] > 8)) {
THROW_GNA_EXCEPTION << "unsupported batch number '" << (concatInput->getLayout() == InferenceEngine::Layout::NC ? dims[0] : dims[1])
<<
"' in layer '" << concatLayer->name << "'";
}
@ -1312,8 +1318,7 @@ void ReorderConcatInputsPass::run() {
auto linkOutData = std::make_shared<Data>(linkName,
TensorDesc(Precision::FP32,
SizeVector({ 1 }),
Layout::C));
SizeVector({ 1 }), InferenceEngine::Layout::C));
getCreatorLayer(linkOutData) = link;
link->outData.push_back(linkOutData);
@ -1340,7 +1345,7 @@ void InsertSplitAligningFilterPass::run() {
}
auto outFunctionalLayers = CNNNetGetAllNextLayersSkipCertain(l, -1, [](CNNLayerPtr next_layer) {
return GNAPluginNS::LayerInfo(next_layer).isNonFunctional();
return LayerInfo(next_layer).isNonFunctional();
});
size_t padding = 0;
for (auto &&outFunctionalLayer : outFunctionalLayers) {
@ -1387,16 +1392,16 @@ void InsertSplitAligningFilterPass::run() {
IE_ASSERT(filterLayer != nullptr);
// encodes offset to beginning of split layer input
filterLayer->params["offset"] = std::to_string(aligned64_offset / GNALimitations::bytesPerSplitElement);
filterLayer->params["offset"] = std::to_string(aligned64_offset / limitations::bytesPerSplitElement);
auto dims = splitOutput->getTensorDesc().getDims();
if (dims.size() > 3) {
THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
}
const auto offsetOfUnalignment = (currentOffset - aligned64_offset) / GNALimitations::bytesPerSplitElement;
const auto offsetOfUnalignment = (currentOffset - aligned64_offset) / limitations::bytesPerSplitElement;
// TODO consider to use a different number of filters do decrese the number of trailing zeros (additionalPaddingOfFilter)
const auto numberOfFilters = GNALimitations::convMinFiltersNum;
const auto filterSize = ALIGN(offsetOfUnalignment + numberOfFilters, GNALimitations::convFilterSizeDivider);
const auto numberOfFilters = limitations::convMinFiltersNum;
const auto filterSize = ALIGN(offsetOfUnalignment + numberOfFilters, limitations::convFilterSizeDivider);
// filterWeights: numberOfFilters X (offsetOfUnalignment + additionalPaddingOfFilter + numberOfFilters)
// offsetOfUnalignment - the leading zeros in the filter
@ -1424,7 +1429,7 @@ void InsertSplitAligningFilterPass::run() {
filterLayer->_weights = make_shared_blob<float>(TensorDesc(
inputData->getTensorDesc().getPrecision(),
SizeVector({filterWeights.size()}),
Layout::C));
InferenceEngine::Layout::C));
filterLayer->_weights->allocate();
CopyVectorToBlob(filterLayer->_weights, filterWeights);
@ -1433,7 +1438,7 @@ void InsertSplitAligningFilterPass::run() {
filterLayer->_biases = make_shared_blob<float>(TensorDesc(
inputData->getTensorDesc().getPrecision(),
SizeVector({ biasWeights.size() }),
Layout::C));
InferenceEngine::Layout::C));
filterLayer->_biases->allocate();
CopyVectorToBlob(filterLayer->_biases, biasWeights);
@ -1452,7 +1457,7 @@ void InsertSplitAligningFilterPass::run() {
}
// search data that starts from unaligned location
currentOffset += outputSize * GNALimitations::bytesPerSplitElement;
currentOffset += outputSize * limitations::bytesPerSplitElement;
splitOutIndex++;
}
}
@ -1490,7 +1495,7 @@ void EltwiseSplitOverChannelsPass::run() {
auto oData = l->outData.front();
auto oDims = oData->getDims();
auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims));
if (totalElementsSize <= GNALimitations::bufferMaxSize) {
if (totalElementsSize <= limitations::bufferMaxSize) {
continue;
}
auto splitSizesPerAxis = AlignedSplitSizesPerAxis(oDims);
@ -1602,7 +1607,7 @@ void SubstituteScaleShiftBroadCastPass::run() {
dataDims = reshaped_data[insData->getName()];
} else {
dataDims = HasTo2DReshapeData(l) ?
Get2DReshapedData(insData, GNALimitations::GetMinBatchToFitInBuffer(insData), 8)->getDims() :
Get2DReshapedData(insData, limitations::GetMinBatchToFitInBuffer(insData), 8)->getDims() :
insData->getDims();
}
@ -1634,7 +1639,7 @@ void SubstituteScaleShiftBroadCastPass::run() {
}
auto tensor = InferenceEngine::TensorDesc(insData->getTensorDesc());
tensor.reshape(SizeVector{ batchSize, nElements }, Layout::NC);
tensor.reshape(SizeVector{batchSize, nElements}, InferenceEngine::Layout::NC);
auto reshapeName = scaleShift->name + "_input_" + std::to_string(0) + "_reshape";
auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized);
auto layer_before_scale_shift = getCreatorLayer(insData);
@ -1949,7 +1954,7 @@ void FuseFQIntoWeightsPass::run() {
<< LAYER_NAME(weightableLayer) << "\n";
auto biases = weightableLayer->insData.size() == 3 ?
LayerUtils::getParamFromInputAsBlob(weightableLayer, biasesIdx) : nullptr;
layer_utils::getParamFromInputAsBlob(weightableLayer, biasesIdx) : nullptr;
auto quantizedWeights = gnaFakeQuantizeLayer.getConstInputData();
// 1. broke existing connections - by detaching fq subgraph from rest of graph
@ -2032,7 +2037,8 @@ void FuseFQIntoWeightsPass::run() {
transform->func_id = gnaFakeQuantizeLayer.parseAsActivation();
auto quantizedWeightsData = quantizedWeights->buffer();
auto dequantizedWeights = make_shared_blob<float>(TensorDesc(Precision::FP32, { outputSize }, Layout::C));
auto dequantizedWeights =
make_shared_blob<float>(TensorDesc(Precision::FP32, {outputSize}, InferenceEngine::Layout::C));
dequantizedWeights->allocate();
auto resultBuffer = dequantizedWeights->buffer();
@ -2460,3 +2466,6 @@ int PassManager::run(int index) {
}
return index;
}
} // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,9 @@
#include <map>
#include <ie_common.h>
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
/**
* @brief interface for gna-pass, special transformer that will be run on input network in order to generate GNABlob
*/
@ -244,4 +246,5 @@ public:
int run(int index = 0);
};
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -9,12 +9,12 @@
#include <gna_graph_tools.hpp>
namespace ov {
namespace intela_gna {
namespace intel_gna {
namespace helpers {
void updateModelInputOrientationWithoutConvolution(const InferenceEngine::CNNLayer& inputLayer,
const GNAPluginNS::backend::DnnComponents& components,
GNAPluginNS::GnaInputs& inputs) {
const backend::DnnComponents& components,
GnaInputs& inputs) {
// does not make sense to go further is there is no input to set
auto input = inputs.find(inputLayer.name);
@ -84,8 +84,8 @@ void updateModelInputOrientationWithoutConvolution(const InferenceEngine::CNNLay
void updateModelOutputOrientation(const std::string& outputName,
const std::string& cnnlayerName,
const GNAPluginNS::backend::DnnComponents& components,
GNAPluginNS::GnaOutputs& outputs) {
const backend::DnnComponents& components,
GnaOutputs& outputs) {
// if there is no output to set does not make sense to go further
auto output = outputs.find(outputName);
if (output == outputs.end()) {
@ -99,5 +99,5 @@ void updateModelOutputOrientation(const std::string& outputName,
}
}
} // namespace helpers
} // namespace intela_gna
} // namespace intel_gna
} // namespace ov

View File

@ -13,7 +13,8 @@
#include "descriptions/gna_desc.hpp"
namespace ov {
namespace intela_gna {
namespace intel_gna {
/**
* @namespace helpers contains helpers tools for gna plugin.
*/
@ -38,8 +39,8 @@ namespace helpers {
* @throws if orientations of input for multiple layers are different
*/
void updateModelInputOrientationWithoutConvolution(const InferenceEngine::CNNLayer& inputLayer,
const GNAPluginNS::backend::DnnComponents& components,
GNAPluginNS::GnaInputs& inputs);
const backend::DnnComponents& components,
GnaInputs& inputs);
/**
* @brief Update expected orientation for model output of given \p outputName. It is needed to recognize if extra
@ -60,9 +61,9 @@ void updateModelInputOrientationWithoutConvolution(const InferenceEngine::CNNLay
*/
void updateModelOutputOrientation(const std::string& outputName,
const std::string& cnnlayerName,
const GNAPluginNS::backend::DnnComponents& components,
GNAPluginNS::GnaOutputs& outputs);
const backend::DnnComponents& components,
GnaOutputs& outputs);
} // namespace helpers
} // namespace intela_gna
} // namespace intel_gna
} // namespace ov

View File

@ -4,7 +4,10 @@
#include "preprocessing.hpp"
int16_t GNAPluginNS::ConvertFloatToInt16(float src) {
namespace ov {
namespace intel_gna {
int16_t ConvertFloatToInt16(float src) {
float rounding_value = (src > 0) ? 0.5f : -0.5f;
float value = src + rounding_value;
if (value > 32767.0) {
@ -15,7 +18,7 @@ int16_t GNAPluginNS::ConvertFloatToInt16(float src) {
return (int16_t)value;
}
int8_t GNAPluginNS::ConvertFloatToInt8(float src) {
int8_t ConvertFloatToInt8(float src) {
float rounding_value = (src > 0) ? 0.5f : -0.5f;
float value = src + rounding_value;
if (value > 127.0) {
@ -26,15 +29,18 @@ int8_t GNAPluginNS::ConvertFloatToInt8(float src) {
return (int8_t)value;
}
void GNAPluginNS::ConvertToInt16(int16_t *ptr_dst,
const float *ptr_src,
const uint32_t num_rows,
const uint32_t num_columns,
const float scale_factor) {
void ConvertToInt16(int16_t* ptr_dst,
const float* ptr_src,
const uint32_t num_rows,
const uint32_t num_columns,
const float scale_factor) {
if (!ptr_dst || !ptr_src) {
return;
}
for (uint32_t i = 0; i < num_rows*num_columns; i++) {
ptr_dst[i] = ConvertFloatToInt16(ptr_src[i]*scale_factor);
for (uint32_t i = 0; i < num_rows * num_columns; i++) {
ptr_dst[i] = ConvertFloatToInt16(ptr_src[i] * scale_factor);
}
}
} // namespace intel_gna
} // namespace ov

View File

@ -6,7 +6,8 @@
#include <cstdint>
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
void ConvertToInt16(int16_t *ptr_dst,
const float *ptr_src,
@ -32,4 +33,5 @@ inline void UnscaleAndCast(T2 *ptr_dst, T1 *ptr_src, const uint32_t num_rows, co
}
}
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -6,7 +6,8 @@
#include "gna2_model_helper.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace request {
ModelWrapper::ModelWrapper(ConstructionPassKey) {
@ -33,4 +34,5 @@ const Gna2Model& ModelWrapper::object() const {
}
} // namespace request
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -6,8 +6,10 @@
#include <gna2-model-api.h>
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace request {
class ModelWrapperFactory;
/**
@ -58,4 +60,5 @@ private:
};
} // namespace request
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,8 @@
#include "backend/am_intel_dnn.hpp"
#include "gna2_model_helper.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace request {
std::shared_ptr<ModelWrapper> ModelWrapperFactory::createTrivial() {
@ -49,4 +50,5 @@ std::shared_ptr<ModelWrapper> ModelWrapperFactory::createInitialized(ModelInitia
}
} // namespace request
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,8 @@
#include "model_wrapper.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace request {
class ModelWrapperFactory {
@ -22,4 +23,5 @@ public:
};
} // namespace request
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,8 @@
#include "request_status.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace request {
/**
@ -27,7 +28,7 @@ public:
* @brief Callback invoked by wait operation.
* @param requestID id of request to be used for wait
* @param timeoutMilliseconds timeout of wait in milliseconds
* @return Status of subrequest @see GNAPluginNS::RequestStatus
* @return Status of subrequest @see RequestStatus
*
*/
using WaitHandler = std::function<RequestStatus(uint32_t requestID, int64_t timeoutMilliseconds)>;
@ -37,7 +38,7 @@ public:
/**
* @brief Wait until subrequest will be finished for given timeout.
* @param timeoutMilliseconds timeout in milliseconds
* @return status of execution of subrequest @see GNAPluginNS::RequestStatus
* @return status of execution of subrequest @see RequestStatus
*/
virtual RequestStatus wait(int64_t timeoutMilliseconds) = 0;
@ -69,4 +70,5 @@ public:
};
} // namespace request
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,8 @@
#include "log/debug.hpp"
#include "log/log.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace request {
SubrequestImpl::SubrequestImpl(EnqueueHandler enqueueHandler, WaitHandler waitHandler)
@ -64,4 +65,5 @@ bool SubrequestImpl::isCompleted() const {
}
} // namespace request
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -6,7 +6,8 @@
#include "subrequest.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace request {
/**
@ -34,7 +35,7 @@ public:
/**
* @brief Wait until subrequest will be finished for given timeout.
* @param timeoutMilliseconds timeout in milliseconds
* @return status of execution of subrequest @see GNAPluginNS::RequestStatus
* @return status of execution of subrequest @see RequestStatus
*/
RequestStatus wait(int64_t timeoutMilliseconds) override;
@ -72,4 +73,5 @@ private:
};
} // namespace request
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -12,7 +12,8 @@
#include "request_status.hpp"
namespace GNAPluginNS {
namespace ov {
namespace intel_gna {
namespace request {
class ModelWrapper;
@ -46,7 +47,7 @@ public:
/**
* @brief Wait untril request will be not finished for give timeout.
* @param timeoutMilliseconds timeout in milliseconds
* @return status of execution of ongoing request. @see GNAPluginNS::RequestStatus
* @return status of execution of ongoing request. @see RequestStatus
*/
virtual RequestStatus wait(int64_t timeoutMilliseconds) = 0;
@ -85,4 +86,5 @@ public:
};
} // namespace request
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

Some files were not shown because too many files have changed in this diff Show More