Gna namespace (#14877)

* [GNA] Cleanup intel_dnn

* [GNA] Replace GNAPluginNS

* [GNA] Rename headers
This commit is contained in:
Szymon Irzabek 2023-01-03 16:06:23 +01:00 committed by GitHub
parent f2d93f4a79
commit c683a72400
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
178 changed files with 1928 additions and 1702 deletions

View File

@ -164,7 +164,7 @@ inline std::istream& operator>>(std::istream& is, HWGeneration& hw_generation) {
static constexpr Property<ExecutionMode> execution_mode{"GNA_DEVICE_MODE"}; static constexpr Property<ExecutionMode> execution_mode{"GNA_DEVICE_MODE"};
/** /**
* @brief The option to override the GNA HW execution target. May be one of GNA_2_0, GNA_3_0. * @brief The option to override the GNA HW execution target. May be one of GNA_2_0, GNA_3_0, GNA_3_5.
* By default (in case of no value set) the behavior depends on GNA HW availability: * By default (in case of no value set) the behavior depends on GNA HW availability:
* If GNA HW is present, use the option corresponding to this HW. * If GNA HW is present, use the option corresponding to this HW.
* If HW is not present, use the option corresponding to the latest fully supported GNA HW generation. * If HW is not present, use the option corresponding to the latest fully supported GNA HW generation.
@ -175,7 +175,7 @@ static constexpr Property<ExecutionMode> execution_mode{"GNA_DEVICE_MODE"};
static constexpr Property<HWGeneration> execution_target{"GNA_HW_EXECUTION_TARGET"}; static constexpr Property<HWGeneration> execution_target{"GNA_HW_EXECUTION_TARGET"};
/** /**
* @brief The option to override the GNA HW compile target. May be one of GNA_2_0, GNA_3_0. * @brief The option to override the GNA HW compile target. May be one of GNA_2_0, GNA_3_0, GNA_3_5.
* By default the same as execution_target. * By default the same as execution_target.
* @ingroup ov_runtime_gna_prop_cpp_api * @ingroup ov_runtime_gna_prop_cpp_api
*/ */

View File

@ -22,11 +22,10 @@
#include "memory/gna_memory_util.hpp" #include "memory/gna_memory_util.hpp"
#include "log/log.hpp" #include "log/log.hpp"
#include "log/dump.hpp" #include "log/dump.hpp"
#include "backend/dnn.hpp"
#include "backend/am_intel_dnn.hpp" #include "backend/am_intel_dnn.hpp"
#include "backend/dnn_types.h" #include "backend/dnn_types.hpp"
#include "gna/gna_config.hpp" #include "gna/gna_config.hpp"
#include "backend/gna_types.h" #include "backend/gna_types.hpp"
#include "backend/gna_limitations.hpp" #include "backend/gna_limitations.hpp"
#include "layers/gna_convolution_layer.hpp" #include "layers/gna_convolution_layer.hpp"
#include "memory/gna_memory.hpp" #include "memory/gna_memory.hpp"
@ -46,20 +45,18 @@
*/ */
#define LIGHT_DUMP #define LIGHT_DUMP
using namespace GNAPluginNS::backend; using gna_convolution_layer::outputFromConv;
using namespace ov::intel_gna; using gna_convolution_layer::outputFromPooling;
using GNAPluginNS::GNAConvolutionLayer::outputFromConv; namespace ov {
using GNAPluginNS::GNAConvolutionLayer::outputFromPooling; namespace intel_gna {
using GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy; namespace backend {
using GNAPluginNS::memory::GNAMemoryInterface; void backend::AMIntelDNN::BeginNewWrite(uint32_t index) {
void GNAPluginNS::backend::AMIntelDNN::BeginNewWrite(uint32_t index) {
dump_write_index = index; dump_write_index = index;
} }
void GNAPluginNS::backend::AMIntelDNN::Init(GNAMemoryInterface* memoryInterface, void backend::AMIntelDNN::Init(memory::GNAMemoryInterface* memoryInterface,
intel_dnn_number_type_t compute_precision, intel_dnn_number_type_t compute_precision,
float scale_factor) { float scale_factor) {
memory = memoryInterface; memory = memoryInterface;
@ -68,25 +65,13 @@ void GNAPluginNS::backend::AMIntelDNN::Init(GNAMemoryInterface* memoryInterface,
ptr_active_outputs_ = nullptr; ptr_active_outputs_ = nullptr;
num_active_outputs_ = 0; num_active_outputs_ = 0;
num_left_context = 0;
num_right_context = 0;
softmax_type = kSoftmaxNone;
ptr_sumgroup_sizes = nullptr;
num_sumgroup_sizes = 0;
ptr_priors = nullptr;
} }
GNAPluginNS::backend::AMIntelDNN::~AMIntelDNN() { backend::AMIntelDNN::~AMIntelDNN() {
component.clear(); component.clear();
if (ptr_sumgroup_sizes != NULL) {
_mm_free(ptr_sumgroup_sizes);
}
if (ptr_priors != NULL) {
_mm_free(ptr_priors);
}
} }
void GNAPluginNS::backend::AMIntelDNN::InitActiveList(uint32_t *ptr_active_list) { void backend::AMIntelDNN::InitActiveList(uint32_t *ptr_active_list) {
ptr_active_outputs_ = ptr_active_list; ptr_active_outputs_ = ptr_active_list;
if (ptr_active_list == nullptr) { if (ptr_active_list == nullptr) {
if (component[component.size() - 1].orientation_out == kDnnInterleavedOrientation) { if (component[component.size() - 1].orientation_out == kDnnInterleavedOrientation) {
@ -100,7 +85,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitActiveList(uint32_t *ptr_active_list)
} }
void GNAPluginNS::backend::AMIntelDNN::InitAffineComponentPrivate(intel_dnn_component_t &comp, void backend::AMIntelDNN::InitAffineComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_rows_in, uint32_t num_rows_in,
uint32_t num_columns, uint32_t num_columns,
uint32_t num_rows_out, uint32_t num_rows_out,
@ -123,7 +108,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitAffineComponentPrivate(intel_dnn_comp
comp.num_bytes_per_input = num_bytes_per_input; comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output; comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = isDiag ? kDnnDiagonalOp : kDnnAffineOp; comp.operation = isDiag ? kDnnDiagonalOp : kDnnAffineOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = kDnnInterleavedOrientation; comp.orientation_in = kDnnInterleavedOrientation;
comp.orientation_out = kDnnInterleavedOrientation; comp.orientation_out = kDnnInterleavedOrientation;
comp.op.affine.num_bytes_per_weight = num_bytes_per_weight; comp.op.affine.num_bytes_per_weight = num_bytes_per_weight;
@ -145,7 +129,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitAffineComponentPrivate(intel_dnn_comp
} }
void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel_dnn_component_t &comp, void backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_columns_in, uint32_t num_columns_in,
uint32_t num_columns_out, uint32_t num_columns_out,
uint32_t num_bytes_per_input, uint32_t num_bytes_per_input,
@ -169,7 +153,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
comp.num_bytes_per_input = num_bytes_per_input; comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output; comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = kDnnConvolutional1dOp; comp.operation = kDnnConvolutional1dOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = kDnnNonInterleavedOrientation; comp.orientation_in = kDnnNonInterleavedOrientation;
comp.orientation_out = kDnnNonInterleavedOrientation; comp.orientation_out = kDnnNonInterleavedOrientation;
comp.ptr_inputs = ptr_inputs; comp.ptr_inputs = ptr_inputs;
@ -199,9 +182,9 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << num_columns_in << THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << num_columns_in <<
") is not a multiply by 8"; ") is not a multiply by 8";
} }
if (num_filters < GNALimitations::convMinFiltersNum || if (num_filters < limitations::convMinFiltersNum ||
num_filters > GNALimitations::convMaxFiltersNum || num_filters > limitations::convMaxFiltersNum ||
num_filters % GNALimitations::convFiltersNumDivider != 0) { num_filters % limitations::convFiltersNumDivider != 0) {
THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << num_filters; THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << num_filters;
} }
auto max_number_of_out_elements = outputFromConv(num_columns_in, num_filter_coefficients, convStride); auto max_number_of_out_elements = outputFromConv(num_columns_in, num_filter_coefficients, convStride);
@ -210,7 +193,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
} }
} }
void GNAPluginNS::backend::AMIntelDNN::InitConvolutional2DComponentPrivate(intel_dnn_component_t& comp, void backend::AMIntelDNN::InitConvolutional2DComponentPrivate(intel_dnn_component_t& comp,
OvGnaTensor inputTensor, OvGnaTensor inputTensor,
OvGnaTensor outputTensor, OvGnaTensor outputTensor,
OvGnaTensor filterTensor, OvGnaTensor filterTensor,
@ -229,7 +212,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional2DComponentPrivate(intel
comp.tensors.push_back(filterTensor); comp.tensors.push_back(filterTensor);
comp.tensors.push_back(biasTensor); comp.tensors.push_back(biasTensor);
comp.operation = kDnnConvolutional2dOp; comp.operation = kDnnConvolutional2dOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = kDnnNonInterleavedOrientation; comp.orientation_in = kDnnNonInterleavedOrientation;
comp.orientation_out = kDnnNonInterleavedOrientation; comp.orientation_out = kDnnNonInterleavedOrientation;
comp.ptr_inputs = ptr_inputs; comp.ptr_inputs = ptr_inputs;
@ -246,7 +228,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional2DComponentPrivate(intel
ptr_outputs = &comp.ptr_outputs; ptr_outputs = &comp.ptr_outputs;
} }
bool GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(const Gna2Operation& op) { bool backend::AMIntelDNN::isOperationCnnLegacySpecific(const Gna2Operation& op) {
// GNA compile target GNA_TARGET_3_0 does not support pooling window < pooling stride // GNA compile target GNA_TARGET_3_0 does not support pooling window < pooling stride
return op.Type == Gna2OperationTypeConvolution && return op.Type == Gna2OperationTypeConvolution &&
op.NumberOfParameters > std::max(PoolStrideParamIdx, PoolWinParamIdx) && op.NumberOfParameters > std::max(PoolStrideParamIdx, PoolWinParamIdx) &&
@ -256,7 +238,7 @@ bool GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(const Gna2Op
static_cast<Gna2Shape*>(op.Parameters[PoolStrideParamIdx])->Dimensions[0] > static_cast<Gna2Shape*>(op.Parameters[PoolWinParamIdx])->Dimensions[0]; static_cast<Gna2Shape*>(op.Parameters[PoolStrideParamIdx])->Dimensions[0] > static_cast<Gna2Shape*>(op.Parameters[PoolWinParamIdx])->Dimensions[0];
} }
void GNAPluginNS::backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(Gna2Model& gnaModel, bool useLegacyFormula) { void backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(Gna2Model& gnaModel, bool useLegacyFormula) {
IE_ASSERT(gnaModel.Operations != nullptr || gnaModel.NumberOfOperations == 0); IE_ASSERT(gnaModel.Operations != nullptr || gnaModel.NumberOfOperations == 0);
for (uint32_t i = 0; i < gnaModel.NumberOfOperations; i++) { for (uint32_t i = 0; i < gnaModel.NumberOfOperations; i++) {
auto& gnaOp = gnaModel.Operations[i]; auto& gnaOp = gnaModel.Operations[i];
@ -277,10 +259,10 @@ void GNAPluginNS::backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(Gna
const auto fltStride = fltStrideShape.Dimensions[0]; const auto fltStride = fltStrideShape.Dimensions[0];
const auto inVecCnt = inputShape.Dimensions[1]; const auto inVecCnt = inputShape.Dimensions[1];
const auto nFltSize = gnaOp.Operands[FilterOpIdx]->Shape.Dimensions[1]; const auto nFltSize = gnaOp.Operands[FilterOpIdx]->Shape.Dimensions[1];
const auto outFromConv = GNAPluginNS::GNAConvolutionLayer::outputFromConv(inVecCnt, nFltSize, fltStride); const auto outFromConv = gna_convolution_layer::outputFromConv(inVecCnt, nFltSize, fltStride);
const auto& poolWindow = *static_cast<Gna2Shape*>(gnaOp.Parameters[PoolWinParamIdx]); const auto& poolWindow = *static_cast<Gna2Shape*>(gnaOp.Parameters[PoolWinParamIdx]);
const auto& poolStride = *static_cast<Gna2Shape*>(gnaOp.Parameters[PoolStrideParamIdx]); const auto& poolStride = *static_cast<Gna2Shape*>(gnaOp.Parameters[PoolStrideParamIdx]);
const auto numberOfOutputs = GNAPluginNS::GNAConvolutionLayer::outputFromPooling( const auto numberOfOutputs = gna_convolution_layer::outputFromPooling(
outFromConv, poolWindow.Dimensions[0], poolStride.Dimensions[0], outFromConv, poolWindow.Dimensions[0], poolStride.Dimensions[0],
useLegacyFormula || isOperationCnnLegacySpecific(gnaOp)); useLegacyFormula || isOperationCnnLegacySpecific(gnaOp));
auto& outputTensor = *gnaOp.Operands[OutOpIdx]; auto& outputTensor = *gnaOp.Operands[OutOpIdx];
@ -289,7 +271,7 @@ void GNAPluginNS::backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(Gna
} }
} }
void GNAPluginNS::backend::AMIntelDNN::InitMaxpoolComponentPrivate(intel_dnn_component_t &comp, void backend::AMIntelDNN::InitMaxpoolComponentPrivate(intel_dnn_component_t &comp,
std::array<uint32_t, 3> inCHW, std::array<uint32_t, 3> inCHW,
std::array<uint32_t, 3> outCHW, std::array<uint32_t, 3> outCHW,
uint32_t num_bytes_per_input, uint32_t num_bytes_per_input,
@ -303,7 +285,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitMaxpoolComponentPrivate(intel_dnn_com
comp.num_bytes_per_input = num_bytes_per_input; comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output; comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = kDnnMaxPoolOp; comp.operation = kDnnMaxPoolOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = kDnnNonInterleavedOrientation; comp.orientation_in = kDnnNonInterleavedOrientation;
comp.orientation_out = kDnnNonInterleavedOrientation; comp.orientation_out = kDnnNonInterleavedOrientation;
comp.op.maxpool.inCHW = inCHW; comp.op.maxpool.inCHW = inCHW;
@ -321,7 +302,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitMaxpoolComponentPrivate(intel_dnn_com
} }
} }
void GNAPluginNS::backend::AMIntelDNN::InitCopyComponentPrivate(intel_dnn_component_t &comp, void backend::AMIntelDNN::InitCopyComponentPrivate(intel_dnn_component_t &comp,
intel_dnn_orientation_t orientation, intel_dnn_orientation_t orientation,
uint32_t num_rows_in, uint32_t num_rows_in,
uint32_t num_columns_in, uint32_t num_columns_in,
@ -342,7 +323,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitCopyComponentPrivate(intel_dnn_compon
comp.num_bytes_per_input = num_bytes_per_input; comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output; comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = kDnnCopyOp; comp.operation = kDnnCopyOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = orientation; comp.orientation_in = orientation;
comp.orientation_out = orientation; comp.orientation_out = orientation;
comp.ptr_inputs = ptr_inputs; comp.ptr_inputs = ptr_inputs;
@ -361,7 +341,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitCopyComponentPrivate(intel_dnn_compon
} }
} }
void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel_dnn_component_t &comp, void backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel_dnn_component_t &comp,
const DnnActivation& function_id, const DnnActivation& function_id,
intel_dnn_orientation_t orientation, intel_dnn_orientation_t orientation,
uint32_t num_rows, uint32_t num_rows,
@ -382,7 +362,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel
comp.num_bytes_per_input = num_bytes_per_input; comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output; comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = kDnnPiecewiselinearOp; comp.operation = kDnnPiecewiselinearOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = orientation; comp.orientation_in = orientation;
comp.orientation_out = orientation; comp.orientation_out = orientation;
comp.op.pwl.func_id = function_id; comp.op.pwl.func_id = function_id;
@ -404,7 +383,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel
} }
} }
void GNAPluginNS::backend::AMIntelDNN::InitInterleaveComponentPrivate(intel_dnn_component_t &comp, void backend::AMIntelDNN::InitInterleaveComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_rows_in, uint32_t num_rows_in,
uint32_t num_columns_in, uint32_t num_columns_in,
uint32_t num_bytes_per_input, uint32_t num_bytes_per_input,
@ -420,7 +399,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitInterleaveComponentPrivate(intel_dnn_
comp.num_bytes_per_input = num_bytes_per_input; comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output; comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = kDnnInterleaveOp; comp.operation = kDnnInterleaveOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = kDnnNonInterleavedOrientation; comp.orientation_in = kDnnNonInterleavedOrientation;
comp.orientation_out = kDnnInterleavedOrientation; comp.orientation_out = kDnnInterleavedOrientation;
comp.output_scale_factor = output_scale_factor; comp.output_scale_factor = output_scale_factor;
@ -434,7 +412,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitInterleaveComponentPrivate(intel_dnn_
} }
} }
void GNAPluginNS::backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dnn_component_t &comp, void backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_rows_in, uint32_t num_rows_in,
uint32_t num_columns_in, uint32_t num_columns_in,
uint32_t num_bytes_per_input, uint32_t num_bytes_per_input,
@ -450,7 +428,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dn
comp.num_bytes_per_input = num_bytes_per_input; comp.num_bytes_per_input = num_bytes_per_input;
comp.num_bytes_per_output = num_bytes_per_output; comp.num_bytes_per_output = num_bytes_per_output;
comp.operation = kDnnDeinterleaveOp; comp.operation = kDnnDeinterleaveOp;
comp.macro_operation = kDnnMacroOpNone;
comp.orientation_in = kDnnInterleavedOrientation; comp.orientation_in = kDnnInterleavedOrientation;
comp.orientation_out = kDnnInterleavedOrientation; comp.orientation_out = kDnnInterleavedOrientation;
comp.output_scale_factor = output_scale_factor; comp.output_scale_factor = output_scale_factor;
@ -464,7 +441,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dn
} }
} }
float GNAPluginNS::backend::AMIntelDNN::OutputScaleFactor(intel_dnn_component_t &comp) { float backend::AMIntelDNN::OutputScaleFactor(intel_dnn_component_t &comp) {
return comp.output_scale_factor; return comp.output_scale_factor;
} }
@ -476,7 +453,7 @@ struct InputEndPoint {
InputEndPoint(int nidx, size_t sz, size_t esize) : idx(nidx), size(sz), num_bytes_per_output(esize) {} InputEndPoint(int nidx, size_t sz, size_t esize) : idx(nidx), size(sz), num_bytes_per_output(esize) {}
}; };
void GNAPluginNS::backend::AMIntelDNN::WriteGraphWizModel(const char *filename) { void backend::AMIntelDNN::WriteGraphWizModel(const char *filename) {
auto & components = component; auto & components = component;
#define IS_AFFINE(k)\ #define IS_AFFINE(k)\
@ -743,12 +720,12 @@ void PrintTensors(std::ofstream& out, T tensors) {
} }
} }
void GNAPluginNS::backend::AMIntelDNN::PrintOffset(std::ofstream& out, const std::string& type, void* ptr) { void backend::AMIntelDNN::PrintOffset(std::ofstream& out, const std::string& type, void* ptr) {
const auto queue = memory->getQueue(ptr); const auto queue = memory->getQueue(ptr);
std::string typeOfRegion = "UNKNOWN_QUEUE"; std::string typeOfRegion = "UNKNOWN_QUEUE";
auto offset = std::numeric_limits<uint32_t>::max(); auto offset = std::numeric_limits<uint32_t>::max();
if (queue != nullptr) { if (queue != nullptr) {
typeOfRegion = GNAPluginNS::memory::rRegionToStr(queue->regionType()); typeOfRegion = memory::rRegionToStr(queue->regionType());
offset = queue->getOffset(ptr).second; offset = queue->getOffset(ptr).second;
} }
out << "<memory_region_type> " << typeOfRegion << "\n"; out << "<memory_region_type> " << typeOfRegion << "\n";
@ -756,9 +733,9 @@ void GNAPluginNS::backend::AMIntelDNN::PrintOffset(std::ofstream& out, const std
<< "0x" << std::setfill('0') << std::setw(8) << std::hex << offset << "\n"; << "0x" << std::setfill('0') << std::setw(8) << std::hex << offset << "\n";
} }
void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision) { void backend::AMIntelDNN::WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision) {
if ((compute_precision_ == kDnnFloat) && (logging_precision == kDnnInt)) { if ((compute_precision_ == kDnnFloat) && (logging_precision == kDnnInt)) {
fprintf(stderr, "Error trying to write floating point DNN as integer in GNAPluginNS::backend::AMIntelDNN::WriteDnnText().\n"); fprintf(stderr, "Error trying to write floating point DNN as integer in backend::AMIntelDNN::WriteDnnText().\n");
fprintf(stderr, " Please convert to integer first.\n"); fprintf(stderr, " Please convert to integer first.\n");
throw -1; throw -1;
} }
@ -777,8 +754,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
out_file << "<intel_dnn_file>\n"; out_file << "<intel_dnn_file>\n";
out_file << "<number_type> " << intel_dnn_number_type_name[logging_precision] << "\n"; out_file << "<number_type> " << intel_dnn_number_type_name[logging_precision] << "\n";
out_file << "<softmax_type> " << intel_dnn_softmax_name[softmax_type] << "\n"; const auto& regionsMap = memory::GetAllRegionsToStrMap();
const auto& regionsMap = GNAPluginNS::memory::GetAllRegionsToStrMap();
for (const auto& regionPair : regionsMap) { for (const auto& regionPair : regionsMap) {
out_file << "<memory_region_type> " << std::dec << regionPair.second << "\n"; out_file << "<memory_region_type> " << std::dec << regionPair.second << "\n";
out_file << "<num_memory_region_bytes> " << std::dec << memory->getRegionBytes(regionPair.first) << "\n"; out_file << "<num_memory_region_bytes> " << std::dec << memory->getRegionBytes(regionPair.first) << "\n";
@ -818,7 +794,6 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
layer++; layer++;
} }
out_file << "<component_operation> " << intel_dnn_operation_name[component[i].operation] << "\n"; out_file << "<component_operation> " << intel_dnn_operation_name[component[i].operation] << "\n";
out_file << "<macro_operation> " << intel_dnn_macro_operation_name[component[i].macro_operation] << "\n";
out_file << "<num_rows_in> " << std::dec << num_rows_in << "\n"; out_file << "<num_rows_in> " << std::dec << num_rows_in << "\n";
out_file << "<num_columns_in> " << std::dec << num_columns_in << "\n"; out_file << "<num_columns_in> " << std::dec << num_columns_in << "\n";
out_file << "<num_rows_out> " << std::dec << num_rows_out << "\n"; out_file << "<num_rows_out> " << std::dec << num_rows_out << "\n";
@ -1383,7 +1358,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
} }
} }
uint32_t GNAPluginNS::backend::AMIntelDNN::CountLayers() { uint32_t backend::AMIntelDNN::CountLayers() {
uint32_t n = 0; uint32_t n = 0;
for (auto && c : component) { for (auto && c : component) {
if (c.operation == kDnnAffineOp if (c.operation == kDnnAffineOp
@ -1401,7 +1376,7 @@ uint32_t GNAPluginNS::backend::AMIntelDNN::CountLayers() {
return n; return n;
} }
void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const std::string& gnaCompileTarget) { void backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const std::string& gnaCompileTarget) {
Gna2Operation * gnaOperation; Gna2Operation * gnaOperation;
if (gnaModel == nullptr) if (gnaModel == nullptr)
THROW_GNA_EXCEPTION << "Invalid input parameter"; THROW_GNA_EXCEPTION << "Invalid input parameter";
@ -1409,12 +1384,12 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const
THROW_GNA_EXCEPTION << "InitGNAStruct can't work on preallocated layers array"; THROW_GNA_EXCEPTION << "InitGNAStruct can't work on preallocated layers array";
if (component.empty()) if (component.empty())
THROW_GNA_EXCEPTION << "empty model in GNAPluginNS::backend::AMIntelDNN::InitGNAStruct()"; THROW_GNA_EXCEPTION << "empty model in backend::AMIntelDNN::InitGNAStruct()";
gnaModel->NumberOfOperations = CountLayers(); gnaModel->NumberOfOperations = CountLayers();
gnaModel->Operations = reinterpret_cast<Gna2Operation*>(gnaUserAllocator(gnaModel->NumberOfOperations * sizeof(Gna2Operation))); gnaModel->Operations = reinterpret_cast<Gna2Operation*>(gnaUserAllocator(gnaModel->NumberOfOperations * sizeof(Gna2Operation)));
if (gnaModel->Operations == nullptr) if (gnaModel->Operations == nullptr)
THROW_GNA_EXCEPTION << "out of memory in GNAPluginNS::backend::AMIntelDNN::InitGNAStruct()"; THROW_GNA_EXCEPTION << "out of memory in backend::AMIntelDNN::InitGNAStruct()";
memset(gnaModel->Operations, 0, gnaModel->NumberOfOperations * sizeof(Gna2Operation)); memset(gnaModel->Operations, 0, gnaModel->NumberOfOperations * sizeof(Gna2Operation));
gnaOperation = gnaModel->Operations; gnaOperation = gnaModel->Operations;
for (int i = 0; i < component.size(); i++) { for (int i = 0; i < component.size(); i++) {
@ -1666,7 +1641,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const
gnaModel->NumberOfOperations = static_cast<uint32_t>(std::distance(gnaModel->Operations, gnaOperation)); gnaModel->NumberOfOperations = static_cast<uint32_t>(std::distance(gnaModel->Operations, gnaOperation));
} }
void GNAPluginNS::backend::AMIntelDNN::DestroyGNAStruct(Gna2Model *gnaModel) { void backend::AMIntelDNN::DestroyGNAStruct(Gna2Model *gnaModel) {
if (gnaModel->Operations != nullptr) { if (gnaModel->Operations != nullptr) {
for (uint32_t i = 0; i < gnaModel->NumberOfOperations; i++) { for (uint32_t i = 0; i < gnaModel->NumberOfOperations; i++) {
switch (gnaModel->Operations[i].Type) { switch (gnaModel->Operations[i].Type) {
@ -1686,7 +1661,7 @@ void GNAPluginNS::backend::AMIntelDNN::DestroyGNAStruct(Gna2Model *gnaModel) {
gnaModel->NumberOfOperations = 0; gnaModel->NumberOfOperations = 0;
} }
void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputTextGNA(const Gna2Model & model) { void backend::AMIntelDNN::WriteInputAndOutputTextGNA(const Gna2Model & model) {
#ifdef LIGHT_DUMP #ifdef LIGHT_DUMP
dump::WriteInputAndOutputTextGNAImpl( dump::WriteInputAndOutputTextGNAImpl(
model, model,
@ -1695,7 +1670,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputTextGNA(const Gna2Mode
#endif #endif
} }
void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputText() { void backend::AMIntelDNN::WriteInputAndOutputText() {
#ifdef LIGHT_DUMP #ifdef LIGHT_DUMP
for (uint32_t i = 0; i < num_components(); i++) { for (uint32_t i = 0; i < num_components(); i++) {
std::stringstream out_file_name; std::stringstream out_file_name;
@ -1791,11 +1766,11 @@ void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputText() {
#endif #endif
} }
uint32_t GNAPluginNS::backend::AMIntelDNN::num_components() { uint32_t backend::AMIntelDNN::num_components() {
return static_cast<uint32_t>(component.size()); return static_cast<uint32_t>(component.size());
} }
uint32_t GNAPluginNS::backend::AMIntelDNN::num_gna_layers() { uint32_t backend::AMIntelDNN::num_gna_layers() {
uint32_t num_layers = 0; uint32_t num_layers = 0;
std::set<intel_dnn_operation_t> gna_layers({ kDnnAffineOp, std::set<intel_dnn_operation_t> gna_layers({ kDnnAffineOp,
kDnnDiagonalOp, kDnnDiagonalOp,
@ -1812,27 +1787,27 @@ uint32_t GNAPluginNS::backend::AMIntelDNN::num_gna_layers() {
return num_layers; return num_layers;
} }
uint32_t GNAPluginNS::backend::AMIntelDNN::num_group_in() { uint32_t backend::AMIntelDNN::num_group_in() {
return ((!component.empty()) ? ((component[0].orientation_in == kDnnInterleavedOrientation) return ((!component.empty()) ? ((component[0].orientation_in == kDnnInterleavedOrientation)
? component[0].num_columns_in : component[0].num_rows_in) : 0); ? component[0].num_columns_in : component[0].num_rows_in) : 0);
} }
uint32_t GNAPluginNS::backend::AMIntelDNN::num_group_out() { uint32_t backend::AMIntelDNN::num_group_out() {
return ((!component.empty()) ? ((component[component.size() - 1].orientation_out == kDnnInterleavedOrientation) return ((!component.empty()) ? ((component[component.size() - 1].orientation_out == kDnnInterleavedOrientation)
? component[component.size() - 1].num_columns_out : component[component.size() - ? component[component.size() - 1].num_columns_out : component[component.size() -
1].num_rows_out) : 0); 1].num_rows_out) : 0);
} }
uint32_t GNAPluginNS::backend::AMIntelDNN::num_inputs() { uint32_t backend::AMIntelDNN::num_inputs() {
return component.empty() ? 0 : component[0].num_rows_in; return component.empty() ? 0 : component[0].num_rows_in;
} }
uint32_t GNAPluginNS::backend::AMIntelDNN::num_outputs() { uint32_t backend::AMIntelDNN::num_outputs() {
return (component[component.size() - 1].orientation_out == kDnnInterleavedOrientation) ? component[ return (component[component.size() - 1].orientation_out == kDnnInterleavedOrientation) ? component[
component.size() - 1].num_rows_out : component[component.size() - 1].num_columns_out; component.size() - 1].num_rows_out : component[component.size() - 1].num_columns_out;
} }
std::string GNAPluginNS::backend::AMIntelDNN::getDumpFilePrefix(const std::string& folder) { std::string backend::AMIntelDNN::getDumpFilePrefix(const std::string& folder) {
const char pathSeparator = const char pathSeparator =
#ifdef _WIN32 #ifdef _WIN32
'\\'; '\\';
@ -1842,14 +1817,18 @@ std::string GNAPluginNS::backend::AMIntelDNN::getDumpFilePrefix(const std::strin
return std::string(".") + pathSeparator + folder + pathSeparator + std::to_string(dump_write_index) + pathSeparator; return std::string(".") + pathSeparator + folder + pathSeparator + std::to_string(dump_write_index) + pathSeparator;
} }
std::string GNAPluginNS::backend::AMIntelDNN::getDumpFilePrefixGNA() { std::string backend::AMIntelDNN::getDumpFilePrefixGNA() {
return getDumpFilePrefix("gna_layers"); return getDumpFilePrefix("gna_layers");
} }
std::string GNAPluginNS::backend::AMIntelDNN::getDumpFolderName() { std::string backend::AMIntelDNN::getDumpFolderName() {
return getDumpFilePrefix("layers"); return getDumpFilePrefix("layers");
} }
std::string GNAPluginNS::backend::AMIntelDNN::getRefFolderName() { std::string backend::AMIntelDNN::getRefFolderName() {
return getDumpFilePrefix("ref_layers"); return getDumpFilePrefix("ref_layers");
} }
} // namespace backend
} // namespace intel_gna
} // namespace ov

View File

@ -8,8 +8,8 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "dnn_types.h" #include "dnn_types.hpp"
#include "gna_types.h" #include "gna_types.hpp"
#include "gna/gna_config.hpp" #include "gna/gna_config.hpp"
#include "log/debug.hpp" #include "log/debug.hpp"
@ -17,9 +17,8 @@
#include "memory/gna_memory.hpp" #include "memory/gna_memory.hpp"
#include <gna2-model-api.h> #include <gna2-model-api.h>
using GNAPluginNS::memory::GNAMemoryInterface; namespace ov {
namespace intel_gna {
namespace GNAPluginNS {
namespace backend { namespace backend {
class AMIntelDNN { class AMIntelDNN {
@ -28,24 +27,15 @@ public:
: ptr_active_outputs_(NULL), : ptr_active_outputs_(NULL),
num_active_outputs_(0), num_active_outputs_(0),
input_scale_factor_(1.0), input_scale_factor_(1.0),
num_left_context(0),
num_right_context(0),
do_rotate_input(false), do_rotate_input(false),
do_rotate_output(false),
num_rotate_rows(0), num_rotate_rows(0),
num_rotate_columns(0), num_rotate_columns(0),
num_rotate_output_rows(0),
num_rotate_output_columns(0),
softmax_type(kSoftmaxNone),
ptr_sumgroup_sizes(NULL),
num_sumgroup_sizes(0),
ptr_priors(NULL),
compute_precision_(kDnnNumNumberType) { compute_precision_(kDnnNumNumberType) {
} }
~AMIntelDNN(); ~AMIntelDNN();
void Init(GNAMemoryInterface * memoryInterface, void Init(memory::GNAMemoryInterface * memoryInterface,
intel_dnn_number_type_t compute_precision, intel_dnn_number_type_t compute_precision,
float scale_factor); float scale_factor);
@ -284,9 +274,31 @@ public:
true); true);
} }
template <class T>
void AdvanceOperationIfAllApplied(const std::vector<intel_dnn_component_t>& cmp, int i, T*& operation) {
if (i == cmp.size() - 1 || cmp[i + 1].operation != kDnnPiecewiselinearOp) {
++operation;
}
}
float OutputScaleFactor(uint32_t component_index) { template <class T>
return OutputScaleFactor(component[component_index]); void AdvanceCnnOperationIfAllApplied(const std::vector<intel_dnn_component_t>& cmp, int i, T*& operation) {
if (i == cmp.size() - 1 ||
((cmp[i + 1].operation != kDnnMaxPoolOp) && (cmp[i + 1].operation != kDnnPiecewiselinearOp))) {
operation++;
}
}
template <class T>
void AdvancePwlOperationIfAllApplied(const std::vector<intel_dnn_component_t>& cmp, int i, T*& operation) {
if (i == cmp.size() - 1 ||
((cmp[i + 1].operation != kDnnMaxPoolOp) && (cmp[i + 1].operation != kDnnPiecewiselinearOp))) {
operation++;
}
}
float OutputScaleFactor(uint32_t cmp_index) {
return OutputScaleFactor(component[cmp_index]);
} }
float OutputScaleFactor(intel_dnn_component_t &comp); float OutputScaleFactor(intel_dnn_component_t &comp);
@ -318,19 +330,10 @@ public:
uint32_t num_outputs(); uint32_t num_outputs();
std::vector<intel_dnn_component_t> component; std::vector<intel_dnn_component_t> component;
uint32_t num_left_context;
uint32_t num_right_context;
uint32_t new_num_conv_columns = 0; uint32_t new_num_conv_columns = 0;
bool do_rotate_input; bool do_rotate_input;
bool do_rotate_output;
uint32_t num_rotate_rows = 0; uint32_t num_rotate_rows = 0;
uint32_t num_rotate_columns = 0; uint32_t num_rotate_columns = 0;
uint32_t num_rotate_output_rows = 0;
uint32_t num_rotate_output_columns = 0;
DnnSoftmaxType softmax_type;
uint32_t *ptr_sumgroup_sizes;
uint32_t num_sumgroup_sizes;
float *ptr_priors;
void WriteInputAndOutputText(); void WriteInputAndOutputText();
@ -339,7 +342,7 @@ public:
void BeginNewWrite(uint32_t index); void BeginNewWrite(uint32_t index);
private: private:
GNAMemoryInterface* memory = nullptr; memory::GNAMemoryInterface* memory = nullptr;
uint32_t *ptr_active_outputs_; uint32_t *ptr_active_outputs_;
uint32_t num_active_outputs_; uint32_t num_active_outputs_;
intel_dnn_number_type_t compute_precision_; intel_dnn_number_type_t compute_precision_;
@ -442,6 +445,20 @@ private:
void*& ptr_filters, void*& ptr_filters,
void*& ptr_biases); void*& ptr_biases);
static void InitDWSCComponentPrivate(intel_dnn_component_t& comp,
OvGnaTensor inputTensor,
OvGnaTensor outputTensor,
OvGnaTensor filterTensor,
OvGnaTensor biasTensor,
std::array<uint32_t, 2> convStride,
std::array<uint32_t, 2> zeroPadding,
float weight_scale_factor,
float output_scale_factor,
void*& ptr_inputs,
void*& ptr_outputs,
void*& ptr_filters,
void*& ptr_biases);
static void InitAffineComponentPrivate(intel_dnn_component_t &comp, static void InitAffineComponentPrivate(intel_dnn_component_t &comp,
uint32_t num_rows_in, uint32_t num_rows_in,
uint32_t num_columns, uint32_t num_columns,
@ -464,5 +481,7 @@ private:
std::string getDumpFolderName(); std::string getDumpFolderName();
std::string getRefFolderName(); std::string getRefFolderName();
}; };
} // namespace backend } // namespace backend
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -1,73 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <cstdio>
#include <cmath>
#include <gna2-model-api.h>
#include "gna2_model_helper.hpp"
#include "log/dump.hpp"
#ifndef _NO_MKL_
#include <mkl_dnn.h>
#endif
#include "runtime/floatmath.h"
#include "dnn.hpp"
#include "runtime/pwl.h"
#include "runtime/cnn.h"
void GNAPluginNS::backend::ClearScoreError(intel_score_error_t *error) {
error->num_scores = 0;
error->num_errors = 0;
error->max_error = 0.0;
error->sum_error = 0.0;
error->sum_squared_error = 0.0;
error->max_rel_error = 0.0;
error->sum_rel_error = 0.0;
error->sum_squared_rel_error = 0.0;
}
void GNAPluginNS::backend::UpdateScoreError(intel_score_error_t *error, intel_score_error_t *total_error) {
total_error->num_errors += error->num_errors;
total_error->num_scores += error->num_scores;
total_error->sum_error += error->sum_error;
total_error->sum_squared_error += error->sum_squared_error;
if (error->max_error > total_error->max_error) {
total_error->max_error = error->max_error;
}
total_error->sum_rel_error += error->sum_rel_error;
total_error->sum_squared_rel_error += error->sum_squared_rel_error;
if (error->max_rel_error > total_error->max_rel_error) {
total_error->max_rel_error = error->max_rel_error;
}
}
void GNAPluginNS::backend::SoftmaxGoogle(float *ptr_output, float *ptr_input, const uint32_t num_outputs, const uint32_t num_inputs) {
// Assumes input vector contains log likelihoods
// The computes x[i] = x[i] - log(sum_j exp(x[j]))
// This normalizes the likelihoods by the sum of likelihoods but stores them as log likelihoods
float max_score = ptr_input[0];
float sum = 0.0;
float diff;
// find max score for normalization to [0,1]
for (uint32_t i = 0; i < num_inputs; i++) {
if (ptr_input[i] > max_score) {
max_score = ptr_input[i];
}
}
for (uint32_t i = 0; i < num_inputs; i++) {
sum += exp(ptr_input[i] - max_score);
}
if (sum < 1.0e-20) {
fprintf(stderr, "Warning: attempt to take log(0) in SoftmaxGoogle()!\n");
sum = 1.0e-20f;
}
diff = max_score + std::log(sum);
for (uint32_t i = 0; i < num_outputs; i++) {
ptr_output[i] = ptr_input[i] - diff;
}
}

View File

@ -1,66 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <cstdlib>
#include <cstdio>
#include <memory.h>
#include <xmmintrin.h>
#include <fstream>
#include <sstream>
#include <string>
#include <iomanip>
#include <type_traits>
#include <vector>
#include "am_intel_dnn.hpp"
#include "dnn_types.h"
#include <gna2-model-api.h>
#define DNN_MAX_BATCH_SIZE 8
#define DNN_MAX_INPUTS 3072
#define DNN_MAX_OUTPUTS 8192
#define DNN_MAX_ERROR 1.0e-4f
#define DNN_NUM_BYTES_INT_BIAS 4
#define DNN_NUM_BYTES_INT_AFFINE_OUT 4
#define DNN_RAND_INT8_AMPLITUDE 127.0f
#define DNN_RAND_INT16_AMPLITUDE 16384.0f
#define DNN_RAND_INT32_AMPLITUDE 1048576.0f
#define DNN_RAND_FLOAT32_AMPLITUDE 8.0f
namespace GNAPluginNS {
namespace backend {
void PlotFloatIntDnn(GNAPluginNS::backend::AMIntelDNN *dnn, GNAPluginNS::backend::AMIntelDNN *dnn_int);
void ClearScoreError(intel_score_error_t *error);
void UpdateScoreError(intel_score_error_t *error, intel_score_error_t *total_error);
void SoftmaxGoogle(float *ptr_output, float *ptr_input, const uint32_t num_outputs, const uint32_t num_inputs);
template <class T>
void AdvanceOperationIfAllApplied(const std::vector<intel_dnn_component_t>& component, int i, T*& operation) {
if (i == component.size() - 1 || component[i + 1].operation != kDnnPiecewiselinearOp) {
++operation;
}
}
template <class T>
void AdvanceCnnOperationIfAllApplied(const std::vector<intel_dnn_component_t>& component, int i, T*& operation) {
if (i == component.size() - 1 || ((component[i + 1].operation != kDnnMaxPoolOp)
&& (component[i + 1].operation != kDnnPiecewiselinearOp))) {
operation++;
}
}
template <class T>
void AdvancePwlOperationIfAllApplied(const std::vector<intel_dnn_component_t>& component, int i, T*& operation) {
if (i == component.size() - 1 || ((component[i + 1].operation != kDnnMaxPoolOp)
&& (component[i + 1].operation != kDnnPiecewiselinearOp))) {
operation++;
}
}
} // namespace backend
} // namespace GNAPluginNS

View File

@ -14,17 +14,18 @@
#include "dnn_components.hpp" #include "dnn_components.hpp"
#include "log/log.hpp" #include "log/log.hpp"
using namespace ov::intel_gna; namespace ov {
using namespace GNAPluginNS; namespace intel_gna {
using namespace GNAPluginNS::backend; namespace backend {
intel_dnn_component_t & DnnComponents::addComponent(const std::string layerName, const std::string layerMetaType) { intel_dnn_component_t& DnnComponents::addComponent(const std::string layerName, const std::string layerMetaType) {
auto isDelayed = InferenceEngine::details::CaselessEq<std::string>()(layerMetaType, DelayedCopyLayerName); auto isDelayed = InferenceEngine::details::CaselessEq<std::string>()(layerMetaType, DelayedCopyLayerName);
delayedOperations += isDelayed ? 1 : 0; delayedOperations += isDelayed ? 1 : 0;
components.emplace_back(DnnComponentExtra{layerName, {}, isDelayed}); components.emplace_back(DnnComponentExtra{layerName, {}, isDelayed});
auto &currentComponent = components.back().dnnComponent; auto& currentComponent = components.back().dnnComponent;
log::trace() << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << components.size() - 1 << std::endl; log::trace() << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_"
<< components.size() - 1 << std::endl;
currentComponent.original_layer_name = components.back().name.c_str(); currentComponent.original_layer_name = components.back().name.c_str();
int execOrder = 0; int execOrder = 0;
@ -32,10 +33,11 @@ intel_dnn_component_t & DnnComponents::addComponent(const std::string layerName,
execOrder = static_cast<int>(components.size() - 1 - delayedOperations); execOrder = static_cast<int>(components.size() - 1 - delayedOperations);
} else { } else {
// todo: not perfect - propose to create mapping table that will be printed out by extra request // todo: not perfect - propose to create mapping table that will be printed out by extra request
execOrder = - static_cast<int>(delayedOperations); execOrder = -static_cast<int>(delayedOperations);
} }
log::debug() << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << execOrder << std::endl; log::debug() << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << execOrder
<< std::endl;
return currentComponent; return currentComponent;
} }
@ -47,7 +49,7 @@ intel_dnn_component_t* DnnComponents::findComponent(InferenceEngine::CNNLayerPtr
return nullptr; return nullptr;
} }
intel_dnn_component_t* GNAPluginNS::backend::DnnComponents::findComponent(const std::string& layerName) { intel_dnn_component_t* DnnComponents::findComponent(const std::string& layerName) {
auto component = std::find_if(begin(components), end(components), [&](const storage_type ::value_type& comp) { auto component = std::find_if(begin(components), end(components), [&](const storage_type ::value_type& comp) {
return comp.name == layerName; return comp.name == layerName;
}); });
@ -57,8 +59,7 @@ intel_dnn_component_t* GNAPluginNS::backend::DnnComponents::findComponent(const
return nullptr; return nullptr;
} }
const intel_dnn_component_t* GNAPluginNS::backend::DnnComponents::findComponent( const intel_dnn_component_t* DnnComponents::findComponent(const InferenceEngine::CNNLayerPtr layer) const {
const InferenceEngine::CNNLayerPtr layer) const {
if (layer) { if (layer) {
return findComponent(layer->name); return findComponent(layer->name);
} }
@ -66,7 +67,7 @@ const intel_dnn_component_t* GNAPluginNS::backend::DnnComponents::findComponent(
return nullptr; return nullptr;
} }
const intel_dnn_component_t* GNAPluginNS::backend::DnnComponents::findComponent(const std::string& layerName) const { const intel_dnn_component_t* DnnComponents::findComponent(const std::string& layerName) const {
auto component = std::find_if(begin(components), end(components), [&](const storage_type ::value_type& comp) { auto component = std::find_if(begin(components), end(components), [&](const storage_type ::value_type& comp) {
return comp.name == layerName; return comp.name == layerName;
}); });
@ -82,10 +83,14 @@ std::vector<intel_dnn_component_t> DnnComponents::getExecutionOrder() {
uint32_t direct_id = 0; uint32_t direct_id = 0;
uint32_t delayed_id = static_cast<uint32_t>(components.size() - delayedOperations); uint32_t delayed_id = static_cast<uint32_t>(components.size() - delayedOperations);
for (auto &&c : components) { for (auto&& c : components) {
uint32_t &id = c.isDelayed ? delayed_id : direct_id; uint32_t& id = c.isDelayed ? delayed_id : direct_id;
result[id] = c.dnnComponent; result[id] = c.dnnComponent;
id++; id++;
} }
return result; return result;
} }
} // namespace backend
} // namespace intel_gna
} // namespace ov

View File

@ -11,11 +11,13 @@
#include <ie_common.h> #include <ie_common.h>
#include <legacy/ie_layers.h> #include <legacy/ie_layers.h>
#include "dnn_types.h" #include "dnn_types.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace backend { namespace backend {
struct DnnComponentExtra { struct DnnComponentExtra {
std::string name; std::string name;
intel_dnn_component_t dnnComponent; intel_dnn_component_t dnnComponent;
@ -71,5 +73,7 @@ struct DnnComponents {
private: private:
uint32_t delayedOperations = 0; uint32_t delayedOperations = 0;
}; };
} // namespace backend } // namespace backend
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -1,92 +0,0 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
// dnn_traits.hpp : c++ trait approach to define dnn objects
//
#pragma once
#include "dnn_types.h"
template<intel_dnn_operation_t layer>
struct DnnTrait {};
template<>
struct DnnTrait<kDnnDiagonalOp> {
using Type = intel_affine_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.affine;
}
};
template<>
struct DnnTrait<kDnnPiecewiselinearOp> {
using Type = intel_piecewiselinear_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.pwl;
}
};
template<>
struct DnnTrait<kDnnAffineOp> {
using Type = intel_affine_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.affine;
}
};
template<>
struct DnnTrait<kDnnConvolutional1dOp> {
using Type = intel_convolutionalD_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.conv1D;
}
};
template<>
struct DnnTrait<kDnnMaxPoolOp> {
using Type = intel_maxpool_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.maxpool;
}
};
template<>
struct DnnTrait<kDnnRecurrentOp> {
using Type = intel_recurrent_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.recurrent;
}
};
template<>
struct DnnTrait<kDnnInterleaveOp> {
using Type = intel_interleave_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.interleave;
}
};
template<>
struct DnnTrait<kDnnDeinterleaveOp> {
using Type = intel_deinterleave_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.deinterleave;
}
};
template<>
struct DnnTrait<kDnnCopyOp> {
using Type = intel_copy_t;
static Type *getLayer(intel_dnn_component_t &component) {
return &component.op.copy;
}
};
template<>
struct DnnTrait<kDnnNullOp> {
using Type = void;
static Type *getLayer(intel_dnn_component_t &component) {
return nullptr;
}
};

View File

@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// //
#include "dnn_types.h" #include "dnn_types.hpp"
const char *intel_dnn_activation_name[kActNumType] = { const char *intel_dnn_activation_name[kActNumType] = {
"kActNone", "kActNone",
@ -25,13 +25,6 @@ const char *intel_dnn_activation_name[kActNumType] = {
"kActPwl" "kActPwl"
}; };
const char *intel_dnn_softmax_name[kSoftmaxNumType] = {
"kSoftmaxNone",
"kSoftmaxKaldiSumGroup",
"kSoftmaxKaldiApplyLog",
"kSoftmaxGoogle"
};
const char* intel_dnn_operation_name[kDnnNumOp] = { const char* intel_dnn_operation_name[kDnnNumOp] = {
"kDnnNullOp", "kDnnNullOp",
"kDnnAffineOp", "kDnnAffineOp",
@ -46,12 +39,6 @@ const char* intel_dnn_operation_name[kDnnNumOp] = {
"kDnnCopyOp" "kDnnCopyOp"
}; };
const char *intel_dnn_macro_operation_name[kDnnNumMacroOp] = {
"kDnnMacroOpNone",
"kDnnMacroOpLstm",
"kDnnMacroOpBiLstm"
};
const char *intel_dnn_number_type_name[kDnnNumNumberType] = { const char *intel_dnn_number_type_name[kDnnNumNumberType] = {
"kDnnFloat", "kDnnFloat",
"kDnnInt" "kDnnInt"

View File

@ -10,7 +10,7 @@
#include <string> #include <string>
#include <type_traits> #include <type_traits>
#include "gna_types.h" #include "gna_types.hpp"
#include "log/debug.hpp" #include "log/debug.hpp"
enum DnnActivationType : uint8_t { enum DnnActivationType : uint8_t {
@ -83,16 +83,6 @@ static_assert(std::is_trivial<DnnActivation>::value, "DnnActivation is not triva
extern const char *intel_dnn_activation_name[kActNumType]; extern const char *intel_dnn_activation_name[kActNumType];
typedef enum DnnSoftmaxType {
kSoftmaxNone,
kSoftmaxKaldiSumgroup,
kSoftmaxEesen,
kSoftmaxGoogle,
kSoftmaxNumType
} intel_dnn_softmax_type_t;
extern const char *intel_dnn_softmax_name[kSoftmaxNumType];
typedef enum { typedef enum {
kDnnUnknownOrientation = 100, kDnnUnknownOrientation = 100,
kDnnInterleavedOrientation, kDnnInterleavedOrientation,
@ -117,15 +107,6 @@ typedef enum {
extern const char* intel_dnn_operation_name[kDnnNumOp]; extern const char* intel_dnn_operation_name[kDnnNumOp];
typedef enum {
kDnnMacroOpNone,
kDnnMacroOpLstm,
kDnnMacroOpBiLstm,
kDnnNumMacroOp
} intel_dnn_macro_operation_t;
extern const char *intel_dnn_macro_operation_name[kDnnNumMacroOp];
typedef enum { typedef enum {
kDnnFloat, kDnnFloat,
kDnnInt, kDnnInt,
@ -262,7 +243,6 @@ struct intel_dnn_component_t {
uint32_t num_bytes_per_input; uint32_t num_bytes_per_input;
uint32_t num_bytes_per_output; uint32_t num_bytes_per_output;
intel_dnn_operation_t operation; intel_dnn_operation_t operation;
intel_dnn_macro_operation_t macro_operation;
intel_dnn_orientation_t orientation_in; intel_dnn_orientation_t orientation_in;
intel_dnn_orientation_t orientation_out; intel_dnn_orientation_t orientation_out;
union operation_struct_t { union operation_struct_t {

View File

@ -15,11 +15,10 @@
#include "gna_limitations.hpp" #include "gna_limitations.hpp"
#include "gna/gna_config.hpp" #include "gna/gna_config.hpp"
using namespace ov::intel_gna; namespace ov {
namespace intel_gna {
namespace GNAPluginNS { namespace limitations {
namespace GNALimitations { namespace cnn2d {
namespace Cnn2D {
bool IsEqualToLimit::isValid(const uint32_t val) const { bool IsEqualToLimit::isValid(const uint32_t val) const {
return val == compared_value; return val == compared_value;
@ -353,7 +352,7 @@ bool AbstractValidator::ValidationSuccesful(const bool throwOnError,
return error.empty(); return error.empty();
} }
} // namespace Cnn2D } // namespace cnn2d
IE_SUPPRESS_DEPRECATED_START IE_SUPPRESS_DEPRECATED_START
static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage) { static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage) {
@ -370,7 +369,7 @@ static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::st
auto isFusableWithConv = [](InferenceEngine::CNNLayerPtr ptr) { auto isFusableWithConv = [](InferenceEngine::CNNLayerPtr ptr) {
return (LayerInfo(ptr).isFusableWithConv() || LayerInfo(ptr).isNonFunctional() || return (LayerInfo(ptr).isFusableWithConv() || LayerInfo(ptr).isNonFunctional() ||
(LayerInfo(ptr).isPermute() && ((ptr->input()->getLayout() == InferenceEngine::Layout::NCHW && (LayerInfo(ptr).isPermute() && ((ptr->input()->getLayout() == InferenceEngine::Layout::NCHW &&
ptr->GetParamAsInts("order") == GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC)) || ptr->GetParamAsInts("order") == permute::GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC)) ||
(ptr->input()->getLayout() == InferenceEngine::Layout::CHW && (ptr->input()->getLayout() == InferenceEngine::Layout::CHW &&
ptr->GetParamAsInts("order") == std::vector<int32_t>{0, 2, 1} /* NCW to NWC */)))); ptr->GetParamAsInts("order") == std::vector<int32_t>{0, 2, 1} /* NCW to NWC */))));
}; };
@ -505,12 +504,12 @@ bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concat_layer) {
break; break;
// Convert dims to NHWC layout to allow later verification // Convert dims to NHWC layout to allow later verification
auto new_order = GetPermuteOrder(concat_layout, InferenceEngine::Layout::NHWC); auto new_order = permute::GetPermuteOrder(concat_layout, InferenceEngine::Layout::NHWC);
InferenceEngine::SizeVector new_dims; InferenceEngine::SizeVector new_dims;
for (size_t i = 0; i < dims_size; ++i) { for (size_t i = 0; i < dims_size; ++i) {
new_dims.push_back(in_dims[new_order[i]]); new_dims.push_back(in_dims[new_order[i]]);
} }
concat_axis = GetPermuteOrder(InferenceEngine::Layout::NHWC, concat_layout)[concat_axis]; concat_axis = permute::GetPermuteOrder(InferenceEngine::Layout::NHWC, concat_layout)[concat_axis];
// Looking for any axis with dimension > 1 before concatentaion axis; // Looking for any axis with dimension > 1 before concatentaion axis;
// in general such concatenation is unsupported // in general such concatenation is unsupported
@ -565,7 +564,7 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
startLayer, startLayer,
[&](const InferenceEngine::CNNLayerPtr layer) { [&](const InferenceEngine::CNNLayerPtr layer) {
LayerInfo info(layer); LayerInfo info(layer);
if (GNAPluginNS::LayerTypeFromStr(layer->type) == GNAPluginNS::LayerType::NO_TYPE) { if (LayerTypeFromStr(layer->type) == LayerType::NO_TYPE) {
errMessage = "The plugin does not support layer: " + layer->name + ":" + layer->type + "\n"; errMessage = "The plugin does not support layer: " + layer->name + ":" + layer->type + "\n";
check_result = false; check_result = false;
} }
@ -591,5 +590,6 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
} }
IE_SUPPRESS_DEPRECATED_END IE_SUPPRESS_DEPRECATED_END
} // namespace GNALimitations } // namespace limitations
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -4,15 +4,16 @@
#pragma once #pragma once
#include "dnn_types.h" #include "dnn_types.hpp"
#include <cstdint> #include <cstdint>
#include <cpp/ie_cnn_network.h> #include <cpp/ie_cnn_network.h>
#include <ie_algorithm.hpp> #include <ie_algorithm.hpp>
#include <legacy/ie_layers.h> #include <legacy/ie_layers.h>
#include "gna_lib_ver_selector.hpp" #include "gna_lib_ver_selector.hpp"
namespace GNAPluginNS { namespace ov {
namespace GNALimitations { namespace intel_gna {
namespace limitations {
constexpr uint32_t bufferMaxSize = 65528; constexpr uint32_t bufferMaxSize = 65528;
@ -65,7 +66,8 @@ inline bool IsTransposeSupported(const std::vector<size_t>& shape) {
return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize; return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize;
} }
namespace Cnn2D { namespace cnn2d {
struct IsEqualToLimit { struct IsEqualToLimit {
uint32_t compared_value; uint32_t compared_value;
std::string what; std::string what;
@ -118,11 +120,10 @@ struct VectorOrSquareLimit {
}; };
struct RectLimitByChannels { struct RectLimitByChannels {
std::vector<std::pair<uint32_t, RectLimit> > limitPerChannel; std::vector<std::pair<uint32_t, RectLimit>> limitPerChannel;
RectLimit GetByChannels(const uint32_t channels) const; RectLimit GetByChannels(const uint32_t channels) const;
bool isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const; bool isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, const uint32_t channels, std::string what) const;
const uint32_t channels, std::string what) const;
}; };
struct RectLimitByChannelsAndPrecision { struct RectLimitByChannelsAndPrecision {
@ -130,8 +131,11 @@ struct RectLimitByChannelsAndPrecision {
RectLimitByChannels limit_for_int16; RectLimitByChannels limit_for_int16;
RectLimitByChannels GetByPrecision(const OvGnaType precision) const; RectLimitByChannels GetByPrecision(const OvGnaType precision) const;
bool isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const; bool isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string GetErrorOrEmpty(const uint32_t h,
const OvGnaType precision, const uint32_t channels, std::string what) const; const uint32_t w,
const OvGnaType precision,
const uint32_t channels,
std::string what) const;
}; };
class AbstractValidator { class AbstractValidator {
@ -144,29 +148,51 @@ protected:
public: public:
virtual ~AbstractValidator() = default; virtual ~AbstractValidator() = default;
virtual bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth, virtual bool ValidateCnn2D(const std::string& name,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN, const uint32_t inHeight,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW, const uint32_t inWidth,
OvGnaType inPrecision, bool exception = true) const = 0; const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const = 0;
virtual bool ValidatePooling2D(const std::string& name, virtual bool ValidatePooling2D(const std::string& name,
const uint32_t windowH, const uint32_t windowW, const uint32_t windowH,
const uint32_t strideH, const uint32_t strideW, const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW,
bool exception = true) const = 0; bool exception = true) const = 0;
virtual bool ValidateInputPadding(const std::string& name, virtual bool ValidateInputPadding(const std::string& name,
const uint32_t pad_h_begin, const uint32_t pad_h_end, const uint32_t pad_h_begin,
const uint32_t pad_w_begin, const uint32_t pad_w_end, const uint32_t pad_h_end,
const uint32_t pad_w_begin,
const uint32_t pad_w_end,
const uint32_t kernel_h, const uint32_t kernel_h,
const uint32_t kernel_w, const uint32_t kernel_w,
const bool throwOnError = true) const = 0; const bool throwOnError = true) const = 0;
virtual bool ShouldUseOnlyConv2DGnaIface() const = 0; virtual bool ShouldUseOnlyConv2DGnaIface() const = 0;
virtual bool ValidateCnn1D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth, virtual bool ValidateCnn1D(const std::string& name,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN, const uint32_t inHeight,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW, const uint32_t inWidth,
OvGnaType inPrecision, bool exception = true) const = 0; const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const = 0;
static std::unique_ptr<AbstractValidator> Create(const std::string&); static std::unique_ptr<AbstractValidator> Create(const std::string&);
}; };
@ -184,29 +210,51 @@ class Validator_30 : public AbstractValidator {
public: public:
Validator_30() = default; Validator_30() = default;
bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth, bool ValidateCnn2D(const std::string& name,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN, const uint32_t inHeight,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW, const uint32_t inWidth,
OvGnaType inPrecision, bool exception = true) const override; const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
bool ValidatePooling2D(const std::string& name, bool ValidatePooling2D(const std::string& name,
const uint32_t windowH, const uint32_t windowW, const uint32_t windowH,
const uint32_t strideH, const uint32_t strideW, const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW,
bool exception = true) const override; bool exception = true) const override;
bool ValidateInputPadding(const std::string& name, bool ValidateInputPadding(const std::string& name,
const uint32_t pad_h_begin, const uint32_t pad_h_end, const uint32_t pad_h_begin,
const uint32_t pad_w_begin, const uint32_t pad_w_end, const uint32_t pad_h_end,
const uint32_t pad_w_begin,
const uint32_t pad_w_end,
const uint32_t kernel_h, const uint32_t kernel_h,
const uint32_t kernel_w, const uint32_t kernel_w,
const bool throwOnError = true) const override; const bool throwOnError = true) const override;
bool ShouldUseOnlyConv2DGnaIface() const override; bool ShouldUseOnlyConv2DGnaIface() const override;
bool ValidateCnn1D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth, bool ValidateCnn1D(const std::string& name,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN, const uint32_t inHeight,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW, const uint32_t inWidth,
OvGnaType inPrecision, bool exception = true) const override; const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
}; };
class Validator_35 : public AbstractValidator { class Validator_35 : public AbstractValidator {
@ -251,31 +299,54 @@ class Validator_35 : public AbstractValidator {
public: public:
Validator_35() = default; Validator_35() = default;
bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth, bool ValidateCnn2D(const std::string& name,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN, const uint32_t inHeight,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW, const uint32_t inWidth,
OvGnaType inPrecision, bool exception = true) const override; const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
bool ValidatePooling2D(const std::string& name, bool ValidatePooling2D(const std::string& name,
const uint32_t windowH, const uint32_t windowW, const uint32_t windowH,
const uint32_t strideH, const uint32_t strideW, const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW,
bool exception = true) const override; bool exception = true) const override;
bool ValidateInputPadding(const std::string& name, bool ValidateInputPadding(const std::string& name,
const uint32_t pad_h_begin, const uint32_t pad_h_end, const uint32_t pad_h_begin,
const uint32_t pad_w_begin, const uint32_t pad_w_end, const uint32_t pad_h_end,
const uint32_t pad_w_begin,
const uint32_t pad_w_end,
const uint32_t kernel_h, const uint32_t kernel_h,
const uint32_t kernel_w, const uint32_t kernel_w,
const bool throwOnError = true) const override; const bool throwOnError = true) const override;
bool ShouldUseOnlyConv2DGnaIface() const override; bool ShouldUseOnlyConv2DGnaIface() const override;
bool ValidateCnn1D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth, bool ValidateCnn1D(const std::string& name,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN, const uint32_t inHeight,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW, const uint32_t inWidth,
OvGnaType inPrecision, bool exception = true) const override; const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
}; };
} // namespace Cnn2D
} // namespace cnn2d
bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage); bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
@ -293,5 +364,6 @@ IE_SUPPRESS_DEPRECATED_START
bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concatLayer); bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concatLayer);
IE_SUPPRESS_DEPRECATED_END IE_SUPPRESS_DEPRECATED_END
} // namespace GNALimitations } // namespace limitations
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -10,9 +10,9 @@
#include "runtime/pwl.h" #include "runtime/pwl.h"
#include "make_pwl.hpp" #include "make_pwl.hpp"
#include "gna_slope_scale.h" #include "gna_slope_scale.hpp"
#include "dnn_types.h" #include "dnn_types.hpp"
#include "backend/gna_types.h" #include "backend/gna_types.hpp"
#include "common/numerical_utils.hpp" #include "common/numerical_utils.hpp"
#include "pwl_input_params.hpp" #include "pwl_input_params.hpp"
#include "pwl_segments_creator_factory.hpp" #include "pwl_segments_creator_factory.hpp"

View File

@ -4,7 +4,7 @@
#pragma once #pragma once
#include "backend/dnn_types.h" #include "backend/dnn_types.hpp"
namespace ov { namespace ov {
namespace intel_gna { namespace intel_gna {

View File

@ -7,7 +7,7 @@
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "backend/gna_types.h" #include "backend/gna_types.hpp"
#include "pwl_border_values_counter.hpp" #include "pwl_border_values_counter.hpp"
namespace ov { namespace ov {

View File

@ -7,7 +7,7 @@
#include <functional> #include <functional>
#include <unordered_map> #include <unordered_map>
#include "backend/dnn_types.h" #include "backend/dnn_types.hpp"
#include "pwl_border_values_counter_identity.hpp" #include "pwl_border_values_counter_identity.hpp"
#include "pwl_segments_creator_identity.hpp" #include "pwl_segments_creator_identity.hpp"

View File

@ -6,7 +6,7 @@
#include "log/debug.hpp" #include "log/debug.hpp"
#include "log/log.hpp" #include "log/log.hpp"
#include "gna_slope_scale.h" #include "gna_slope_scale.hpp"
#include "pwl_input_params.hpp" #include "pwl_input_params.hpp"
#include "pwl_tools.hpp" #include "pwl_tools.hpp"
#include "runtime/pwl.h" #include "runtime/pwl.h"

View File

@ -4,7 +4,7 @@
#include "pwl_tools.hpp" #include "pwl_tools.hpp"
#include "gna_slope_scale.h" #include "gna_slope_scale.hpp"
#include "common/numerical_utils.hpp" #include "common/numerical_utils.hpp"
#include "runtime/pwl.h" #include "runtime/pwl.h"

View File

@ -4,7 +4,7 @@
#pragma once #pragma once
#include "backend/gna_types.h" #include "backend/gna_types.hpp"
namespace ov { namespace ov {
namespace intel_gna { namespace intel_gna {

View File

@ -4,13 +4,17 @@
#pragma once #pragma once
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace common { namespace common {
static constexpr const char* kGnaTargetUnspecified = ""; static constexpr const char* kGnaTargetUnspecified = "";
static constexpr const char* kGnaTarget2_0 = "GNA_TARGET_2_0"; static constexpr const char* kGnaTarget2_0 = "GNA_TARGET_2_0";
static constexpr const char* kGnaTarget3_0 = "GNA_TARGET_3_0"; static constexpr const char* kGnaTarget3_0 = "GNA_TARGET_3_0";
static constexpr const char* kGnaTarget3_1 = "GNA_TARGET_3_1"; static constexpr const char* kGnaTarget3_1 = "GNA_TARGET_3_1";
static constexpr const char* kGnaTarget3_5 = "GNA_TARGET_3_5"; static constexpr const char* kGnaTarget3_5 = "GNA_TARGET_3_5";
static constexpr const char* kGnaDefaultTarget = kGnaTarget3_0; static constexpr const char* kGnaDefaultTarget = kGnaTarget3_0;
} // namespace common } // namespace common
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -6,7 +6,9 @@
#include <legacy/ie_layers.h> #include <legacy/ie_layers.h>
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
struct ConnectionDetails { struct ConnectionDetails {
InferenceEngine::CNNLayerPtr input; InferenceEngine::CNNLayerPtr input;
bool needTransposeWeights = false; bool needTransposeWeights = false;
@ -19,4 +21,6 @@ struct ConnectionDetails {
, permute(permute) { , permute(permute) {
} }
}; };
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -13,10 +13,11 @@
#include "ie_input_info.hpp" #include "ie_input_info.hpp"
#include "ie_algorithm.hpp" #include "ie_algorithm.hpp"
#include "backend/dnn_types.h" #include "backend/dnn_types.hpp"
#include "gna_plugin_config.hpp" #include "gna_plugin_config.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
/* /*
* This base structure accumulates all required information for network inputs and outputs * This base structure accumulates all required information for network inputs and outputs
@ -31,11 +32,11 @@ struct GnaDesc {
InferenceEngine::Precision tensor_precision = InferenceEngine::Precision::UNSPECIFIED; InferenceEngine::Precision tensor_precision = InferenceEngine::Precision::UNSPECIFIED;
// gna specific properties // gna specific properties
double scale_factor = GNAPluginNS::kScaleFactorDefault; double scale_factor = kScaleFactorDefault;
intel_dnn_orientation_t orientation = kDnnUnknownOrientation; intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
uint32_t num_elements = 0; uint32_t num_elements = 0;
uint32_t allocated_size = 0; uint32_t allocated_size = 0;
std::vector<void *> ptrs = {}; // ptr per each infer request std::vector<void*> ptrs = {}; // ptr per each infer request
// help methods // help methods
uint32_t get_required_size() const { uint32_t get_required_size() const {
@ -53,25 +54,27 @@ struct GnaDesc {
// helps to get the precision for gna layers, because they use num_bytes instead of precision values // helps to get the precision for gna layers, because they use num_bytes instead of precision values
void set_precision(uint32_t num_bytes) { void set_precision(uint32_t num_bytes) {
switch (num_bytes) { switch (num_bytes) {
case sizeof(int8_t) : { case sizeof(int8_t): {
set_precision(InferenceEngine::Precision::I8); set_precision(InferenceEngine::Precision::I8);
break; break;
} }
case sizeof(int16_t) : { case sizeof(int16_t): {
set_precision(InferenceEngine::Precision::I16); set_precision(InferenceEngine::Precision::I16);
break; break;
} }
case sizeof(int32_t) : { case sizeof(int32_t): {
set_precision(InferenceEngine::Precision::I32); set_precision(InferenceEngine::Precision::I32);
break; break;
} }
default : default:
set_precision(InferenceEngine::Precision::UNSPECIFIED); set_precision(InferenceEngine::Precision::UNSPECIFIED);
} }
} }
InferenceEngine::DataPtr to_ie_data() { InferenceEngine::DataPtr to_ie_data() {
return std::make_shared<InferenceEngine::Data>(name, InferenceEngine::TensorDesc(model_precision, dims, model_layout)); return std::make_shared<InferenceEngine::Data>(
name,
InferenceEngine::TensorDesc(model_precision, dims, model_layout));
} }
}; };
@ -79,7 +82,9 @@ struct GnaDesc {
* This structure accumulates all required information for one the network input * This structure accumulates all required information for one the network input
*/ */
struct InputDesc : GnaDesc { struct InputDesc : GnaDesc {
InputDesc(const std::string &name) { this->name = name; } InputDesc(const std::string& name) {
this->name = name;
}
void Update(const InferenceEngine::InputInfo::Ptr inputInfo) { void Update(const InferenceEngine::InputInfo::Ptr inputInfo) {
this->model_precision = inputInfo->getPrecision(); this->model_precision = inputInfo->getPrecision();
@ -101,7 +106,9 @@ struct InputDesc : GnaDesc {
* This structure accumulates all required information for one network output * This structure accumulates all required information for one network output
*/ */
struct OutputDesc : GnaDesc { struct OutputDesc : GnaDesc {
OutputDesc(const std::string &name) { this->name = name; } OutputDesc(const std::string& name) {
this->name = name;
}
void Update(const InferenceEngine::DataPtr outputData) { void Update(const InferenceEngine::DataPtr outputData) {
this->model_precision = outputData->getPrecision(); this->model_precision = outputData->getPrecision();
@ -123,9 +130,9 @@ private:
std::vector<T> infos_; std::vector<T> infos_;
public: public:
GnaNetworkInfo(): infos_({}) { } GnaNetworkInfo() : infos_({}) {}
const T& at(const std::string &key) const { const T& at(const std::string& key) const {
if (key.empty()) { if (key.empty()) {
throw std::invalid_argument("The key cannot be empty"); throw std::invalid_argument("The key cannot be empty");
} }
@ -136,8 +143,8 @@ public:
return *desc_it; return *desc_it;
} }
T& at(const std::string &key) { T& at(const std::string& key) {
return const_cast<T&>( static_cast<const GnaNetworkInfo&>(*this).at(key) ); return const_cast<T&>(static_cast<const GnaNetworkInfo&>(*this).at(key));
} }
typename std::vector<T>::iterator end() { typename std::vector<T>::iterator end() {
@ -156,11 +163,13 @@ public:
}); });
} }
T& operator[](const std::string &key) { T& operator[](const std::string& key) {
if (key.empty()) { if (key.empty()) {
throw std::invalid_argument("The key cannot be empty"); throw std::invalid_argument("The key cannot be empty");
} }
auto desc_it = std::find_if(infos_.begin(), infos_.end(), [&key](const T& desc){return desc.name == key;}); auto desc_it = std::find_if(infos_.begin(), infos_.end(), [&key](const T& desc) {
return desc.name == key;
});
if (desc_it == infos_.end()) { if (desc_it == infos_.end()) {
infos_.push_back(T(key)); infos_.push_back(T(key));
return infos_.back(); return infos_.back();
@ -168,16 +177,25 @@ public:
return *desc_it; return *desc_it;
} }
size_t size() const { return infos_.size(); } size_t size() const {
return infos_.size();
}
bool empty() const { return infos_.empty(); } bool empty() const {
return infos_.empty();
}
const std::vector<T>& Get() const { return infos_; } const std::vector<T>& Get() const {
return infos_;
}
std::vector<T>& Get() { return infos_; } std::vector<T>& Get() {
return infos_;
}
}; };
typedef GnaNetworkInfo<InputDesc> GnaInputs; typedef GnaNetworkInfo<InputDesc> GnaInputs;
typedef GnaNetworkInfo<OutputDesc> GnaOutputs; typedef GnaNetworkInfo<OutputDesc> GnaOutputs;
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -8,7 +8,9 @@
#include "openvino/runtime/intel_gna/properties.hpp" #include "openvino/runtime/intel_gna/properties.hpp"
#include "gna/gna_config.hpp" #include "gna/gna_config.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
struct GNAFlags { struct GNAFlags {
uint8_t num_requests = 1; uint8_t num_requests = 1;
bool compact_mode = true; bool compact_mode = true;
@ -22,4 +24,6 @@ struct GNAFlags {
bool input_low_precision = false; bool input_low_precision = false;
ov::log::Level log_level = ov::log::Level::NO; ov::log::Level log_level = ov::log::Level::NO;
}; };
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -4,12 +4,10 @@
#include "layer_quantizer.hpp" #include "layer_quantizer.hpp"
#include "weights_converter.hpp" #include "weights_converter.hpp"
#include "backend/gna_types.h" #include "backend/gna_types.hpp"
#include "common/gna_target.hpp" #include "common/gna_target.hpp"
#include "gna_graph_tools.hpp" #include "gna_graph_tools.hpp"
using namespace GNAPluginNS;
namespace ov { namespace ov {
namespace intel_gna { namespace intel_gna {
namespace frontend { namespace frontend {
@ -252,7 +250,7 @@ void LayerQuantizer::QuantizeWeightsBiases(InferenceEngine::WeightableLayer& wl)
QuantizationData common_data{ QuantizationData common_data{
num_rows, num_rows,
num_columns, num_columns,
GNAPluginNS::kScaleFactorDefault, kScaleFactorDefault,
quant_layer_params->_weights_quant quant_layer_params->_weights_quant
}; };

View File

@ -13,8 +13,6 @@ namespace ov {
namespace intel_gna { namespace intel_gna {
namespace frontend { namespace frontend {
using namespace GNAPluginNS;
/** /**
* @brief Returns layer's target input precision * @brief Returns layer's target input precision
* @return layer's target input precision * @return layer's target input precision

View File

@ -9,7 +9,7 @@
#include "log/debug.hpp" #include "log/debug.hpp"
#include "log/log.hpp" #include "log/log.hpp"
#include "layers/gna_fake_quantize_layer.hpp" #include "layers/gna_fake_quantize_layer.hpp"
#include "backend/gna_types.h" #include "backend/gna_types.hpp"
#include "quantization.hpp" #include "quantization.hpp"
namespace ov { namespace ov {

View File

@ -10,7 +10,7 @@
#include <vector> #include <vector>
#include <cstdint> #include <cstdint>
#include "quantized_layer_params.hpp" #include "quantized_layer_params.hpp"
#include "backend/gna_types.h" #include "backend/gna_types.hpp"
namespace ov { namespace ov {
namespace intel_gna { namespace intel_gna {

View File

@ -3,7 +3,7 @@
// //
#include "scale_factor_calc.hpp" #include "scale_factor_calc.hpp"
#include "gna_slope_scale.h" #include "gna_slope_scale.hpp"
#include "common/numerical_utils.hpp" #include "common/numerical_utils.hpp"
#include "layer_quantizer.hpp" #include "layer_quantizer.hpp"
#include "gna_upstream_iterator.hpp" #include "gna_upstream_iterator.hpp"
@ -323,7 +323,7 @@ bool ScaleFactorCalculator::requantizeInput(InferenceEngine::CNNLayerPtr input,
*/ */
float ScaleFactorCalculator::adjustScaleFactor(float sf, float ScaleFactorCalculator::adjustScaleFactor(float sf,
InferenceEngine::CNNLayer const* cnnLayer, InferenceEngine::CNNLayer const* cnnLayer,
GNAPluginNS::LayerInfo const& layer, LayerInfo const& layer,
QuantizedLayerParams* quantizedParams) const { QuantizedLayerParams* quantizedParams) const {
auto get_rank = [](uint32_t value) { auto get_rank = [](uint32_t value) {
uint8_t rank = 0; uint8_t rank = 0;
@ -365,7 +365,7 @@ float ScaleFactorCalculator::adjustScaleFactor(float sf,
} }
float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const* cnnLayer, float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const* cnnLayer,
GNAPluginNS::LayerInfo const& layer, LayerInfo const& layer,
int inputsSize, int inputsSize,
const bool fake_quantized) const { const bool fake_quantized) const {
auto quantizedParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer); auto quantizedParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer);
@ -420,9 +420,9 @@ float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const*
double offset = 0; double offset = 0;
auto powerLayer = dynamic_cast<InferenceEngine::PowerLayer const*>(cnnLayer); auto powerLayer = dynamic_cast<InferenceEngine::PowerLayer const*>(cnnLayer);
if (!powerLayer) { if (!powerLayer) {
std::shared_ptr<ov::intel_gna::op::Pwl> pwl_node; std::shared_ptr<op::Pwl> pwl_node;
if (!cnnLayer->getNode() || if (!cnnLayer->getNode() ||
!(pwl_node = std::dynamic_pointer_cast<ov::intel_gna::op::Pwl>(cnnLayer->getNode()))) { !(pwl_node = std::dynamic_pointer_cast<op::Pwl>(cnnLayer->getNode()))) {
IE_THROW() << "Incorrect Power Layer pointer \n"; IE_THROW() << "Incorrect Power Layer pointer \n";
} else { } else {
auto powerIE = std::dynamic_pointer_cast<ngraph::op::PowerIE>(pwl_node->get_base_node()); auto powerIE = std::dynamic_pointer_cast<ngraph::op::PowerIE>(pwl_node->get_base_node());
@ -587,7 +587,7 @@ float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const*
bool ScaleFactorCalculator::ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cnnLayer, bool ScaleFactorCalculator::ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cnnLayer,
ScaleFactorUpdateResult& result, ScaleFactorUpdateResult& result,
int infiniteLoopCount, int infiniteLoopCount,
const GNAPluginNS::Config& gna_config) const { const Config& gna_config) const {
if ( !cnnLayer ) { if ( !cnnLayer ) {
IE_THROW() << "Incorrect Layer pointer \n"; IE_THROW() << "Incorrect Layer pointer \n";
} }
@ -1234,7 +1234,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerWeightable(InferenceEngine::Weigh
auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl); auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl);
if (conv && !LayerInfo(conv).isConvolutionFilter()) { if (conv && !LayerInfo(conv).isConvolutionFilter()) {
const auto inDepth = GetDataDimByName(conv->insData.front().lock(), InferenceEngine::DataDimName::C); const auto inDepth = GetDataDimByName(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
weights_reducer = GNAConvolutionLayer::getWeightsReducer(*conv); weights_reducer = gna_convolution_layer::getWeightsReducer(*conv);
weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max(); weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();
weights_reducer = std::max(1.0, weights_reducer); weights_reducer = std::max(1.0, weights_reducer);
} }

View File

@ -49,13 +49,13 @@ struct ScaleFactorUpdateResult {
class ScaleFactorCalculator { class ScaleFactorCalculator {
using Cnt = std::vector<InferenceEngine::CNNLayerPtr>; using Cnt = std::vector<InferenceEngine::CNNLayerPtr>;
Cnt net; Cnt net;
const GNAPluginNS::Config& gna_config; const Config& gna_config;
const bool fake_quantized; const bool fake_quantized;
mutable Cnt::const_iterator idx; mutable Cnt::const_iterator idx;
mutable bool needRestart = false; mutable bool needRestart = false;
int infiniteLoopCount = 0; int infiniteLoopCount = 0;
std::vector<double> getPWLSlopes(const GNAPluginNS::LayerInfo& info) const; std::vector<double> getPWLSlopes(const LayerInfo& info) const;
static float selectBestOutputScaleFactors(float inScale, static float selectBestOutputScaleFactors(float inScale,
std::vector<float> outScales, std::vector<float> outScales,
const std::vector<double>& slopes); const std::vector<double>& slopes);
@ -71,35 +71,35 @@ class ScaleFactorCalculator {
int infiniteLoopCount); int infiniteLoopCount);
float adjustScaleFactor(float sf, float adjustScaleFactor(float sf,
InferenceEngine::CNNLayer const* cnnLayer, InferenceEngine::CNNLayer const* cnnLayer,
GNAPluginNS::LayerInfo const& layer, LayerInfo const& layer,
QuantizedLayerParams* quantizedParams) const; QuantizedLayerParams* quantizedParams) const;
float getActivationScale(InferenceEngine::CNNLayer const* cnnLayer, float getActivationScale(InferenceEngine::CNNLayer const* cnnLayer,
GNAPluginNS::LayerInfo const& layer, LayerInfo const& layer,
int inputsSize, int inputsSize,
const bool fakeQuantize) const; const bool fakeQuantize) const;
bool ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cnnLayer, bool ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cnnLayer,
ScaleFactorUpdateResult& result, ScaleFactorUpdateResult& result,
int infiniteLoopCount, int infiniteLoopCount,
const GNAPluginNS::Config& gna_config) const; const Config& gna_config) const;
bool ScaleFactorPerLayerConcat(InferenceEngine::ConcatLayer* concatLayer, bool ScaleFactorPerLayerConcat(InferenceEngine::ConcatLayer* concatLayer,
ScaleFactorUpdateResult& result, ScaleFactorUpdateResult& result,
int infiniteLoopCount, int infiniteLoopCount,
const GNAPluginNS::Config& gna_config) const; const Config& gna_config) const;
bool ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseLayer* eltwiseLayer, bool ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseLayer* eltwiseLayer,
ScaleFactorUpdateResult& result, ScaleFactorUpdateResult& result,
int infiniteLoopCount, int infiniteLoopCount,
const GNAPluginNS::Config& gna_config) const; const Config& gna_config) const;
bool ScaleFactorPerLayerGemm(InferenceEngine::GemmLayer* gemmLayer, bool ScaleFactorPerLayerGemm(InferenceEngine::GemmLayer* gemmLayer,
ScaleFactorUpdateResult& result, ScaleFactorUpdateResult& result,
int infiniteLoopCount, int infiniteLoopCount,
const GNAPluginNS::Config& gna_config) const; const Config& gna_config) const;
bool ScaleFactorPerLayerWeightable(InferenceEngine::WeightableLayer* wl, bool ScaleFactorPerLayerWeightable(InferenceEngine::WeightableLayer* wl,
ScaleFactorUpdateResult& result, ScaleFactorUpdateResult& result,
int infiniteLoopCount, int infiniteLoopCount,
const GNAPluginNS::Config& gna_config) const; const Config& gna_config) const;
public: public:
ScaleFactorCalculator(Cnt& net, const GNAPluginNS::Config& gna_config, const bool fake_quantized) ScaleFactorCalculator(Cnt& net, const Config& gna_config, const bool fake_quantized)
: net(net), : net(net),
gna_config(gna_config), gna_config(gna_config),
fake_quantized(fake_quantized) { fake_quantized(fake_quantized) {
@ -120,7 +120,7 @@ class ScaleFactorCalculator {
bool CalculateScaleFactor(InferenceEngine::CNNLayerPtr layer_ptr) const { bool CalculateScaleFactor(InferenceEngine::CNNLayerPtr layer_ptr) const {
ScaleFactorUpdateResult result; ScaleFactorUpdateResult result;
needRestart = false; needRestart = false;
auto layer_info = GNAPluginNS::LayerInfo(layer_ptr); auto layer_info = LayerInfo(layer_ptr);
if (layer_info.isConcat()) { if (layer_info.isConcat()) {
if (!ScaleFactorPerLayerConcat(dynamic_cast<InferenceEngine::ConcatLayer*>(layer_ptr.get()), if (!ScaleFactorPerLayerConcat(dynamic_cast<InferenceEngine::ConcatLayer*>(layer_ptr.get()),

View File

@ -42,7 +42,7 @@ InferenceEngine::Blob::Ptr make_fp32_blob(InferenceEngine::Blob::Ptr fp16_blob)
} }
void convert_blobs_precision(InferenceEngine::CNNLayer& layer) { void convert_blobs_precision(InferenceEngine::CNNLayer& layer) {
auto layer_info = GNAPluginNS::LayerInfo(layer); auto layer_info = LayerInfo(layer);
if (layer_info.isWeightable()) { if (layer_info.isWeightable()) {
InferenceEngine::WeightableLayer& wl = dynamic_cast<InferenceEngine::WeightableLayer&>(layer); InferenceEngine::WeightableLayer& wl = dynamic_cast<InferenceEngine::WeightableLayer&>(layer);

View File

@ -91,8 +91,8 @@ std::vector<char> GetStringAsTlv(Gna2TlvType type, const std::string& s) {
Gna2DeviceVersion getEmbeddedTargetFromCompileTarget(const std::string compileTarget) { Gna2DeviceVersion getEmbeddedTargetFromCompileTarget(const std::string compileTarget) {
static const std::map<std::string, Gna2DeviceVersion> targetMap = { static const std::map<std::string, Gna2DeviceVersion> targetMap = {
{GNAPluginNS::common::kGnaTarget3_1, Gna2DeviceVersionEmbedded3_1}, {common::kGnaTarget3_1, Gna2DeviceVersionEmbedded3_1},
{GNAPluginNS::common::kGnaTarget3_5, Gna2DeviceVersionEmbedded3_5}, {common::kGnaTarget3_5, Gna2DeviceVersionEmbedded3_5},
}; };
auto found = targetMap.find(compileTarget); auto found = targetMap.find(compileTarget);
if (found == targetMap.end()) { if (found == targetMap.end()) {

View File

@ -6,7 +6,7 @@
#include <gna2-common-api.h> #include <gna2-common-api.h>
#include <gna2-model-api.h> #include <gna2-model-api.h>
#include "backend/dnn_types.h" #include "backend/dnn_types.hpp"
#include <cstdint> #include <cstdint>

View File

@ -29,10 +29,12 @@ static inline bool FoundPartToTranspose(const std::vector<TranspositionInfo> &tr
return partToTranspose != std::end(transpositionInfo); return partToTranspose != std::end(transpositionInfo);
} }
namespace GNAPluginNS { namespace ov {
using gna_memory_type = GNAPluginNS::memory::GNAMemoryInterface; namespace intel_gna {
using gna_memory_float = GNAPluginNS::memory::GNAMemory<memory::GNAFloatAllocator>;
using gna_memory_device = GNAPluginNS::memory::GNAMemory<>; using gna_memory_type = memory::GNAMemoryInterface;
using gna_memory_float = memory::GNAMemory<memory::GNAFloatAllocator>;
using gna_memory_device = memory::GNAMemory<>;
using DnnComponentsForLayer = std::list<std::pair<std::string, intel_dnn_component_t>>; using DnnComponentsForLayer = std::list<std::pair<std::string, intel_dnn_component_t>>;
using MemoryConnection = std::list<std::pair<std::string, GNAMemoryLayer>>; using MemoryConnection = std::list<std::pair<std::string, GNAMemoryLayer>>;
@ -40,4 +42,6 @@ namespace GNAPluginNS {
using SplitConnection = std::unordered_map<std::string, GNASplitLayer>; using SplitConnection = std::unordered_map<std::string, GNASplitLayer>;
using CropConnection = std::unordered_map<std::string, GNACropLayer>; using CropConnection = std::unordered_map<std::string, GNACropLayer>;
using ConstConnections = std::unordered_map<std::string, void*>; using ConstConnections = std::unordered_map<std::string, void*>;
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -80,9 +80,9 @@ uint8_t* GNADeviceHelper::alloc(uint32_t size_requested, uint32_t *size_granted)
return static_cast<uint8_t *>(memPtr); return static_cast<uint8_t *>(memPtr);
} }
void GNADeviceHelper::tagMemoryRegion(void* memPtr, const GNAPluginNS::memory::rRegion tag) { void GNADeviceHelper::tagMemoryRegion(void* memPtr, const memory::rRegion tag) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync }; std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
using GNAPluginNS::memory::rRegion; using memory::rRegion;
static const std::map<rRegion, Gna2MemoryTag> tagMap { static const std::map<rRegion, Gna2MemoryTag> tagMap {
{rRegion::REGION_INPUTS, Gna2MemoryTagInput}, {rRegion::REGION_INPUTS, Gna2MemoryTagInput},
{rRegion::REGION_OUTPUTS, Gna2MemoryTagOutput}, {rRegion::REGION_OUTPUTS, Gna2MemoryTagOutput},
@ -192,7 +192,7 @@ void GNADeviceHelper::enforceLegacyCnns(Gna2Model& gnaModel) {
void GNADeviceHelper::enforceLegacyCnnsWhenNeeded(Gna2Model& gnaModel) { void GNADeviceHelper::enforceLegacyCnnsWhenNeeded(Gna2Model& gnaModel) {
for (uint32_t i = 0; i < gnaModel.NumberOfOperations; i++) { for (uint32_t i = 0; i < gnaModel.NumberOfOperations; i++) {
auto& op = gnaModel.Operations[i]; auto& op = gnaModel.Operations[i];
if (GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(op)) { if (backend::AMIntelDNN::isOperationCnnLegacySpecific(op)) {
enforceLegacyCnn(op); enforceLegacyCnn(op);
} }
} }
@ -207,7 +207,7 @@ uint32_t GNADeviceHelper::createModel(Gna2Model& gnaModel) const {
} }
enforceLegacyCnnsWhenNeeded(gnaModel); enforceLegacyCnnsWhenNeeded(gnaModel);
GNAPluginNS::backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(gnaModel, legacyExecTarget); backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(gnaModel, legacyExecTarget);
if (per_model_diagnostics) { if (per_model_diagnostics) {
std::string path = std::string path =
@ -240,10 +240,10 @@ bool GNADeviceHelper::enforceLegacyCnnNeeded() const {
Gna2DeviceVersion GNADeviceHelper::parseTarget(const std::string& target) { Gna2DeviceVersion GNADeviceHelper::parseTarget(const std::string& target) {
static const std::map<std::string, Gna2DeviceVersion> targetMap { static const std::map<std::string, Gna2DeviceVersion> targetMap {
{GNAPluginNS::common::kGnaTarget2_0, Gna2DeviceVersion2_0}, {common::kGnaTarget2_0, Gna2DeviceVersion2_0},
{GNAPluginNS::common::kGnaTarget3_0, Gna2DeviceVersion3_0}, {common::kGnaTarget3_0, Gna2DeviceVersion3_0},
{GNAPluginNS::common::kGnaTarget3_5, Gna2DeviceVersion3_5}, {common::kGnaTarget3_5, Gna2DeviceVersion3_5},
{GNAPluginNS::common::kGnaTargetUnspecified, Gna2DeviceVersionSoftwareEmulation}, {common::kGnaTargetUnspecified, Gna2DeviceVersionSoftwareEmulation},
}; };
const auto f = targetMap.find(target); const auto f = targetMap.find(target);
if (f != targetMap.end()) { if (f != targetMap.end()) {
@ -254,13 +254,13 @@ Gna2DeviceVersion GNADeviceHelper::parseTarget(const std::string& target) {
Gna2DeviceVersion GNADeviceHelper::getDefaultTarget() const { Gna2DeviceVersion GNADeviceHelper::getDefaultTarget() const {
if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation) if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation)
return parseTarget(GNAPluginNS::common::kGnaDefaultTarget); return parseTarget(common::kGnaDefaultTarget);
return detectedGnaDevVersion; return detectedGnaDevVersion;
} }
Gna2DeviceVersion GNADeviceHelper::getTargetDevice(const bool execTarget) const { Gna2DeviceVersion GNADeviceHelper::getTargetDevice(const bool execTarget) const {
const auto declared = execTarget ? executionTarget : compileTarget; const auto declared = execTarget ? executionTarget : compileTarget;
if (declared == GNAPluginNS::common::kGnaTargetUnspecified) { if (declared == common::kGnaTargetUnspecified) {
return execTarget ? getDefaultTarget() : getTargetDevice(true); return execTarget ? getDefaultTarget() : getTargetDevice(true);
} }
return parseTarget(declared); return parseTarget(declared);
@ -465,15 +465,15 @@ const std::map <const std::pair<Gna2OperationType, int32_t>, const std::string>
{{Gna2OperationTypeThreshold, 1}, "Output"} {{Gna2OperationTypeThreshold, 1}, "Output"}
}; };
GNAPluginNS::RequestStatus GNADeviceHelper::waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds) { RequestStatus GNADeviceHelper::waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds) {
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync }; std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
const auto status = Gna2RequestWait(requestID, static_cast<uint32_t>(timeoutMilliseconds)); const auto status = Gna2RequestWait(requestID, static_cast<uint32_t>(timeoutMilliseconds));
if (status == Gna2StatusWarningDeviceBusy) { if (status == Gna2StatusWarningDeviceBusy) {
return GNAPluginNS::RequestStatus::kPending; return RequestStatus::kPending;
} }
unwaitedRequestIds.erase(requestID); unwaitedRequestIds.erase(requestID);
if (status == Gna2StatusDriverQoSTimeoutExceeded) { if (status == Gna2StatusDriverQoSTimeoutExceeded) {
return GNAPluginNS::RequestStatus::kAborted; return RequestStatus::kAborted;
} }
if (per_request_diagnostics) { if (per_request_diagnostics) {
@ -485,7 +485,7 @@ GNAPluginNS::RequestStatus GNADeviceHelper::waitForRequest(uint32_t requestID, i
// handle error case after updating statistics data. // handle error case after updating statistics data.
checkGna2Status(status, "Gna2RequestWait"); checkGna2Status(status, "Gna2RequestWait");
return GNAPluginNS::RequestStatus::kCompleted; return RequestStatus::kCompleted;
} }
GNADeviceHelper::DumpResult GNADeviceHelper::dumpXnn(const uint32_t modelId) { GNADeviceHelper::DumpResult GNADeviceHelper::dumpXnn(const uint32_t modelId) {
@ -559,7 +559,7 @@ void GNADeviceHelper::close() {
for (auto requestId : requestsToClose) for (auto requestId : requestsToClose)
try { try {
if (waitForRequest(requestId) == GNAPluginNS::RequestStatus::kPending) if (waitForRequest(requestId) == RequestStatus::kPending)
log::warning() << "Request with Id " << requestId << " is still pending"; log::warning() << "Request with Id " << requestId << " is still pending";
} catch (...) { } catch (...) {
log::warning() << "Request with Id " << requestId << " was not awaited successfully"; log::warning() << "Request with Id " << requestId << " was not awaited successfully";
@ -598,10 +598,10 @@ void GNADeviceHelper::getGnaPerfCounters(std::map<std::string, InferenceEngine::
std::string GNADeviceHelper::GetCompileTarget() const { std::string GNADeviceHelper::GetCompileTarget() const {
static const std::map<Gna2DeviceVersion, std::string> targetMap = { static const std::map<Gna2DeviceVersion, std::string> targetMap = {
{Gna2DeviceVersion2_0, GNAPluginNS::common::kGnaTarget2_0}, {Gna2DeviceVersion2_0, common::kGnaTarget2_0},
{Gna2DeviceVersion3_0, GNAPluginNS::common::kGnaTarget3_0}, {Gna2DeviceVersion3_0, common::kGnaTarget3_0},
{Gna2DeviceVersion3_5, GNAPluginNS::common::kGnaTarget3_5}, {Gna2DeviceVersion3_5, common::kGnaTarget3_5},
{Gna2DeviceVersionEmbedded3_5, GNAPluginNS::common::kGnaTarget3_5}, {Gna2DeviceVersionEmbedded3_5, common::kGnaTarget3_5},
}; };
const auto target = getTargetDevice(false); const auto target = getTargetDevice(false);
auto found = targetMap.find(target); auto found = targetMap.find(target);
@ -616,7 +616,7 @@ uint32_t GNADeviceHelper::maxLayersCount() const {
} }
uint32_t GNADeviceHelper::retrieveMaxLayersCount() { uint32_t GNADeviceHelper::retrieveMaxLayersCount() {
using namespace GNAPluginNS::GNALimitations; using namespace limitations;
switch (getTargetDevice(true)) { switch (getTargetDevice(true)) {
case Gna2DeviceVersion1_0: case Gna2DeviceVersion1_0:

View File

@ -34,7 +34,7 @@
/** /**
* holds gna - style handle in RAII way * holds gna - style handle in RAII way
*/ */
class GNADeviceHelper : public GNAPluginNS::GNADevice { class GNADeviceHelper : public ov::intel_gna::GNADevice {
using UnwaitedRequestIds = std::set<uint32_t>; using UnwaitedRequestIds = std::set<uint32_t>;
static std::mutex acrossPluginsSync; static std::mutex acrossPluginsSync;
static std::string decoratedGnaLibVersion() { static std::string decoratedGnaLibVersion() {
@ -92,7 +92,7 @@ public:
void dumpAllAllocations(uint64_t idx, const std::string& infix) const; void dumpAllAllocations(uint64_t idx, const std::string& infix) const;
uint8_t *alloc(uint32_t size_requested, uint32_t *size_granted); uint8_t *alloc(uint32_t size_requested, uint32_t *size_granted);
void tagMemoryRegion(void* memPtr, const GNAPluginNS::memory::rRegion memoryTag); void tagMemoryRegion(void* memPtr, const ov::intel_gna::memory::rRegion memoryTag);
void releaseModel(const uint32_t model_id); void releaseModel(const uint32_t model_id);
static uint32_t getNumberOfGnaDevices(); static uint32_t getNumberOfGnaDevices();
@ -155,7 +155,7 @@ public:
/** /**
* @see GNADevice::waitForRequest() * @see GNADevice::waitForRequest()
*/ */
GNAPluginNS::RequestStatus waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds = MAX_TIMEOUT) override; ov::intel_gna::RequestStatus waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds = MAX_TIMEOUT) override;
/** /**
* @see GNADevice::maxLayersCount() * @see GNADevice::maxLayersCount()

View File

@ -20,7 +20,7 @@
#include "memory/gna_mem_regions.hpp" #include "memory/gna_mem_regions.hpp"
#include "gna_lib_ver_selector.hpp" #include "gna_lib_ver_selector.hpp"
using GNAPluginNS::memory::rRegion; using ov::intel_gna::memory::rRegion;
struct GnaAllocation { struct GnaAllocation {
void* ptr = nullptr; void* ptr = nullptr;

View File

@ -13,10 +13,12 @@
enum Gna2AccelerationMode; enum Gna2AccelerationMode;
class Gna2Model; class Gna2Model;
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
// Interface name is different to the file naem due the lagacy reason. // Interface name is different to the file naem due the lagacy reason.
// 1. Implementation file names should be changed in next PR. // 1. Implementation file names should be changed in next PR.
// 2. Implementation of interface should be moved to GNAPluginNS namespace // 2. Implementation of interface should be moved to ov::intel_gna namespace
/** /**
* @interface Interface for invoking operation on GNA device. * @interface Interface for invoking operation on GNA device.
@ -57,10 +59,10 @@ public:
* @brief Wait for request to be finished. * @brief Wait for request to be finished.
* @param requestID id of request enqueued on device * @param requestID id of request enqueued on device
* @param timeoutMilliseconds maximum timeout to be used for waiting * @param timeoutMilliseconds maximum timeout to be used for waiting
* @return status of request given to the methoid. @see GNAPluginNS::RequestStatus. * @return status of request given to the methoid. @see RequestStatus.
* @throw Exception in case of error * @throw Exception in case of error
*/ */
virtual GNAPluginNS::RequestStatus waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds) = 0; virtual RequestStatus waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds) = 0;
/** /**
* @brief Return maximum number of layers supported by device. * @brief Return maximum number of layers supported by device.
@ -74,4 +76,5 @@ public:
virtual void close() {} virtual void close() {}
}; };
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -15,7 +15,8 @@
#include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp> #include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp>
#include <ie_icore.hpp> #include <ie_icore.hpp>
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
class GNAExecutableNetwork : public InferenceEngine::IExecutableNetworkInternal { class GNAExecutableNetwork : public InferenceEngine::IExecutableNetworkInternal {
std::shared_ptr<GNAPlugin> plg; std::shared_ptr<GNAPlugin> plg;
@ -135,4 +136,5 @@ class GNAExecutableNetwork : public InferenceEngine::IExecutableNetworkInternal
} }
}; };
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -10,7 +10,9 @@
#include "gna_graph_tools.hpp" #include "gna_graph_tools.hpp"
#include "layers/gna_layer_helpers.hpp" #include "layers/gna_layer_helpers.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
/** /**
* @brief Modify child layers walking order to maintain strict ordering required for gna_fuse logic * @brief Modify child layers walking order to maintain strict ordering required for gna_fuse logic
*/ */
@ -99,4 +101,5 @@ inline FuzedLayersContainer make_fuzed_order(InferenceEngine::CNNLayer* origin)
return fusedCnt; return fusedCnt;
} }
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -43,12 +43,11 @@
using namespace InferenceEngine; using namespace InferenceEngine;
using namespace std; using namespace std;
using namespace ov::intel_gna; using namespace ov::intel_gna;
using namespace GNAPluginNS;
using namespace ov::intel_gna::frontend; using namespace ov::intel_gna::frontend;
using namespace ov::intel_gna::common; using namespace ov::intel_gna::common;
using namespace memory; using namespace memory;
static bool CheckIFLastComponentIsPrecededByConv2D(const GNAPluginNS::backend::DnnComponents::storage_type& components, static bool CheckIFLastComponentIsPrecededByConv2D(const backend::DnnComponents::storage_type& components,
bool verify_with_pooling = true) { bool verify_with_pooling = true) {
bool proceded_by_conv2D = false; bool proceded_by_conv2D = false;
auto last_element = components.rbegin(); auto last_element = components.rbegin();
@ -71,15 +70,15 @@ static bool CheckIFLastComponentIsPrecededByConv2D(const GNAPluginNS::backend::D
GNAGraphCompiler::GNAGraphCompiler(const Config& gna_config) : gna_config(gna_config) {} GNAGraphCompiler::GNAGraphCompiler(const Config& gna_config) : gna_config(gna_config) {}
void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<GNAPluginNS::gna_memory_type> gnaMemPtr) { void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr) {
this->gnamem = std::move(gnaMemPtr); this->gnamem = std::move(gnaMemPtr);
} }
void GNAGraphCompiler::setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr) { void GNAGraphCompiler::setDNNPtr(std::shared_ptr<backend::AMIntelDNN> dnnPtr) {
this->dnn = std::move(dnnPtr); this->dnn = std::move(dnnPtr);
} }
void GNAGraphCompiler::setInputsPtr(std::shared_ptr<GNAPluginNS::GnaInputs> inputsPtr) { void GNAGraphCompiler::setInputsPtr(std::shared_ptr<GnaInputs> inputsPtr) {
this->inputs_ptr_ = std::move(inputsPtr); this->inputs_ptr_ = std::move(inputsPtr);
} }
@ -110,7 +109,7 @@ void GNAGraphCompiler::fillMemoryConnections(std::unordered_map<std::string,
void GNAGraphCompiler::fillConcatConnections(InferenceEngine::CNNLayerPtr layer) { void GNAGraphCompiler::fillConcatConnections(InferenceEngine::CNNLayerPtr layer) {
// creating connection for each layer outputs as form of extramap // creating connection for each layer outputs as form of extramap
GNAPluginNS::GNAConcatLayer layerInfoItem(layer); GNAConcatLayer layerInfoItem(layer);
size_t concat_size = 0; size_t concat_size = 0;
std::string& id = layer->name; std::string& id = layer->name;
@ -148,7 +147,7 @@ void GNAGraphCompiler::fillConcatConnections(InferenceEngine::CNNLayerPtr layer)
void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) { void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) {
// creating connection for each layer inputs as form of extramap // creating connection for each layer inputs as form of extramap
GNAPluginNS::GNASplitLayer layerInfoItem(layer); GNASplitLayer layerInfoItem(layer);
size_t split_size = 0; size_t split_size = 0;
std::string& id = layer->name; std::string& id = layer->name;
IE_ASSERT(!layer->insData.empty()); IE_ASSERT(!layer->insData.empty());
@ -214,16 +213,16 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
split_connection.emplace(id, layerInfoItem); split_connection.emplace(id, layerInfoItem);
} }
void GNAPluginNS::GNAGraphCompiler::SetValidatorTarget(const std::string& target) { void GNAGraphCompiler::SetValidatorTarget(const std::string& target) {
auto temp = GNALimitations::Cnn2D::AbstractValidator::Create(target); auto temp = limitations::cnn2d::AbstractValidator::Create(target);
cnn2dValidator.reset(temp.release()); cnn2dValidator.reset(temp.release());
} }
bool GNAPluginNS::GNAGraphCompiler::ShouldUseOnlyConv2DGnaIface() const { bool GNAGraphCompiler::ShouldUseOnlyConv2DGnaIface() const {
return cnn2dValidator && cnn2dValidator->ShouldUseOnlyConv2DGnaIface(); return cnn2dValidator && cnn2dValidator->ShouldUseOnlyConv2DGnaIface();
} }
void GNAPluginNS::GNAGraphCompiler::ValidateCnn2D(const std::string& name, void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
const uint32_t inHeight, const uint32_t inHeight,
const uint32_t inWidth, const uint32_t inWidth,
const uint32_t inChannels, const uint32_t inChannels,
@ -245,7 +244,7 @@ void GNAPluginNS::GNAGraphCompiler::ValidateCnn2D(const std::string& name,
} }
} }
void GNAPluginNS::GNAGraphCompiler::ValidatePooling2D(const std::string& name, void GNAGraphCompiler::ValidatePooling2D(const std::string& name,
const uint32_t windowH, const uint32_t windowH,
const uint32_t windowW, const uint32_t windowW,
const uint32_t strideH, const uint32_t strideH,
@ -280,9 +279,8 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer)
} }
void GNAGraphCompiler::assertConvolutionLayoutProper(const InferenceEngine::DataPtr& data) { void GNAGraphCompiler::assertConvolutionLayoutProper(const InferenceEngine::DataPtr& data) {
if (data->getLayout() != Layout::NHWC && if (data->getLayout() != InferenceEngine::Layout::NHWC && data->getLayout() != InferenceEngine::Layout::NCHW &&
data->getLayout() != Layout::NCHW && data->getLayout() != InferenceEngine::Layout::NC) {
data->getLayout() != Layout::NC) {
THROW_GNA_EXCEPTION << "layer: \"Convolution\" with layout " << data->getLayout() << " isn't currently supported on GNA"; THROW_GNA_EXCEPTION << "layer: \"Convolution\" with layout " << data->getLayout() << " isn't currently supported on GNA";
} }
} }
@ -338,10 +336,10 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
// Map 2d convolution to 1d if it's possible. // Map 2d convolution to 1d if it's possible.
if (!ShouldUseOnlyConv2DGnaIface() && if (!ShouldUseOnlyConv2DGnaIface() &&
GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, in_channels, gna_convolution_layer::isMappableFrom2DTo1D(in_height, in_width, in_channels,
convolution._kernel_y, convolution._kernel_x, convolution._kernel_y, convolution._kernel_x,
convolution._stride_y, convolution._stride_x)) { convolution._stride_y, convolution._stride_x)) {
transpose_h_w = GNAConvolutionLayer::should_transpose_h_w(in_height, transpose_h_w = gna_convolution_layer::should_transpose_h_w(in_height,
convolution._kernel_y, convolution._kernel_y,
in_channels, in_channels,
convolution._stride_y); convolution._stride_y);
@ -382,7 +380,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
} }
if (ShouldUseOnlyConv2DGnaIface() || if (ShouldUseOnlyConv2DGnaIface() ||
GNAConvolutionLayer::is3DInputOr2DKernel(in_height, in_width, in_channels, convolution._kernel_y, convolution._kernel_x) || gna_convolution_layer::is3DInputOr2DKernel(in_height, in_width, in_channels, convolution._kernel_y, convolution._kernel_x) ||
in_height != 1) { in_height != 1) {
// TensorFlow default layout is NHWC // TensorFlow default layout is NHWC
// OpenVino Default layout is NCHW // OpenVino Default layout is NCHW
@ -518,7 +516,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
// Keep both variants of kaldi models working: // Keep both variants of kaldi models working:
// Old one has layout which is different from NHWC // Old one has layout which is different from NHWC
// New one has layout NHWC, but it is mapped from 2d by H // New one has layout NHWC, but it is mapped from 2d by H
if (inputs->getLayout() == Layout::NHWC && !transpose_h_w) { if (inputs->getLayout() == InferenceEngine::Layout::NHWC && !transpose_h_w) {
currentComponent.orientation_in = kDnnInterleavedOrientation; currentComponent.orientation_in = kDnnInterleavedOrientation;
currentComponent.orientation_out = kDnnInterleavedOrientation; currentComponent.orientation_out = kDnnInterleavedOrientation;
} }
@ -536,7 +534,8 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
if (!dnn->do_rotate_input) { if (!dnn->do_rotate_input) {
if ((inputs->getLayout() != Layout::NHWC || transpose_h_w) && LayerInfo(connectedInputLayer).isInput()) { if ((inputs->getLayout() != InferenceEngine::Layout::NHWC || transpose_h_w) &&
LayerInfo(connectedInputLayer).isInput()) {
// Kaldi features are opposite orientation // Kaldi features are opposite orientation
dnn->do_rotate_input = true; dnn->do_rotate_input = true;
dnn->num_rotate_rows = effectiveStride; dnn->num_rotate_rows = effectiveStride;
@ -699,7 +698,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
currentComponent.num_bytes_per_input = inputs->getPrecision().size(); currentComponent.num_bytes_per_input = inputs->getPrecision().size();
currentComponent.num_bytes_per_output = outputs->getPrecision().size(); currentComponent.num_bytes_per_output = outputs->getPrecision().size();
if (inputs->getLayout() == Layout::NHWC) { if (inputs->getLayout() == InferenceEngine::Layout::NHWC) {
currentComponent.orientation_in = kDnnInterleavedOrientation; currentComponent.orientation_in = kDnnInterleavedOrientation;
currentComponent.orientation_out = kDnnInterleavedOrientation; currentComponent.orientation_out = kDnnInterleavedOrientation;
} }
@ -713,7 +712,8 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input; auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that // TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
if (!dnn->do_rotate_input && inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) { if (!dnn->do_rotate_input && inputs->getLayout() != InferenceEngine::Layout::NHWC &&
LayerInfo(connectedInputLayer).isInput()) {
// Kaldi features are opposite orientation // Kaldi features are opposite orientation
dnn->do_rotate_input = true; dnn->do_rotate_input = true;
dnn->num_rotate_rows = in_channels; dnn->num_rotate_rows = in_channels;
@ -766,9 +766,9 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto input = layer->insData[0].lock(); auto input = layer->insData[0].lock();
auto outputs = *layer->outData.begin(); auto outputs = *layer->outData.begin();
auto reshaped_dims = Get2DReshapedData(input, GNALimitations::GetMinBatchToFitInBuffer(input), 8)->getDims(); auto reshaped_dims = Get2DReshapedData(input, limitations::GetMinBatchToFitInBuffer(input), 8)->getDims();
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor; limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
uint32_t num_rows_in = reshaped_dims[1]; uint32_t num_rows_in = reshaped_dims[1];
uint32_t num_columns_in = reshaped_dims[0]; uint32_t num_columns_in = reshaped_dims[0];
uint32_t num_rows_out = num_rows_in; uint32_t num_rows_out = num_rows_in;
@ -914,10 +914,10 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
const uint32_t c_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::C); const uint32_t c_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::C);
if (w_dim_in == 1) { // swap dimensions if needed to support swapped 1D case if (w_dim_in == 1) { // swap dimensions if needed to support swapped 1D case
swap(h_dim_in, w_dim_in); std::swap(h_dim_in, w_dim_in);
swap(h_dim_out, w_dim_out); std::swap(h_dim_out, w_dim_out);
swap(pooling._kernel[X_AXIS], pooling._kernel[Y_AXIS]); std::swap(pooling._kernel[X_AXIS], pooling._kernel[Y_AXIS]);
swap(pooling._stride[X_AXIS], pooling._stride[Y_AXIS]); std::swap(pooling._stride[X_AXIS], pooling._stride[Y_AXIS]);
} }
void* ptr_inputs = nullptr; void* ptr_inputs = nullptr;
@ -968,9 +968,9 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
// but it does not use any specific new GNA features it should be correct to import and run using previous GNA HW // but it does not use any specific new GNA features it should be correct to import and run using previous GNA HW
if (!is2DPooling) { if (!is2DPooling) {
const auto hLegacy = const auto hLegacy =
GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy(h_dim_in, pooling._stride[X_AXIS]); gna_convolution_layer::outputFromPoolingLegacy(h_dim_in, pooling._stride[X_AXIS]);
const auto wLegacy = const auto wLegacy =
GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy(w_dim_in, pooling._stride[Y_AXIS]); gna_convolution_layer::outputFromPoolingLegacy(w_dim_in, pooling._stride[Y_AXIS]);
if (num_data_bytes_out < hLegacy * wLegacy * c_dim_out) { if (num_data_bytes_out < hLegacy * wLegacy * c_dim_out) {
num_data_bytes_out = hLegacy * wLegacy * c_dim_out; num_data_bytes_out = hLegacy * wLegacy * c_dim_out;
} }
@ -1007,7 +1007,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto inputs = layer->insData.begin()->lock(); auto inputs = layer->insData.begin()->lock();
auto outputs = *layer->outData.begin(); auto outputs = *layer->outData.begin();
auto reshaped_dims = Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims(); auto reshaped_dims = Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims();
uint32_t num_rows_in = reshaped_dims[1]; uint32_t num_rows_in = reshaped_dims[1];
uint32_t num_columns_in = reshaped_dims[0]; uint32_t num_columns_in = reshaped_dims[0];
uint32_t num_rows_out = num_rows_in; uint32_t num_rows_out = num_rows_in;
@ -1068,7 +1068,7 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
} }
// Concat axis validation // Concat axis validation
if (!GNALimitations::ValidateConvConcatAxis(concatLayer)) { if (!limitations::ValidateConvConcatAxis(concatLayer)) {
std::ostringstream in_dims_oss; std::ostringstream in_dims_oss;
auto in_dims = concatLayer->insData[0].lock()->getDims(); auto in_dims = concatLayer->insData[0].lock()->getDims();
std::copy(in_dims.begin(), in_dims.end(), std::ostream_iterator<size_t>(in_dims_oss, ",")); std::copy(in_dims.begin(), in_dims.end(), std::ostream_iterator<size_t>(in_dims_oss, ","));
@ -1147,7 +1147,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
if (!LayerInfo(cropLayer).isCropAffined()) { if (!LayerInfo(cropLayer).isCropAffined()) {
// leave crop as it is // leave crop as it is
GNAPluginNS::GNACropLayer cropLayerInfoItem(layer); GNACropLayer cropLayerInfoItem(layer);
std::string& id = layer->name; std::string& id = layer->name;
crop_connection.emplace(id, cropLayerInfoItem); crop_connection.emplace(id, cropLayerInfoItem);
auto cropLayerInfo = crop_connection.find(cropLayer->name); auto cropLayerInfo = crop_connection.find(cropLayer->name);
@ -1178,7 +1178,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
uint32_t num_rows_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())); uint32_t num_rows_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()));
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor; limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in; uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
void* ptr_inputs = nullptr; void* ptr_inputs = nullptr;
@ -1234,7 +1234,7 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
auto& eltwise = dynamic_cast<EltwiseLayer&>(*layer.get()); auto& eltwise = dynamic_cast<EltwiseLayer&>(*layer.get());
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer); auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor; limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
// for eltwise sum/sub in 16-bit precision one input should be 4 bytes and one 2 bytes - detecting that below // for eltwise sum/sub in 16-bit precision one input should be 4 bytes and one 2 bytes - detecting that below
// the names of variables are left for clarity although not always reflecting the real precision/size // the names of variables are left for clarity although not always reflecting the real precision/size
@ -1414,7 +1414,7 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto input_2 = layer->insData[1].lock(); // the second input corresponds to ptr_weights in component auto input_2 = layer->insData[1].lock(); // the second input corresponds to ptr_weights in component
auto outputs = *layer->outData.begin(); auto outputs = *layer->outData.begin();
auto inputPrecision = quantized ? Precision(Precision::I16) : input_1->getPrecision(); auto inputPrecision = quantized ? Precision(Precision::I16) : input_1->getPrecision();
uint32_t noOfInputsDivisor = GNALimitations::noOfInputsDivisor; uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor;
auto in_dims = input_1->getDims(); auto in_dims = input_1->getDims();
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front(); auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
@ -1478,7 +1478,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
auto outputs = *layer->outData.begin(); auto outputs = *layer->outData.begin();
const auto out_dims = outputs->getDims(); const auto out_dims = outputs->getDims();
Precision inputPrecision; Precision inputPrecision;
uint32_t noOfInputsDivisor = GNALimitations::noOfInputsDivisor; uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor;
if (!quantized) { if (!quantized) {
inputPrecision = inputs->getPrecision(); inputPrecision = inputs->getPrecision();
@ -1486,11 +1486,11 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
inputPrecision = Precision(Precision::I16); inputPrecision = Precision(Precision::I16);
} else { } else {
inputPrecision = Precision(Precision::I8); inputPrecision = Precision(Precision::I8);
noOfInputsDivisor = GNALimitations::noOfInputsLowPrecDivisor; noOfInputsDivisor = limitations::noOfInputsLowPrecDivisor;
} }
auto input_data = HasTo2DReshapeData(layer) ? auto input_data = HasTo2DReshapeData(layer) ?
Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8) : inputs; Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8) : inputs;
auto in_dims = input_data->getDims(); auto in_dims = input_data->getDims();
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front(); auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size; uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size;
@ -1690,7 +1690,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
auto inputs = layer->insData.begin()->lock(); auto inputs = layer->insData.begin()->lock();
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor; limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
uint32_t num_columns_in = GetDimFromBack(inputs->getDims(), 2); uint32_t num_columns_in = GetDimFromBack(inputs->getDims(), 2);
uint32_t num_rows_out = GetDimFromBack(outputs->getDims(), 1); uint32_t num_rows_out = GetDimFromBack(outputs->getDims(), 1);
uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out; uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
@ -1826,7 +1826,7 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
auto inputs = layer->insData.begin()->lock(); auto inputs = layer->insData.begin()->lock();
const auto noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? const auto noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor; limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
const uint32_t orginalInputSize = const uint32_t orginalInputSize =
InferenceEngine::details::product(std::next(inputs->getDims().begin()), inputs->getDims().end()); InferenceEngine::details::product(std::next(inputs->getDims().begin()), inputs->getDims().end());
const uint32_t orginalOutputSize = const uint32_t orginalOutputSize =
@ -1842,7 +1842,7 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
const auto minInputsNeeded = (minOutputsPerFilter - 1) * convolutionStride + filterWidth; const auto minInputsNeeded = (minOutputsPerFilter - 1) * convolutionStride + filterWidth;
const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, noOfInputsDivisor); const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, noOfInputsDivisor);
auto numOutputs = GNAConvolutionLayer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride); auto numOutputs = gna_convolution_layer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride);
numOutputs *= numberOfFilters; numOutputs *= numberOfFilters;
const auto& biasPrecision = filterLayer->_biases ? filterLayer->_biases->getTensorDesc().getPrecision() : outputs->getPrecision(); const auto& biasPrecision = filterLayer->_biases ? filterLayer->_biases->getTensorDesc().getPrecision() : outputs->getPrecision();
auto& currentComponent = dnnComponents.addComponent(layer->name, "affine"); auto& currentComponent = dnnComponents.addComponent(layer->name, "affine");
@ -2154,7 +2154,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
} }
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor; limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
// now this can be run on GNA // now this can be run on GNA
if (squeezedInputOrder[0] < squeezedInputOrder[1]) { // interleave case if (squeezedInputOrder[0] < squeezedInputOrder[1]) { // interleave case
@ -2359,7 +2359,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
// find this input in vector sum all outputs in primitive // find this input in vector sum all outputs in primitive
auto it = std::find_if(concatLayerInfoItem.concatInputLayers.begin(), auto it = std::find_if(concatLayerInfoItem.concatInputLayers.begin(),
concatLayerInfoItem.concatInputLayers.end(), concatLayerInfoItem.concatInputLayers.end(),
[&name](GNAPluginNS::GNAConcatLayer::ConcatConnectedLayerInfo &item) { [&name](GNAConcatLayer::ConcatConnectedLayerInfo &item) {
return item.name == name; return item.name == name;
}); });
if (it != concatLayerInfoItem.concatInputLayers.end()) { if (it != concatLayerInfoItem.concatInputLayers.end()) {
@ -2371,11 +2371,11 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
std::find_if(concat_connection.begin(), std::find_if(concat_connection.begin(),
concat_connection.end(), concat_connection.end(),
[&concatLayerInfo] [&concatLayerInfo]
(const std::pair<std::string, GNAPluginNS::GNAConcatLayer> &concatItem) -> bool { (const std::pair<std::string, GNAConcatLayer> &concatItem) -> bool {
auto it = std::find_if(concatItem.second.concatInputLayers.begin(), auto it = std::find_if(concatItem.second.concatInputLayers.begin(),
concatItem.second.concatInputLayers.end(), concatItem.second.concatInputLayers.end(),
[&concatLayerInfo] [&concatLayerInfo]
(const GNAPluginNS::GNAConcatLayer::ConcatConnectedLayerInfo &item) -> bool { (const GNAConcatLayer::ConcatConnectedLayerInfo &item) -> bool {
return item.name == concatLayerInfo->first; return item.name == concatLayerInfo->first;
}); });
return it != concatItem.second.concatInputLayers.end(); return it != concatItem.second.concatInputLayers.end();
@ -2384,9 +2384,9 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
auto outputSize = std::max(concatLayerInfoItem.reserved_size, num_data_bytes_out * 2); auto outputSize = std::max(concatLayerInfoItem.reserved_size, num_data_bytes_out * 2);
gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, ALIGN64(outputSize), 64); gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, ALIGN64(outputSize), 64);
std::function<void(GNAConcatLayer, GNAPluginNS::GnaInputs&, ConcatConnection&)> allocate_input_recursively = std::function<void(GNAConcatLayer, GnaInputs&, ConcatConnection&)> allocate_input_recursively =
[&allocate_input_recursively](GNAConcatLayer clayer, [&allocate_input_recursively](GNAConcatLayer clayer,
GNAPluginNS::GnaInputs &inputs, GnaInputs &inputs,
ConcatConnection& concat_connection) { ConcatConnection& concat_connection) {
size_t concatInputIdx = 0; size_t concatInputIdx = 0;
for (auto &&inputLayer : clayer.concatInputLayers) { for (auto &&inputLayer : clayer.concatInputLayers) {
@ -2437,7 +2437,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
gnamem->getQueue(mem_region)->reserve_ptr(layer, ptr, ALIGN64(num_data_bytes_out), 64); gnamem->getQueue(mem_region)->reserve_ptr(layer, ptr, ALIGN64(num_data_bytes_out), 64);
} }
GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
void *ptr, void *ptr,
size_t num_data_bytes_in, size_t num_data_bytes_in,
int32_t offset, int32_t offset,
@ -2465,7 +2465,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
auto minInput = inputs_ptr_->at(prevLayer->name).get_required_size(); auto minInput = inputs_ptr_->at(prevLayer->name).get_required_size();
if (num_data_bytes_in < minInput) { if (num_data_bytes_in < minInput) {
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor; limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
log::debug() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to" << ALIGN(minInput, noOfInputsDivisor); log::debug() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to" << ALIGN(minInput, noOfInputsDivisor);
num_data_bytes_in = ALIGN(minInput, noOfInputsDivisor); num_data_bytes_in = ALIGN(minInput, noOfInputsDivisor);
} }
@ -2528,7 +2528,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// find this input in vector sum all outputs in primitive // find this input in vector sum all outputs in primitive
auto it = std::find_if(splitLayerInfoItem.splitOutputLayers.begin(), auto it = std::find_if(splitLayerInfoItem.splitOutputLayers.begin(),
splitLayerInfoItem.splitOutputLayers.end(), splitLayerInfoItem.splitOutputLayers.end(),
[&idx, &layer](GNAPluginNS::GNASplitLayer::SplitConnectedLayerInfo &item) { [&idx, &layer](GNASplitLayer::SplitConnectedLayerInfo &item) {
return item.connectedTo == layer && item.insDataIdx == idx; return item.connectedTo == layer && item.insDataIdx == idx;
}); });

View File

@ -15,7 +15,6 @@
#include "descriptions/gna_desc.hpp" #include "descriptions/gna_desc.hpp"
#include "descriptions/gna_flags.hpp" #include "descriptions/gna_flags.hpp"
#include "connection_details.hpp" #include "connection_details.hpp"
#include "backend/dnn.hpp"
#include "memory/gna_memory.hpp" #include "memory/gna_memory.hpp"
#include "layers/gna_memory_layer.hpp" #include "layers/gna_memory_layer.hpp"
#include "layers/gna_concat_layer.hpp" #include "layers/gna_concat_layer.hpp"
@ -27,12 +26,14 @@
#include "gna_device.hpp" #include "gna_device.hpp"
#include "gna_data_types.hpp" #include "gna_data_types.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
class GNAGraphCompiler { class GNAGraphCompiler {
private: private:
std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnn; std::shared_ptr<backend::AMIntelDNN> dnn;
std::shared_ptr<GNAPluginNS::gna_memory_type> gnamem; std::shared_ptr<gna_memory_type> gnamem;
std::shared_ptr<GNAPluginNS::GnaInputs> inputs_ptr_; std::shared_ptr<GnaInputs> inputs_ptr_;
// layers with extra storage for connections and additional // layers with extra storage for connections and additional
// non trivial processing // non trivial processing
@ -49,20 +50,20 @@ private:
static void assertConvolutionLayoutProper(const InferenceEngine::DataPtr&); static void assertConvolutionLayoutProper(const InferenceEngine::DataPtr&);
std::vector<uint8_t> static transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols); std::vector<uint8_t> static transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols);
std::unique_ptr<const GNALimitations::Cnn2D::AbstractValidator> cnn2dValidator; std::unique_ptr<const limitations::cnn2d::AbstractValidator> cnn2dValidator;
bool ShouldUseOnlyConv2DGnaIface() const; bool ShouldUseOnlyConv2DGnaIface() const;
public: public:
GNAPluginNS::backend::DnnComponents dnnComponents; backend::DnnComponents dnnComponents;
MemoryConnection memory_connection; MemoryConnection memory_connection;
ConcatConnection concat_connection; ConcatConnection concat_connection;
ConstConnections const_connections; ConstConnections const_connections;
GNAGraphCompiler(const Config& gna_config); GNAGraphCompiler(const Config& gna_config);
void setGNAMemoryPtr(std::shared_ptr<GNAPluginNS::gna_memory_type> gnaMemPtr); void setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr);
void setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr); void setDNNPtr(std::shared_ptr<backend::AMIntelDNN> dnnPtr);
void setInputsPtr(std::shared_ptr<GNAPluginNS::GnaInputs> inputsPtr); void setInputsPtr(std::shared_ptr<GnaInputs> inputsPtr);
void fillMemoryConnections(std::unordered_map<std::string, void fillMemoryConnections(std::unordered_map<std::string,
std::vector<InferenceEngine::CNNLayerPtr>> &memoryPairs); std::vector<InferenceEngine::CNNLayerPtr>> &memoryPairs);
@ -102,7 +103,7 @@ public:
* in case when we would like to use zero offset and connect from pointer set this to negative * in case when we would like to use zero offset and connect from pointer set this to negative
* @return layer used as input * @return layer used as input
*/ */
GNAPluginNS::ConnectionDetails connectInput(InferenceEngine::CNNLayerPtr layer, ConnectionDetails connectInput(InferenceEngine::CNNLayerPtr layer,
void *pVoid, void *pVoid,
size_t num_data_bytes_in, size_t num_data_bytes_in,
int32_t offset = 0, int32_t offset = 0,
@ -149,4 +150,6 @@ public:
void Reset(); void Reset();
}; };
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -11,7 +11,8 @@
#include "layers/gna_layer_info.hpp" #include "layers/gna_layer_info.hpp"
#include "ops/util/util.hpp" #include "ops/util/util.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
/** /**
* @brief checks if it's a reshape from 4d to 3d tensor * @brief checks if it's a reshape from 4d to 3d tensor
@ -104,7 +105,7 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin
const auto layout = next->outData[0]->getLayout(); const auto layout = next->outData[0]->getLayout();
const auto order = next->GetParamAsInts("order"); const auto order = next->GetParamAsInts("order");
if (layout != InferenceEngine::Layout::NCHW && layout != InferenceEngine::Layout::CHW || if (layout != InferenceEngine::Layout::NCHW && layout != InferenceEngine::Layout::CHW ||
order != GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC) && order != permute::GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC) &&
order != std::vector<int32_t>{0, 2, 1} /* NCW to NWC */) { order != std::vector<int32_t>{0, 2, 1} /* NCW to NWC */) {
return std::make_pair(nullptr, nullptr); return std::make_pair(nullptr, nullptr);
} }
@ -155,7 +156,7 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin
const auto layout = prev->outData[0]->getLayout(); const auto layout = prev->outData[0]->getLayout();
const auto order = prev->GetParamAsInts("order"); const auto order = prev->GetParamAsInts("order");
if (layout != InferenceEngine::Layout::NCHW && layout != InferenceEngine::Layout::CHW || if (layout != InferenceEngine::Layout::NCHW && layout != InferenceEngine::Layout::CHW ||
order != GetPermuteOrder(InferenceEngine::Layout::NHWC, InferenceEngine::Layout::NCHW) && order != permute::GetPermuteOrder(InferenceEngine::Layout::NHWC, InferenceEngine::Layout::NCHW) &&
order != std::vector<int32_t>{0, 2, 1} /* NWC to NCW */) { order != std::vector<int32_t>{0, 2, 1} /* NWC to NCW */) {
return std::make_pair(nullptr, nullptr); return std::make_pair(nullptr, nullptr);
} }
@ -427,4 +428,5 @@ inline std::vector<TranspositionInfo> FindTranspositionInfoFromNextLayers(Infere
return findTranspositionInfoRecursive(layer); return findTranspositionInfoRecursive(layer);
} }
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -8,7 +8,9 @@
#include "gna_graph_tools.hpp" #include "gna_graph_tools.hpp"
#include "layers/gna_layer_info.hpp" #include "layers/gna_layer_info.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
/** /**
* @brief returns a pointer to 2D reshaped data to satisfy maximum size of zero dimension * @brief returns a pointer to 2D reshaped data to satisfy maximum size of zero dimension
* @param input a pointer to data to be reshaped * @param input a pointer to data to be reshaped
@ -47,14 +49,15 @@ inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input
* @param layer * @param layer
*/ */
inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) { inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
if (GNAPluginNS::LayerInfo(layer).isPower() || GNAPluginNS::LayerInfo(layer).isCopy()) if (LayerInfo(layer).isPower() || LayerInfo(layer).isCopy())
return true; return true;
if (!GNAPluginNS::LayerInfo(layer).isSyntheticScaleShift()) if (!LayerInfo(layer).isSyntheticScaleShift())
return false; return false;
// Don't reshape diagonallayers with bias connection // Don't reshape diagonallayers with bias connection
return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput(); return !LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
} }
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -6,7 +6,9 @@
#include "gna_plugin.hpp" #include "gna_plugin.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
GNAInferRequest::GNAInferRequest(const std::shared_ptr<GNAPlugin>& plg, GNAInferRequest::GNAInferRequest(const std::shared_ptr<GNAPlugin>& plg,
const std::vector<std::shared_ptr<const ov::Node>>& inputs, const std::vector<std::shared_ptr<const ov::Node>>& inputs,
@ -170,4 +172,5 @@ void GNAInferRequest::CreateInferRequest() {
} }
} }
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,9 @@
#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp" #include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
#include "request_status.hpp" #include "request_status.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
class GNAPlugin; class GNAPlugin;
class GNAInferRequest : public InferenceEngine::IInferRequestInternal { class GNAInferRequest : public InferenceEngine::IInferRequestInternal {
@ -60,4 +62,6 @@ private:
uint32_t _infer_request_idx = kRequestIndexInvalid; uint32_t _infer_request_idx = kRequestIndexInvalid;
std::shared_ptr<GNAPlugin> plg; std::shared_ptr<GNAPlugin> plg;
}; };
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -11,11 +11,15 @@
#include <openvino/itt.hpp> #include <openvino/itt.hpp>
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace itt { namespace itt {
namespace domains { namespace domains {
OV_ITT_DOMAIN(GNAPlugin); OV_ITT_DOMAIN(GNAPlugin);
OV_ITT_DOMAIN(GNA_LT); OV_ITT_DOMAIN(GNA_LT);
}
} } // namespace domains
} } // namespace itt
} // namespace intel_gna
} // namespace ov

View File

@ -28,12 +28,15 @@
*/ */
#define ALIGN64(number) ALIGN(number, 64) #define ALIGN64(number) ALIGN(number, 64)
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace tools { namespace tools {
template <typename T, typename... Args> template <typename T, typename... Args>
std::unique_ptr<T> make_unique(Args&&... args) { std::unique_ptr<T> make_unique(Args&&... args) {
return std::unique_ptr<T>(new T(std::forward<Args>(args)...)); return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
} }
} // namespace tools
} // namespace GNAPluginNS } // namespace tools
} // namespace intel_gna
} // namespace ov

View File

@ -29,7 +29,7 @@
#include "serial/headers/latest/gna_model_header.hpp" #include "serial/headers/latest/gna_model_header.hpp"
#include "common/versioning.hpp" #include "common/versioning.hpp"
using namespace GNAPluginNS; using namespace ov::intel_gna;
inline void writeNBytes(const void *ptr, uint32_t size, std::ostream & os) { inline void writeNBytes(const void *ptr, uint32_t size, std::ostream & os) {
os.write(static_cast<const char*>(ptr), size); os.write(static_cast<const char*>(ptr), size);
@ -108,7 +108,7 @@ std::string GNAVersionSerializer::Import(std::istream& is) const {
const int gna_header_magic = is_little_endian() ? 0x4d414e47 : 0x474e414d; const int gna_header_magic = is_little_endian() ? 0x4d414e47 : 0x474e414d;
GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &is) { header_latest::ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
is.exceptions(std::istream::failbit); is.exceptions(std::istream::failbit);
auto startPos = is.tellg(); auto startPos = is.tellg();
if (startPos == -1) { if (startPos == -1) {
@ -122,11 +122,11 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
stream_len -= startPos; stream_len -= startPos;
is.seekg(startPos, is.beg); is.seekg(startPos, is.beg);
HeaderLatest::ModelHeader header; header_latest::ModelHeader header;
header.version.major = 0u; header.version.major = 0u;
header.version.minor = 0u; header.version.minor = 0u;
auto size_of_headers_header = sizeof(HeaderLatest::ModelHeader::gnam) + sizeof(HeaderLatest::ModelHeader::headerSize) auto size_of_headers_header = sizeof(header_latest::ModelHeader::gnam) + sizeof(header_latest::ModelHeader::headerSize)
+ sizeof(HeaderLatest::ModelHeader::Version); + sizeof(header_latest::ModelHeader::Version);
if (stream_len > size_of_headers_header) { if (stream_len > size_of_headers_header) {
readNBytes(&header, static_cast<uint32_t>(size_of_headers_header), is); readNBytes(&header, static_cast<uint32_t>(size_of_headers_header), is);
} else { } else {
@ -142,34 +142,34 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
} }
is.seekg(startPos, is.beg); is.seekg(startPos, is.beg);
Header2dot1::ModelHeader tempHeader2dot1; header_2_dot_1::ModelHeader tempheader_2_dot_1;
switch (header.version.major) { switch (header.version.major) {
case 2: case 2:
switch (header.version.minor) { switch (header.version.minor) {
case 1: case 1:
readBits(tempHeader2dot1, is); readBits(tempheader_2_dot_1, is);
header = HeaderLatest::ModelHeader(tempHeader2dot1); header = header_latest::ModelHeader(tempheader_2_dot_1);
break; break;
case 2: case 2:
case 3: case 3:
{ {
Header2dot3::ModelHeader tempHeader2dot3; header_2_dot_3::ModelHeader tempheader_2_dot_3;
readBits(tempHeader2dot3, is); readBits(tempheader_2_dot_3, is);
header = HeaderLatest::ModelHeader(tempHeader2dot3); header = header_latest::ModelHeader(tempheader_2_dot_3);
break; break;
} }
case 4: case 4:
{ {
Header2dot4::ModelHeader tempHeader2dot4; header_2_dot_4::ModelHeader tempheader_2_dot_4;
readBits(tempHeader2dot4, is); readBits(tempheader_2_dot_4, is);
header = HeaderLatest::ModelHeader(tempHeader2dot4); header = header_latest::ModelHeader(tempheader_2_dot_4);
break; break;
} }
case 5: case 5:
case 6: case 6:
case 7: case 7:
case 8: case 8:
readNBytes(&header, sizeof(HeaderLatest::ModelHeader), is); readNBytes(&header, sizeof(header_latest::ModelHeader), is);
break; break;
default: default:
THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 8 and is: " << header.version.minor; THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 8 and is: " << header.version.minor;
@ -190,10 +190,10 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
return header; return header;
} }
GNAPluginNS::HeaderLatest::RuntimeEndPoint GNAModelSerial::ReadEndPoint(std::istream &is) { header_latest::RuntimeEndPoint GNAModelSerial::ReadEndPoint(std::istream &is) {
is.exceptions(std::istream::failbit); is.exceptions(std::istream::failbit);
HeaderLatest::RuntimeEndPoint endPoint; header_latest::RuntimeEndPoint endPoint;
switch (model_header_.version.major) { switch (model_header_.version.major) {
case 2: case 2:
switch (model_header_.version.minor) { switch (model_header_.version.minor) {
@ -204,20 +204,20 @@ GNAPluginNS::HeaderLatest::RuntimeEndPoint GNAModelSerial::ReadEndPoint(std::ist
case 5: case 5:
case 6: case 6:
{ {
Header2dot6::RuntimeEndPoint tempEndPoint2dot6; header_2_dot_6::RuntimeEndPoint tempEndPoint2dot6;
readBits(tempEndPoint2dot6, is); readBits(tempEndPoint2dot6, is);
endPoint = HeaderLatest::RuntimeEndPoint(tempEndPoint2dot6, model_header_.nGroup); endPoint = header_latest::RuntimeEndPoint(tempEndPoint2dot6, model_header_.nGroup);
break; break;
} }
case 7: case 7:
{ {
Header2dot7::RuntimeEndPoint tempEndPoint2dot7; header_2_dot_7::RuntimeEndPoint tempEndPoint2dot7;
readBits(tempEndPoint2dot7, is); readBits(tempEndPoint2dot7, is);
endPoint = HeaderLatest::RuntimeEndPoint(tempEndPoint2dot7); endPoint = header_latest::RuntimeEndPoint(tempEndPoint2dot7);
break; break;
} }
case 8: case 8:
readNBytes(&endPoint, sizeof(HeaderLatest::RuntimeEndPoint), is); readNBytes(&endPoint, sizeof(header_latest::RuntimeEndPoint), is);
break; break;
default: default:
THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 8 and is: " THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 8 and is: "
@ -259,8 +259,8 @@ static const std::map<Gna2OperationType, std::vector<uint32_t>> GnaParamSize{
void GNAModelSerial::Import(void *basePointer, void GNAModelSerial::Import(void *basePointer,
size_t gnaGraphSize, size_t gnaGraphSize,
std::istream &is, std::istream &is,
GNAPluginNS::GnaInputs &inputs, GnaInputs &inputs,
GNAPluginNS::GnaOutputs &outputs, GnaOutputs &outputs,
TranspositionInfoMap &inputsTranspositionInfo, TranspositionInfoMap &inputsTranspositionInfo,
TranspositionInfoMap &outputsTranspositionInfo, TranspositionInfoMap &outputsTranspositionInfo,
std::string & libVersionFromFile) { std::string & libVersionFromFile) {
@ -269,7 +269,7 @@ void GNAModelSerial::Import(void *basePointer,
if (model_header_.version.major == 2) { if (model_header_.version.major == 2) {
for (auto inputIndex = 0; inputIndex < model_header_.nInputs; inputIndex++) { for (auto inputIndex = 0; inputIndex < model_header_.nInputs; inputIndex++) {
std::string name = (model_header_.version.minor >= 3) ? readString(is) : std::string("input" + std::to_string(inputIndex)); std::string name = (model_header_.version.minor >= 3) ? readString(is) : std::string("input" + std::to_string(inputIndex));
inputs[name] = GNAPluginNS::InputDesc(name); inputs[name] = InputDesc(name);
} }
if (model_header_.version.minor >= 5) { if (model_header_.version.minor >= 5) {
// 3. Read transposition input info // 3. Read transposition input info
@ -294,7 +294,7 @@ void GNAModelSerial::Import(void *basePointer,
if (model_header_.version.major == 2) { if (model_header_.version.major == 2) {
for (auto outputIndex = 0; outputIndex < model_header_.nOutputs; outputIndex++) { for (auto outputIndex = 0; outputIndex < model_header_.nOutputs; outputIndex++) {
std::string name = (model_header_.version.minor >= 3) ? readString(is) : std::string("output" + std::to_string(outputIndex)); std::string name = (model_header_.version.minor >= 3) ? readString(is) : std::string("output" + std::to_string(outputIndex));
outputs[name] = GNAPluginNS::OutputDesc(name); outputs[name] = OutputDesc(name);
} }
} }
// 7. Read outputs // 7. Read outputs
@ -416,8 +416,8 @@ void GNAModelSerial::Export(const GnaAllocations& allocations, std::ostream& os)
return out; return out;
}; };
auto convert_to_serial = [&allocationsOrdered](const GNAPluginNS::GnaDesc& desc) { auto convert_to_serial = [&allocationsOrdered](const GnaDesc& desc) {
HeaderLatest::RuntimeEndPoint ep; header_latest::RuntimeEndPoint ep;
ep.elements_count = desc.num_elements; ep.elements_count = desc.num_elements;
ep.scaleFactor = desc.scale_factor; ep.scaleFactor = desc.scale_factor;
ep.element_size = desc.tensor_precision.size(); ep.element_size = desc.tensor_precision.size();
@ -441,12 +441,12 @@ void GNAModelSerial::Export(const GnaAllocations& allocations, std::ostream& os)
/** /**
* writing header * writing header
*/ */
HeaderLatest::ModelHeader header; header_latest::ModelHeader header;
header.gnam[0] = 'G'; header.gnam[0] = 'G';
header.gnam[1] = 'N'; header.gnam[1] = 'N';
header.gnam[2] = 'A'; header.gnam[2] = 'A';
header.gnam[3] = 'M'; header.gnam[3] = 'M';
header.headerSize = sizeof(HeaderLatest::ModelHeader); header.headerSize = sizeof(header_latest::ModelHeader);
header.gnaMemSize = gnaGraphSize; header.gnaMemSize = gnaGraphSize;
header.layersCount = layers.size(); header.layersCount = layers.size();
header.nGroup = 1; // just to support the old models header.nGroup = 1; // just to support the old models
@ -561,9 +561,9 @@ void GNAModelSerial::Export(const GnaAllocations& allocations, std::ostream& os)
version_.Export(os); version_.Export(os);
} }
void GNAModelSerial::ImportInputs(std::istream &is, void* basePtr, GNAPluginNS::GnaInputs &inputs) { void GNAModelSerial::ImportInputs(std::istream &is, void* basePtr, GnaInputs &inputs) {
for (auto &input : inputs.Get()) { for (auto &input : inputs.Get()) {
HeaderLatest::RuntimeEndPoint ep = ReadEndPoint(is); header_latest::RuntimeEndPoint ep = ReadEndPoint(is);
input.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + ep.descriptor_offset)); input.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + ep.descriptor_offset));
input.orientation = ep.orientation; input.orientation = ep.orientation;
@ -589,9 +589,9 @@ void GNAModelSerial::ImportInputs(std::istream &is, void* basePtr, GNAPluginNS::
} }
} }
void GNAModelSerial::ImportOutputs(std::istream &is, void* basePtr, GNAPluginNS::GnaOutputs &outputs) { void GNAModelSerial::ImportOutputs(std::istream &is, void* basePtr, GnaOutputs &outputs) {
for (auto &output : outputs.Get()) { for (auto &output : outputs.Get()) {
HeaderLatest::RuntimeEndPoint ep = ReadEndPoint(is); header_latest::RuntimeEndPoint ep = ReadEndPoint(is);
output.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + ep.descriptor_offset)); output.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + ep.descriptor_offset));
output.orientation = ep.orientation; output.orientation = ep.orientation;
@ -648,9 +648,9 @@ void GNAModelSerial::ExportTranspositionInfo(std::ostream &os,
} }
void GNAModelSerial::AppendTensorNameIfNeeded(GnaDesc& nodeDesc) const { void GNAModelSerial::AppendTensorNameIfNeeded(GnaDesc& nodeDesc) const {
static constexpr Header2dot8::ModelHeader::Version kHasTensorNamesVersion; static constexpr header_2_dot_8::ModelHeader::Version kHasTensorNamesVersion;
if (HeaderLatest::IsFirstVersionLower(model_header_.version, kHasTensorNamesVersion) && if (header_latest::IsFirstVersionLower(model_header_.version, kHasTensorNamesVersion) &&
nodeDesc.tensor_names.empty()) { nodeDesc.tensor_names.empty()) {
nodeDesc.tensor_names.insert(nodeDesc.name); nodeDesc.tensor_names.insert(nodeDesc.name);
} }

View File

@ -34,16 +34,16 @@ public:
private: private:
Gna2Model * gna2model_; Gna2Model * gna2model_;
MemoryType states, *pstates_ = nullptr; MemoryType states, *pstates_ = nullptr;
GNAPluginNS::GnaInputs inputs_; ov::intel_gna::GnaInputs inputs_;
GNAPluginNS::GnaOutputs outputs_; ov::intel_gna::GnaOutputs outputs_;
TranspositionInfoMap inputs_transpose_info_; TranspositionInfoMap inputs_transpose_info_;
TranspositionInfoMap outputs_transpose_info_; TranspositionInfoMap outputs_transpose_info_;
GNAPluginNS::HeaderLatest::ModelHeader model_header_; ov::intel_gna::header_latest::ModelHeader model_header_;
GNAVersionSerializer version_; GNAVersionSerializer version_;
void ImportInputs(std::istream &is, void* basePtr, GNAPluginNS::GnaInputs &inputs); void ImportInputs(std::istream &is, void* basePtr, ov::intel_gna::GnaInputs &inputs);
void ImportOutputs(std::istream &is, void* basePtr, GNAPluginNS::GnaOutputs &outputs); void ImportOutputs(std::istream &is, void* basePtr, ov::intel_gna::GnaOutputs &outputs);
void ImportTranspositionInfo(std::istream &is, std::string &name, std::vector<TranspositionInfo> &transpositionInfo); void ImportTranspositionInfo(std::istream &is, std::string &name, std::vector<TranspositionInfo> &transpositionInfo);
@ -53,7 +53,7 @@ private:
* @brief Update input or output description to support importing of < 2.8 format where tensor_names were not present * @brief Update input or output description to support importing of < 2.8 format where tensor_names were not present
* @param nodeDesc input or output description to be appended * @param nodeDesc input or output description to be appended
*/ */
void AppendTensorNameIfNeeded(GNAPluginNS::GnaDesc& nodeDesc) const; void AppendTensorNameIfNeeded(ov::intel_gna::GnaDesc& nodeDesc) const;
public: public:
GNAModelSerial(Gna2Model* model, MemoryType& states_holder) GNAModelSerial(Gna2Model* model, MemoryType& states_holder)
@ -62,14 +62,14 @@ private:
} }
GNAModelSerial(Gna2Model* model, GNAModelSerial(Gna2Model* model,
GNAPluginNS::GnaInputs& inputs, ov::intel_gna::GnaInputs& inputs,
GNAPluginNS::GnaOutputs& outputs) ov::intel_gna::GnaOutputs& outputs)
: gna2model_(model), : gna2model_(model),
inputs_(inputs), inputs_(inputs),
outputs_(outputs) { outputs_(outputs) {
} }
void setHeader(GNAPluginNS::HeaderLatest::ModelHeader header) { void setHeader(ov::intel_gna::header_latest::ModelHeader header) {
model_header_ = header; model_header_ = header;
} }
@ -100,9 +100,9 @@ private:
* @param is - opened input stream * @param is - opened input stream
* @return * @return
*/ */
static GNAPluginNS::HeaderLatest::ModelHeader ReadHeader(std::istream &is); static ov::intel_gna::header_latest::ModelHeader ReadHeader(std::istream &is);
GNAPluginNS::HeaderLatest::RuntimeEndPoint ReadEndPoint(std::istream &is); ov::intel_gna::header_latest::RuntimeEndPoint ReadEndPoint(std::istream &is);
/** /**
* @brief Import model from FS into preallocated buffer, * @brief Import model from FS into preallocated buffer,
@ -114,8 +114,8 @@ private:
void Import(void *basePointer, void Import(void *basePointer,
size_t gnaGraphSize, size_t gnaGraphSize,
std::istream &is, std::istream &is,
GNAPluginNS::GnaInputs &inputs, ov::intel_gna::GnaInputs &inputs,
GNAPluginNS::GnaOutputs &outputs, ov::intel_gna::GnaOutputs &outputs,
TranspositionInfoMap& inputstranspositionInfo, TranspositionInfoMap& inputstranspositionInfo,
TranspositionInfoMap& outputstranspositionInfo, TranspositionInfoMap& outputstranspositionInfo,
std::string& modelLibVersion); std::string& modelLibVersion);

View File

@ -125,8 +125,8 @@ inline uint32_t ToByteSize(const Gna2DataType type) {
using namespace std; using namespace std;
using namespace InferenceEngine; using namespace InferenceEngine;
using namespace InferenceEngine::details; using namespace InferenceEngine::details;
using namespace GNAPluginNS;
using namespace GNAPluginNS::memory; using namespace ov::intel_gna::memory;
using namespace ov::intel_gna::frontend; using namespace ov::intel_gna::frontend;
namespace InferenceEngine { namespace InferenceEngine {
@ -355,9 +355,9 @@ GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) :
void GNAPlugin::Init() { void GNAPlugin::Init() {
OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "Init"); OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "Init");
dnn = std::make_shared<backend::AMIntelDNN>(backend::AMIntelDNN()); dnn = std::make_shared<backend::AMIntelDNN>(backend::AMIntelDNN());
gnaFlags = std::make_shared<GNAPluginNS::GNAFlags>(GNAPluginNS::GNAFlags()); gnaFlags = std::make_shared<GNAFlags>(GNAFlags());
inputs_ptr_ = std::make_shared<GNAPluginNS::GnaInputs>(GNAPluginNS::GnaInputs()); inputs_ptr_ = std::make_shared<GnaInputs>(GnaInputs());
outputs_ = GNAPluginNS::GnaOutputs(); outputs_ = GnaOutputs();
graphCompiler.setDNNPtr(dnn); graphCompiler.setDNNPtr(dnn);
graphCompiler.setInputsPtr(inputs_ptr_); graphCompiler.setInputsPtr(inputs_ptr_);
@ -508,7 +508,7 @@ bool GNAPlugin::TryToInitOutput(const std::string &portName, InferenceEngine::CN
outputs_.at(portName).ptrs.resize(gnaFlags->num_requests); outputs_.at(portName).ptrs.resize(gnaFlags->num_requests);
outputs_.at(portName).orientation = orientation; outputs_.at(portName).orientation = orientation;
outputs_.at(portName).set_precision(numBytesPerElem); outputs_.at(portName).set_precision(numBytesPerElem);
outputs_.at(portName).scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : GNAPluginNS::kScaleFactorDefault; outputs_.at(portName).scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : kScaleFactorDefault;
outputs_.at(portName).num_elements = numElem; outputs_.at(portName).num_elements = numElem;
// binding ptr for first infer request - then others will be setup during relocation // binding ptr for first infer request - then others will be setup during relocation
@ -787,7 +787,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
// Check the network // Check the network
std::string error; std::string error;
if (!GNAPluginNS::GNALimitations::AreLayersSupported(network, error)) { if (!limitations::AreLayersSupported(network, error)) {
THROW_GNA_EXCEPTION << error.c_str(); THROW_GNA_EXCEPTION << error.c_str();
} }
@ -1082,7 +1082,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
// update orientation of model intput layer // update orientation of model intput layer
for (auto& inputLayer : inputLayers) { for (auto& inputLayer : inputLayers) {
if (LayerInfo(inputLayer).isInput()) { if (LayerInfo(inputLayer).isInput()) {
ov::intela_gna::helpers::updateModelInputOrientationWithoutConvolution(*inputLayer, ov::intel_gna::helpers::updateModelInputOrientationWithoutConvolution(*inputLayer,
graphCompiler.dnnComponents, graphCompiler.dnnComponents,
*inputs_ptr_); *inputs_ptr_);
} }
@ -1092,7 +1092,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
for (auto&& outPort : outputs_data_map_) { for (auto&& outPort : outputs_data_map_) {
auto outLayer = getCreatorLayer(outPort.second).lock(); auto outLayer = getCreatorLayer(outPort.second).lock();
if (outLayer && LayerInfo(outLayer).isOutput()) { if (outLayer && LayerInfo(outLayer).isOutput()) {
ov::intela_gna::helpers::updateModelOutputOrientation(outPort.first, ov::intel_gna::helpers::updateModelOutputOrientation(outPort.first,
outLayer->name, outLayer->name,
graphCompiler.dnnComponents, graphCompiler.dnnComponents,
outputs_); outputs_);
@ -1113,11 +1113,11 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
#endif #endif
} }
bool GNAPluginNS::GNAPlugin::isFP32ModeActive() const { bool GNAPlugin::isFP32ModeActive() const {
return gnaFlags->sw_fp32 || !gnadevice; return gnaFlags->sw_fp32 || !gnadevice;
} }
std::string GNAPluginNS::GNAPlugin::effectiveGnaCompileTarget() const { std::string GNAPlugin::effectiveGnaCompileTarget() const {
if (gnadevice) { if (gnadevice) {
return gnadevice->GetCompileTarget(); return gnadevice->GetCompileTarget();
} else if (!config.gnaCompileTarget.empty()) { } else if (!config.gnaCompileTarget.empty()) {
@ -1161,7 +1161,7 @@ std::shared_ptr<request::ModelWrapper> GNAPlugin::createModelWrapperForLoadNetwo
THROW_GNA_EXCEPTION << "dnn is nullptr cannot load network"; THROW_GNA_EXCEPTION << "dnn is nullptr cannot load network";
} }
std::weak_ptr<GNAPluginNS::backend::AMIntelDNN> weakDnn = dnn; std::weak_ptr<backend::AMIntelDNN> weakDnn = dnn;
auto compileTarget = effectiveGnaCompileTarget(); auto compileTarget = effectiveGnaCompileTarget();
auto initializer = [weakDnn, compileTarget](Gna2Model* model) { auto initializer = [weakDnn, compileTarget](Gna2Model* model) {
if (auto dnn = weakDnn.lock()) { if (auto dnn = weakDnn.lock()) {
@ -1174,7 +1174,7 @@ std::shared_ptr<request::ModelWrapper> GNAPlugin::createModelWrapperForLoadNetwo
return request::ModelWrapperFactory::createInitialized(std::move(initializer)); return request::ModelWrapperFactory::createInitialized(std::move(initializer));
} }
std::shared_ptr<request::ModelWrapper> GNAPluginNS::GNAPlugin::createModelWrapperForImportNetwork( std::shared_ptr<request::ModelWrapper> GNAPlugin::createModelWrapperForImportNetwork(
uint32_t numberOfOperations) { uint32_t numberOfOperations) {
return request::ModelWrapperFactory::createWithNumberOfEmptyOperations(numberOfOperations); return request::ModelWrapperFactory::createWithNumberOfEmptyOperations(numberOfOperations);
} }
@ -1238,20 +1238,21 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap& inputs, Infer
int inputNum = 0; int inputNum = 0;
for (auto& input : inputs) { for (auto& input : inputs) {
auto inputLayout = input.second->getTensorDesc().getLayout(); auto inputLayout = input.second->getTensorDesc().getLayout();
if (inputLayout != Layout::C && inputLayout != Layout::NC && inputLayout != Layout::CN && if (inputLayout != InferenceEngine::Layout::C && inputLayout != InferenceEngine::Layout::NC &&
inputLayout != Layout::CHW && inputLayout != Layout::NCHW) { inputLayout != InferenceEngine::Layout::CN && inputLayout != InferenceEngine::Layout::CHW &&
inputLayout != InferenceEngine::Layout::NCHW) {
THROW_GNA_EXCEPTION << "Expected input blob to have Layout::C, Layout::NC, Layout::CN, Layout::NCHW or " THROW_GNA_EXCEPTION << "Expected input blob to have Layout::C, Layout::NC, Layout::CN, Layout::NCHW or "
"Layout::CHW. But was: " "Layout::CHW. But was: "
<< input.second->getTensorDesc().getLayout(); << input.second->getTensorDesc().getLayout();
} }
if (inputLayout == Layout::NCHW || inputLayout == Layout::CHW) { if (inputLayout == InferenceEngine::Layout::NCHW || inputLayout == InferenceEngine::Layout::CHW) {
// specific case that can be squeezed to 2d // specific case that can be squeezed to 2d
inputLayout = Layout::NC; inputLayout = InferenceEngine::Layout::NC;
} }
auto is1D = input.second->getTensorDesc().getLayout() == Layout::C; auto is1D = input.second->getTensorDesc().getLayout() == InferenceEngine::Layout::C;
auto is3D = input.second->getTensorDesc().getLayout() == Layout::CHW; auto is3D = input.second->getTensorDesc().getLayout() == InferenceEngine::Layout::CHW;
if (inputs_ptr_->at(input.first).ptrs.empty()) { if (inputs_ptr_->at(input.first).ptrs.empty()) {
// should not happen in user code however might happen if there any non executable network based integration // should not happen in user code however might happen if there any non executable network based integration
@ -1297,7 +1298,7 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap& inputs, Infer
ImportFrames(inputs_ptr_->at(input.first).ptrs[index], ImportFrames(inputs_ptr_->at(input.first).ptrs[index],
input.second->cbuffer().as<float*>(), input.second->cbuffer().as<float*>(),
input.second->getTensorDesc().getPrecision(), input.second->getTensorDesc().getPrecision(),
gnaFlags->sw_fp32 ? GNAPluginNS::kScaleFactorDefault : inputs_ptr_->at(input.first).scale_factor, gnaFlags->sw_fp32 ? kScaleFactorDefault : inputs_ptr_->at(input.first).scale_factor,
inputOrientation, inputOrientation,
importedFrames, importedFrames,
targetGroups, targetGroups,
@ -1394,21 +1395,21 @@ RequestStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
for (auto&& outputBlobIt : requestResult) { for (auto&& outputBlobIt : requestResult) {
auto& outputBlob = outputBlobIt.second; auto& outputBlob = outputBlobIt.second;
auto& outputDesc = outputs_.at(outputBlobIt.first); auto& outputDesc = outputs_.at(outputBlobIt.first);
if (outputBlob->getTensorDesc().getLayout() != Layout::C && if (outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::C &&
outputBlob->getTensorDesc().getLayout() != Layout::NC && outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::NC &&
outputBlob->getTensorDesc().getLayout() != Layout::CN && outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::CN &&
outputBlob->getTensorDesc().getLayout() != Layout::NCHW && outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::NCHW &&
outputBlob->getTensorDesc().getLayout() != Layout::CHW && outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::CHW &&
outputBlob->getTensorDesc().getLayout() != Layout::SCALAR) { outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR) {
THROW_GNA_EXCEPTION << "Expected output blob to have Layout::C, Layout::NC, Layout::CN, Layout::NCHW or " THROW_GNA_EXCEPTION << "Expected output blob to have Layout::C, Layout::NC, Layout::CN, Layout::NCHW or "
"Layout::CHW. But was " "Layout::CHW. But was "
<< outputBlob->getTensorDesc().getLayout(); << outputBlob->getTensorDesc().getLayout();
} }
auto dims = outputBlob->getTensorDesc().getDims(); auto dims = outputBlob->getTensorDesc().getDims();
auto is1D = outputBlob->getTensorDesc().getLayout() == Layout::C; auto is1D = outputBlob->getTensorDesc().getLayout() == InferenceEngine::Layout::C;
auto isScalar = outputBlob->getTensorDesc().getLayout() == Layout::SCALAR; auto isScalar = outputBlob->getTensorDesc().getLayout() == InferenceEngine::Layout::SCALAR;
auto is3D = outputBlob->getTensorDesc().getLayout() == Layout::CHW; auto is3D = outputBlob->getTensorDesc().getLayout() == InferenceEngine::Layout::CHW;
auto batchSize = (is1D || isScalar || is3D) ? 1 : dims[0]; auto batchSize = (is1D || isScalar || is3D) ? 1 : dims[0];
auto elementsPerBatch = auto elementsPerBatch =
isScalar ? 1 isScalar ? 1
@ -1635,7 +1636,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
SetNetworkInputs(); SetNetworkInputs();
SetNetworkOutputs(); SetNetworkOutputs();
ov::intela_gna::helpers::ApplyInputScaleFactors(config, header, *inputs_ptr_); ov::intel_gna::helpers::ApplyInputScaleFactors(config, header, *inputs_ptr_);
auto getOrientation = [](Gna2Operation& gnaOperation) { auto getOrientation = [](Gna2Operation& gnaOperation) {
return gnaOperation.Type == Gna2OperationTypeConvolution ? kDnnNonInterleavedOrientation return gnaOperation.Type == Gna2OperationTypeConvolution ? kDnnNonInterleavedOrientation

View File

@ -26,8 +26,10 @@
#include <legacy/ie_util_internal.hpp> #include <legacy/ie_util_internal.hpp>
#include <gna2-model-api.h> #include <gna2-model-api.h>
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace request { namespace request {
class ModelWrapper; class ModelWrapper;
class WorkerPool; class WorkerPool;
class Worker; class Worker;
@ -38,13 +40,13 @@ protected:
std::string _pluginName = "GNA"; std::string _pluginName = "GNA";
Config config {}; Config config {};
std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnn; std::shared_ptr<backend::AMIntelDNN> dnn;
std::shared_ptr<GNAPluginNS::GNAFlags> gnaFlags; std::shared_ptr<GNAFlags> gnaFlags;
std::shared_ptr<GNAPluginNS::gna_memory_type> gnamem; std::shared_ptr<gna_memory_type> gnamem;
std::shared_ptr<GNAPluginNS::GnaInputs> inputs_ptr_; std::shared_ptr<GnaInputs> inputs_ptr_;
GNAPluginNS::GnaOutputs outputs_; GnaOutputs outputs_;
GNAPluginNS::GNAGraphCompiler graphCompiler; GNAGraphCompiler graphCompiler;
uint32_t activeLayerIndex = 0xffffffff; uint32_t activeLayerIndex = 0xffffffff;
TranspositionInfoMap transpose_inputs_info; TranspositionInfoMap transpose_inputs_info;
@ -237,4 +239,5 @@ protected:
#endif #endif
}; };
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -19,10 +19,11 @@
using namespace InferenceEngine; using namespace InferenceEngine;
using namespace InferenceEngine::details; using namespace InferenceEngine::details;
using namespace ov::intel_gna;
using namespace ov::intel_gna::common; using namespace ov::intel_gna::common;
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
const uint8_t Config::max_num_requests; const uint8_t Config::max_num_requests;
OPENVINO_SUPPRESS_DEPRECATED_START OPENVINO_SUPPRESS_DEPRECATED_START
@ -127,7 +128,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
check_scale_factor(scale_factor); check_scale_factor(scale_factor);
// missing scale factors are set to be 1.0f // missing scale factors are set to be 1.0f
if (inputScaleFactors.size() <= input_index) { if (inputScaleFactors.size() <= input_index) {
inputScaleFactors.resize(input_index + 1, GNAPluginNS::kScaleFactorDefault); inputScaleFactors.resize(input_index + 1, kScaleFactorDefault);
} }
inputScaleFactors[input_index] = InferenceEngine::CNNLayer::ie_parse_float(value); inputScaleFactors[input_index] = InferenceEngine::CNNLayer::ie_parse_float(value);
} else if (key == GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE) || key == ov::intel_gna::firmware_model_image_path) { } else if (key == GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE) || key == ov::intel_gna::firmware_model_image_path) {
@ -414,4 +415,6 @@ std::vector<std::string> Config::GetSupportedKeys() const {
} }
return result; return result;
} }
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -14,7 +14,8 @@
#include <map> #include <map>
#include <mutex> #include <mutex>
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
static const float kScaleFactorDefault = 1.f; static const float kScaleFactorDefault = 1.f;
@ -76,4 +77,5 @@ struct Config {
static const uint8_t max_num_requests = 127; static const uint8_t max_num_requests = 127;
}; };
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -7,7 +7,6 @@
using namespace InferenceEngine; using namespace InferenceEngine;
using namespace std; using namespace std;
using namespace GNAPluginNS;
static const Version gnaPluginDescription = { static const Version gnaPluginDescription = {
{2, 1}, {2, 1},

View File

@ -13,7 +13,8 @@
#include "gna_plugin_config.hpp" #include "gna_plugin_config.hpp"
#include <legacy/ie_util_internal.hpp> #include <legacy/ie_util_internal.hpp>
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
class GNAPluginInternal : public InferenceEngine::IInferencePlugin { class GNAPluginInternal : public InferenceEngine::IInferencePlugin {
private: private:
@ -104,4 +105,5 @@ public:
} }
}; };
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -13,7 +13,6 @@
#include <unordered_map> #include <unordered_map>
#include <memory> #include <memory>
using namespace GNAPluginNS;
using namespace InferenceEngine; using namespace InferenceEngine;
using namespace InferenceEngine::PluginConfigParams; using namespace InferenceEngine::PluginConfigParams;

View File

@ -5,7 +5,7 @@
#include <cstdint> #include <cstdint>
#include <limits> #include <limits>
#include "gna_slope_scale.h" #include "gna_slope_scale.hpp"
pwl_gna_slope_scale_t gna_slope(const double slope, pwl_gna_slope_scale_t gna_slope(const double slope,
const double in_scale, const double in_scale,

View File

@ -5,7 +5,8 @@
#include <ie_memcpy.h> #include <ie_memcpy.h>
#include "gna_data_types.hpp" #include "gna_data_types.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
/** /**
* @brief convert a tensor or its parts from NCHW to NHWC order on the base of transposition information. * @brief convert a tensor or its parts from NCHW to NHWC order on the base of transposition information.
@ -79,4 +80,5 @@ inline void ConvertTensorFromNCHWToNHWC(size_t precision, size_t rows, size_t co
} }
} }
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,9 @@
#include <string> #include <string>
#include "gna_graph_tools.hpp" #include "gna_graph_tools.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
/** /**
* @brief implements upstream search for BFS routine * @brief implements upstream search for BFS routine
*/ */
@ -113,5 +115,5 @@ inline UpstreamLayersContainer make_upstream_order(InferenceEngine::CNNLayer* or
return fusedCnt; return fusedCnt;
} }
} // namespace intel_gna
} // namespace GNAPluginNS } // namespace ov

View File

@ -9,7 +9,9 @@
#include <legacy/ie_layers.h> #include <legacy/ie_layers.h>
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
class GNAConcatLayer { class GNAConcatLayer {
InferenceEngine::CNNLayerPtr concatLayer; InferenceEngine::CNNLayerPtr concatLayer;
@ -46,4 +48,6 @@ public:
std::vector<ConcatConnectedLayerInfo> concatInputLayers; std::vector<ConcatConnectedLayerInfo> concatInputLayers;
}; };
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -14,8 +14,10 @@
#include "gna_graph_tools.hpp" #include "gna_graph_tools.hpp"
#include "log/debug.hpp" #include "log/debug.hpp"
namespace GNAPluginNS { namespace ov {
namespace GNAConvolutionLayer { namespace intel_gna {
namespace gna_convolution_layer {
bool should_transpose_h_w(const uint32_t in_height, bool should_transpose_h_w(const uint32_t in_height,
const uint32_t kernel_height, const uint32_t kernel_height,
const uint32_t in_channels, const uint32_t in_channels,
@ -23,9 +25,13 @@ bool should_transpose_h_w(const uint32_t in_height,
return in_height == kernel_height && in_channels == 1 && stride_height == 1; return in_height == kernel_height && in_channels == 1 && stride_height == 1;
} }
bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t in_channels, bool isMappableFrom2DTo1D(const uint32_t inHeight,
const uint32_t kernelHeight, const uint32_t kernelWidth, const uint32_t inWidth,
const uint32_t strideHeight, const uint32_t strideWidth) { const uint32_t in_channels,
const uint32_t kernelHeight,
const uint32_t kernelWidth,
const uint32_t strideHeight,
const uint32_t strideWidth) {
if (inHeight <= 1 || inWidth <= 1) { if (inHeight <= 1 || inWidth <= 1) {
// Mapping not needed since input is already 1D // Mapping not needed since input is already 1D
return false; return false;
@ -34,8 +40,11 @@ bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const
should_transpose_h_w(inHeight, kernelHeight, in_channels, strideHeight); should_transpose_h_w(inHeight, kernelHeight, in_channels, strideHeight);
} }
bool is3DInputOr2DKernel(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth, bool is3DInputOr2DKernel(const uint32_t inHeight,
const uint32_t kernelHeight, const uint32_t kernelWidth) { const uint32_t inWidth,
const uint32_t inDepth,
const uint32_t kernelHeight,
const uint32_t kernelWidth) {
return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1); return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1);
} }
@ -46,18 +55,27 @@ double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
// for kernelSize >= 14 -> 1.7 // for kernelSize >= 14 -> 1.7
// for kernelSize >= 9 -> 1.3 // for kernelSize >= 9 -> 1.3
// for kernelSize in {7, 8} -> 1.2 // for kernelSize in {7, 8} -> 1.2
const std::vector< KRT > reducers{ {49, 3.0}, {36, 2.6}, {21, 2.3}, {14, 1.7}, {9, 1.3}, {7, 1.2} }; const std::vector<KRT> reducers{{49, 3.0}, {36, 2.6}, {21, 2.3}, {14, 1.7}, {9, 1.3}, {7, 1.2}};
auto reducer = 1.0; auto reducer = 1.0;
const auto inDepth = InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::C); const auto inDepth =
InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
const auto inHeight = const auto inHeight =
InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::H); InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
const auto inWidth = const auto inWidth =
InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::W); InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
if (is3DInputOr2DKernel(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) && if (is3DInputOr2DKernel(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
!isMappableFrom2DTo1D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x, conv._stride_y, conv._stride_x)) { !isMappableFrom2DTo1D(inHeight,
inWidth,
inDepth,
conv._kernel_y,
conv._kernel_x,
conv._stride_y,
conv._stride_x)) {
const auto kernelSize = conv._kernel_x * conv._kernel_y; const auto kernelSize = conv._kernel_x * conv._kernel_y;
auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize, auto r =
[](const KRT& l, const KRT::first_type& r) {return l.first > r; }); std::lower_bound(reducers.begin(), reducers.end(), kernelSize, [](const KRT& l, const KRT::first_type& r) {
return l.first > r;
});
if (r != reducers.end()) if (r != reducers.end())
reducer = r->second; reducer = r->second;
} }
@ -80,7 +98,8 @@ uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint3
if (window > in || window == 0 || stride == 0) { if (window > in || window == 0 || stride == 0) {
THROW_GNA_EXCEPTION << "Invalid (input, window, stride) = (" << in << "," << window << "," << stride << ")"; THROW_GNA_EXCEPTION << "Invalid (input, window, stride) = (" << in << "," << window << "," << stride << ")";
} }
if (window == in) return 1; if (window == in)
return 1;
return (in - window - 1) / stride + 2; return (in - window - 1) / stride + 2;
} }
@ -94,5 +113,6 @@ uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride) {
return (in - 1) / stride + 1; return (in - 1) / stride + 1;
} }
} // namespace GNAConvolutionLayer } // namespace gna_convolution_layer
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -8,20 +8,28 @@
#include <legacy/ie_layers.h> #include <legacy/ie_layers.h>
namespace GNAPluginNS { namespace ov {
namespace GNAConvolutionLayer { namespace intel_gna {
namespace gna_convolution_layer {
bool should_transpose_h_w(const uint32_t in_height, bool should_transpose_h_w(const uint32_t in_height,
const uint32_t kernel_height, const uint32_t kernel_height,
const uint32_t in_channels, const uint32_t in_channels,
const uint32_t stride_height); const uint32_t stride_height);
bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inChannels, bool isMappableFrom2DTo1D(const uint32_t inHeight,
const uint32_t kernelHeight, const uint32_t kernelWidth, const uint32_t inWidth,
const uint32_t strideHeight, const uint32_t strideWidth); const uint32_t inChannels,
const uint32_t kernelHeight,
const uint32_t kernelWidth,
const uint32_t strideHeight,
const uint32_t strideWidth);
bool is3DInputOr2DKernel(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth, bool is3DInputOr2DKernel(const uint32_t inHeight,
const uint32_t kernelHeight, const uint32_t kernelWidth); const uint32_t inWidth,
const uint32_t inDepth,
const uint32_t kernelHeight,
const uint32_t kernelWidth);
double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv); double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv);
@ -31,5 +39,6 @@ uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint3
uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride); uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride);
} // namespace GNAConvolutionLayer } // namespace gna_convolution_layer
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -4,7 +4,9 @@
#pragma once #pragma once
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
/** /**
* GNA primitive created in sorting order for this copy layer * GNA primitive created in sorting order for this copy layer
*/ */
@ -14,4 +16,5 @@ static constexpr auto CopyLayerName = "Copy";
*/ */
static constexpr auto DelayedCopyLayerName = "DelayedCopy"; static constexpr auto DelayedCopyLayerName = "DelayedCopy";
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -7,9 +7,8 @@
#include "log/log.hpp" #include "log/log.hpp"
#include "log/debug.hpp" #include "log/debug.hpp"
using namespace ov::intel_gna; namespace ov {
namespace intel_gna {
namespace GNAPluginNS {
SimpleCrop get_crop_params(const std::vector<int32_t>& axis_in, SimpleCrop get_crop_params(const std::vector<int32_t>& axis_in,
const std::vector<int32_t>& offset_in, const std::vector<int32_t>& offset_in,
@ -57,4 +56,5 @@ SimpleCrop GetCropParams(InferenceEngine::CropLayer* cropLayer) {
return out_val; return out_val;
} }
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -8,7 +8,9 @@
#include <cstdint> #include <cstdint>
#include <vector> #include <vector>
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
class GNACropLayer { class GNACropLayer {
InferenceEngine::CNNLayerPtr cropLayer; InferenceEngine::CNNLayerPtr cropLayer;
@ -41,4 +43,5 @@ SimpleCrop get_crop_params(const std::vector<int32_t>& axis_in,
SimpleCrop GetCropParams(InferenceEngine::CropLayer* cropLayer); SimpleCrop GetCropParams(InferenceEngine::CropLayer* cropLayer);
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -12,12 +12,14 @@
using ov::intel_gna::frontend::make_fp32_blob; using ov::intel_gna::frontend::make_fp32_blob;
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
class GNAFakeQuantizeLayer { class GNAFakeQuantizeLayer {
InferenceEngine::CNNLayerPtr fqLayer; InferenceEngine::CNNLayerPtr fqLayer;
public :
GNAFakeQuantizeLayer(InferenceEngine::CNNLayerPtr fqLayer) public:
: fqLayer(fqLayer) { GNAFakeQuantizeLayer(InferenceEngine::CNNLayerPtr fqLayer) : fqLayer(fqLayer) {
if (!LayerInfo(fqLayer).isFakeQuantize()) { if (!LayerInfo(fqLayer).isFakeQuantize()) {
THROW_GNA_LAYER_EXCEPTION(fqLayer) << "cannot parse as fake quantize"; THROW_GNA_LAYER_EXCEPTION(fqLayer) << "cannot parse as fake quantize";
} }
@ -55,11 +57,12 @@ class GNAFakeQuantizeLayer {
* @brief Retrieve input blob for FQ layer that connected to const layer * @brief Retrieve input blob for FQ layer that connected to const layer
*/ */
InferenceEngine::Blob::Ptr getConstInputData() const { InferenceEngine::Blob::Ptr getConstInputData() const {
return LayerUtils::getParamFromInputAsBlob(fqLayer, 0); return layer_utils::getParamFromInputAsBlob(fqLayer, 0);
} }
/** /**
* @brief Fake quantize has 5 input layers, while 4 of them always constant layer, and 1 might be a tensor - connection * @brief Fake quantize has 5 input layers, while 4 of them always constant layer, and 1 might be a tensor -
* connection
*/ */
InferenceEngine::CNNLayerPtr getInputLayer() const { InferenceEngine::CNNLayerPtr getInputLayer() const {
return getInputLayerAt(fqLayer, 0); return getInputLayerAt(fqLayer, 0);
@ -77,24 +80,24 @@ class GNAFakeQuantizeLayer {
return getRange(fqLayer, 3); return getRange(fqLayer, 3);
} }
operator InferenceEngine::CNNLayerPtr () const { operator InferenceEngine::CNNLayerPtr() const {
return fqLayer; return fqLayer;
} }
InferenceEngine::CNNLayerPtr operator -> () const { InferenceEngine::CNNLayerPtr operator->() const {
return fqLayer; return fqLayer;
} }
InferenceEngine::CNNLayerPtr operator * () const { InferenceEngine::CNNLayerPtr operator*() const {
return fqLayer; return fqLayer;
} }
protected :
protected:
static std::pair<std::vector<float>, std::vector<float>> getRange(InferenceEngine::CNNLayerPtr input, size_t idx) { static std::pair<std::vector<float>, std::vector<float>> getRange(InferenceEngine::CNNLayerPtr input, size_t idx) {
auto shape = getShapeForRange(input, idx); auto shape = getShapeForRange(input, idx);
auto rangeSize = InferenceEngine::details::product(shape.begin(), shape.end()); auto rangeSize = InferenceEngine::details::product(shape.begin(), shape.end());
auto dataMin = LayerUtils::getParamFromInputAsBlob(input, idx); auto dataMin = layer_utils::getParamFromInputAsBlob(input, idx);
auto dataMax = LayerUtils::getParamFromInputAsBlob(input, idx + 1); auto dataMax = layer_utils::getParamFromInputAsBlob(input, idx + 1);
std::vector<float> minValues(rangeSize), maxValues(rangeSize); std::vector<float> minValues(rangeSize), maxValues(rangeSize);
switch (dataMin->getTensorDesc().getPrecision()) { switch (dataMin->getTensorDesc().getPrecision()) {
case InferenceEngine::Precision::FP32: { case InferenceEngine::Precision::FP32: {
@ -120,7 +123,7 @@ class GNAFakeQuantizeLayer {
} }
static float* getParamFromInputAsFloats(InferenceEngine::CNNLayerPtr input, size_t idx) { static float* getParamFromInputAsFloats(InferenceEngine::CNNLayerPtr input, size_t idx) {
auto data = LayerUtils::getParamFromInputAsBlob(input, idx); auto data = layer_utils::getParamFromInputAsBlob(input, idx);
if (data->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP32) { if (data->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP32) {
THROW_GNA_LAYER_EXCEPTION(input) << "cannot cast custom blob to type FP32, since it is of type: " THROW_GNA_LAYER_EXCEPTION(input) << "cannot cast custom blob to type FP32, since it is of type: "
<< data->getTensorDesc().getPrecision(); << data->getTensorDesc().getPrecision();
@ -129,7 +132,7 @@ class GNAFakeQuantizeLayer {
} }
static InferenceEngine::SizeVector getShapeFromInput(InferenceEngine::CNNLayerPtr input, size_t idx) { static InferenceEngine::SizeVector getShapeFromInput(InferenceEngine::CNNLayerPtr input, size_t idx) {
auto data = LayerUtils::getParamFromInputAsBlob(input, idx); auto data = layer_utils::getParamFromInputAsBlob(input, idx);
return data->getTensorDesc().getDims(); return data->getTensorDesc().getDims();
} }
@ -139,13 +142,13 @@ class GNAFakeQuantizeLayer {
} }
auto iLayerData = input->insData[idx].lock(); auto iLayerData = input->insData[idx].lock();
if (!iLayerData) { if (!iLayerData) {
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx THROW_GNA_LAYER_EXCEPTION(input)
<< ", input: cannot dereference data weak-pointer"; << "cannot get data from " << idx << ", input: cannot dereference data weak-pointer";
} }
auto iLayer = getCreatorLayer(iLayerData).lock(); auto iLayer = getCreatorLayer(iLayerData).lock();
if (!iLayer) { if (!iLayer) {
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx THROW_GNA_LAYER_EXCEPTION(input)
<< ", input: cannot dereference creator layer weak-pointer"; << "cannot get data from " << idx << ", input: cannot dereference creator layer weak-pointer";
} }
return iLayer; return iLayer;
} }
@ -164,4 +167,6 @@ class GNAFakeQuantizeLayer {
return lowShape; return lowShape;
} }
}; };
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -6,8 +6,10 @@
#include "gna_layer_info.hpp" #include "gna_layer_info.hpp"
namespace GNAPluginNS { namespace ov {
namespace LayerUtils { namespace intel_gna {
namespace layer_utils {
/** /**
* @brief retrievs blob from const layer connected to certain layer * @brief retrievs blob from const layer connected to certain layer
* @param input * @param input
@ -38,5 +40,7 @@ inline InferenceEngine::Blob::Ptr getParamFromInputAsBlob(InferenceEngine::CNNLa
return iLayer->blobs["custom"]; return iLayer->blobs["custom"];
} }
} // namespace LayerUtils
} // namespace GNAPluginNS } // namespace layer_utils
} // namespace intel_gna
} // namespace ov

View File

@ -10,7 +10,7 @@
#include <legacy/ie_layers.h> #include <legacy/ie_layers.h>
#include "caseless.hpp" #include "caseless.hpp"
#include "ie_algorithm.hpp" #include "ie_algorithm.hpp"
#include "backend/gna_types.h" #include "backend/gna_types.hpp"
#include "gna_permute.hpp" #include "gna_permute.hpp"
#include "gna_lib_ver_selector.hpp" #include "gna_lib_ver_selector.hpp"
#include "gna_copy_layer.hpp" #include "gna_copy_layer.hpp"
@ -21,7 +21,8 @@
#include "backend/gna_limitations.hpp" #include "backend/gna_limitations.hpp"
#include "transformations/rt_info/gna_transpose_fusable.hpp" #include "transformations/rt_info/gna_transpose_fusable.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
/** /**
* @brief detecting of const pointer for dynamic cast operations * @brief detecting of const pointer for dynamic cast operations
@ -321,7 +322,7 @@ class LayerInfo {
auto inputs = layer->insData.begin()->lock(); auto inputs = layer->insData.begin()->lock();
auto inputsOrder = inputs->getTensorDesc().getDims(); auto inputsOrder = inputs->getTensorDesc().getDims();
return GNAPluginNS::isTrivialPermute(std::vector<int64_t>{begin(layerOrder), end(layerOrder)}, return permute::isTrivialPermute(std::vector<int64_t>{begin(layerOrder), end(layerOrder)},
inputsOrder); inputsOrder);
} }
bool isNonValuesChangable() const { bool isNonValuesChangable() const {
@ -356,7 +357,7 @@ class LayerInfo {
auto cropLayer = dynamic_cast<InferenceEngine::CropLayer *> (layer); auto cropLayer = dynamic_cast<InferenceEngine::CropLayer *> (layer);
if (cropLayer != nullptr && !cropLayer->offset.empty()) { if (cropLayer != nullptr && !cropLayer->offset.empty()) {
const auto crop_params = GetCropParams(cropLayer); const auto crop_params = GetCropParams(cropLayer);
return GNAPluginNS::GNALimitations::isCropAffinedOffset(crop_params.start_offset); return limitations::isCropAffinedOffset(crop_params.start_offset);
} }
return false; return false;
} }
@ -425,4 +426,5 @@ inline std::ostream & operator <<(std::ostream &os, const LayerInfo & info) {
return os; return os;
} }
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -8,10 +8,16 @@
#include "gna_layer_type.hpp" #include "gna_layer_type.hpp"
#include "gna_layer_info.hpp" #include "gna_layer_info.hpp"
GNAPluginNS::LayerType GNAPluginNS::LayerTypeFromStr(const std::string &str) { namespace ov {
namespace intel_gna {
LayerType LayerTypeFromStr(const std::string& str) {
auto it = LayerNameToType.find(str); auto it = LayerNameToType.find(str);
if (it != LayerNameToType.end()) if (it != LayerNameToType.end())
return it->second; return it->second;
else else
return LayerType::NO_TYPE; return LayerType::NO_TYPE;
} }
} // namespace intel_gna
} // namespace ov

View File

@ -9,9 +9,11 @@
#include <caseless.hpp> #include <caseless.hpp>
#include "backend/dnn_types.h" #include "backend/dnn_types.hpp"
namespace ov {
namespace intel_gna {
namespace GNAPluginNS {
enum class LayerType { enum class LayerType {
Input, Input,
Convolution, Convolution,
@ -54,7 +56,7 @@ enum class LayerType {
NO_TYPE NO_TYPE
}; };
static const InferenceEngine::details::caseless_map<std::string, GNAPluginNS::LayerType> LayerNameToType = { static const InferenceEngine::details::caseless_map<std::string, LayerType> LayerNameToType = {
{ "Input" , LayerType::Input }, { "Input" , LayerType::Input },
{ "Convolution" , LayerType::Convolution }, { "Convolution" , LayerType::Convolution },
{ "ReLU" , LayerType::ReLU }, { "ReLU" , LayerType::ReLU },
@ -94,5 +96,7 @@ static const InferenceEngine::details::caseless_map<std::string, GNAPluginNS::La
{"Gemm", LayerType::Gemm}, {"Gemm", LayerType::Gemm},
}; };
GNAPluginNS::LayerType LayerTypeFromStr(const std::string &str); LayerType LayerTypeFromStr(const std::string &str);
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -7,7 +7,9 @@
#include "legacy/ie_layers.h" #include "legacy/ie_layers.h"
#include "debug.h" #include "debug.h"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
/** /**
* maps type of connection to input and output layers also stores gna_pointer for alloc request * maps type of connection to input and output layers also stores gna_pointer for alloc request
*/ */
@ -63,4 +65,6 @@ public:
*/ */
float scale_factor = 1.0f; float scale_factor = 1.0f;
}; };
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,10 @@
#include "ie_common.h" #include "ie_common.h"
#include "log/debug.hpp" #include "log/debug.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace permute {
template <class T> template <class T>
class PermuteSequence { class PermuteSequence {
public: public:
@ -20,14 +23,14 @@ private:
cnt_type permutes; cnt_type permutes;
public: public:
explicit PermuteSequence(std::vector<T> && orderVecIn) : orderVec(std::move(orderVecIn)) { explicit PermuteSequence(std::vector<T>&& orderVecIn) : orderVec(std::move(orderVecIn)) {
std::vector<bool> counter(orderVec.size()); std::vector<bool> counter(orderVec.size());
for (auto && x : this->orderVec) { for (auto&& x : this->orderVec) {
if (x < 0) { if (x < 0) {
THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be >= 0"; THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be >= 0";
} }
if (x >= counter.size()) { if (x >= counter.size()) {
THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be < "<< counter.size(); THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be < " << counter.size();
} }
if (counter[x]) { if (counter[x]) {
THROW_GNA_EXCEPTION << "invalid order: element " << x << " present more than once"; THROW_GNA_EXCEPTION << "invalid order: element " << x << " present more than once";
@ -65,13 +68,13 @@ public:
i++; i++;
} }
for (auto && cycle : permuteCycles) { for (auto&& cycle : permuteCycles) {
for (int i = 0; i + 1 < cycle.size(); i++) { for (int i = 0; i + 1 < cycle.size(); i++) {
permutes.push_back(cycle[i]); permutes.push_back(cycle[i]);
} }
} }
} }
const cnt_type & cnt() const noexcept { const cnt_type& cnt() const noexcept {
return permutes; return permutes;
} }
}; };
@ -83,9 +86,9 @@ public:
*/ */
template <class Iterator> template <class Iterator>
inline typename PermuteSequence<typename std::iterator_traits<Iterator>::value_type>::cnt_type genPermutations( inline typename PermuteSequence<typename std::iterator_traits<Iterator>::value_type>::cnt_type genPermutations(
Iterator beg, Iterator en) { Iterator beg,
static_assert( Iterator en) {
std::is_same<std::random_access_iterator_tag, static_assert(std::is_same<std::random_access_iterator_tag,
typename std::iterator_traits<Iterator>::iterator_category>::value, typename std::iterator_traits<Iterator>::iterator_category>::value,
"The genPermutations() function only accepts random access iterators or raw pointers to an array.\n"); "The genPermutations() function only accepts random access iterators or raw pointers to an array.\n");
using value_type = typename std::iterator_traits<Iterator>::value_type; using value_type = typename std::iterator_traits<Iterator>::value_type;
@ -93,12 +96,12 @@ inline typename PermuteSequence<typename std::iterator_traits<Iterator>::value_t
for (; beg != en; beg++) { for (; beg != en; beg++) {
v.push_back(*beg); v.push_back(*beg);
} }
auto permute = PermuteSequence<value_type> (std::move(v)); auto permute = PermuteSequence<value_type>(std::move(v));
return permute.cnt(); return permute.cnt();
} }
template <class T> template <class T>
inline typename PermuteSequence<T>::cnt_type genPermutations(const std::initializer_list<T> & lst) { inline typename PermuteSequence<T>::cnt_type genPermutations(const std::initializer_list<T>& lst) {
return genPermutations(lst.begin(), lst.end()); return genPermutations(lst.begin(), lst.end());
} }
@ -121,14 +124,12 @@ inline bool isTrivialPermute(const std::vector<int64_t> order, const std::vector
// cases when all permutations happened either between 1 and X shape where no other dims in between // cases when all permutations happened either between 1 and X shape where no other dims in between
auto transpose_seq = genPermutations(order.begin(), order.end()); auto transpose_seq = genPermutations(order.begin(), order.end());
auto input_order_transformed = input_shape; auto input_order_transformed = input_shape;
for (auto && transp : transpose_seq) { for (auto&& transp : transpose_seq) {
// check dims of transposed // check dims of transposed
if (input_order_transformed[transp.first] == 1 && if (input_order_transformed[transp.first] == 1 && input_order_transformed[transp.second] == 1) {
input_order_transformed[transp.second] == 1) {
return true; return true;
} }
if (input_order_transformed[transp.first] != 1 && if (input_order_transformed[transp.first] != 1 && input_order_transformed[transp.second] != 1) {
input_order_transformed[transp.second] != 1) {
return false; return false;
} }
// check dims in between // check dims in between
@ -143,4 +144,6 @@ inline bool isTrivialPermute(const std::vector<int64_t> order, const std::vector
return true; return true;
} }
} // namespace GNAPluginNS } // namespace permute
} // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,9 @@
#include <legacy/ie_layers.h> #include <legacy/ie_layers.h>
#include "backend/gna_limitations.hpp" #include "backend/gna_limitations.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
// Split, Slice // Split, Slice
class GNASplitLayer { class GNASplitLayer {
InferenceEngine::CNNLayerPtr splitLayer; InferenceEngine::CNNLayerPtr splitLayer;
@ -48,7 +50,7 @@ public:
}; };
// @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size // @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
static std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t maxSplitSize, uint32_t alignment = GNALimitations::inputByteAlignment) { static std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t maxSplitSize, uint32_t alignment = limitations::inputByteAlignment) {
std::vector<uint32_t> splitSizes; std::vector<uint32_t> splitSizes;
uint32_t maxAlignedSplitSize = std::max(maxSplitSize - maxSplitSize % alignment, alignment); uint32_t maxAlignedSplitSize = std::max(maxSplitSize - maxSplitSize % alignment, alignment);
uint32_t usedSize = 0; uint32_t usedSize = 0;
@ -68,7 +70,7 @@ static std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
IE_ASSERT(firstValuableDim != std::end(dims)); IE_ASSERT(firstValuableDim != std::end(dims));
auto splittedElementsSize = *firstValuableDim; auto splittedElementsSize = *firstValuableDim;
auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim); auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim);
auto alignment = GNALimitations::inputByteAlignment; auto alignment = limitations::inputByteAlignment;
// Split output size should be multiple by 64 to avoid align filters insertion, // Split output size should be multiple by 64 to avoid align filters insertion,
// but we need to check if our input size to split exceeds 64; if not we can always // but we need to check if our input size to split exceeds 64; if not we can always
@ -81,8 +83,9 @@ static std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
} }
} }
splitSizes = GetAlignedSplitSizes(splittedElementsSize, splitSizes = GetAlignedSplitSizes(splittedElementsSize,
GNALimitations::bufferMaxSize * splittedElementsSize / totalElementsSize, alignment); limitations::bufferMaxSize * splittedElementsSize / totalElementsSize, alignment);
return {splittedDimIx, splitSizes}; return {splittedDimIx, splitSizes};
} }
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -11,7 +11,9 @@
#include <caseless.hpp> #include <caseless.hpp>
#include "gna_graph_compiler.hpp" #include "gna_graph_compiler.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
class LayersBuilder { class LayersBuilder {
using CreatorFnc = std::function<void(GNAGraphCompiler*, InferenceEngine::CNNLayerPtr)>; using CreatorFnc = std::function<void(GNAGraphCompiler*, InferenceEngine::CNNLayerPtr)>;
@ -26,4 +28,6 @@ public:
return LayerBuilder; return LayerBuilder;
} }
}; };
} // namespace GNAPluginNS
} // namespace intel_gna
} // namespace ov

View File

@ -12,7 +12,8 @@
#include "gna_device.hpp" #include "gna_device.hpp"
#include "memory/gna_mem_requests.hpp" #include "memory/gna_mem_requests.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace memory { namespace memory {
/** /**
* wrap GNA interface into c++ allocator friendly one * wrap GNA interface into c++ allocator friendly one
@ -35,9 +36,11 @@ class GNAAllocator {
void deallocate(uint8_t *p, std::size_t n) { void deallocate(uint8_t *p, std::size_t n) {
_device->free(p); _device->free(p);
} }
void setTag(void* memPtr, GNAPluginNS::memory::rRegion tagValue) { void setTag(void* memPtr, memory::rRegion tagValue) {
_device->tagMemoryRegion(memPtr, tagValue); _device->tagMemoryRegion(memPtr, tagValue);
} }
}; };
} // namespace memory } // namespace memory
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,8 @@
#include "log/debug.hpp" #include "log/debug.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace memory { namespace memory {
/** /**
@ -47,4 +48,5 @@ inline std::string rRegionToStr(const rRegion region) {
} }
} // namespace memory } // namespace memory
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -10,7 +10,8 @@
#include "gna_mem_regions.hpp" #include "gna_mem_regions.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace memory { namespace memory {
enum rType : uint8_t { enum rType : uint8_t {
@ -126,5 +127,7 @@ struct MemRequest {
_initializer(initializer) { _initializer(initializer) {
} }
}; };
} // namespace memory } // namespace memory
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -19,7 +19,8 @@
using namespace ov::intel_gna; using namespace ov::intel_gna;
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace memory { namespace memory {
/** /**
@ -200,7 +201,7 @@ public:
} }
template<class T> template<class T>
void iterate_binded(GNAPluginNS::memory::MemRequest & reference, const T & visitor) { void iterate_binded(memory::MemRequest & reference, const T & visitor) {
for (auto &re : _mem_requests) { for (auto &re : _mem_requests) {
if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) { if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) {
log::trace() << " [binded=" << re._type << ", ptr=" << re._ptr_out <<"]\n"; log::trace() << " [binded=" << re._type << ", ptr=" << re._ptr_out <<"]\n";
@ -284,4 +285,5 @@ public:
}; };
} // namespace memory } // namespace memory
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -26,12 +26,13 @@
#include <iomanip> #include <iomanip>
#endif #endif
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace memory { namespace memory {
class GNAFloatAllocator : public std::allocator < uint8_t > { class GNAFloatAllocator : public std::allocator < uint8_t > {
public: public:
void setTag(void*, GNAPluginNS::memory::rRegion) { void setTag(void*, memory::rRegion) {
} }
}; };
@ -154,7 +155,7 @@ protected:
} }
template<class T> template<class T>
void iterate_binded(GNAPluginNS::memory::MemRequest & reference, const T & visitor) { void iterate_binded(memory::MemRequest & reference, const T & visitor) {
for (auto &re : getQueue(REGION_AUTO)->_mem_requests) { for (auto &re : getQueue(REGION_AUTO)->_mem_requests) {
if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) { if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) {
// log::trace() << " [binded=" << rTypeToStr(re._type) << ", ptr=" << re._ptr_out <<"]\n"; // log::trace() << " [binded=" << rTypeToStr(re._type) << ", ptr=" << re._ptr_out <<"]\n";
@ -291,4 +292,5 @@ protected:
}; };
} // namespace memory } // namespace memory
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -8,15 +8,15 @@
#include "ie_layouts.h" #include "ie_layouts.h"
#include "gna_graph_tools.hpp" #include "gna_graph_tools.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace memory { namespace memory {
void GNAVariableState::Reset() { void GNAVariableState::Reset() {
state->Reset(); state->Reset();
} }
InferenceEngine::Precision GNAVariableState::getPrecision() const { InferenceEngine::Precision GNAVariableState::getPrecision() const {
InferenceEngine::Precision state_precision; InferenceEngine::Precision state_precision;
if (state->getInput()) { if (state->getInput()) {
@ -31,24 +31,25 @@ namespace memory {
state_precision = InferenceEngine::Precision::I16; state_precision = InferenceEngine::Precision::I16;
break; break;
default: default:
THROW_GNA_EXCEPTION << "Incorrect state element size " << element_size << THROW_GNA_EXCEPTION << "Incorrect state element size " << element_size
" to determine precision for VariableState " << name; << " to determine precision for VariableState " << name;
} }
} }
return state_precision; return state_precision;
} }
void GNAVariableState::SetState(const InferenceEngine::Blob::Ptr& newState) { void GNAVariableState::SetState(const InferenceEngine::Blob::Ptr& newState) {
IE_ASSERT(newState != nullptr); IE_ASSERT(newState != nullptr);
auto data_ptr = newState->cbuffer().as<void*>(); auto data_ptr = newState->cbuffer().as<void*>();
IE_ASSERT(data_ptr != nullptr); IE_ASSERT(data_ptr != nullptr);
auto data_size = newState->byteSize(); auto data_size = newState->byteSize();
auto data_elements = data_size / newState->element_size(); auto data_elements = data_size / newState->element_size();
if (ALIGN64(state->reserved_size) != ALIGN64((data_size / (newState->element_size() / state->elementSizeBytes())))) { if (ALIGN64(state->reserved_size) !=
THROW_GNA_EXCEPTION << "Failed to SetState. Sizes of new and old states do not match. (" ALIGN64((data_size / (newState->element_size() / state->elementSizeBytes())))) {
<< state->reserved_size << " != " << (newState->element_size() / state->elementSizeBytes()) << ")"; THROW_GNA_EXCEPTION << "Failed to SetState. Sizes of new and old states do not match. (" << state->reserved_size
<< " != " << (newState->element_size() / state->elementSizeBytes()) << ")";
} }
InferenceEngine::Precision state_precision = getPrecision(); InferenceEngine::Precision state_precision = getPrecision();
@ -69,26 +70,26 @@ namespace memory {
auto quantized = auto quantized =
InferenceEngine::getInjectedData<ov::intel_gna::frontend::QuantizedLayerParams>(state->getInput()); InferenceEngine::getInjectedData<ov::intel_gna::frontend::QuantizedLayerParams>(state->getInput());
auto scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : state->scale_factor; auto scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : state->scale_factor;
GNAPluginNS::ConvertToInt16(static_cast<int16_t*>(state->gna_ptr), ConvertToInt16(static_cast<int16_t*>(state->gna_ptr),
newState->buffer().as<float*>(), newState->buffer().as<float*>(),
1, 1,
data_elements, data_elements,
scale_factor); scale_factor);
} else { } else {
THROW_GNA_EXCEPTION << "Failed to SetState for VariableState " << name THROW_GNA_EXCEPTION
<< "Failed to SetState for VariableState " << name
<< ". If old state precision is I16 only I16 and FP32 are allowed as new state precisions." << ". If old state precision is I16 only I16 and FP32 are allowed as new state precisions."
<< " Old state: " << state_precision << " New state: " << new_state_precision; << " Old state: " << state_precision << " New state: " << new_state_precision;
} }
break; break;
} }
default: default:
THROW_GNA_EXCEPTION << "Failed to SetState for VariableState " << name THROW_GNA_EXCEPTION << "Failed to SetState for VariableState " << name << ". Incorrect new/old precision pair"
<< ". Incorrect new/old precision pair"
<< " Old state: " << state_precision << " New state: " << new_state_precision; << " Old state: " << state_precision << " New state: " << new_state_precision;
} }
} }
InferenceEngine::Blob::CPtr GNAVariableState::GetState() const { InferenceEngine::Blob::CPtr GNAVariableState::GetState() const {
auto elements = state->reserved_size / state->elementSizeBytes(); auto elements = state->reserved_size / state->elementSizeBytes();
InferenceEngine::Precision state_precision = getPrecision(); InferenceEngine::Precision state_precision = getPrecision();
@ -97,8 +98,9 @@ namespace memory {
InferenceEngine::getInjectedData<ov::intel_gna::frontend::QuantizedLayerParams>(state->getInput()); InferenceEngine::getInjectedData<ov::intel_gna::frontend::QuantizedLayerParams>(state->getInput());
auto scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : state->scale_factor; auto scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : state->scale_factor;
auto result_blob = make_blob_with_precision(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32, auto result_blob =
InferenceEngine::SizeVector({ 1, elements }), make_blob_with_precision(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32,
InferenceEngine::SizeVector({1, elements}),
InferenceEngine::NC)); InferenceEngine::NC));
result_blob->allocate(); result_blob->allocate();
@ -111,20 +113,22 @@ namespace memory {
return result_blob; return result_blob;
} else { } else {
auto result_blob = make_blob_with_precision(InferenceEngine::TensorDesc(state_precision, auto result_blob =
InferenceEngine::SizeVector({ 1, elements }), make_blob_with_precision(InferenceEngine::TensorDesc(state_precision,
InferenceEngine::SizeVector({1, elements}),
InferenceEngine::NC)); InferenceEngine::NC));
result_blob->allocate(); result_blob->allocate();
std::memcpy(result_blob->buffer(), state->gna_ptr, state->reserved_size); std::memcpy(result_blob->buffer(), state->gna_ptr, state->reserved_size);
return result_blob; return result_blob;
} }
} }
float GNAVariableState::GetScaleFactor() const { float GNAVariableState::GetScaleFactor() const {
auto quantized = auto quantized = InferenceEngine::getInjectedData<ov::intel_gna::frontend::QuantizedLayerParams>(state->getInput());
InferenceEngine::getInjectedData<ov::intel_gna::frontend::QuantizedLayerParams>(state->getInput());
auto scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : state->scale_factor; auto scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : state->scale_factor;
return scale_factor; return scale_factor;
} }
} // namespace memory } // namespace memory
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -9,8 +9,10 @@
#include <cpp_interfaces/interface/ie_ivariable_state_internal.hpp> #include <cpp_interfaces/interface/ie_ivariable_state_internal.hpp>
#include "gna_plugin.hpp" #include "gna_plugin.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace memory { namespace memory {
class GNAVariableState : public InferenceEngine::IVariableStateInternal { class GNAVariableState : public InferenceEngine::IVariableStateInternal {
public: public:
GNAVariableState(std::string name, std::shared_ptr<GNAMemoryLayer> state) GNAVariableState(std::string name, std::shared_ptr<GNAMemoryLayer> state)
@ -33,5 +35,7 @@ private:
*/ */
InferenceEngine::Precision getPrecision() const; InferenceEngine::Precision getPrecision() const;
}; };
} // namespace memory } // namespace memory
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -7,7 +7,11 @@
#include <cstdint> #include <cstdint>
#include "log/debug.hpp" #include "log/debug.hpp"
int32_t GNAPluginNS::memory::MemoryOffset(void *ptr_target, void *ptr_base) { namespace ov {
namespace intel_gna {
namespace memory {
int32_t MemoryOffset(void* ptr_target, void* ptr_base) {
auto target = reinterpret_cast<uintptr_t>(ptr_target); auto target = reinterpret_cast<uintptr_t>(ptr_target);
auto base = reinterpret_cast<uintptr_t>(ptr_base); auto base = reinterpret_cast<uintptr_t>(ptr_base);
if (target == 0) { // handle NULL pointers separately if (target == 0) { // handle NULL pointers separately
@ -23,3 +27,6 @@ int32_t GNAPluginNS::memory::MemoryOffset(void *ptr_target, void *ptr_base) {
} }
} }
} // namespace memory
} // namespace intel_gna
} // namespace ov

View File

@ -6,10 +6,12 @@
#include <cstdint> #include <cstdint>
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace memory { namespace memory {
int32_t MemoryOffset(void *ptr_target, void *ptr_base); int32_t MemoryOffset(void *ptr_target, void *ptr_base);
} // namespace memory } // namespace memory
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -84,7 +84,7 @@ static bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
if (std::dynamic_pointer_cast<ngraph::opset8::Split>(input_op) || std::dynamic_pointer_cast<ngraph::opset8::VariadicSplit>(input_op)) { if (std::dynamic_pointer_cast<ngraph::opset8::Split>(input_op) || std::dynamic_pointer_cast<ngraph::opset8::VariadicSplit>(input_op)) {
for (size_t index = 0; index < input_op_out_index; index++) { for (size_t index = 0; index < input_op_out_index; index++) {
size_t outputSize = ngraph::shape_size(input_op->get_output_shape(index)); size_t outputSize = ngraph::shape_size(input_op->get_output_shape(index));
offset += outputSize * GNAPluginNS::GNALimitations::bytesPerSplitElement; offset += outputSize * limitations::bytesPerSplitElement;
} }
} }
return (offset == ALIGN64(offset)); return (offset == ALIGN64(offset));
@ -93,7 +93,7 @@ static bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
static bool is_crop_affined(std::shared_ptr<ngraph::Node> node) { static bool is_crop_affined(std::shared_ptr<ngraph::Node> node) {
auto crop = std::dynamic_pointer_cast<ngraph::op::CropIE>(node); auto crop = std::dynamic_pointer_cast<ngraph::op::CropIE>(node);
if (crop != nullptr && !crop->offset.empty()) { if (crop != nullptr && !crop->offset.empty()) {
return GNAPluginNS::GNALimitations::isCropAffinedOffset(crop->offset.back()); return limitations::isCropAffinedOffset(crop->offset.back());
} }
return false; return false;
} }
@ -117,7 +117,7 @@ static bool is_trivial_transpose(std::shared_ptr<ngraph::Node> node) {
auto input = transpose->input(0).get_source_output().get_node_shared_ptr(); auto input = transpose->input(0).get_source_output().get_node_shared_ptr();
auto input_order = transpose->get_input_shape(0); auto input_order = transpose->get_input_shape(0);
return GNAPluginNS::isTrivialPermute(node_order, input_order); return permute::isTrivialPermute(node_order, input_order);
} }
inline std::shared_ptr<ov::Node> get_prev_node_skipping_certain(const std::shared_ptr<ngraph::Node>& node, inline std::shared_ptr<ov::Node> get_prev_node_skipping_certain(const std::shared_ptr<ngraph::Node>& node,

View File

@ -24,7 +24,7 @@
#include <legacy/net_pass.h> #include <legacy/net_pass.h>
#include <layers/gna_copy_layer.hpp> #include <layers/gna_copy_layer.hpp>
#include "backend/dnn_types.h" #include "backend/dnn_types.hpp"
#include "log/debug.hpp" #include "log/debug.hpp"
#include "log/log.hpp" #include "log/log.hpp"
#include "frontend/quantization.hpp" #include "frontend/quantization.hpp"
@ -46,10 +46,12 @@
using namespace InferenceEngine; using namespace InferenceEngine;
using namespace InferenceEngine::details; using namespace InferenceEngine::details;
using namespace GNAPluginNS;
using namespace ov::intel_gna::frontend; using namespace ov::intel_gna::frontend;
using namespace ov::intel_gna::common; using namespace ov::intel_gna::common;
namespace ov {
namespace intel_gna {
#define pass_trace() log::debug() << "[" << getName() << "] " #define pass_trace() log::debug() << "[" << getName() << "] "
std::shared_ptr<IPassManager> BasePass::getPassManager() { std::shared_ptr<IPassManager> BasePass::getPassManager() {
@ -98,14 +100,14 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
}); });
IE_ASSERT(inputLayer != nullptr); IE_ASSERT(inputLayer != nullptr);
size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() : size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() :
Get2DReshapedData(nextLayer->outData[0], GNALimitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8)->getDims()[1]; Get2DReshapedData(nextLayer->outData[0], limitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8)->getDims()[1];
std::vector<float> weightsValues(weightsSize, fillValue); std::vector<float> weightsValues(weightsSize, fillValue);
IE_ASSERT(diagLayer != nullptr); IE_ASSERT(diagLayer != nullptr);
diagLayer->_weights = make_shared_blob<float>( diagLayer->_weights = make_shared_blob<float>(
TensorDesc( TensorDesc(
nextLayer->outData[0]->getTensorDesc().getPrecision(), nextLayer->outData[0]->getTensorDesc().getPrecision(),
SizeVector({weightsValues.size()}), SizeVector({weightsValues.size()}),
Layout::C)); InferenceEngine::Layout::C));
diagLayer->_weights->allocate(); diagLayer->_weights->allocate();
CopyVectorToBlob(diagLayer->_weights, weightsValues); CopyVectorToBlob(diagLayer->_weights, weightsValues);
auto dataPtr = std::make_shared<Data>(diagName, nextLayer->outData[0]->getTensorDesc()); auto dataPtr = std::make_shared<Data>(diagName, nextLayer->outData[0]->getTensorDesc());
@ -666,7 +668,7 @@ void RemovePermutationsNHWCToNCHWPass::run() {
} }
// HWC layout enum is used here as the only available in CNNNetwork for 3D vectors, // HWC layout enum is used here as the only available in CNNNetwork for 3D vectors,
// but the real layout is NCW and it's the one used in order vector later // but the real layout is NCW and it's the one used in order vector later
return dims_size == 4 ? Layout::NHWC : Layout::HWC; return dims_size == 4 ? InferenceEngine::Layout::NHWC : InferenceEngine::Layout::HWC;
}; };
auto setTransposedOrder = [getTransposedLayout](InferenceEngine::DataPtr data) { auto setTransposedOrder = [getTransposedLayout](InferenceEngine::DataPtr data) {
@ -677,13 +679,17 @@ void RemovePermutationsNHWCToNCHWPass::run() {
if (LayerInfo(current_layer).isConcat()) { if (LayerInfo(current_layer).isConcat()) {
auto concat_layer = dynamic_cast<InferenceEngine::ConcatLayer*> (current_layer.get()); auto concat_layer = dynamic_cast<InferenceEngine::ConcatLayer*> (current_layer.get());
auto dims_size = data->getDims().size(); auto dims_size = data->getDims().size();
concat_layer->_axis = (dims_size == 4 ? GetPermuteOrder(Layout::NHWC, Layout::NCHW) : concat_layer->_axis = (dims_size == 4 ? permute::GetPermuteOrder(InferenceEngine::Layout::NHWC,
InferenceEngine::Layout::NCHW)
:
std::vector<int32_t>{0, 2, 1})[concat_layer->_axis]; std::vector<int32_t>{0, 2, 1})[concat_layer->_axis];
} }
// NWC->NCW layouts are used here for order vector, see comments a few lines above // NWC->NCW layouts are used here for order vector, see comments a few lines above
auto dims = data->getDims(); auto dims = data->getDims();
auto order = dims.size() == 4 ? GetPermuteOrder(Layout::NCHW, Layout::NHWC) : auto order = dims.size() == 4
? permute::GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC)
:
std::vector<int32_t>{0, 2, 1}; std::vector<int32_t>{0, 2, 1};
InferenceEngine::SizeVector new_dims; InferenceEngine::SizeVector new_dims;
for (int i = 0; i < dims.size(); ++i) { for (int i = 0; i < dims.size(); ++i) {
@ -1074,7 +1080,7 @@ void FlattenTrivialConcatPass::run() {
auto concatInput = getLayerByIndex(input_idx, concatLayer); auto concatInput = getLayerByIndex(input_idx, concatLayer);
auto tensor = InferenceEngine::TensorDesc(concatInput->getTensorDesc()); auto tensor = InferenceEngine::TensorDesc(concatInput->getTensorDesc());
tensor.reshape(SizeVector({1, total_sizes[input_idx]}), Layout::NC); tensor.reshape(SizeVector({1, total_sizes[input_idx]}), InferenceEngine::Layout::NC);
auto reshapeName = l->name + "_input_"+ std::to_string(input_idx) +"_reshape"; auto reshapeName = l->name + "_input_"+ std::to_string(input_idx) +"_reshape";
auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized); auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized);
@ -1091,7 +1097,7 @@ void FlattenTrivialConcatPass::run() {
auto total_size = std::accumulate(dims.begin(), dims.end(), size_t(1), std::multiplies<size_t>()); auto total_size = std::accumulate(dims.begin(), dims.end(), size_t(1), std::multiplies<size_t>());
auto new_tensor = output->getTensorDesc(); auto new_tensor = output->getTensorDesc();
new_tensor.reshape(SizeVector({1, total_size}), Layout::NC); new_tensor.reshape(SizeVector({1, total_size}), InferenceEngine::Layout::NC);
auto new_output = CNNReplaceDataWithChangedTensorDescription(output, new_tensor); auto new_output = CNNReplaceDataWithChangedTensorDescription(output, new_tensor);
log::debug() << "\tChanged " << output->getName() << " dims to 2D" << std::endl; log::debug() << "\tChanged " << output->getName() << " dims to 2D" << std::endl;
@ -1197,7 +1203,7 @@ void InsertConcatAligningFilterPass::run() {
TensorDesc( TensorDesc(
concatInput->getTensorDesc().getPrecision(), concatInput->getTensorDesc().getPrecision(),
SizeVector({filterWeights.size()}), SizeVector({filterWeights.size()}),
Layout::C)); InferenceEngine::Layout::C));
concatAligningFilter->_weights->allocate(); concatAligningFilter->_weights->allocate();
if (!concatAligningFilter->_weights->buffer().as<float*>()) { if (!concatAligningFilter->_weights->buffer().as<float*>()) {
THROW_GNA_EXCEPTION << "Failed to allocate weights of size " << filterWeights.size() << " for " << filterName; THROW_GNA_EXCEPTION << "Failed to allocate weights of size " << filterWeights.size() << " for " << filterName;
@ -1208,10 +1214,10 @@ void InsertConcatAligningFilterPass::run() {
// modifying output rows to be used - to avoid modification to original concat we are store num of elements in params // modifying output rows to be used - to avoid modification to original concat we are store num of elements in params
dims[1] = num_rows_out; dims[1] = num_rows_out;
if ((concatInput->getLayout() == Layout::NC && dims[0] > 8) || if ((concatInput->getLayout() == InferenceEngine::Layout::NC && dims[0] > 8) ||
(concatInput->getLayout() == Layout::CN && dims[1] > 8)) { (concatInput->getLayout() == InferenceEngine::Layout::CN && dims[1] > 8)) {
THROW_GNA_EXCEPTION << "unsupported batch number '" << THROW_GNA_EXCEPTION << "unsupported batch number '" << (concatInput->getLayout() == InferenceEngine::Layout::NC ? dims[0] : dims[1])
(concatInput->getLayout() == Layout::NC ? dims[0] : dims[1]) << <<
"' in layer '" << concatLayer->name << "'"; "' in layer '" << concatLayer->name << "'";
} }
@ -1312,8 +1318,7 @@ void ReorderConcatInputsPass::run() {
auto linkOutData = std::make_shared<Data>(linkName, auto linkOutData = std::make_shared<Data>(linkName,
TensorDesc(Precision::FP32, TensorDesc(Precision::FP32,
SizeVector({ 1 }), SizeVector({ 1 }), InferenceEngine::Layout::C));
Layout::C));
getCreatorLayer(linkOutData) = link; getCreatorLayer(linkOutData) = link;
link->outData.push_back(linkOutData); link->outData.push_back(linkOutData);
@ -1340,7 +1345,7 @@ void InsertSplitAligningFilterPass::run() {
} }
auto outFunctionalLayers = CNNNetGetAllNextLayersSkipCertain(l, -1, [](CNNLayerPtr next_layer) { auto outFunctionalLayers = CNNNetGetAllNextLayersSkipCertain(l, -1, [](CNNLayerPtr next_layer) {
return GNAPluginNS::LayerInfo(next_layer).isNonFunctional(); return LayerInfo(next_layer).isNonFunctional();
}); });
size_t padding = 0; size_t padding = 0;
for (auto &&outFunctionalLayer : outFunctionalLayers) { for (auto &&outFunctionalLayer : outFunctionalLayers) {
@ -1387,16 +1392,16 @@ void InsertSplitAligningFilterPass::run() {
IE_ASSERT(filterLayer != nullptr); IE_ASSERT(filterLayer != nullptr);
// encodes offset to beginning of split layer input // encodes offset to beginning of split layer input
filterLayer->params["offset"] = std::to_string(aligned64_offset / GNALimitations::bytesPerSplitElement); filterLayer->params["offset"] = std::to_string(aligned64_offset / limitations::bytesPerSplitElement);
auto dims = splitOutput->getTensorDesc().getDims(); auto dims = splitOutput->getTensorDesc().getDims();
if (dims.size() > 3) { if (dims.size() > 3) {
THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size(); THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
} }
const auto offsetOfUnalignment = (currentOffset - aligned64_offset) / GNALimitations::bytesPerSplitElement; const auto offsetOfUnalignment = (currentOffset - aligned64_offset) / limitations::bytesPerSplitElement;
// TODO consider to use a different number of filters do decrese the number of trailing zeros (additionalPaddingOfFilter) // TODO consider to use a different number of filters do decrese the number of trailing zeros (additionalPaddingOfFilter)
const auto numberOfFilters = GNALimitations::convMinFiltersNum; const auto numberOfFilters = limitations::convMinFiltersNum;
const auto filterSize = ALIGN(offsetOfUnalignment + numberOfFilters, GNALimitations::convFilterSizeDivider); const auto filterSize = ALIGN(offsetOfUnalignment + numberOfFilters, limitations::convFilterSizeDivider);
// filterWeights: numberOfFilters X (offsetOfUnalignment + additionalPaddingOfFilter + numberOfFilters) // filterWeights: numberOfFilters X (offsetOfUnalignment + additionalPaddingOfFilter + numberOfFilters)
// offsetOfUnalignment - the leading zeros in the filter // offsetOfUnalignment - the leading zeros in the filter
@ -1424,7 +1429,7 @@ void InsertSplitAligningFilterPass::run() {
filterLayer->_weights = make_shared_blob<float>(TensorDesc( filterLayer->_weights = make_shared_blob<float>(TensorDesc(
inputData->getTensorDesc().getPrecision(), inputData->getTensorDesc().getPrecision(),
SizeVector({filterWeights.size()}), SizeVector({filterWeights.size()}),
Layout::C)); InferenceEngine::Layout::C));
filterLayer->_weights->allocate(); filterLayer->_weights->allocate();
CopyVectorToBlob(filterLayer->_weights, filterWeights); CopyVectorToBlob(filterLayer->_weights, filterWeights);
@ -1433,7 +1438,7 @@ void InsertSplitAligningFilterPass::run() {
filterLayer->_biases = make_shared_blob<float>(TensorDesc( filterLayer->_biases = make_shared_blob<float>(TensorDesc(
inputData->getTensorDesc().getPrecision(), inputData->getTensorDesc().getPrecision(),
SizeVector({ biasWeights.size() }), SizeVector({ biasWeights.size() }),
Layout::C)); InferenceEngine::Layout::C));
filterLayer->_biases->allocate(); filterLayer->_biases->allocate();
CopyVectorToBlob(filterLayer->_biases, biasWeights); CopyVectorToBlob(filterLayer->_biases, biasWeights);
@ -1452,7 +1457,7 @@ void InsertSplitAligningFilterPass::run() {
} }
// search data that starts from unaligned location // search data that starts from unaligned location
currentOffset += outputSize * GNALimitations::bytesPerSplitElement; currentOffset += outputSize * limitations::bytesPerSplitElement;
splitOutIndex++; splitOutIndex++;
} }
} }
@ -1490,7 +1495,7 @@ void EltwiseSplitOverChannelsPass::run() {
auto oData = l->outData.front(); auto oData = l->outData.front();
auto oDims = oData->getDims(); auto oDims = oData->getDims();
auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims)); auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims));
if (totalElementsSize <= GNALimitations::bufferMaxSize) { if (totalElementsSize <= limitations::bufferMaxSize) {
continue; continue;
} }
auto splitSizesPerAxis = AlignedSplitSizesPerAxis(oDims); auto splitSizesPerAxis = AlignedSplitSizesPerAxis(oDims);
@ -1602,7 +1607,7 @@ void SubstituteScaleShiftBroadCastPass::run() {
dataDims = reshaped_data[insData->getName()]; dataDims = reshaped_data[insData->getName()];
} else { } else {
dataDims = HasTo2DReshapeData(l) ? dataDims = HasTo2DReshapeData(l) ?
Get2DReshapedData(insData, GNALimitations::GetMinBatchToFitInBuffer(insData), 8)->getDims() : Get2DReshapedData(insData, limitations::GetMinBatchToFitInBuffer(insData), 8)->getDims() :
insData->getDims(); insData->getDims();
} }
@ -1634,7 +1639,7 @@ void SubstituteScaleShiftBroadCastPass::run() {
} }
auto tensor = InferenceEngine::TensorDesc(insData->getTensorDesc()); auto tensor = InferenceEngine::TensorDesc(insData->getTensorDesc());
tensor.reshape(SizeVector{ batchSize, nElements }, Layout::NC); tensor.reshape(SizeVector{batchSize, nElements}, InferenceEngine::Layout::NC);
auto reshapeName = scaleShift->name + "_input_" + std::to_string(0) + "_reshape"; auto reshapeName = scaleShift->name + "_input_" + std::to_string(0) + "_reshape";
auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized); auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized);
auto layer_before_scale_shift = getCreatorLayer(insData); auto layer_before_scale_shift = getCreatorLayer(insData);
@ -1949,7 +1954,7 @@ void FuseFQIntoWeightsPass::run() {
<< LAYER_NAME(weightableLayer) << "\n"; << LAYER_NAME(weightableLayer) << "\n";
auto biases = weightableLayer->insData.size() == 3 ? auto biases = weightableLayer->insData.size() == 3 ?
LayerUtils::getParamFromInputAsBlob(weightableLayer, biasesIdx) : nullptr; layer_utils::getParamFromInputAsBlob(weightableLayer, biasesIdx) : nullptr;
auto quantizedWeights = gnaFakeQuantizeLayer.getConstInputData(); auto quantizedWeights = gnaFakeQuantizeLayer.getConstInputData();
// 1. broke existing connections - by detaching fq subgraph from rest of graph // 1. broke existing connections - by detaching fq subgraph from rest of graph
@ -2032,7 +2037,8 @@ void FuseFQIntoWeightsPass::run() {
transform->func_id = gnaFakeQuantizeLayer.parseAsActivation(); transform->func_id = gnaFakeQuantizeLayer.parseAsActivation();
auto quantizedWeightsData = quantizedWeights->buffer(); auto quantizedWeightsData = quantizedWeights->buffer();
auto dequantizedWeights = make_shared_blob<float>(TensorDesc(Precision::FP32, { outputSize }, Layout::C)); auto dequantizedWeights =
make_shared_blob<float>(TensorDesc(Precision::FP32, {outputSize}, InferenceEngine::Layout::C));
dequantizedWeights->allocate(); dequantizedWeights->allocate();
auto resultBuffer = dequantizedWeights->buffer(); auto resultBuffer = dequantizedWeights->buffer();
@ -2460,3 +2466,6 @@ int PassManager::run(int index) {
} }
return index; return index;
} }
} // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,9 @@
#include <map> #include <map>
#include <ie_common.h> #include <ie_common.h>
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
/** /**
* @brief interface for gna-pass, special transformer that will be run on input network in order to generate GNABlob * @brief interface for gna-pass, special transformer that will be run on input network in order to generate GNABlob
*/ */
@ -244,4 +246,5 @@ public:
int run(int index = 0); int run(int index = 0);
}; };
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -9,12 +9,12 @@
#include <gna_graph_tools.hpp> #include <gna_graph_tools.hpp>
namespace ov { namespace ov {
namespace intela_gna { namespace intel_gna {
namespace helpers { namespace helpers {
void updateModelInputOrientationWithoutConvolution(const InferenceEngine::CNNLayer& inputLayer, void updateModelInputOrientationWithoutConvolution(const InferenceEngine::CNNLayer& inputLayer,
const GNAPluginNS::backend::DnnComponents& components, const backend::DnnComponents& components,
GNAPluginNS::GnaInputs& inputs) { GnaInputs& inputs) {
// does not make sense to go further is there is no input to set // does not make sense to go further is there is no input to set
auto input = inputs.find(inputLayer.name); auto input = inputs.find(inputLayer.name);
@ -84,8 +84,8 @@ void updateModelInputOrientationWithoutConvolution(const InferenceEngine::CNNLay
void updateModelOutputOrientation(const std::string& outputName, void updateModelOutputOrientation(const std::string& outputName,
const std::string& cnnlayerName, const std::string& cnnlayerName,
const GNAPluginNS::backend::DnnComponents& components, const backend::DnnComponents& components,
GNAPluginNS::GnaOutputs& outputs) { GnaOutputs& outputs) {
// if there is no output to set does not make sense to go further // if there is no output to set does not make sense to go further
auto output = outputs.find(outputName); auto output = outputs.find(outputName);
if (output == outputs.end()) { if (output == outputs.end()) {
@ -99,5 +99,5 @@ void updateModelOutputOrientation(const std::string& outputName,
} }
} }
} // namespace helpers } // namespace helpers
} // namespace intela_gna } // namespace intel_gna
} // namespace ov } // namespace ov

View File

@ -13,7 +13,8 @@
#include "descriptions/gna_desc.hpp" #include "descriptions/gna_desc.hpp"
namespace ov { namespace ov {
namespace intela_gna { namespace intel_gna {
/** /**
* @namespace helpers contains helpers tools for gna plugin. * @namespace helpers contains helpers tools for gna plugin.
*/ */
@ -38,8 +39,8 @@ namespace helpers {
* @throws if orientations of input for multiple layers are different * @throws if orientations of input for multiple layers are different
*/ */
void updateModelInputOrientationWithoutConvolution(const InferenceEngine::CNNLayer& inputLayer, void updateModelInputOrientationWithoutConvolution(const InferenceEngine::CNNLayer& inputLayer,
const GNAPluginNS::backend::DnnComponents& components, const backend::DnnComponents& components,
GNAPluginNS::GnaInputs& inputs); GnaInputs& inputs);
/** /**
* @brief Update expected orientation for model output of given \p outputName. It is needed to recognize if extra * @brief Update expected orientation for model output of given \p outputName. It is needed to recognize if extra
@ -60,9 +61,9 @@ void updateModelInputOrientationWithoutConvolution(const InferenceEngine::CNNLay
*/ */
void updateModelOutputOrientation(const std::string& outputName, void updateModelOutputOrientation(const std::string& outputName,
const std::string& cnnlayerName, const std::string& cnnlayerName,
const GNAPluginNS::backend::DnnComponents& components, const backend::DnnComponents& components,
GNAPluginNS::GnaOutputs& outputs); GnaOutputs& outputs);
} // namespace helpers } // namespace helpers
} // namespace intela_gna } // namespace intel_gna
} // namespace ov } // namespace ov

View File

@ -4,7 +4,10 @@
#include "preprocessing.hpp" #include "preprocessing.hpp"
int16_t GNAPluginNS::ConvertFloatToInt16(float src) { namespace ov {
namespace intel_gna {
int16_t ConvertFloatToInt16(float src) {
float rounding_value = (src > 0) ? 0.5f : -0.5f; float rounding_value = (src > 0) ? 0.5f : -0.5f;
float value = src + rounding_value; float value = src + rounding_value;
if (value > 32767.0) { if (value > 32767.0) {
@ -15,7 +18,7 @@ int16_t GNAPluginNS::ConvertFloatToInt16(float src) {
return (int16_t)value; return (int16_t)value;
} }
int8_t GNAPluginNS::ConvertFloatToInt8(float src) { int8_t ConvertFloatToInt8(float src) {
float rounding_value = (src > 0) ? 0.5f : -0.5f; float rounding_value = (src > 0) ? 0.5f : -0.5f;
float value = src + rounding_value; float value = src + rounding_value;
if (value > 127.0) { if (value > 127.0) {
@ -26,15 +29,18 @@ int8_t GNAPluginNS::ConvertFloatToInt8(float src) {
return (int8_t)value; return (int8_t)value;
} }
void GNAPluginNS::ConvertToInt16(int16_t *ptr_dst, void ConvertToInt16(int16_t* ptr_dst,
const float *ptr_src, const float* ptr_src,
const uint32_t num_rows, const uint32_t num_rows,
const uint32_t num_columns, const uint32_t num_columns,
const float scale_factor) { const float scale_factor) {
if (!ptr_dst || !ptr_src) { if (!ptr_dst || !ptr_src) {
return; return;
} }
for (uint32_t i = 0; i < num_rows*num_columns; i++) { for (uint32_t i = 0; i < num_rows * num_columns; i++) {
ptr_dst[i] = ConvertFloatToInt16(ptr_src[i]*scale_factor); ptr_dst[i] = ConvertFloatToInt16(ptr_src[i] * scale_factor);
} }
} }
} // namespace intel_gna
} // namespace ov

View File

@ -6,7 +6,8 @@
#include <cstdint> #include <cstdint>
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
void ConvertToInt16(int16_t *ptr_dst, void ConvertToInt16(int16_t *ptr_dst,
const float *ptr_src, const float *ptr_src,
@ -32,4 +33,5 @@ inline void UnscaleAndCast(T2 *ptr_dst, T1 *ptr_src, const uint32_t num_rows, co
} }
} }
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -6,7 +6,8 @@
#include "gna2_model_helper.hpp" #include "gna2_model_helper.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace request { namespace request {
ModelWrapper::ModelWrapper(ConstructionPassKey) { ModelWrapper::ModelWrapper(ConstructionPassKey) {
@ -33,4 +34,5 @@ const Gna2Model& ModelWrapper::object() const {
} }
} // namespace request } // namespace request
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -6,8 +6,10 @@
#include <gna2-model-api.h> #include <gna2-model-api.h>
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace request { namespace request {
class ModelWrapperFactory; class ModelWrapperFactory;
/** /**
@ -58,4 +60,5 @@ private:
}; };
} // namespace request } // namespace request
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,8 @@
#include "backend/am_intel_dnn.hpp" #include "backend/am_intel_dnn.hpp"
#include "gna2_model_helper.hpp" #include "gna2_model_helper.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace request { namespace request {
std::shared_ptr<ModelWrapper> ModelWrapperFactory::createTrivial() { std::shared_ptr<ModelWrapper> ModelWrapperFactory::createTrivial() {
@ -49,4 +50,5 @@ std::shared_ptr<ModelWrapper> ModelWrapperFactory::createInitialized(ModelInitia
} }
} // namespace request } // namespace request
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,8 @@
#include "model_wrapper.hpp" #include "model_wrapper.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace request { namespace request {
class ModelWrapperFactory { class ModelWrapperFactory {
@ -22,4 +23,5 @@ public:
}; };
} // namespace request } // namespace request
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,8 @@
#include "request_status.hpp" #include "request_status.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace request { namespace request {
/** /**
@ -27,7 +28,7 @@ public:
* @brief Callback invoked by wait operation. * @brief Callback invoked by wait operation.
* @param requestID id of request to be used for wait * @param requestID id of request to be used for wait
* @param timeoutMilliseconds timeout of wait in milliseconds * @param timeoutMilliseconds timeout of wait in milliseconds
* @return Status of subrequest @see GNAPluginNS::RequestStatus * @return Status of subrequest @see RequestStatus
* *
*/ */
using WaitHandler = std::function<RequestStatus(uint32_t requestID, int64_t timeoutMilliseconds)>; using WaitHandler = std::function<RequestStatus(uint32_t requestID, int64_t timeoutMilliseconds)>;
@ -37,7 +38,7 @@ public:
/** /**
* @brief Wait until subrequest will be finished for given timeout. * @brief Wait until subrequest will be finished for given timeout.
* @param timeoutMilliseconds timeout in milliseconds * @param timeoutMilliseconds timeout in milliseconds
* @return status of execution of subrequest @see GNAPluginNS::RequestStatus * @return status of execution of subrequest @see RequestStatus
*/ */
virtual RequestStatus wait(int64_t timeoutMilliseconds) = 0; virtual RequestStatus wait(int64_t timeoutMilliseconds) = 0;
@ -69,4 +70,5 @@ public:
}; };
} // namespace request } // namespace request
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -9,7 +9,8 @@
#include "log/debug.hpp" #include "log/debug.hpp"
#include "log/log.hpp" #include "log/log.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace request { namespace request {
SubrequestImpl::SubrequestImpl(EnqueueHandler enqueueHandler, WaitHandler waitHandler) SubrequestImpl::SubrequestImpl(EnqueueHandler enqueueHandler, WaitHandler waitHandler)
@ -64,4 +65,5 @@ bool SubrequestImpl::isCompleted() const {
} }
} // namespace request } // namespace request
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -6,7 +6,8 @@
#include "subrequest.hpp" #include "subrequest.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace request { namespace request {
/** /**
@ -34,7 +35,7 @@ public:
/** /**
* @brief Wait until subrequest will be finished for given timeout. * @brief Wait until subrequest will be finished for given timeout.
* @param timeoutMilliseconds timeout in milliseconds * @param timeoutMilliseconds timeout in milliseconds
* @return status of execution of subrequest @see GNAPluginNS::RequestStatus * @return status of execution of subrequest @see RequestStatus
*/ */
RequestStatus wait(int64_t timeoutMilliseconds) override; RequestStatus wait(int64_t timeoutMilliseconds) override;
@ -72,4 +73,5 @@ private:
}; };
} // namespace request } // namespace request
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

View File

@ -12,7 +12,8 @@
#include "request_status.hpp" #include "request_status.hpp"
namespace GNAPluginNS { namespace ov {
namespace intel_gna {
namespace request { namespace request {
class ModelWrapper; class ModelWrapper;
@ -46,7 +47,7 @@ public:
/** /**
* @brief Wait untril request will be not finished for give timeout. * @brief Wait untril request will be not finished for give timeout.
* @param timeoutMilliseconds timeout in milliseconds * @param timeoutMilliseconds timeout in milliseconds
* @return status of execution of ongoing request. @see GNAPluginNS::RequestStatus * @return status of execution of ongoing request. @see RequestStatus
*/ */
virtual RequestStatus wait(int64_t timeoutMilliseconds) = 0; virtual RequestStatus wait(int64_t timeoutMilliseconds) = 0;
@ -85,4 +86,5 @@ public:
}; };
} // namespace request } // namespace request
} // namespace GNAPluginNS } // namespace intel_gna
} // namespace ov

Some files were not shown because too many files have changed in this diff Show More