Gna namespace (#14877)
* [GNA] Cleanup intel_dnn * [GNA] Replace GNAPluginNS * [GNA] Rename headers
This commit is contained in:
parent
f2d93f4a79
commit
c683a72400
@ -164,7 +164,7 @@ inline std::istream& operator>>(std::istream& is, HWGeneration& hw_generation) {
|
||||
static constexpr Property<ExecutionMode> execution_mode{"GNA_DEVICE_MODE"};
|
||||
|
||||
/**
|
||||
* @brief The option to override the GNA HW execution target. May be one of GNA_2_0, GNA_3_0.
|
||||
* @brief The option to override the GNA HW execution target. May be one of GNA_2_0, GNA_3_0, GNA_3_5.
|
||||
* By default (in case of no value set) the behavior depends on GNA HW availability:
|
||||
* If GNA HW is present, use the option corresponding to this HW.
|
||||
* If HW is not present, use the option corresponding to the latest fully supported GNA HW generation.
|
||||
@ -175,7 +175,7 @@ static constexpr Property<ExecutionMode> execution_mode{"GNA_DEVICE_MODE"};
|
||||
static constexpr Property<HWGeneration> execution_target{"GNA_HW_EXECUTION_TARGET"};
|
||||
|
||||
/**
|
||||
* @brief The option to override the GNA HW compile target. May be one of GNA_2_0, GNA_3_0.
|
||||
* @brief The option to override the GNA HW compile target. May be one of GNA_2_0, GNA_3_0, GNA_3_5.
|
||||
* By default the same as execution_target.
|
||||
* @ingroup ov_runtime_gna_prop_cpp_api
|
||||
*/
|
||||
|
@ -22,11 +22,10 @@
|
||||
#include "memory/gna_memory_util.hpp"
|
||||
#include "log/log.hpp"
|
||||
#include "log/dump.hpp"
|
||||
#include "backend/dnn.hpp"
|
||||
#include "backend/am_intel_dnn.hpp"
|
||||
#include "backend/dnn_types.h"
|
||||
#include "backend/dnn_types.hpp"
|
||||
#include "gna/gna_config.hpp"
|
||||
#include "backend/gna_types.h"
|
||||
#include "backend/gna_types.hpp"
|
||||
#include "backend/gna_limitations.hpp"
|
||||
#include "layers/gna_convolution_layer.hpp"
|
||||
#include "memory/gna_memory.hpp"
|
||||
@ -46,20 +45,18 @@
|
||||
*/
|
||||
#define LIGHT_DUMP
|
||||
|
||||
using namespace GNAPluginNS::backend;
|
||||
using namespace ov::intel_gna;
|
||||
using gna_convolution_layer::outputFromConv;
|
||||
using gna_convolution_layer::outputFromPooling;
|
||||
|
||||
using GNAPluginNS::GNAConvolutionLayer::outputFromConv;
|
||||
using GNAPluginNS::GNAConvolutionLayer::outputFromPooling;
|
||||
using GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy;
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace backend {
|
||||
|
||||
using GNAPluginNS::memory::GNAMemoryInterface;
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::BeginNewWrite(uint32_t index) {
|
||||
void backend::AMIntelDNN::BeginNewWrite(uint32_t index) {
|
||||
dump_write_index = index;
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::Init(GNAMemoryInterface* memoryInterface,
|
||||
void backend::AMIntelDNN::Init(memory::GNAMemoryInterface* memoryInterface,
|
||||
intel_dnn_number_type_t compute_precision,
|
||||
float scale_factor) {
|
||||
memory = memoryInterface;
|
||||
@ -68,25 +65,13 @@ void GNAPluginNS::backend::AMIntelDNN::Init(GNAMemoryInterface* memoryInterface,
|
||||
|
||||
ptr_active_outputs_ = nullptr;
|
||||
num_active_outputs_ = 0;
|
||||
num_left_context = 0;
|
||||
num_right_context = 0;
|
||||
softmax_type = kSoftmaxNone;
|
||||
ptr_sumgroup_sizes = nullptr;
|
||||
num_sumgroup_sizes = 0;
|
||||
ptr_priors = nullptr;
|
||||
}
|
||||
|
||||
GNAPluginNS::backend::AMIntelDNN::~AMIntelDNN() {
|
||||
backend::AMIntelDNN::~AMIntelDNN() {
|
||||
component.clear();
|
||||
if (ptr_sumgroup_sizes != NULL) {
|
||||
_mm_free(ptr_sumgroup_sizes);
|
||||
}
|
||||
if (ptr_priors != NULL) {
|
||||
_mm_free(ptr_priors);
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitActiveList(uint32_t *ptr_active_list) {
|
||||
void backend::AMIntelDNN::InitActiveList(uint32_t *ptr_active_list) {
|
||||
ptr_active_outputs_ = ptr_active_list;
|
||||
if (ptr_active_list == nullptr) {
|
||||
if (component[component.size() - 1].orientation_out == kDnnInterleavedOrientation) {
|
||||
@ -100,7 +85,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitActiveList(uint32_t *ptr_active_list)
|
||||
}
|
||||
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitAffineComponentPrivate(intel_dnn_component_t &comp,
|
||||
void backend::AMIntelDNN::InitAffineComponentPrivate(intel_dnn_component_t &comp,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns,
|
||||
uint32_t num_rows_out,
|
||||
@ -123,7 +108,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitAffineComponentPrivate(intel_dnn_comp
|
||||
comp.num_bytes_per_input = num_bytes_per_input;
|
||||
comp.num_bytes_per_output = num_bytes_per_output;
|
||||
comp.operation = isDiag ? kDnnDiagonalOp : kDnnAffineOp;
|
||||
comp.macro_operation = kDnnMacroOpNone;
|
||||
comp.orientation_in = kDnnInterleavedOrientation;
|
||||
comp.orientation_out = kDnnInterleavedOrientation;
|
||||
comp.op.affine.num_bytes_per_weight = num_bytes_per_weight;
|
||||
@ -145,23 +129,23 @@ void GNAPluginNS::backend::AMIntelDNN::InitAffineComponentPrivate(intel_dnn_comp
|
||||
}
|
||||
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel_dnn_component_t &comp,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_columns_out,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
uint32_t num_bytes_per_weight,
|
||||
uint32_t num_bytes_per_bias,
|
||||
uint32_t num_filters,
|
||||
uint32_t num_filter_coefficients,
|
||||
const uint32_t convStride,
|
||||
float weight_scale_factor,
|
||||
float output_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
void *&ptr_filters,
|
||||
void *&ptr_biases,
|
||||
bool postInitMem) {
|
||||
void backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel_dnn_component_t &comp,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_columns_out,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
uint32_t num_bytes_per_weight,
|
||||
uint32_t num_bytes_per_bias,
|
||||
uint32_t num_filters,
|
||||
uint32_t num_filter_coefficients,
|
||||
const uint32_t convStride,
|
||||
float weight_scale_factor,
|
||||
float output_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
void *&ptr_filters,
|
||||
void *&ptr_biases,
|
||||
bool postInitMem) {
|
||||
comp.num_rows_in = 1;
|
||||
comp.num_columns_in = num_columns_in;
|
||||
comp.num_rows_out = 1;
|
||||
@ -169,7 +153,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
|
||||
comp.num_bytes_per_input = num_bytes_per_input;
|
||||
comp.num_bytes_per_output = num_bytes_per_output;
|
||||
comp.operation = kDnnConvolutional1dOp;
|
||||
comp.macro_operation = kDnnMacroOpNone;
|
||||
comp.orientation_in = kDnnNonInterleavedOrientation;
|
||||
comp.orientation_out = kDnnNonInterleavedOrientation;
|
||||
comp.ptr_inputs = ptr_inputs;
|
||||
@ -199,9 +182,9 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
|
||||
THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << num_columns_in <<
|
||||
") is not a multiply by 8";
|
||||
}
|
||||
if (num_filters < GNALimitations::convMinFiltersNum ||
|
||||
num_filters > GNALimitations::convMaxFiltersNum ||
|
||||
num_filters % GNALimitations::convFiltersNumDivider != 0) {
|
||||
if (num_filters < limitations::convMinFiltersNum ||
|
||||
num_filters > limitations::convMaxFiltersNum ||
|
||||
num_filters % limitations::convFiltersNumDivider != 0) {
|
||||
THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << num_filters;
|
||||
}
|
||||
auto max_number_of_out_elements = outputFromConv(num_columns_in, num_filter_coefficients, convStride);
|
||||
@ -210,26 +193,25 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitConvolutional2DComponentPrivate(intel_dnn_component_t& comp,
|
||||
OvGnaTensor inputTensor,
|
||||
OvGnaTensor outputTensor,
|
||||
OvGnaTensor filterTensor,
|
||||
OvGnaTensor biasTensor,
|
||||
std::array<uint32_t, 2> convStride,
|
||||
std::array<uint32_t, 2> zeroPadding,
|
||||
float weight_scale_factor,
|
||||
float output_scale_factor,
|
||||
void*& ptr_inputs,
|
||||
void*& ptr_outputs,
|
||||
void*& ptr_filters,
|
||||
void*& ptr_biases) {
|
||||
void backend::AMIntelDNN::InitConvolutional2DComponentPrivate(intel_dnn_component_t& comp,
|
||||
OvGnaTensor inputTensor,
|
||||
OvGnaTensor outputTensor,
|
||||
OvGnaTensor filterTensor,
|
||||
OvGnaTensor biasTensor,
|
||||
std::array<uint32_t, 2> convStride,
|
||||
std::array<uint32_t, 2> zeroPadding,
|
||||
float weight_scale_factor,
|
||||
float output_scale_factor,
|
||||
void*& ptr_inputs,
|
||||
void*& ptr_outputs,
|
||||
void*& ptr_filters,
|
||||
void*& ptr_biases) {
|
||||
comp.tensors.clear();
|
||||
comp.tensors.push_back(inputTensor);
|
||||
comp.tensors.push_back(outputTensor);
|
||||
comp.tensors.push_back(filterTensor);
|
||||
comp.tensors.push_back(biasTensor);
|
||||
comp.operation = kDnnConvolutional2dOp;
|
||||
comp.macro_operation = kDnnMacroOpNone;
|
||||
comp.orientation_in = kDnnNonInterleavedOrientation;
|
||||
comp.orientation_out = kDnnNonInterleavedOrientation;
|
||||
comp.ptr_inputs = ptr_inputs;
|
||||
@ -246,7 +228,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional2DComponentPrivate(intel
|
||||
ptr_outputs = &comp.ptr_outputs;
|
||||
}
|
||||
|
||||
bool GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(const Gna2Operation& op) {
|
||||
bool backend::AMIntelDNN::isOperationCnnLegacySpecific(const Gna2Operation& op) {
|
||||
// GNA compile target GNA_TARGET_3_0 does not support pooling window < pooling stride
|
||||
return op.Type == Gna2OperationTypeConvolution &&
|
||||
op.NumberOfParameters > std::max(PoolStrideParamIdx, PoolWinParamIdx) &&
|
||||
@ -256,7 +238,7 @@ bool GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(const Gna2Op
|
||||
static_cast<Gna2Shape*>(op.Parameters[PoolStrideParamIdx])->Dimensions[0] > static_cast<Gna2Shape*>(op.Parameters[PoolWinParamIdx])->Dimensions[0];
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(Gna2Model& gnaModel, bool useLegacyFormula) {
|
||||
void backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(Gna2Model& gnaModel, bool useLegacyFormula) {
|
||||
IE_ASSERT(gnaModel.Operations != nullptr || gnaModel.NumberOfOperations == 0);
|
||||
for (uint32_t i = 0; i < gnaModel.NumberOfOperations; i++) {
|
||||
auto& gnaOp = gnaModel.Operations[i];
|
||||
@ -277,10 +259,10 @@ void GNAPluginNS::backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(Gna
|
||||
const auto fltStride = fltStrideShape.Dimensions[0];
|
||||
const auto inVecCnt = inputShape.Dimensions[1];
|
||||
const auto nFltSize = gnaOp.Operands[FilterOpIdx]->Shape.Dimensions[1];
|
||||
const auto outFromConv = GNAPluginNS::GNAConvolutionLayer::outputFromConv(inVecCnt, nFltSize, fltStride);
|
||||
const auto outFromConv = gna_convolution_layer::outputFromConv(inVecCnt, nFltSize, fltStride);
|
||||
const auto& poolWindow = *static_cast<Gna2Shape*>(gnaOp.Parameters[PoolWinParamIdx]);
|
||||
const auto& poolStride = *static_cast<Gna2Shape*>(gnaOp.Parameters[PoolStrideParamIdx]);
|
||||
const auto numberOfOutputs = GNAPluginNS::GNAConvolutionLayer::outputFromPooling(
|
||||
const auto numberOfOutputs = gna_convolution_layer::outputFromPooling(
|
||||
outFromConv, poolWindow.Dimensions[0], poolStride.Dimensions[0],
|
||||
useLegacyFormula || isOperationCnnLegacySpecific(gnaOp));
|
||||
auto& outputTensor = *gnaOp.Operands[OutOpIdx];
|
||||
@ -289,21 +271,20 @@ void GNAPluginNS::backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(Gna
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitMaxpoolComponentPrivate(intel_dnn_component_t &comp,
|
||||
std::array<uint32_t, 3> inCHW,
|
||||
std::array<uint32_t, 3> outCHW,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
std::array<uint32_t, 2> poolingWindowXY,
|
||||
std::array<uint32_t, 2> poolingStrideXY,
|
||||
float output_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
bool postInitMem) {
|
||||
void backend::AMIntelDNN::InitMaxpoolComponentPrivate(intel_dnn_component_t &comp,
|
||||
std::array<uint32_t, 3> inCHW,
|
||||
std::array<uint32_t, 3> outCHW,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
std::array<uint32_t, 2> poolingWindowXY,
|
||||
std::array<uint32_t, 2> poolingStrideXY,
|
||||
float output_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
bool postInitMem) {
|
||||
comp.num_bytes_per_input = num_bytes_per_input;
|
||||
comp.num_bytes_per_output = num_bytes_per_output;
|
||||
comp.operation = kDnnMaxPoolOp;
|
||||
comp.macro_operation = kDnnMacroOpNone;
|
||||
comp.orientation_in = kDnnNonInterleavedOrientation;
|
||||
comp.orientation_out = kDnnNonInterleavedOrientation;
|
||||
comp.op.maxpool.inCHW = inCHW;
|
||||
@ -321,20 +302,20 @@ void GNAPluginNS::backend::AMIntelDNN::InitMaxpoolComponentPrivate(intel_dnn_com
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitCopyComponentPrivate(intel_dnn_component_t &comp,
|
||||
intel_dnn_orientation_t orientation,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_rows_out,
|
||||
uint32_t num_columns_out,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
float output_scale_factor,
|
||||
uint32_t num_copy_rows,
|
||||
uint32_t num_copy_columns,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
bool postInitMem) {
|
||||
void backend::AMIntelDNN::InitCopyComponentPrivate(intel_dnn_component_t &comp,
|
||||
intel_dnn_orientation_t orientation,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_rows_out,
|
||||
uint32_t num_columns_out,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
float output_scale_factor,
|
||||
uint32_t num_copy_rows,
|
||||
uint32_t num_copy_columns,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
bool postInitMem) {
|
||||
comp.num_rows_in = num_rows_in;
|
||||
comp.num_columns_in = num_columns_in;
|
||||
comp.num_rows_out = num_rows_out;
|
||||
@ -342,7 +323,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitCopyComponentPrivate(intel_dnn_compon
|
||||
comp.num_bytes_per_input = num_bytes_per_input;
|
||||
comp.num_bytes_per_output = num_bytes_per_output;
|
||||
comp.operation = kDnnCopyOp;
|
||||
comp.macro_operation = kDnnMacroOpNone;
|
||||
comp.orientation_in = orientation;
|
||||
comp.orientation_out = orientation;
|
||||
comp.ptr_inputs = ptr_inputs;
|
||||
@ -361,20 +341,20 @@ void GNAPluginNS::backend::AMIntelDNN::InitCopyComponentPrivate(intel_dnn_compon
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel_dnn_component_t &comp,
|
||||
const DnnActivation& function_id,
|
||||
intel_dnn_orientation_t orientation,
|
||||
uint32_t num_rows,
|
||||
uint32_t num_columns,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
uint32_t num_segments,
|
||||
float output_scale_factor,
|
||||
float input_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
gna_pwl_segment_t *ptr_segments,
|
||||
bool postInitMem) {
|
||||
void backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel_dnn_component_t &comp,
|
||||
const DnnActivation& function_id,
|
||||
intel_dnn_orientation_t orientation,
|
||||
uint32_t num_rows,
|
||||
uint32_t num_columns,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
uint32_t num_segments,
|
||||
float output_scale_factor,
|
||||
float input_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
gna_pwl_segment_t *ptr_segments,
|
||||
bool postInitMem) {
|
||||
comp.num_rows_in = num_rows;
|
||||
comp.num_columns_in = num_columns;
|
||||
comp.num_rows_out = num_rows;
|
||||
@ -382,7 +362,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel
|
||||
comp.num_bytes_per_input = num_bytes_per_input;
|
||||
comp.num_bytes_per_output = num_bytes_per_output;
|
||||
comp.operation = kDnnPiecewiselinearOp;
|
||||
comp.macro_operation = kDnnMacroOpNone;
|
||||
comp.orientation_in = orientation;
|
||||
comp.orientation_out = orientation;
|
||||
comp.op.pwl.func_id = function_id;
|
||||
@ -404,15 +383,15 @@ void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitInterleaveComponentPrivate(intel_dnn_component_t &comp,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
float output_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
bool postInitMem) {
|
||||
void backend::AMIntelDNN::InitInterleaveComponentPrivate(intel_dnn_component_t &comp,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
float output_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
bool postInitMem) {
|
||||
comp.num_rows_in = num_rows_in;
|
||||
comp.num_columns_in = num_columns_in;
|
||||
comp.num_rows_out = num_columns_in;
|
||||
@ -420,7 +399,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitInterleaveComponentPrivate(intel_dnn_
|
||||
comp.num_bytes_per_input = num_bytes_per_input;
|
||||
comp.num_bytes_per_output = num_bytes_per_output;
|
||||
comp.operation = kDnnInterleaveOp;
|
||||
comp.macro_operation = kDnnMacroOpNone;
|
||||
comp.orientation_in = kDnnNonInterleavedOrientation;
|
||||
comp.orientation_out = kDnnInterleavedOrientation;
|
||||
comp.output_scale_factor = output_scale_factor;
|
||||
@ -434,15 +412,15 @@ void GNAPluginNS::backend::AMIntelDNN::InitInterleaveComponentPrivate(intel_dnn_
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dnn_component_t &comp,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
float output_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
bool postInitMem) {
|
||||
void backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dnn_component_t &comp,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
float output_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
void *&ptr_outputs,
|
||||
bool postInitMem) {
|
||||
comp.num_rows_in = num_rows_in;
|
||||
comp.num_columns_in = num_columns_in;
|
||||
comp.num_rows_out = num_columns_in;
|
||||
@ -450,7 +428,6 @@ void GNAPluginNS::backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dn
|
||||
comp.num_bytes_per_input = num_bytes_per_input;
|
||||
comp.num_bytes_per_output = num_bytes_per_output;
|
||||
comp.operation = kDnnDeinterleaveOp;
|
||||
comp.macro_operation = kDnnMacroOpNone;
|
||||
comp.orientation_in = kDnnInterleavedOrientation;
|
||||
comp.orientation_out = kDnnInterleavedOrientation;
|
||||
comp.output_scale_factor = output_scale_factor;
|
||||
@ -464,7 +441,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitDeinterleaveComponentPrivate(intel_dn
|
||||
}
|
||||
}
|
||||
|
||||
float GNAPluginNS::backend::AMIntelDNN::OutputScaleFactor(intel_dnn_component_t &comp) {
|
||||
float backend::AMIntelDNN::OutputScaleFactor(intel_dnn_component_t &comp) {
|
||||
return comp.output_scale_factor;
|
||||
}
|
||||
|
||||
@ -476,7 +453,7 @@ struct InputEndPoint {
|
||||
InputEndPoint(int nidx, size_t sz, size_t esize) : idx(nidx), size(sz), num_bytes_per_output(esize) {}
|
||||
};
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::WriteGraphWizModel(const char *filename) {
|
||||
void backend::AMIntelDNN::WriteGraphWizModel(const char *filename) {
|
||||
auto & components = component;
|
||||
|
||||
#define IS_AFFINE(k)\
|
||||
@ -743,12 +720,12 @@ void PrintTensors(std::ofstream& out, T tensors) {
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::PrintOffset(std::ofstream& out, const std::string& type, void* ptr) {
|
||||
void backend::AMIntelDNN::PrintOffset(std::ofstream& out, const std::string& type, void* ptr) {
|
||||
const auto queue = memory->getQueue(ptr);
|
||||
std::string typeOfRegion = "UNKNOWN_QUEUE";
|
||||
auto offset = std::numeric_limits<uint32_t>::max();
|
||||
if (queue != nullptr) {
|
||||
typeOfRegion = GNAPluginNS::memory::rRegionToStr(queue->regionType());
|
||||
typeOfRegion = memory::rRegionToStr(queue->regionType());
|
||||
offset = queue->getOffset(ptr).second;
|
||||
}
|
||||
out << "<memory_region_type> " << typeOfRegion << "\n";
|
||||
@ -756,9 +733,9 @@ void GNAPluginNS::backend::AMIntelDNN::PrintOffset(std::ofstream& out, const std
|
||||
<< "0x" << std::setfill('0') << std::setw(8) << std::hex << offset << "\n";
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision) {
|
||||
void backend::AMIntelDNN::WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision) {
|
||||
if ((compute_precision_ == kDnnFloat) && (logging_precision == kDnnInt)) {
|
||||
fprintf(stderr, "Error trying to write floating point DNN as integer in GNAPluginNS::backend::AMIntelDNN::WriteDnnText().\n");
|
||||
fprintf(stderr, "Error trying to write floating point DNN as integer in backend::AMIntelDNN::WriteDnnText().\n");
|
||||
fprintf(stderr, " Please convert to integer first.\n");
|
||||
throw -1;
|
||||
}
|
||||
@ -777,8 +754,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
|
||||
|
||||
out_file << "<intel_dnn_file>\n";
|
||||
out_file << "<number_type> " << intel_dnn_number_type_name[logging_precision] << "\n";
|
||||
out_file << "<softmax_type> " << intel_dnn_softmax_name[softmax_type] << "\n";
|
||||
const auto& regionsMap = GNAPluginNS::memory::GetAllRegionsToStrMap();
|
||||
const auto& regionsMap = memory::GetAllRegionsToStrMap();
|
||||
for (const auto& regionPair : regionsMap) {
|
||||
out_file << "<memory_region_type> " << std::dec << regionPair.second << "\n";
|
||||
out_file << "<num_memory_region_bytes> " << std::dec << memory->getRegionBytes(regionPair.first) << "\n";
|
||||
@ -818,7 +794,6 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
|
||||
layer++;
|
||||
}
|
||||
out_file << "<component_operation> " << intel_dnn_operation_name[component[i].operation] << "\n";
|
||||
out_file << "<macro_operation> " << intel_dnn_macro_operation_name[component[i].macro_operation] << "\n";
|
||||
out_file << "<num_rows_in> " << std::dec << num_rows_in << "\n";
|
||||
out_file << "<num_columns_in> " << std::dec << num_columns_in << "\n";
|
||||
out_file << "<num_rows_out> " << std::dec << num_rows_out << "\n";
|
||||
@ -1383,7 +1358,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t GNAPluginNS::backend::AMIntelDNN::CountLayers() {
|
||||
uint32_t backend::AMIntelDNN::CountLayers() {
|
||||
uint32_t n = 0;
|
||||
for (auto && c : component) {
|
||||
if (c.operation == kDnnAffineOp
|
||||
@ -1401,7 +1376,7 @@ uint32_t GNAPluginNS::backend::AMIntelDNN::CountLayers() {
|
||||
return n;
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const std::string& gnaCompileTarget) {
|
||||
void backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const std::string& gnaCompileTarget) {
|
||||
Gna2Operation * gnaOperation;
|
||||
if (gnaModel == nullptr)
|
||||
THROW_GNA_EXCEPTION << "Invalid input parameter";
|
||||
@ -1409,12 +1384,12 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const
|
||||
THROW_GNA_EXCEPTION << "InitGNAStruct can't work on preallocated layers array";
|
||||
|
||||
if (component.empty())
|
||||
THROW_GNA_EXCEPTION << "empty model in GNAPluginNS::backend::AMIntelDNN::InitGNAStruct()";
|
||||
THROW_GNA_EXCEPTION << "empty model in backend::AMIntelDNN::InitGNAStruct()";
|
||||
|
||||
gnaModel->NumberOfOperations = CountLayers();
|
||||
gnaModel->Operations = reinterpret_cast<Gna2Operation*>(gnaUserAllocator(gnaModel->NumberOfOperations * sizeof(Gna2Operation)));
|
||||
if (gnaModel->Operations == nullptr)
|
||||
THROW_GNA_EXCEPTION << "out of memory in GNAPluginNS::backend::AMIntelDNN::InitGNAStruct()";
|
||||
THROW_GNA_EXCEPTION << "out of memory in backend::AMIntelDNN::InitGNAStruct()";
|
||||
memset(gnaModel->Operations, 0, gnaModel->NumberOfOperations * sizeof(Gna2Operation));
|
||||
gnaOperation = gnaModel->Operations;
|
||||
for (int i = 0; i < component.size(); i++) {
|
||||
@ -1666,7 +1641,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const
|
||||
gnaModel->NumberOfOperations = static_cast<uint32_t>(std::distance(gnaModel->Operations, gnaOperation));
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::DestroyGNAStruct(Gna2Model *gnaModel) {
|
||||
void backend::AMIntelDNN::DestroyGNAStruct(Gna2Model *gnaModel) {
|
||||
if (gnaModel->Operations != nullptr) {
|
||||
for (uint32_t i = 0; i < gnaModel->NumberOfOperations; i++) {
|
||||
switch (gnaModel->Operations[i].Type) {
|
||||
@ -1686,7 +1661,7 @@ void GNAPluginNS::backend::AMIntelDNN::DestroyGNAStruct(Gna2Model *gnaModel) {
|
||||
gnaModel->NumberOfOperations = 0;
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputTextGNA(const Gna2Model & model) {
|
||||
void backend::AMIntelDNN::WriteInputAndOutputTextGNA(const Gna2Model & model) {
|
||||
#ifdef LIGHT_DUMP
|
||||
dump::WriteInputAndOutputTextGNAImpl(
|
||||
model,
|
||||
@ -1695,7 +1670,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputTextGNA(const Gna2Mode
|
||||
#endif
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputText() {
|
||||
void backend::AMIntelDNN::WriteInputAndOutputText() {
|
||||
#ifdef LIGHT_DUMP
|
||||
for (uint32_t i = 0; i < num_components(); i++) {
|
||||
std::stringstream out_file_name;
|
||||
@ -1791,11 +1766,11 @@ void GNAPluginNS::backend::AMIntelDNN::WriteInputAndOutputText() {
|
||||
#endif
|
||||
}
|
||||
|
||||
uint32_t GNAPluginNS::backend::AMIntelDNN::num_components() {
|
||||
uint32_t backend::AMIntelDNN::num_components() {
|
||||
return static_cast<uint32_t>(component.size());
|
||||
}
|
||||
|
||||
uint32_t GNAPluginNS::backend::AMIntelDNN::num_gna_layers() {
|
||||
uint32_t backend::AMIntelDNN::num_gna_layers() {
|
||||
uint32_t num_layers = 0;
|
||||
std::set<intel_dnn_operation_t> gna_layers({ kDnnAffineOp,
|
||||
kDnnDiagonalOp,
|
||||
@ -1812,27 +1787,27 @@ uint32_t GNAPluginNS::backend::AMIntelDNN::num_gna_layers() {
|
||||
return num_layers;
|
||||
}
|
||||
|
||||
uint32_t GNAPluginNS::backend::AMIntelDNN::num_group_in() {
|
||||
uint32_t backend::AMIntelDNN::num_group_in() {
|
||||
return ((!component.empty()) ? ((component[0].orientation_in == kDnnInterleavedOrientation)
|
||||
? component[0].num_columns_in : component[0].num_rows_in) : 0);
|
||||
}
|
||||
|
||||
uint32_t GNAPluginNS::backend::AMIntelDNN::num_group_out() {
|
||||
uint32_t backend::AMIntelDNN::num_group_out() {
|
||||
return ((!component.empty()) ? ((component[component.size() - 1].orientation_out == kDnnInterleavedOrientation)
|
||||
? component[component.size() - 1].num_columns_out : component[component.size() -
|
||||
1].num_rows_out) : 0);
|
||||
}
|
||||
|
||||
uint32_t GNAPluginNS::backend::AMIntelDNN::num_inputs() {
|
||||
uint32_t backend::AMIntelDNN::num_inputs() {
|
||||
return component.empty() ? 0 : component[0].num_rows_in;
|
||||
}
|
||||
|
||||
uint32_t GNAPluginNS::backend::AMIntelDNN::num_outputs() {
|
||||
uint32_t backend::AMIntelDNN::num_outputs() {
|
||||
return (component[component.size() - 1].orientation_out == kDnnInterleavedOrientation) ? component[
|
||||
component.size() - 1].num_rows_out : component[component.size() - 1].num_columns_out;
|
||||
}
|
||||
|
||||
std::string GNAPluginNS::backend::AMIntelDNN::getDumpFilePrefix(const std::string& folder) {
|
||||
std::string backend::AMIntelDNN::getDumpFilePrefix(const std::string& folder) {
|
||||
const char pathSeparator =
|
||||
#ifdef _WIN32
|
||||
'\\';
|
||||
@ -1842,14 +1817,18 @@ std::string GNAPluginNS::backend::AMIntelDNN::getDumpFilePrefix(const std::strin
|
||||
return std::string(".") + pathSeparator + folder + pathSeparator + std::to_string(dump_write_index) + pathSeparator;
|
||||
}
|
||||
|
||||
std::string GNAPluginNS::backend::AMIntelDNN::getDumpFilePrefixGNA() {
|
||||
std::string backend::AMIntelDNN::getDumpFilePrefixGNA() {
|
||||
return getDumpFilePrefix("gna_layers");
|
||||
}
|
||||
|
||||
std::string GNAPluginNS::backend::AMIntelDNN::getDumpFolderName() {
|
||||
std::string backend::AMIntelDNN::getDumpFolderName() {
|
||||
return getDumpFilePrefix("layers");
|
||||
}
|
||||
|
||||
std::string GNAPluginNS::backend::AMIntelDNN::getRefFolderName() {
|
||||
std::string backend::AMIntelDNN::getRefFolderName() {
|
||||
return getDumpFilePrefix("ref_layers");
|
||||
}
|
||||
|
||||
} // namespace backend
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -8,8 +8,8 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "dnn_types.h"
|
||||
#include "gna_types.h"
|
||||
#include "dnn_types.hpp"
|
||||
#include "gna_types.hpp"
|
||||
#include "gna/gna_config.hpp"
|
||||
|
||||
#include "log/debug.hpp"
|
||||
@ -17,9 +17,8 @@
|
||||
#include "memory/gna_memory.hpp"
|
||||
#include <gna2-model-api.h>
|
||||
|
||||
using GNAPluginNS::memory::GNAMemoryInterface;
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace backend {
|
||||
|
||||
class AMIntelDNN {
|
||||
@ -28,24 +27,15 @@ public:
|
||||
: ptr_active_outputs_(NULL),
|
||||
num_active_outputs_(0),
|
||||
input_scale_factor_(1.0),
|
||||
num_left_context(0),
|
||||
num_right_context(0),
|
||||
do_rotate_input(false),
|
||||
do_rotate_output(false),
|
||||
num_rotate_rows(0),
|
||||
num_rotate_columns(0),
|
||||
num_rotate_output_rows(0),
|
||||
num_rotate_output_columns(0),
|
||||
softmax_type(kSoftmaxNone),
|
||||
ptr_sumgroup_sizes(NULL),
|
||||
num_sumgroup_sizes(0),
|
||||
ptr_priors(NULL),
|
||||
compute_precision_(kDnnNumNumberType) {
|
||||
}
|
||||
|
||||
~AMIntelDNN();
|
||||
|
||||
void Init(GNAMemoryInterface * memoryInterface,
|
||||
void Init(memory::GNAMemoryInterface * memoryInterface,
|
||||
intel_dnn_number_type_t compute_precision,
|
||||
float scale_factor);
|
||||
|
||||
@ -284,9 +274,31 @@ public:
|
||||
true);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void AdvanceOperationIfAllApplied(const std::vector<intel_dnn_component_t>& cmp, int i, T*& operation) {
|
||||
if (i == cmp.size() - 1 || cmp[i + 1].operation != kDnnPiecewiselinearOp) {
|
||||
++operation;
|
||||
}
|
||||
}
|
||||
|
||||
float OutputScaleFactor(uint32_t component_index) {
|
||||
return OutputScaleFactor(component[component_index]);
|
||||
template <class T>
|
||||
void AdvanceCnnOperationIfAllApplied(const std::vector<intel_dnn_component_t>& cmp, int i, T*& operation) {
|
||||
if (i == cmp.size() - 1 ||
|
||||
((cmp[i + 1].operation != kDnnMaxPoolOp) && (cmp[i + 1].operation != kDnnPiecewiselinearOp))) {
|
||||
operation++;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void AdvancePwlOperationIfAllApplied(const std::vector<intel_dnn_component_t>& cmp, int i, T*& operation) {
|
||||
if (i == cmp.size() - 1 ||
|
||||
((cmp[i + 1].operation != kDnnMaxPoolOp) && (cmp[i + 1].operation != kDnnPiecewiselinearOp))) {
|
||||
operation++;
|
||||
}
|
||||
}
|
||||
|
||||
float OutputScaleFactor(uint32_t cmp_index) {
|
||||
return OutputScaleFactor(component[cmp_index]);
|
||||
}
|
||||
|
||||
float OutputScaleFactor(intel_dnn_component_t &comp);
|
||||
@ -318,19 +330,10 @@ public:
|
||||
uint32_t num_outputs();
|
||||
|
||||
std::vector<intel_dnn_component_t> component;
|
||||
uint32_t num_left_context;
|
||||
uint32_t num_right_context;
|
||||
uint32_t new_num_conv_columns = 0;
|
||||
bool do_rotate_input;
|
||||
bool do_rotate_output;
|
||||
uint32_t num_rotate_rows = 0;
|
||||
uint32_t num_rotate_columns = 0;
|
||||
uint32_t num_rotate_output_rows = 0;
|
||||
uint32_t num_rotate_output_columns = 0;
|
||||
DnnSoftmaxType softmax_type;
|
||||
uint32_t *ptr_sumgroup_sizes;
|
||||
uint32_t num_sumgroup_sizes;
|
||||
float *ptr_priors;
|
||||
|
||||
void WriteInputAndOutputText();
|
||||
|
||||
@ -339,7 +342,7 @@ public:
|
||||
void BeginNewWrite(uint32_t index);
|
||||
|
||||
private:
|
||||
GNAMemoryInterface* memory = nullptr;
|
||||
memory::GNAMemoryInterface* memory = nullptr;
|
||||
uint32_t *ptr_active_outputs_;
|
||||
uint32_t num_active_outputs_;
|
||||
intel_dnn_number_type_t compute_precision_;
|
||||
@ -442,6 +445,20 @@ private:
|
||||
void*& ptr_filters,
|
||||
void*& ptr_biases);
|
||||
|
||||
static void InitDWSCComponentPrivate(intel_dnn_component_t& comp,
|
||||
OvGnaTensor inputTensor,
|
||||
OvGnaTensor outputTensor,
|
||||
OvGnaTensor filterTensor,
|
||||
OvGnaTensor biasTensor,
|
||||
std::array<uint32_t, 2> convStride,
|
||||
std::array<uint32_t, 2> zeroPadding,
|
||||
float weight_scale_factor,
|
||||
float output_scale_factor,
|
||||
void*& ptr_inputs,
|
||||
void*& ptr_outputs,
|
||||
void*& ptr_filters,
|
||||
void*& ptr_biases);
|
||||
|
||||
static void InitAffineComponentPrivate(intel_dnn_component_t &comp,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns,
|
||||
@ -464,5 +481,7 @@ private:
|
||||
std::string getDumpFolderName();
|
||||
std::string getRefFolderName();
|
||||
};
|
||||
|
||||
} // namespace backend
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -1,73 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
|
||||
#include <gna2-model-api.h>
|
||||
#include "gna2_model_helper.hpp"
|
||||
#include "log/dump.hpp"
|
||||
|
||||
#ifndef _NO_MKL_
|
||||
#include <mkl_dnn.h>
|
||||
#endif
|
||||
|
||||
#include "runtime/floatmath.h"
|
||||
#include "dnn.hpp"
|
||||
|
||||
#include "runtime/pwl.h"
|
||||
#include "runtime/cnn.h"
|
||||
|
||||
void GNAPluginNS::backend::ClearScoreError(intel_score_error_t *error) {
|
||||
error->num_scores = 0;
|
||||
error->num_errors = 0;
|
||||
error->max_error = 0.0;
|
||||
error->sum_error = 0.0;
|
||||
error->sum_squared_error = 0.0;
|
||||
error->max_rel_error = 0.0;
|
||||
error->sum_rel_error = 0.0;
|
||||
error->sum_squared_rel_error = 0.0;
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::UpdateScoreError(intel_score_error_t *error, intel_score_error_t *total_error) {
|
||||
total_error->num_errors += error->num_errors;
|
||||
total_error->num_scores += error->num_scores;
|
||||
total_error->sum_error += error->sum_error;
|
||||
total_error->sum_squared_error += error->sum_squared_error;
|
||||
if (error->max_error > total_error->max_error) {
|
||||
total_error->max_error = error->max_error;
|
||||
}
|
||||
total_error->sum_rel_error += error->sum_rel_error;
|
||||
total_error->sum_squared_rel_error += error->sum_squared_rel_error;
|
||||
if (error->max_rel_error > total_error->max_rel_error) {
|
||||
total_error->max_rel_error = error->max_rel_error;
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::backend::SoftmaxGoogle(float *ptr_output, float *ptr_input, const uint32_t num_outputs, const uint32_t num_inputs) {
|
||||
// Assumes input vector contains log likelihoods
|
||||
// The computes x[i] = x[i] - log(sum_j exp(x[j]))
|
||||
// This normalizes the likelihoods by the sum of likelihoods but stores them as log likelihoods
|
||||
|
||||
float max_score = ptr_input[0];
|
||||
float sum = 0.0;
|
||||
float diff;
|
||||
// find max score for normalization to [0,1]
|
||||
for (uint32_t i = 0; i < num_inputs; i++) {
|
||||
if (ptr_input[i] > max_score) {
|
||||
max_score = ptr_input[i];
|
||||
}
|
||||
}
|
||||
for (uint32_t i = 0; i < num_inputs; i++) {
|
||||
sum += exp(ptr_input[i] - max_score);
|
||||
}
|
||||
if (sum < 1.0e-20) {
|
||||
fprintf(stderr, "Warning: attempt to take log(0) in SoftmaxGoogle()!\n");
|
||||
sum = 1.0e-20f;
|
||||
}
|
||||
diff = max_score + std::log(sum);
|
||||
for (uint32_t i = 0; i < num_outputs; i++) {
|
||||
ptr_output[i] = ptr_input[i] - diff;
|
||||
}
|
||||
}
|
@ -1,66 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <memory.h>
|
||||
#include <xmmintrin.h>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <iomanip>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include "am_intel_dnn.hpp"
|
||||
#include "dnn_types.h"
|
||||
|
||||
#include <gna2-model-api.h>
|
||||
|
||||
#define DNN_MAX_BATCH_SIZE 8
|
||||
#define DNN_MAX_INPUTS 3072
|
||||
#define DNN_MAX_OUTPUTS 8192
|
||||
#define DNN_MAX_ERROR 1.0e-4f
|
||||
#define DNN_NUM_BYTES_INT_BIAS 4
|
||||
#define DNN_NUM_BYTES_INT_AFFINE_OUT 4
|
||||
#define DNN_RAND_INT8_AMPLITUDE 127.0f
|
||||
#define DNN_RAND_INT16_AMPLITUDE 16384.0f
|
||||
#define DNN_RAND_INT32_AMPLITUDE 1048576.0f
|
||||
#define DNN_RAND_FLOAT32_AMPLITUDE 8.0f
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace backend {
|
||||
|
||||
void PlotFloatIntDnn(GNAPluginNS::backend::AMIntelDNN *dnn, GNAPluginNS::backend::AMIntelDNN *dnn_int);
|
||||
void ClearScoreError(intel_score_error_t *error);
|
||||
void UpdateScoreError(intel_score_error_t *error, intel_score_error_t *total_error);
|
||||
void SoftmaxGoogle(float *ptr_output, float *ptr_input, const uint32_t num_outputs, const uint32_t num_inputs);
|
||||
|
||||
template <class T>
|
||||
void AdvanceOperationIfAllApplied(const std::vector<intel_dnn_component_t>& component, int i, T*& operation) {
|
||||
if (i == component.size() - 1 || component[i + 1].operation != kDnnPiecewiselinearOp) {
|
||||
++operation;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void AdvanceCnnOperationIfAllApplied(const std::vector<intel_dnn_component_t>& component, int i, T*& operation) {
|
||||
if (i == component.size() - 1 || ((component[i + 1].operation != kDnnMaxPoolOp)
|
||||
&& (component[i + 1].operation != kDnnPiecewiselinearOp))) {
|
||||
operation++;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void AdvancePwlOperationIfAllApplied(const std::vector<intel_dnn_component_t>& component, int i, T*& operation) {
|
||||
if (i == component.size() - 1 || ((component[i + 1].operation != kDnnMaxPoolOp)
|
||||
&& (component[i + 1].operation != kDnnPiecewiselinearOp))) {
|
||||
operation++;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace backend
|
||||
} // namespace GNAPluginNS
|
@ -14,17 +14,18 @@
|
||||
#include "dnn_components.hpp"
|
||||
#include "log/log.hpp"
|
||||
|
||||
using namespace ov::intel_gna;
|
||||
using namespace GNAPluginNS;
|
||||
using namespace GNAPluginNS::backend;
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace backend {
|
||||
|
||||
intel_dnn_component_t & DnnComponents::addComponent(const std::string layerName, const std::string layerMetaType) {
|
||||
intel_dnn_component_t& DnnComponents::addComponent(const std::string layerName, const std::string layerMetaType) {
|
||||
auto isDelayed = InferenceEngine::details::CaselessEq<std::string>()(layerMetaType, DelayedCopyLayerName);
|
||||
delayedOperations += isDelayed ? 1 : 0;
|
||||
components.emplace_back(DnnComponentExtra{layerName, {}, isDelayed});
|
||||
auto ¤tComponent = components.back().dnnComponent;
|
||||
auto& currentComponent = components.back().dnnComponent;
|
||||
|
||||
log::trace() << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << components.size() - 1 << std::endl;
|
||||
log::trace() << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_"
|
||||
<< components.size() - 1 << std::endl;
|
||||
|
||||
currentComponent.original_layer_name = components.back().name.c_str();
|
||||
int execOrder = 0;
|
||||
@ -32,10 +33,11 @@ intel_dnn_component_t & DnnComponents::addComponent(const std::string layerName,
|
||||
execOrder = static_cast<int>(components.size() - 1 - delayedOperations);
|
||||
} else {
|
||||
// todo: not perfect - propose to create mapping table that will be printed out by extra request
|
||||
execOrder = - static_cast<int>(delayedOperations);
|
||||
execOrder = -static_cast<int>(delayedOperations);
|
||||
}
|
||||
|
||||
log::debug() << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << execOrder << std::endl;
|
||||
log::debug() << "IR layer : " << std::left << std::setw(20) << layerName << " " << layerMetaType << "_" << execOrder
|
||||
<< std::endl;
|
||||
return currentComponent;
|
||||
}
|
||||
|
||||
@ -47,7 +49,7 @@ intel_dnn_component_t* DnnComponents::findComponent(InferenceEngine::CNNLayerPtr
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
intel_dnn_component_t* GNAPluginNS::backend::DnnComponents::findComponent(const std::string& layerName) {
|
||||
intel_dnn_component_t* DnnComponents::findComponent(const std::string& layerName) {
|
||||
auto component = std::find_if(begin(components), end(components), [&](const storage_type ::value_type& comp) {
|
||||
return comp.name == layerName;
|
||||
});
|
||||
@ -57,8 +59,7 @@ intel_dnn_component_t* GNAPluginNS::backend::DnnComponents::findComponent(const
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const intel_dnn_component_t* GNAPluginNS::backend::DnnComponents::findComponent(
|
||||
const InferenceEngine::CNNLayerPtr layer) const {
|
||||
const intel_dnn_component_t* DnnComponents::findComponent(const InferenceEngine::CNNLayerPtr layer) const {
|
||||
if (layer) {
|
||||
return findComponent(layer->name);
|
||||
}
|
||||
@ -66,7 +67,7 @@ const intel_dnn_component_t* GNAPluginNS::backend::DnnComponents::findComponent(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const intel_dnn_component_t* GNAPluginNS::backend::DnnComponents::findComponent(const std::string& layerName) const {
|
||||
const intel_dnn_component_t* DnnComponents::findComponent(const std::string& layerName) const {
|
||||
auto component = std::find_if(begin(components), end(components), [&](const storage_type ::value_type& comp) {
|
||||
return comp.name == layerName;
|
||||
});
|
||||
@ -82,10 +83,14 @@ std::vector<intel_dnn_component_t> DnnComponents::getExecutionOrder() {
|
||||
uint32_t direct_id = 0;
|
||||
uint32_t delayed_id = static_cast<uint32_t>(components.size() - delayedOperations);
|
||||
|
||||
for (auto &&c : components) {
|
||||
uint32_t &id = c.isDelayed ? delayed_id : direct_id;
|
||||
for (auto&& c : components) {
|
||||
uint32_t& id = c.isDelayed ? delayed_id : direct_id;
|
||||
result[id] = c.dnnComponent;
|
||||
id++;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace backend
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -11,11 +11,13 @@
|
||||
|
||||
#include <ie_common.h>
|
||||
#include <legacy/ie_layers.h>
|
||||
#include "dnn_types.h"
|
||||
#include "dnn_types.hpp"
|
||||
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace backend {
|
||||
|
||||
struct DnnComponentExtra {
|
||||
std::string name;
|
||||
intel_dnn_component_t dnnComponent;
|
||||
@ -71,5 +73,7 @@ struct DnnComponents {
|
||||
private:
|
||||
uint32_t delayedOperations = 0;
|
||||
};
|
||||
|
||||
} // namespace backend
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -1,92 +0,0 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
// dnn_traits.hpp : c++ trait approach to define dnn objects
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "dnn_types.h"
|
||||
|
||||
template<intel_dnn_operation_t layer>
|
||||
struct DnnTrait {};
|
||||
|
||||
template<>
|
||||
struct DnnTrait<kDnnDiagonalOp> {
|
||||
using Type = intel_affine_t;
|
||||
static Type *getLayer(intel_dnn_component_t &component) {
|
||||
return &component.op.affine;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct DnnTrait<kDnnPiecewiselinearOp> {
|
||||
using Type = intel_piecewiselinear_t;
|
||||
static Type *getLayer(intel_dnn_component_t &component) {
|
||||
return &component.op.pwl;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct DnnTrait<kDnnAffineOp> {
|
||||
using Type = intel_affine_t;
|
||||
static Type *getLayer(intel_dnn_component_t &component) {
|
||||
return &component.op.affine;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct DnnTrait<kDnnConvolutional1dOp> {
|
||||
using Type = intel_convolutionalD_t;
|
||||
static Type *getLayer(intel_dnn_component_t &component) {
|
||||
return &component.op.conv1D;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct DnnTrait<kDnnMaxPoolOp> {
|
||||
using Type = intel_maxpool_t;
|
||||
static Type *getLayer(intel_dnn_component_t &component) {
|
||||
return &component.op.maxpool;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct DnnTrait<kDnnRecurrentOp> {
|
||||
using Type = intel_recurrent_t;
|
||||
static Type *getLayer(intel_dnn_component_t &component) {
|
||||
return &component.op.recurrent;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct DnnTrait<kDnnInterleaveOp> {
|
||||
using Type = intel_interleave_t;
|
||||
static Type *getLayer(intel_dnn_component_t &component) {
|
||||
return &component.op.interleave;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct DnnTrait<kDnnDeinterleaveOp> {
|
||||
using Type = intel_deinterleave_t;
|
||||
static Type *getLayer(intel_dnn_component_t &component) {
|
||||
return &component.op.deinterleave;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct DnnTrait<kDnnCopyOp> {
|
||||
using Type = intel_copy_t;
|
||||
static Type *getLayer(intel_dnn_component_t &component) {
|
||||
return &component.op.copy;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct DnnTrait<kDnnNullOp> {
|
||||
using Type = void;
|
||||
static Type *getLayer(intel_dnn_component_t &component) {
|
||||
return nullptr;
|
||||
}
|
||||
};
|
@ -2,7 +2,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "dnn_types.h"
|
||||
#include "dnn_types.hpp"
|
||||
|
||||
const char *intel_dnn_activation_name[kActNumType] = {
|
||||
"kActNone",
|
||||
@ -25,13 +25,6 @@ const char *intel_dnn_activation_name[kActNumType] = {
|
||||
"kActPwl"
|
||||
};
|
||||
|
||||
const char *intel_dnn_softmax_name[kSoftmaxNumType] = {
|
||||
"kSoftmaxNone",
|
||||
"kSoftmaxKaldiSumGroup",
|
||||
"kSoftmaxKaldiApplyLog",
|
||||
"kSoftmaxGoogle"
|
||||
};
|
||||
|
||||
const char* intel_dnn_operation_name[kDnnNumOp] = {
|
||||
"kDnnNullOp",
|
||||
"kDnnAffineOp",
|
||||
@ -46,12 +39,6 @@ const char* intel_dnn_operation_name[kDnnNumOp] = {
|
||||
"kDnnCopyOp"
|
||||
};
|
||||
|
||||
const char *intel_dnn_macro_operation_name[kDnnNumMacroOp] = {
|
||||
"kDnnMacroOpNone",
|
||||
"kDnnMacroOpLstm",
|
||||
"kDnnMacroOpBiLstm"
|
||||
};
|
||||
|
||||
const char *intel_dnn_number_type_name[kDnnNumNumberType] = {
|
||||
"kDnnFloat",
|
||||
"kDnnInt"
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
#include "gna_types.h"
|
||||
#include "gna_types.hpp"
|
||||
#include "log/debug.hpp"
|
||||
|
||||
enum DnnActivationType : uint8_t {
|
||||
@ -83,16 +83,6 @@ static_assert(std::is_trivial<DnnActivation>::value, "DnnActivation is not triva
|
||||
|
||||
extern const char *intel_dnn_activation_name[kActNumType];
|
||||
|
||||
typedef enum DnnSoftmaxType {
|
||||
kSoftmaxNone,
|
||||
kSoftmaxKaldiSumgroup,
|
||||
kSoftmaxEesen,
|
||||
kSoftmaxGoogle,
|
||||
kSoftmaxNumType
|
||||
} intel_dnn_softmax_type_t;
|
||||
|
||||
extern const char *intel_dnn_softmax_name[kSoftmaxNumType];
|
||||
|
||||
typedef enum {
|
||||
kDnnUnknownOrientation = 100,
|
||||
kDnnInterleavedOrientation,
|
||||
@ -117,15 +107,6 @@ typedef enum {
|
||||
|
||||
extern const char* intel_dnn_operation_name[kDnnNumOp];
|
||||
|
||||
typedef enum {
|
||||
kDnnMacroOpNone,
|
||||
kDnnMacroOpLstm,
|
||||
kDnnMacroOpBiLstm,
|
||||
kDnnNumMacroOp
|
||||
} intel_dnn_macro_operation_t;
|
||||
|
||||
extern const char *intel_dnn_macro_operation_name[kDnnNumMacroOp];
|
||||
|
||||
typedef enum {
|
||||
kDnnFloat,
|
||||
kDnnInt,
|
||||
@ -262,7 +243,6 @@ struct intel_dnn_component_t {
|
||||
uint32_t num_bytes_per_input;
|
||||
uint32_t num_bytes_per_output;
|
||||
intel_dnn_operation_t operation;
|
||||
intel_dnn_macro_operation_t macro_operation;
|
||||
intel_dnn_orientation_t orientation_in;
|
||||
intel_dnn_orientation_t orientation_out;
|
||||
union operation_struct_t {
|
@ -15,11 +15,10 @@
|
||||
#include "gna_limitations.hpp"
|
||||
#include "gna/gna_config.hpp"
|
||||
|
||||
using namespace ov::intel_gna;
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace GNALimitations {
|
||||
namespace Cnn2D {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace limitations {
|
||||
namespace cnn2d {
|
||||
|
||||
bool IsEqualToLimit::isValid(const uint32_t val) const {
|
||||
return val == compared_value;
|
||||
@ -353,7 +352,7 @@ bool AbstractValidator::ValidationSuccesful(const bool throwOnError,
|
||||
return error.empty();
|
||||
}
|
||||
|
||||
} // namespace Cnn2D
|
||||
} // namespace cnn2d
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage) {
|
||||
@ -370,7 +369,7 @@ static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::st
|
||||
auto isFusableWithConv = [](InferenceEngine::CNNLayerPtr ptr) {
|
||||
return (LayerInfo(ptr).isFusableWithConv() || LayerInfo(ptr).isNonFunctional() ||
|
||||
(LayerInfo(ptr).isPermute() && ((ptr->input()->getLayout() == InferenceEngine::Layout::NCHW &&
|
||||
ptr->GetParamAsInts("order") == GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC)) ||
|
||||
ptr->GetParamAsInts("order") == permute::GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC)) ||
|
||||
(ptr->input()->getLayout() == InferenceEngine::Layout::CHW &&
|
||||
ptr->GetParamAsInts("order") == std::vector<int32_t>{0, 2, 1} /* NCW to NWC */))));
|
||||
};
|
||||
@ -505,12 +504,12 @@ bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concat_layer) {
|
||||
break;
|
||||
|
||||
// Convert dims to NHWC layout to allow later verification
|
||||
auto new_order = GetPermuteOrder(concat_layout, InferenceEngine::Layout::NHWC);
|
||||
auto new_order = permute::GetPermuteOrder(concat_layout, InferenceEngine::Layout::NHWC);
|
||||
InferenceEngine::SizeVector new_dims;
|
||||
for (size_t i = 0; i < dims_size; ++i) {
|
||||
new_dims.push_back(in_dims[new_order[i]]);
|
||||
}
|
||||
concat_axis = GetPermuteOrder(InferenceEngine::Layout::NHWC, concat_layout)[concat_axis];
|
||||
concat_axis = permute::GetPermuteOrder(InferenceEngine::Layout::NHWC, concat_layout)[concat_axis];
|
||||
|
||||
// Looking for any axis with dimension > 1 before concatentaion axis;
|
||||
// in general such concatenation is unsupported
|
||||
@ -565,7 +564,7 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
|
||||
startLayer,
|
||||
[&](const InferenceEngine::CNNLayerPtr layer) {
|
||||
LayerInfo info(layer);
|
||||
if (GNAPluginNS::LayerTypeFromStr(layer->type) == GNAPluginNS::LayerType::NO_TYPE) {
|
||||
if (LayerTypeFromStr(layer->type) == LayerType::NO_TYPE) {
|
||||
errMessage = "The plugin does not support layer: " + layer->name + ":" + layer->type + "\n";
|
||||
check_result = false;
|
||||
}
|
||||
@ -591,5 +590,6 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
} // namespace GNALimitations
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace limitations
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -4,15 +4,16 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "dnn_types.h"
|
||||
#include "dnn_types.hpp"
|
||||
#include <cstdint>
|
||||
#include <cpp/ie_cnn_network.h>
|
||||
#include <ie_algorithm.hpp>
|
||||
#include <legacy/ie_layers.h>
|
||||
#include "gna_lib_ver_selector.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace GNALimitations {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace limitations {
|
||||
|
||||
constexpr uint32_t bufferMaxSize = 65528;
|
||||
|
||||
@ -65,7 +66,8 @@ inline bool IsTransposeSupported(const std::vector<size_t>& shape) {
|
||||
return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize;
|
||||
}
|
||||
|
||||
namespace Cnn2D {
|
||||
namespace cnn2d {
|
||||
|
||||
struct IsEqualToLimit {
|
||||
uint32_t compared_value;
|
||||
std::string what;
|
||||
@ -118,11 +120,10 @@ struct VectorOrSquareLimit {
|
||||
};
|
||||
|
||||
struct RectLimitByChannels {
|
||||
std::vector<std::pair<uint32_t, RectLimit> > limitPerChannel;
|
||||
std::vector<std::pair<uint32_t, RectLimit>> limitPerChannel;
|
||||
RectLimit GetByChannels(const uint32_t channels) const;
|
||||
bool isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const;
|
||||
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w,
|
||||
const uint32_t channels, std::string what) const;
|
||||
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, const uint32_t channels, std::string what) const;
|
||||
};
|
||||
|
||||
struct RectLimitByChannelsAndPrecision {
|
||||
@ -130,8 +131,11 @@ struct RectLimitByChannelsAndPrecision {
|
||||
RectLimitByChannels limit_for_int16;
|
||||
RectLimitByChannels GetByPrecision(const OvGnaType precision) const;
|
||||
bool isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const;
|
||||
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w,
|
||||
const OvGnaType precision, const uint32_t channels, std::string what) const;
|
||||
std::string GetErrorOrEmpty(const uint32_t h,
|
||||
const uint32_t w,
|
||||
const OvGnaType precision,
|
||||
const uint32_t channels,
|
||||
std::string what) const;
|
||||
};
|
||||
|
||||
class AbstractValidator {
|
||||
@ -144,29 +148,51 @@ protected:
|
||||
|
||||
public:
|
||||
virtual ~AbstractValidator() = default;
|
||||
virtual bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
|
||||
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
|
||||
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
|
||||
OvGnaType inPrecision, bool exception = true) const = 0;
|
||||
virtual bool ValidateCnn2D(const std::string& name,
|
||||
const uint32_t inHeight,
|
||||
const uint32_t inWidth,
|
||||
const uint32_t inChannels,
|
||||
const uint32_t kH,
|
||||
const uint32_t kW,
|
||||
const uint32_t kN,
|
||||
const uint32_t strideH,
|
||||
const uint32_t strideW,
|
||||
const uint32_t dilationH,
|
||||
const uint32_t dilationW,
|
||||
OvGnaType inPrecision,
|
||||
bool exception = true) const = 0;
|
||||
|
||||
virtual bool ValidatePooling2D(const std::string& name,
|
||||
const uint32_t windowH, const uint32_t windowW,
|
||||
const uint32_t strideH, const uint32_t strideW,
|
||||
bool exception = true) const = 0;
|
||||
const uint32_t windowH,
|
||||
const uint32_t windowW,
|
||||
const uint32_t strideH,
|
||||
const uint32_t strideW,
|
||||
bool exception = true) const = 0;
|
||||
|
||||
virtual bool ValidateInputPadding(const std::string& name,
|
||||
const uint32_t pad_h_begin, const uint32_t pad_h_end,
|
||||
const uint32_t pad_w_begin, const uint32_t pad_w_end,
|
||||
const uint32_t kernel_h,
|
||||
const uint32_t kernel_w,
|
||||
const bool throwOnError = true) const = 0;
|
||||
const uint32_t pad_h_begin,
|
||||
const uint32_t pad_h_end,
|
||||
const uint32_t pad_w_begin,
|
||||
const uint32_t pad_w_end,
|
||||
const uint32_t kernel_h,
|
||||
const uint32_t kernel_w,
|
||||
const bool throwOnError = true) const = 0;
|
||||
|
||||
virtual bool ShouldUseOnlyConv2DGnaIface() const = 0;
|
||||
|
||||
virtual bool ValidateCnn1D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
|
||||
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
|
||||
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
|
||||
OvGnaType inPrecision, bool exception = true) const = 0;
|
||||
virtual bool ValidateCnn1D(const std::string& name,
|
||||
const uint32_t inHeight,
|
||||
const uint32_t inWidth,
|
||||
const uint32_t inChannels,
|
||||
const uint32_t kH,
|
||||
const uint32_t kW,
|
||||
const uint32_t kN,
|
||||
const uint32_t strideH,
|
||||
const uint32_t strideW,
|
||||
const uint32_t dilationH,
|
||||
const uint32_t dilationW,
|
||||
OvGnaType inPrecision,
|
||||
bool exception = true) const = 0;
|
||||
|
||||
static std::unique_ptr<AbstractValidator> Create(const std::string&);
|
||||
};
|
||||
@ -184,29 +210,51 @@ class Validator_30 : public AbstractValidator {
|
||||
public:
|
||||
Validator_30() = default;
|
||||
|
||||
bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
|
||||
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
|
||||
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
|
||||
OvGnaType inPrecision, bool exception = true) const override;
|
||||
bool ValidateCnn2D(const std::string& name,
|
||||
const uint32_t inHeight,
|
||||
const uint32_t inWidth,
|
||||
const uint32_t inChannels,
|
||||
const uint32_t kH,
|
||||
const uint32_t kW,
|
||||
const uint32_t kN,
|
||||
const uint32_t strideH,
|
||||
const uint32_t strideW,
|
||||
const uint32_t dilationH,
|
||||
const uint32_t dilationW,
|
||||
OvGnaType inPrecision,
|
||||
bool exception = true) const override;
|
||||
|
||||
bool ValidatePooling2D(const std::string& name,
|
||||
const uint32_t windowH, const uint32_t windowW,
|
||||
const uint32_t strideH, const uint32_t strideW,
|
||||
bool exception = true) const override;
|
||||
const uint32_t windowH,
|
||||
const uint32_t windowW,
|
||||
const uint32_t strideH,
|
||||
const uint32_t strideW,
|
||||
bool exception = true) const override;
|
||||
|
||||
bool ValidateInputPadding(const std::string& name,
|
||||
const uint32_t pad_h_begin, const uint32_t pad_h_end,
|
||||
const uint32_t pad_w_begin, const uint32_t pad_w_end,
|
||||
const uint32_t kernel_h,
|
||||
const uint32_t kernel_w,
|
||||
const bool throwOnError = true) const override;
|
||||
const uint32_t pad_h_begin,
|
||||
const uint32_t pad_h_end,
|
||||
const uint32_t pad_w_begin,
|
||||
const uint32_t pad_w_end,
|
||||
const uint32_t kernel_h,
|
||||
const uint32_t kernel_w,
|
||||
const bool throwOnError = true) const override;
|
||||
|
||||
bool ShouldUseOnlyConv2DGnaIface() const override;
|
||||
|
||||
bool ValidateCnn1D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
|
||||
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
|
||||
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
|
||||
OvGnaType inPrecision, bool exception = true) const override;
|
||||
bool ValidateCnn1D(const std::string& name,
|
||||
const uint32_t inHeight,
|
||||
const uint32_t inWidth,
|
||||
const uint32_t inChannels,
|
||||
const uint32_t kH,
|
||||
const uint32_t kW,
|
||||
const uint32_t kN,
|
||||
const uint32_t strideH,
|
||||
const uint32_t strideW,
|
||||
const uint32_t dilationH,
|
||||
const uint32_t dilationW,
|
||||
OvGnaType inPrecision,
|
||||
bool exception = true) const override;
|
||||
};
|
||||
|
||||
class Validator_35 : public AbstractValidator {
|
||||
@ -228,7 +276,30 @@ class Validator_35 : public AbstractValidator {
|
||||
static const CnnLimits kCnn1DLimits;
|
||||
|
||||
std::string ValidateCnn(const CnnLimits& limits,
|
||||
const std::string& name,
|
||||
const std::string& name,
|
||||
const uint32_t inHeight,
|
||||
const uint32_t inWidth,
|
||||
const uint32_t inChannels,
|
||||
const uint32_t kH,
|
||||
const uint32_t kW,
|
||||
const uint32_t kN,
|
||||
const uint32_t strideH,
|
||||
const uint32_t strideW,
|
||||
const uint32_t dilationH,
|
||||
const uint32_t dilationW,
|
||||
OvGnaType inPrecision) const;
|
||||
|
||||
std::string ValidatePooling(const CnnLimits& limits,
|
||||
const std::string& name,
|
||||
const uint32_t windowH,
|
||||
const uint32_t windowW,
|
||||
const uint32_t strideH,
|
||||
const uint32_t strideW) const;
|
||||
|
||||
public:
|
||||
Validator_35() = default;
|
||||
|
||||
bool ValidateCnn2D(const std::string& name,
|
||||
const uint32_t inHeight,
|
||||
const uint32_t inWidth,
|
||||
const uint32_t inChannels,
|
||||
@ -239,43 +310,43 @@ class Validator_35 : public AbstractValidator {
|
||||
const uint32_t strideW,
|
||||
const uint32_t dilationH,
|
||||
const uint32_t dilationW,
|
||||
OvGnaType inPrecision) const;
|
||||
OvGnaType inPrecision,
|
||||
bool exception = true) const override;
|
||||
|
||||
std::string ValidatePooling(const CnnLimits& limits,
|
||||
const std::string& name,
|
||||
bool ValidatePooling2D(const std::string& name,
|
||||
const uint32_t windowH,
|
||||
const uint32_t windowW,
|
||||
const uint32_t strideH,
|
||||
const uint32_t strideW) const;
|
||||
|
||||
public:
|
||||
Validator_35() = default;
|
||||
|
||||
bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
|
||||
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
|
||||
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
|
||||
OvGnaType inPrecision, bool exception = true) const override;
|
||||
|
||||
bool ValidatePooling2D(const std::string& name,
|
||||
const uint32_t windowH, const uint32_t windowW,
|
||||
const uint32_t strideH, const uint32_t strideW,
|
||||
bool exception = true) const override;
|
||||
const uint32_t strideW,
|
||||
bool exception = true) const override;
|
||||
|
||||
bool ValidateInputPadding(const std::string& name,
|
||||
const uint32_t pad_h_begin, const uint32_t pad_h_end,
|
||||
const uint32_t pad_w_begin, const uint32_t pad_w_end,
|
||||
const uint32_t kernel_h,
|
||||
const uint32_t kernel_w,
|
||||
const bool throwOnError = true) const override;
|
||||
const uint32_t pad_h_begin,
|
||||
const uint32_t pad_h_end,
|
||||
const uint32_t pad_w_begin,
|
||||
const uint32_t pad_w_end,
|
||||
const uint32_t kernel_h,
|
||||
const uint32_t kernel_w,
|
||||
const bool throwOnError = true) const override;
|
||||
|
||||
bool ShouldUseOnlyConv2DGnaIface() const override;
|
||||
|
||||
bool ValidateCnn1D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
|
||||
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
|
||||
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
|
||||
OvGnaType inPrecision, bool exception = true) const override;
|
||||
bool ValidateCnn1D(const std::string& name,
|
||||
const uint32_t inHeight,
|
||||
const uint32_t inWidth,
|
||||
const uint32_t inChannels,
|
||||
const uint32_t kH,
|
||||
const uint32_t kW,
|
||||
const uint32_t kN,
|
||||
const uint32_t strideH,
|
||||
const uint32_t strideW,
|
||||
const uint32_t dilationH,
|
||||
const uint32_t dilationW,
|
||||
OvGnaType inPrecision,
|
||||
bool exception = true) const override;
|
||||
};
|
||||
} // namespace Cnn2D
|
||||
|
||||
} // namespace cnn2d
|
||||
|
||||
bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
|
||||
|
||||
@ -293,5 +364,6 @@ IE_SUPPRESS_DEPRECATED_START
|
||||
bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concatLayer);
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
} // namespace GNALimitations
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace limitations
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -10,9 +10,9 @@
|
||||
|
||||
#include "runtime/pwl.h"
|
||||
#include "make_pwl.hpp"
|
||||
#include "gna_slope_scale.h"
|
||||
#include "dnn_types.h"
|
||||
#include "backend/gna_types.h"
|
||||
#include "gna_slope_scale.hpp"
|
||||
#include "dnn_types.hpp"
|
||||
#include "backend/gna_types.hpp"
|
||||
#include "common/numerical_utils.hpp"
|
||||
#include "pwl_input_params.hpp"
|
||||
#include "pwl_segments_creator_factory.hpp"
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "backend/dnn_types.h"
|
||||
#include "backend/dnn_types.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "backend/gna_types.h"
|
||||
#include "backend/gna_types.hpp"
|
||||
#include "pwl_border_values_counter.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <functional>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "backend/dnn_types.h"
|
||||
#include "backend/dnn_types.hpp"
|
||||
#include "pwl_border_values_counter_identity.hpp"
|
||||
#include "pwl_segments_creator_identity.hpp"
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#include "log/debug.hpp"
|
||||
#include "log/log.hpp"
|
||||
#include "gna_slope_scale.h"
|
||||
#include "gna_slope_scale.hpp"
|
||||
#include "pwl_input_params.hpp"
|
||||
#include "pwl_tools.hpp"
|
||||
#include "runtime/pwl.h"
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
#include "pwl_tools.hpp"
|
||||
|
||||
#include "gna_slope_scale.h"
|
||||
#include "gna_slope_scale.hpp"
|
||||
#include "common/numerical_utils.hpp"
|
||||
#include "runtime/pwl.h"
|
||||
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "backend/gna_types.h"
|
||||
#include "backend/gna_types.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
@ -4,13 +4,17 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace common {
|
||||
|
||||
static constexpr const char* kGnaTargetUnspecified = "";
|
||||
static constexpr const char* kGnaTarget2_0 = "GNA_TARGET_2_0";
|
||||
static constexpr const char* kGnaTarget3_0 = "GNA_TARGET_3_0";
|
||||
static constexpr const char* kGnaTarget3_1 = "GNA_TARGET_3_1";
|
||||
static constexpr const char* kGnaTarget3_5 = "GNA_TARGET_3_5";
|
||||
static constexpr const char* kGnaDefaultTarget = kGnaTarget3_0;
|
||||
} // namespace common
|
||||
} // namespace GNAPluginNS
|
||||
|
||||
} // namespace common
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -6,7 +6,9 @@
|
||||
|
||||
#include <legacy/ie_layers.h>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
struct ConnectionDetails {
|
||||
InferenceEngine::CNNLayerPtr input;
|
||||
bool needTransposeWeights = false;
|
||||
@ -19,4 +21,6 @@ struct ConnectionDetails {
|
||||
, permute(permute) {
|
||||
}
|
||||
};
|
||||
} // namespace GNAPluginNS
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -13,10 +13,11 @@
|
||||
#include "ie_input_info.hpp"
|
||||
#include "ie_algorithm.hpp"
|
||||
|
||||
#include "backend/dnn_types.h"
|
||||
#include "backend/dnn_types.hpp"
|
||||
#include "gna_plugin_config.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
/*
|
||||
* This base structure accumulates all required information for network inputs and outputs
|
||||
@ -27,15 +28,15 @@ struct GnaDesc {
|
||||
std::unordered_set<std::string> tensor_names = {};
|
||||
InferenceEngine::Layout model_layout = InferenceEngine::Layout::ANY;
|
||||
InferenceEngine::SizeVector dims = {};
|
||||
InferenceEngine::Precision model_precision = InferenceEngine::Precision::UNSPECIFIED;
|
||||
InferenceEngine::Precision model_precision = InferenceEngine::Precision::UNSPECIFIED;
|
||||
InferenceEngine::Precision tensor_precision = InferenceEngine::Precision::UNSPECIFIED;
|
||||
|
||||
// gna specific properties
|
||||
double scale_factor = GNAPluginNS::kScaleFactorDefault;
|
||||
double scale_factor = kScaleFactorDefault;
|
||||
intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
|
||||
uint32_t num_elements = 0;
|
||||
uint32_t allocated_size = 0;
|
||||
std::vector<void *> ptrs = {}; // ptr per each infer request
|
||||
std::vector<void*> ptrs = {}; // ptr per each infer request
|
||||
|
||||
// help methods
|
||||
uint32_t get_required_size() const {
|
||||
@ -53,25 +54,27 @@ struct GnaDesc {
|
||||
// helps to get the precision for gna layers, because they use num_bytes instead of precision values
|
||||
void set_precision(uint32_t num_bytes) {
|
||||
switch (num_bytes) {
|
||||
case sizeof(int8_t) : {
|
||||
set_precision(InferenceEngine::Precision::I8);
|
||||
break;
|
||||
}
|
||||
case sizeof(int16_t) : {
|
||||
set_precision(InferenceEngine::Precision::I16);
|
||||
break;
|
||||
}
|
||||
case sizeof(int32_t) : {
|
||||
set_precision(InferenceEngine::Precision::I32);
|
||||
break;
|
||||
}
|
||||
default :
|
||||
set_precision(InferenceEngine::Precision::UNSPECIFIED);
|
||||
case sizeof(int8_t): {
|
||||
set_precision(InferenceEngine::Precision::I8);
|
||||
break;
|
||||
}
|
||||
case sizeof(int16_t): {
|
||||
set_precision(InferenceEngine::Precision::I16);
|
||||
break;
|
||||
}
|
||||
case sizeof(int32_t): {
|
||||
set_precision(InferenceEngine::Precision::I32);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
set_precision(InferenceEngine::Precision::UNSPECIFIED);
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::DataPtr to_ie_data() {
|
||||
return std::make_shared<InferenceEngine::Data>(name, InferenceEngine::TensorDesc(model_precision, dims, model_layout));
|
||||
return std::make_shared<InferenceEngine::Data>(
|
||||
name,
|
||||
InferenceEngine::TensorDesc(model_precision, dims, model_layout));
|
||||
}
|
||||
};
|
||||
|
||||
@ -79,7 +82,9 @@ struct GnaDesc {
|
||||
* This structure accumulates all required information for one the network input
|
||||
*/
|
||||
struct InputDesc : GnaDesc {
|
||||
InputDesc(const std::string &name) { this->name = name; }
|
||||
InputDesc(const std::string& name) {
|
||||
this->name = name;
|
||||
}
|
||||
|
||||
void Update(const InferenceEngine::InputInfo::Ptr inputInfo) {
|
||||
this->model_precision = inputInfo->getPrecision();
|
||||
@ -101,7 +106,9 @@ struct InputDesc : GnaDesc {
|
||||
* This structure accumulates all required information for one network output
|
||||
*/
|
||||
struct OutputDesc : GnaDesc {
|
||||
OutputDesc(const std::string &name) { this->name = name; }
|
||||
OutputDesc(const std::string& name) {
|
||||
this->name = name;
|
||||
}
|
||||
|
||||
void Update(const InferenceEngine::DataPtr outputData) {
|
||||
this->model_precision = outputData->getPrecision();
|
||||
@ -123,9 +130,9 @@ private:
|
||||
std::vector<T> infos_;
|
||||
|
||||
public:
|
||||
GnaNetworkInfo(): infos_({}) { }
|
||||
GnaNetworkInfo() : infos_({}) {}
|
||||
|
||||
const T& at(const std::string &key) const {
|
||||
const T& at(const std::string& key) const {
|
||||
if (key.empty()) {
|
||||
throw std::invalid_argument("The key cannot be empty");
|
||||
}
|
||||
@ -136,8 +143,8 @@ public:
|
||||
return *desc_it;
|
||||
}
|
||||
|
||||
T& at(const std::string &key) {
|
||||
return const_cast<T&>( static_cast<const GnaNetworkInfo&>(*this).at(key) );
|
||||
T& at(const std::string& key) {
|
||||
return const_cast<T&>(static_cast<const GnaNetworkInfo&>(*this).at(key));
|
||||
}
|
||||
|
||||
typename std::vector<T>::iterator end() {
|
||||
@ -156,11 +163,13 @@ public:
|
||||
});
|
||||
}
|
||||
|
||||
T& operator[](const std::string &key) {
|
||||
T& operator[](const std::string& key) {
|
||||
if (key.empty()) {
|
||||
throw std::invalid_argument("The key cannot be empty");
|
||||
}
|
||||
auto desc_it = std::find_if(infos_.begin(), infos_.end(), [&key](const T& desc){return desc.name == key;});
|
||||
auto desc_it = std::find_if(infos_.begin(), infos_.end(), [&key](const T& desc) {
|
||||
return desc.name == key;
|
||||
});
|
||||
if (desc_it == infos_.end()) {
|
||||
infos_.push_back(T(key));
|
||||
return infos_.back();
|
||||
@ -168,16 +177,25 @@ public:
|
||||
return *desc_it;
|
||||
}
|
||||
|
||||
size_t size() const { return infos_.size(); }
|
||||
size_t size() const {
|
||||
return infos_.size();
|
||||
}
|
||||
|
||||
bool empty() const { return infos_.empty(); }
|
||||
bool empty() const {
|
||||
return infos_.empty();
|
||||
}
|
||||
|
||||
const std::vector<T>& Get() const { return infos_; }
|
||||
const std::vector<T>& Get() const {
|
||||
return infos_;
|
||||
}
|
||||
|
||||
std::vector<T>& Get() { return infos_; }
|
||||
std::vector<T>& Get() {
|
||||
return infos_;
|
||||
}
|
||||
};
|
||||
|
||||
typedef GnaNetworkInfo<InputDesc> GnaInputs;
|
||||
typedef GnaNetworkInfo<OutputDesc> GnaOutputs;
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -8,7 +8,9 @@
|
||||
#include "openvino/runtime/intel_gna/properties.hpp"
|
||||
#include "gna/gna_config.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
struct GNAFlags {
|
||||
uint8_t num_requests = 1;
|
||||
bool compact_mode = true;
|
||||
@ -22,4 +24,6 @@ struct GNAFlags {
|
||||
bool input_low_precision = false;
|
||||
ov::log::Level log_level = ov::log::Level::NO;
|
||||
};
|
||||
} // namespace GNAPluginNS
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -4,12 +4,10 @@
|
||||
|
||||
#include "layer_quantizer.hpp"
|
||||
#include "weights_converter.hpp"
|
||||
#include "backend/gna_types.h"
|
||||
#include "backend/gna_types.hpp"
|
||||
#include "common/gna_target.hpp"
|
||||
#include "gna_graph_tools.hpp"
|
||||
|
||||
using namespace GNAPluginNS;
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace frontend {
|
||||
@ -252,7 +250,7 @@ void LayerQuantizer::QuantizeWeightsBiases(InferenceEngine::WeightableLayer& wl)
|
||||
QuantizationData common_data{
|
||||
num_rows,
|
||||
num_columns,
|
||||
GNAPluginNS::kScaleFactorDefault,
|
||||
kScaleFactorDefault,
|
||||
quant_layer_params->_weights_quant
|
||||
};
|
||||
|
||||
|
@ -13,8 +13,6 @@ namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace frontend {
|
||||
|
||||
using namespace GNAPluginNS;
|
||||
|
||||
/**
|
||||
* @brief Returns layer's target input precision
|
||||
* @return layer's target input precision
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include "log/debug.hpp"
|
||||
#include "log/log.hpp"
|
||||
#include "layers/gna_fake_quantize_layer.hpp"
|
||||
#include "backend/gna_types.h"
|
||||
#include "backend/gna_types.hpp"
|
||||
#include "quantization.hpp"
|
||||
|
||||
namespace ov {
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
#include "quantized_layer_params.hpp"
|
||||
#include "backend/gna_types.h"
|
||||
#include "backend/gna_types.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
@ -3,7 +3,7 @@
|
||||
//
|
||||
|
||||
#include "scale_factor_calc.hpp"
|
||||
#include "gna_slope_scale.h"
|
||||
#include "gna_slope_scale.hpp"
|
||||
#include "common/numerical_utils.hpp"
|
||||
#include "layer_quantizer.hpp"
|
||||
#include "gna_upstream_iterator.hpp"
|
||||
@ -323,7 +323,7 @@ bool ScaleFactorCalculator::requantizeInput(InferenceEngine::CNNLayerPtr input,
|
||||
*/
|
||||
float ScaleFactorCalculator::adjustScaleFactor(float sf,
|
||||
InferenceEngine::CNNLayer const* cnnLayer,
|
||||
GNAPluginNS::LayerInfo const& layer,
|
||||
LayerInfo const& layer,
|
||||
QuantizedLayerParams* quantizedParams) const {
|
||||
auto get_rank = [](uint32_t value) {
|
||||
uint8_t rank = 0;
|
||||
@ -365,7 +365,7 @@ float ScaleFactorCalculator::adjustScaleFactor(float sf,
|
||||
}
|
||||
|
||||
float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const* cnnLayer,
|
||||
GNAPluginNS::LayerInfo const& layer,
|
||||
LayerInfo const& layer,
|
||||
int inputsSize,
|
||||
const bool fake_quantized) const {
|
||||
auto quantizedParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer);
|
||||
@ -420,9 +420,9 @@ float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const*
|
||||
double offset = 0;
|
||||
auto powerLayer = dynamic_cast<InferenceEngine::PowerLayer const*>(cnnLayer);
|
||||
if (!powerLayer) {
|
||||
std::shared_ptr<ov::intel_gna::op::Pwl> pwl_node;
|
||||
std::shared_ptr<op::Pwl> pwl_node;
|
||||
if (!cnnLayer->getNode() ||
|
||||
!(pwl_node = std::dynamic_pointer_cast<ov::intel_gna::op::Pwl>(cnnLayer->getNode()))) {
|
||||
!(pwl_node = std::dynamic_pointer_cast<op::Pwl>(cnnLayer->getNode()))) {
|
||||
IE_THROW() << "Incorrect Power Layer pointer \n";
|
||||
} else {
|
||||
auto powerIE = std::dynamic_pointer_cast<ngraph::op::PowerIE>(pwl_node->get_base_node());
|
||||
@ -587,7 +587,7 @@ float ScaleFactorCalculator::getActivationScale(InferenceEngine::CNNLayer const*
|
||||
bool ScaleFactorCalculator::ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cnnLayer,
|
||||
ScaleFactorUpdateResult& result,
|
||||
int infiniteLoopCount,
|
||||
const GNAPluginNS::Config& gna_config) const {
|
||||
const Config& gna_config) const {
|
||||
if ( !cnnLayer ) {
|
||||
IE_THROW() << "Incorrect Layer pointer \n";
|
||||
}
|
||||
@ -1234,7 +1234,7 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerWeightable(InferenceEngine::Weigh
|
||||
auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl);
|
||||
if (conv && !LayerInfo(conv).isConvolutionFilter()) {
|
||||
const auto inDepth = GetDataDimByName(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
|
||||
weights_reducer = GNAConvolutionLayer::getWeightsReducer(*conv);
|
||||
weights_reducer = gna_convolution_layer::getWeightsReducer(*conv);
|
||||
weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();
|
||||
weights_reducer = std::max(1.0, weights_reducer);
|
||||
}
|
||||
|
@ -49,13 +49,13 @@ struct ScaleFactorUpdateResult {
|
||||
class ScaleFactorCalculator {
|
||||
using Cnt = std::vector<InferenceEngine::CNNLayerPtr>;
|
||||
Cnt net;
|
||||
const GNAPluginNS::Config& gna_config;
|
||||
const Config& gna_config;
|
||||
const bool fake_quantized;
|
||||
mutable Cnt::const_iterator idx;
|
||||
mutable bool needRestart = false;
|
||||
int infiniteLoopCount = 0;
|
||||
|
||||
std::vector<double> getPWLSlopes(const GNAPluginNS::LayerInfo& info) const;
|
||||
std::vector<double> getPWLSlopes(const LayerInfo& info) const;
|
||||
static float selectBestOutputScaleFactors(float inScale,
|
||||
std::vector<float> outScales,
|
||||
const std::vector<double>& slopes);
|
||||
@ -71,35 +71,35 @@ class ScaleFactorCalculator {
|
||||
int infiniteLoopCount);
|
||||
float adjustScaleFactor(float sf,
|
||||
InferenceEngine::CNNLayer const* cnnLayer,
|
||||
GNAPluginNS::LayerInfo const& layer,
|
||||
LayerInfo const& layer,
|
||||
QuantizedLayerParams* quantizedParams) const;
|
||||
float getActivationScale(InferenceEngine::CNNLayer const* cnnLayer,
|
||||
GNAPluginNS::LayerInfo const& layer,
|
||||
LayerInfo const& layer,
|
||||
int inputsSize,
|
||||
const bool fakeQuantize) const;
|
||||
bool ScaleFactorPerLayerCNN(InferenceEngine::CNNLayer* cnnLayer,
|
||||
ScaleFactorUpdateResult& result,
|
||||
int infiniteLoopCount,
|
||||
const GNAPluginNS::Config& gna_config) const;
|
||||
const Config& gna_config) const;
|
||||
bool ScaleFactorPerLayerConcat(InferenceEngine::ConcatLayer* concatLayer,
|
||||
ScaleFactorUpdateResult& result,
|
||||
int infiniteLoopCount,
|
||||
const GNAPluginNS::Config& gna_config) const;
|
||||
const Config& gna_config) const;
|
||||
bool ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseLayer* eltwiseLayer,
|
||||
ScaleFactorUpdateResult& result,
|
||||
int infiniteLoopCount,
|
||||
const GNAPluginNS::Config& gna_config) const;
|
||||
const Config& gna_config) const;
|
||||
bool ScaleFactorPerLayerGemm(InferenceEngine::GemmLayer* gemmLayer,
|
||||
ScaleFactorUpdateResult& result,
|
||||
int infiniteLoopCount,
|
||||
const GNAPluginNS::Config& gna_config) const;
|
||||
const Config& gna_config) const;
|
||||
bool ScaleFactorPerLayerWeightable(InferenceEngine::WeightableLayer* wl,
|
||||
ScaleFactorUpdateResult& result,
|
||||
int infiniteLoopCount,
|
||||
const GNAPluginNS::Config& gna_config) const;
|
||||
const Config& gna_config) const;
|
||||
|
||||
public:
|
||||
ScaleFactorCalculator(Cnt& net, const GNAPluginNS::Config& gna_config, const bool fake_quantized)
|
||||
ScaleFactorCalculator(Cnt& net, const Config& gna_config, const bool fake_quantized)
|
||||
: net(net),
|
||||
gna_config(gna_config),
|
||||
fake_quantized(fake_quantized) {
|
||||
@ -120,7 +120,7 @@ class ScaleFactorCalculator {
|
||||
bool CalculateScaleFactor(InferenceEngine::CNNLayerPtr layer_ptr) const {
|
||||
ScaleFactorUpdateResult result;
|
||||
needRestart = false;
|
||||
auto layer_info = GNAPluginNS::LayerInfo(layer_ptr);
|
||||
auto layer_info = LayerInfo(layer_ptr);
|
||||
|
||||
if (layer_info.isConcat()) {
|
||||
if (!ScaleFactorPerLayerConcat(dynamic_cast<InferenceEngine::ConcatLayer*>(layer_ptr.get()),
|
||||
|
@ -42,7 +42,7 @@ InferenceEngine::Blob::Ptr make_fp32_blob(InferenceEngine::Blob::Ptr fp16_blob)
|
||||
}
|
||||
|
||||
void convert_blobs_precision(InferenceEngine::CNNLayer& layer) {
|
||||
auto layer_info = GNAPluginNS::LayerInfo(layer);
|
||||
auto layer_info = LayerInfo(layer);
|
||||
|
||||
if (layer_info.isWeightable()) {
|
||||
InferenceEngine::WeightableLayer& wl = dynamic_cast<InferenceEngine::WeightableLayer&>(layer);
|
||||
|
@ -91,8 +91,8 @@ std::vector<char> GetStringAsTlv(Gna2TlvType type, const std::string& s) {
|
||||
|
||||
Gna2DeviceVersion getEmbeddedTargetFromCompileTarget(const std::string compileTarget) {
|
||||
static const std::map<std::string, Gna2DeviceVersion> targetMap = {
|
||||
{GNAPluginNS::common::kGnaTarget3_1, Gna2DeviceVersionEmbedded3_1},
|
||||
{GNAPluginNS::common::kGnaTarget3_5, Gna2DeviceVersionEmbedded3_5},
|
||||
{common::kGnaTarget3_1, Gna2DeviceVersionEmbedded3_1},
|
||||
{common::kGnaTarget3_5, Gna2DeviceVersionEmbedded3_5},
|
||||
};
|
||||
auto found = targetMap.find(compileTarget);
|
||||
if (found == targetMap.end()) {
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
#include <gna2-common-api.h>
|
||||
#include <gna2-model-api.h>
|
||||
#include "backend/dnn_types.h"
|
||||
#include "backend/dnn_types.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
|
@ -29,10 +29,12 @@ static inline bool FoundPartToTranspose(const std::vector<TranspositionInfo> &tr
|
||||
return partToTranspose != std::end(transpositionInfo);
|
||||
}
|
||||
|
||||
namespace GNAPluginNS {
|
||||
using gna_memory_type = GNAPluginNS::memory::GNAMemoryInterface;
|
||||
using gna_memory_float = GNAPluginNS::memory::GNAMemory<memory::GNAFloatAllocator>;
|
||||
using gna_memory_device = GNAPluginNS::memory::GNAMemory<>;
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
using gna_memory_type = memory::GNAMemoryInterface;
|
||||
using gna_memory_float = memory::GNAMemory<memory::GNAFloatAllocator>;
|
||||
using gna_memory_device = memory::GNAMemory<>;
|
||||
|
||||
using DnnComponentsForLayer = std::list<std::pair<std::string, intel_dnn_component_t>>;
|
||||
using MemoryConnection = std::list<std::pair<std::string, GNAMemoryLayer>>;
|
||||
@ -40,4 +42,6 @@ namespace GNAPluginNS {
|
||||
using SplitConnection = std::unordered_map<std::string, GNASplitLayer>;
|
||||
using CropConnection = std::unordered_map<std::string, GNACropLayer>;
|
||||
using ConstConnections = std::unordered_map<std::string, void*>;
|
||||
} // namespace GNAPluginNS
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -80,9 +80,9 @@ uint8_t* GNADeviceHelper::alloc(uint32_t size_requested, uint32_t *size_granted)
|
||||
return static_cast<uint8_t *>(memPtr);
|
||||
}
|
||||
|
||||
void GNADeviceHelper::tagMemoryRegion(void* memPtr, const GNAPluginNS::memory::rRegion tag) {
|
||||
void GNADeviceHelper::tagMemoryRegion(void* memPtr, const memory::rRegion tag) {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
using GNAPluginNS::memory::rRegion;
|
||||
using memory::rRegion;
|
||||
static const std::map<rRegion, Gna2MemoryTag> tagMap {
|
||||
{rRegion::REGION_INPUTS, Gna2MemoryTagInput},
|
||||
{rRegion::REGION_OUTPUTS, Gna2MemoryTagOutput},
|
||||
@ -192,7 +192,7 @@ void GNADeviceHelper::enforceLegacyCnns(Gna2Model& gnaModel) {
|
||||
void GNADeviceHelper::enforceLegacyCnnsWhenNeeded(Gna2Model& gnaModel) {
|
||||
for (uint32_t i = 0; i < gnaModel.NumberOfOperations; i++) {
|
||||
auto& op = gnaModel.Operations[i];
|
||||
if (GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(op)) {
|
||||
if (backend::AMIntelDNN::isOperationCnnLegacySpecific(op)) {
|
||||
enforceLegacyCnn(op);
|
||||
}
|
||||
}
|
||||
@ -207,7 +207,7 @@ uint32_t GNADeviceHelper::createModel(Gna2Model& gnaModel) const {
|
||||
}
|
||||
enforceLegacyCnnsWhenNeeded(gnaModel);
|
||||
|
||||
GNAPluginNS::backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(gnaModel, legacyExecTarget);
|
||||
backend::AMIntelDNN::updateNumberOfOutputsIfPoolingEnabled(gnaModel, legacyExecTarget);
|
||||
|
||||
if (per_model_diagnostics) {
|
||||
std::string path =
|
||||
@ -240,10 +240,10 @@ bool GNADeviceHelper::enforceLegacyCnnNeeded() const {
|
||||
|
||||
Gna2DeviceVersion GNADeviceHelper::parseTarget(const std::string& target) {
|
||||
static const std::map<std::string, Gna2DeviceVersion> targetMap {
|
||||
{GNAPluginNS::common::kGnaTarget2_0, Gna2DeviceVersion2_0},
|
||||
{GNAPluginNS::common::kGnaTarget3_0, Gna2DeviceVersion3_0},
|
||||
{GNAPluginNS::common::kGnaTarget3_5, Gna2DeviceVersion3_5},
|
||||
{GNAPluginNS::common::kGnaTargetUnspecified, Gna2DeviceVersionSoftwareEmulation},
|
||||
{common::kGnaTarget2_0, Gna2DeviceVersion2_0},
|
||||
{common::kGnaTarget3_0, Gna2DeviceVersion3_0},
|
||||
{common::kGnaTarget3_5, Gna2DeviceVersion3_5},
|
||||
{common::kGnaTargetUnspecified, Gna2DeviceVersionSoftwareEmulation},
|
||||
};
|
||||
const auto f = targetMap.find(target);
|
||||
if (f != targetMap.end()) {
|
||||
@ -254,13 +254,13 @@ Gna2DeviceVersion GNADeviceHelper::parseTarget(const std::string& target) {
|
||||
|
||||
Gna2DeviceVersion GNADeviceHelper::getDefaultTarget() const {
|
||||
if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation)
|
||||
return parseTarget(GNAPluginNS::common::kGnaDefaultTarget);
|
||||
return parseTarget(common::kGnaDefaultTarget);
|
||||
return detectedGnaDevVersion;
|
||||
}
|
||||
|
||||
Gna2DeviceVersion GNADeviceHelper::getTargetDevice(const bool execTarget) const {
|
||||
const auto declared = execTarget ? executionTarget : compileTarget;
|
||||
if (declared == GNAPluginNS::common::kGnaTargetUnspecified) {
|
||||
if (declared == common::kGnaTargetUnspecified) {
|
||||
return execTarget ? getDefaultTarget() : getTargetDevice(true);
|
||||
}
|
||||
return parseTarget(declared);
|
||||
@ -465,15 +465,15 @@ const std::map <const std::pair<Gna2OperationType, int32_t>, const std::string>
|
||||
{{Gna2OperationTypeThreshold, 1}, "Output"}
|
||||
};
|
||||
|
||||
GNAPluginNS::RequestStatus GNADeviceHelper::waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds) {
|
||||
RequestStatus GNADeviceHelper::waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds) {
|
||||
std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
|
||||
const auto status = Gna2RequestWait(requestID, static_cast<uint32_t>(timeoutMilliseconds));
|
||||
if (status == Gna2StatusWarningDeviceBusy) {
|
||||
return GNAPluginNS::RequestStatus::kPending;
|
||||
return RequestStatus::kPending;
|
||||
}
|
||||
unwaitedRequestIds.erase(requestID);
|
||||
if (status == Gna2StatusDriverQoSTimeoutExceeded) {
|
||||
return GNAPluginNS::RequestStatus::kAborted;
|
||||
return RequestStatus::kAborted;
|
||||
}
|
||||
|
||||
if (per_request_diagnostics) {
|
||||
@ -485,7 +485,7 @@ GNAPluginNS::RequestStatus GNADeviceHelper::waitForRequest(uint32_t requestID, i
|
||||
// handle error case after updating statistics data.
|
||||
checkGna2Status(status, "Gna2RequestWait");
|
||||
|
||||
return GNAPluginNS::RequestStatus::kCompleted;
|
||||
return RequestStatus::kCompleted;
|
||||
}
|
||||
|
||||
GNADeviceHelper::DumpResult GNADeviceHelper::dumpXnn(const uint32_t modelId) {
|
||||
@ -559,7 +559,7 @@ void GNADeviceHelper::close() {
|
||||
|
||||
for (auto requestId : requestsToClose)
|
||||
try {
|
||||
if (waitForRequest(requestId) == GNAPluginNS::RequestStatus::kPending)
|
||||
if (waitForRequest(requestId) == RequestStatus::kPending)
|
||||
log::warning() << "Request with Id " << requestId << " is still pending";
|
||||
} catch (...) {
|
||||
log::warning() << "Request with Id " << requestId << " was not awaited successfully";
|
||||
@ -598,10 +598,10 @@ void GNADeviceHelper::getGnaPerfCounters(std::map<std::string, InferenceEngine::
|
||||
|
||||
std::string GNADeviceHelper::GetCompileTarget() const {
|
||||
static const std::map<Gna2DeviceVersion, std::string> targetMap = {
|
||||
{Gna2DeviceVersion2_0, GNAPluginNS::common::kGnaTarget2_0},
|
||||
{Gna2DeviceVersion3_0, GNAPluginNS::common::kGnaTarget3_0},
|
||||
{Gna2DeviceVersion3_5, GNAPluginNS::common::kGnaTarget3_5},
|
||||
{Gna2DeviceVersionEmbedded3_5, GNAPluginNS::common::kGnaTarget3_5},
|
||||
{Gna2DeviceVersion2_0, common::kGnaTarget2_0},
|
||||
{Gna2DeviceVersion3_0, common::kGnaTarget3_0},
|
||||
{Gna2DeviceVersion3_5, common::kGnaTarget3_5},
|
||||
{Gna2DeviceVersionEmbedded3_5, common::kGnaTarget3_5},
|
||||
};
|
||||
const auto target = getTargetDevice(false);
|
||||
auto found = targetMap.find(target);
|
||||
@ -616,7 +616,7 @@ uint32_t GNADeviceHelper::maxLayersCount() const {
|
||||
}
|
||||
|
||||
uint32_t GNADeviceHelper::retrieveMaxLayersCount() {
|
||||
using namespace GNAPluginNS::GNALimitations;
|
||||
using namespace limitations;
|
||||
|
||||
switch (getTargetDevice(true)) {
|
||||
case Gna2DeviceVersion1_0:
|
||||
|
@ -34,7 +34,7 @@
|
||||
/**
|
||||
* holds gna - style handle in RAII way
|
||||
*/
|
||||
class GNADeviceHelper : public GNAPluginNS::GNADevice {
|
||||
class GNADeviceHelper : public ov::intel_gna::GNADevice {
|
||||
using UnwaitedRequestIds = std::set<uint32_t>;
|
||||
static std::mutex acrossPluginsSync;
|
||||
static std::string decoratedGnaLibVersion() {
|
||||
@ -92,7 +92,7 @@ public:
|
||||
void dumpAllAllocations(uint64_t idx, const std::string& infix) const;
|
||||
|
||||
uint8_t *alloc(uint32_t size_requested, uint32_t *size_granted);
|
||||
void tagMemoryRegion(void* memPtr, const GNAPluginNS::memory::rRegion memoryTag);
|
||||
void tagMemoryRegion(void* memPtr, const ov::intel_gna::memory::rRegion memoryTag);
|
||||
|
||||
void releaseModel(const uint32_t model_id);
|
||||
static uint32_t getNumberOfGnaDevices();
|
||||
@ -155,7 +155,7 @@ public:
|
||||
/**
|
||||
* @see GNADevice::waitForRequest()
|
||||
*/
|
||||
GNAPluginNS::RequestStatus waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds = MAX_TIMEOUT) override;
|
||||
ov::intel_gna::RequestStatus waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds = MAX_TIMEOUT) override;
|
||||
|
||||
/**
|
||||
* @see GNADevice::maxLayersCount()
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include "memory/gna_mem_regions.hpp"
|
||||
#include "gna_lib_ver_selector.hpp"
|
||||
|
||||
using GNAPluginNS::memory::rRegion;
|
||||
using ov::intel_gna::memory::rRegion;
|
||||
|
||||
struct GnaAllocation {
|
||||
void* ptr = nullptr;
|
||||
|
@ -13,10 +13,12 @@
|
||||
enum Gna2AccelerationMode;
|
||||
class Gna2Model;
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
// Interface name is different to the file naem due the lagacy reason.
|
||||
// 1. Implementation file names should be changed in next PR.
|
||||
// 2. Implementation of interface should be moved to GNAPluginNS namespace
|
||||
// 2. Implementation of interface should be moved to ov::intel_gna namespace
|
||||
|
||||
/**
|
||||
* @interface Interface for invoking operation on GNA device.
|
||||
@ -57,10 +59,10 @@ public:
|
||||
* @brief Wait for request to be finished.
|
||||
* @param requestID id of request enqueued on device
|
||||
* @param timeoutMilliseconds maximum timeout to be used for waiting
|
||||
* @return status of request given to the methoid. @see GNAPluginNS::RequestStatus.
|
||||
* @return status of request given to the methoid. @see RequestStatus.
|
||||
* @throw Exception in case of error
|
||||
*/
|
||||
virtual GNAPluginNS::RequestStatus waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds) = 0;
|
||||
virtual RequestStatus waitForRequest(uint32_t requestID, int64_t timeoutMilliseconds) = 0;
|
||||
|
||||
/**
|
||||
* @brief Return maximum number of layers supported by device.
|
||||
@ -74,4 +76,5 @@ public:
|
||||
virtual void close() {}
|
||||
};
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -15,7 +15,8 @@
|
||||
#include <cpp_interfaces/interface/ie_iexecutable_network_internal.hpp>
|
||||
#include <ie_icore.hpp>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
class GNAExecutableNetwork : public InferenceEngine::IExecutableNetworkInternal {
|
||||
std::shared_ptr<GNAPlugin> plg;
|
||||
@ -135,4 +136,5 @@ class GNAExecutableNetwork : public InferenceEngine::IExecutableNetworkInternal
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -10,7 +10,9 @@
|
||||
#include "gna_graph_tools.hpp"
|
||||
#include "layers/gna_layer_helpers.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
/**
|
||||
* @brief Modify child layers walking order to maintain strict ordering required for gna_fuse logic
|
||||
*/
|
||||
@ -99,4 +101,5 @@ inline FuzedLayersContainer make_fuzed_order(InferenceEngine::CNNLayer* origin)
|
||||
return fusedCnt;
|
||||
}
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -43,12 +43,11 @@
|
||||
using namespace InferenceEngine;
|
||||
using namespace std;
|
||||
using namespace ov::intel_gna;
|
||||
using namespace GNAPluginNS;
|
||||
using namespace ov::intel_gna::frontend;
|
||||
using namespace ov::intel_gna::common;
|
||||
using namespace memory;
|
||||
|
||||
static bool CheckIFLastComponentIsPrecededByConv2D(const GNAPluginNS::backend::DnnComponents::storage_type& components,
|
||||
static bool CheckIFLastComponentIsPrecededByConv2D(const backend::DnnComponents::storage_type& components,
|
||||
bool verify_with_pooling = true) {
|
||||
bool proceded_by_conv2D = false;
|
||||
auto last_element = components.rbegin();
|
||||
@ -71,15 +70,15 @@ static bool CheckIFLastComponentIsPrecededByConv2D(const GNAPluginNS::backend::D
|
||||
|
||||
GNAGraphCompiler::GNAGraphCompiler(const Config& gna_config) : gna_config(gna_config) {}
|
||||
|
||||
void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<GNAPluginNS::gna_memory_type> gnaMemPtr) {
|
||||
void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr) {
|
||||
this->gnamem = std::move(gnaMemPtr);
|
||||
}
|
||||
|
||||
void GNAGraphCompiler::setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr) {
|
||||
void GNAGraphCompiler::setDNNPtr(std::shared_ptr<backend::AMIntelDNN> dnnPtr) {
|
||||
this->dnn = std::move(dnnPtr);
|
||||
}
|
||||
|
||||
void GNAGraphCompiler::setInputsPtr(std::shared_ptr<GNAPluginNS::GnaInputs> inputsPtr) {
|
||||
void GNAGraphCompiler::setInputsPtr(std::shared_ptr<GnaInputs> inputsPtr) {
|
||||
this->inputs_ptr_ = std::move(inputsPtr);
|
||||
}
|
||||
|
||||
@ -110,7 +109,7 @@ void GNAGraphCompiler::fillMemoryConnections(std::unordered_map<std::string,
|
||||
|
||||
void GNAGraphCompiler::fillConcatConnections(InferenceEngine::CNNLayerPtr layer) {
|
||||
// creating connection for each layer outputs as form of extramap
|
||||
GNAPluginNS::GNAConcatLayer layerInfoItem(layer);
|
||||
GNAConcatLayer layerInfoItem(layer);
|
||||
size_t concat_size = 0;
|
||||
std::string& id = layer->name;
|
||||
|
||||
@ -148,7 +147,7 @@ void GNAGraphCompiler::fillConcatConnections(InferenceEngine::CNNLayerPtr layer)
|
||||
|
||||
void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) {
|
||||
// creating connection for each layer inputs as form of extramap
|
||||
GNAPluginNS::GNASplitLayer layerInfoItem(layer);
|
||||
GNASplitLayer layerInfoItem(layer);
|
||||
size_t split_size = 0;
|
||||
std::string& id = layer->name;
|
||||
IE_ASSERT(!layer->insData.empty());
|
||||
@ -214,16 +213,16 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
|
||||
split_connection.emplace(id, layerInfoItem);
|
||||
}
|
||||
|
||||
void GNAPluginNS::GNAGraphCompiler::SetValidatorTarget(const std::string& target) {
|
||||
auto temp = GNALimitations::Cnn2D::AbstractValidator::Create(target);
|
||||
void GNAGraphCompiler::SetValidatorTarget(const std::string& target) {
|
||||
auto temp = limitations::cnn2d::AbstractValidator::Create(target);
|
||||
cnn2dValidator.reset(temp.release());
|
||||
}
|
||||
|
||||
bool GNAPluginNS::GNAGraphCompiler::ShouldUseOnlyConv2DGnaIface() const {
|
||||
bool GNAGraphCompiler::ShouldUseOnlyConv2DGnaIface() const {
|
||||
return cnn2dValidator && cnn2dValidator->ShouldUseOnlyConv2DGnaIface();
|
||||
}
|
||||
|
||||
void GNAPluginNS::GNAGraphCompiler::ValidateCnn2D(const std::string& name,
|
||||
void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
|
||||
const uint32_t inHeight,
|
||||
const uint32_t inWidth,
|
||||
const uint32_t inChannels,
|
||||
@ -245,7 +244,7 @@ void GNAPluginNS::GNAGraphCompiler::ValidateCnn2D(const std::string& name,
|
||||
}
|
||||
}
|
||||
|
||||
void GNAPluginNS::GNAGraphCompiler::ValidatePooling2D(const std::string& name,
|
||||
void GNAGraphCompiler::ValidatePooling2D(const std::string& name,
|
||||
const uint32_t windowH,
|
||||
const uint32_t windowW,
|
||||
const uint32_t strideH,
|
||||
@ -280,9 +279,8 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer)
|
||||
}
|
||||
|
||||
void GNAGraphCompiler::assertConvolutionLayoutProper(const InferenceEngine::DataPtr& data) {
|
||||
if (data->getLayout() != Layout::NHWC &&
|
||||
data->getLayout() != Layout::NCHW &&
|
||||
data->getLayout() != Layout::NC) {
|
||||
if (data->getLayout() != InferenceEngine::Layout::NHWC && data->getLayout() != InferenceEngine::Layout::NCHW &&
|
||||
data->getLayout() != InferenceEngine::Layout::NC) {
|
||||
THROW_GNA_EXCEPTION << "layer: \"Convolution\" with layout " << data->getLayout() << " isn't currently supported on GNA";
|
||||
}
|
||||
}
|
||||
@ -338,10 +336,10 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
|
||||
|
||||
// Map 2d convolution to 1d if it's possible.
|
||||
if (!ShouldUseOnlyConv2DGnaIface() &&
|
||||
GNAConvolutionLayer::isMappableFrom2DTo1D(in_height, in_width, in_channels,
|
||||
gna_convolution_layer::isMappableFrom2DTo1D(in_height, in_width, in_channels,
|
||||
convolution._kernel_y, convolution._kernel_x,
|
||||
convolution._stride_y, convolution._stride_x)) {
|
||||
transpose_h_w = GNAConvolutionLayer::should_transpose_h_w(in_height,
|
||||
transpose_h_w = gna_convolution_layer::should_transpose_h_w(in_height,
|
||||
convolution._kernel_y,
|
||||
in_channels,
|
||||
convolution._stride_y);
|
||||
@ -382,7 +380,7 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
|
||||
}
|
||||
|
||||
if (ShouldUseOnlyConv2DGnaIface() ||
|
||||
GNAConvolutionLayer::is3DInputOr2DKernel(in_height, in_width, in_channels, convolution._kernel_y, convolution._kernel_x) ||
|
||||
gna_convolution_layer::is3DInputOr2DKernel(in_height, in_width, in_channels, convolution._kernel_y, convolution._kernel_x) ||
|
||||
in_height != 1) {
|
||||
// TensorFlow default layout is NHWC
|
||||
// OpenVino Default layout is NCHW
|
||||
@ -518,7 +516,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
// Keep both variants of kaldi models working:
|
||||
// Old one has layout which is different from NHWC
|
||||
// New one has layout NHWC, but it is mapped from 2d by H
|
||||
if (inputs->getLayout() == Layout::NHWC && !transpose_h_w) {
|
||||
if (inputs->getLayout() == InferenceEngine::Layout::NHWC && !transpose_h_w) {
|
||||
currentComponent.orientation_in = kDnnInterleavedOrientation;
|
||||
currentComponent.orientation_out = kDnnInterleavedOrientation;
|
||||
}
|
||||
@ -536,7 +534,8 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
|
||||
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
|
||||
if (!dnn->do_rotate_input) {
|
||||
if ((inputs->getLayout() != Layout::NHWC || transpose_h_w) && LayerInfo(connectedInputLayer).isInput()) {
|
||||
if ((inputs->getLayout() != InferenceEngine::Layout::NHWC || transpose_h_w) &&
|
||||
LayerInfo(connectedInputLayer).isInput()) {
|
||||
// Kaldi features are opposite orientation
|
||||
dnn->do_rotate_input = true;
|
||||
dnn->num_rotate_rows = effectiveStride;
|
||||
@ -699,7 +698,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
currentComponent.num_bytes_per_input = inputs->getPrecision().size();
|
||||
currentComponent.num_bytes_per_output = outputs->getPrecision().size();
|
||||
|
||||
if (inputs->getLayout() == Layout::NHWC) {
|
||||
if (inputs->getLayout() == InferenceEngine::Layout::NHWC) {
|
||||
currentComponent.orientation_in = kDnnInterleavedOrientation;
|
||||
currentComponent.orientation_out = kDnnInterleavedOrientation;
|
||||
}
|
||||
@ -713,7 +712,8 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
|
||||
|
||||
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
|
||||
if (!dnn->do_rotate_input && inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) {
|
||||
if (!dnn->do_rotate_input && inputs->getLayout() != InferenceEngine::Layout::NHWC &&
|
||||
LayerInfo(connectedInputLayer).isInput()) {
|
||||
// Kaldi features are opposite orientation
|
||||
dnn->do_rotate_input = true;
|
||||
dnn->num_rotate_rows = in_channels;
|
||||
@ -766,9 +766,9 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto input = layer->insData[0].lock();
|
||||
|
||||
auto outputs = *layer->outData.begin();
|
||||
auto reshaped_dims = Get2DReshapedData(input, GNALimitations::GetMinBatchToFitInBuffer(input), 8)->getDims();
|
||||
auto reshaped_dims = Get2DReshapedData(input, limitations::GetMinBatchToFitInBuffer(input), 8)->getDims();
|
||||
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
|
||||
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
|
||||
limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
|
||||
uint32_t num_rows_in = reshaped_dims[1];
|
||||
uint32_t num_columns_in = reshaped_dims[0];
|
||||
uint32_t num_rows_out = num_rows_in;
|
||||
@ -914,10 +914,10 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
const uint32_t c_dim_out = InferenceEngine::GetDataDimByName(outputs, InferenceEngine::DataDimName::C);
|
||||
|
||||
if (w_dim_in == 1) { // swap dimensions if needed to support swapped 1D case
|
||||
swap(h_dim_in, w_dim_in);
|
||||
swap(h_dim_out, w_dim_out);
|
||||
swap(pooling._kernel[X_AXIS], pooling._kernel[Y_AXIS]);
|
||||
swap(pooling._stride[X_AXIS], pooling._stride[Y_AXIS]);
|
||||
std::swap(h_dim_in, w_dim_in);
|
||||
std::swap(h_dim_out, w_dim_out);
|
||||
std::swap(pooling._kernel[X_AXIS], pooling._kernel[Y_AXIS]);
|
||||
std::swap(pooling._stride[X_AXIS], pooling._stride[Y_AXIS]);
|
||||
}
|
||||
|
||||
void* ptr_inputs = nullptr;
|
||||
@ -968,9 +968,9 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
// but it does not use any specific new GNA features it should be correct to import and run using previous GNA HW
|
||||
if (!is2DPooling) {
|
||||
const auto hLegacy =
|
||||
GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy(h_dim_in, pooling._stride[X_AXIS]);
|
||||
gna_convolution_layer::outputFromPoolingLegacy(h_dim_in, pooling._stride[X_AXIS]);
|
||||
const auto wLegacy =
|
||||
GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy(w_dim_in, pooling._stride[Y_AXIS]);
|
||||
gna_convolution_layer::outputFromPoolingLegacy(w_dim_in, pooling._stride[Y_AXIS]);
|
||||
if (num_data_bytes_out < hLegacy * wLegacy * c_dim_out) {
|
||||
num_data_bytes_out = hLegacy * wLegacy * c_dim_out;
|
||||
}
|
||||
@ -1007,7 +1007,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
auto outputs = *layer->outData.begin();
|
||||
|
||||
auto reshaped_dims = Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims();
|
||||
auto reshaped_dims = Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims();
|
||||
uint32_t num_rows_in = reshaped_dims[1];
|
||||
uint32_t num_columns_in = reshaped_dims[0];
|
||||
uint32_t num_rows_out = num_rows_in;
|
||||
@ -1068,7 +1068,7 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
}
|
||||
|
||||
// Concat axis validation
|
||||
if (!GNALimitations::ValidateConvConcatAxis(concatLayer)) {
|
||||
if (!limitations::ValidateConvConcatAxis(concatLayer)) {
|
||||
std::ostringstream in_dims_oss;
|
||||
auto in_dims = concatLayer->insData[0].lock()->getDims();
|
||||
std::copy(in_dims.begin(), in_dims.end(), std::ostream_iterator<size_t>(in_dims_oss, ","));
|
||||
@ -1147,7 +1147,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
if (!LayerInfo(cropLayer).isCropAffined()) {
|
||||
// leave crop as it is
|
||||
GNAPluginNS::GNACropLayer cropLayerInfoItem(layer);
|
||||
GNACropLayer cropLayerInfoItem(layer);
|
||||
std::string& id = layer->name;
|
||||
crop_connection.emplace(id, cropLayerInfoItem);
|
||||
auto cropLayerInfo = crop_connection.find(cropLayer->name);
|
||||
@ -1178,7 +1178,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
uint32_t num_rows_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()));
|
||||
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
|
||||
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
|
||||
limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
|
||||
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
|
||||
|
||||
void* ptr_inputs = nullptr;
|
||||
@ -1234,7 +1234,7 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto& eltwise = dynamic_cast<EltwiseLayer&>(*layer.get());
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
|
||||
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
|
||||
limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
|
||||
|
||||
// for eltwise sum/sub in 16-bit precision one input should be 4 bytes and one 2 bytes - detecting that below
|
||||
// the names of variables are left for clarity although not always reflecting the real precision/size
|
||||
@ -1414,7 +1414,7 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto input_2 = layer->insData[1].lock(); // the second input corresponds to ptr_weights in component
|
||||
auto outputs = *layer->outData.begin();
|
||||
auto inputPrecision = quantized ? Precision(Precision::I16) : input_1->getPrecision();
|
||||
uint32_t noOfInputsDivisor = GNALimitations::noOfInputsDivisor;
|
||||
uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor;
|
||||
|
||||
auto in_dims = input_1->getDims();
|
||||
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
|
||||
@ -1478,7 +1478,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
auto outputs = *layer->outData.begin();
|
||||
const auto out_dims = outputs->getDims();
|
||||
Precision inputPrecision;
|
||||
uint32_t noOfInputsDivisor = GNALimitations::noOfInputsDivisor;
|
||||
uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor;
|
||||
|
||||
if (!quantized) {
|
||||
inputPrecision = inputs->getPrecision();
|
||||
@ -1486,11 +1486,11 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
inputPrecision = Precision(Precision::I16);
|
||||
} else {
|
||||
inputPrecision = Precision(Precision::I8);
|
||||
noOfInputsDivisor = GNALimitations::noOfInputsLowPrecDivisor;
|
||||
noOfInputsDivisor = limitations::noOfInputsLowPrecDivisor;
|
||||
}
|
||||
|
||||
auto input_data = HasTo2DReshapeData(layer) ?
|
||||
Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8) : inputs;
|
||||
Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8) : inputs;
|
||||
auto in_dims = input_data->getDims();
|
||||
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
|
||||
uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size;
|
||||
@ -1690,7 +1690,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
|
||||
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
|
||||
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
|
||||
limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
|
||||
uint32_t num_columns_in = GetDimFromBack(inputs->getDims(), 2);
|
||||
uint32_t num_rows_out = GetDimFromBack(outputs->getDims(), 1);
|
||||
uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
|
||||
@ -1826,7 +1826,7 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
|
||||
const auto noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
|
||||
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
|
||||
limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
|
||||
const uint32_t orginalInputSize =
|
||||
InferenceEngine::details::product(std::next(inputs->getDims().begin()), inputs->getDims().end());
|
||||
const uint32_t orginalOutputSize =
|
||||
@ -1842,7 +1842,7 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
const auto minInputsNeeded = (minOutputsPerFilter - 1) * convolutionStride + filterWidth;
|
||||
const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, noOfInputsDivisor);
|
||||
|
||||
auto numOutputs = GNAConvolutionLayer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride);
|
||||
auto numOutputs = gna_convolution_layer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride);
|
||||
numOutputs *= numberOfFilters;
|
||||
const auto& biasPrecision = filterLayer->_biases ? filterLayer->_biases->getTensorDesc().getPrecision() : outputs->getPrecision();
|
||||
auto& currentComponent = dnnComponents.addComponent(layer->name, "affine");
|
||||
@ -2154,7 +2154,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
}
|
||||
|
||||
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
|
||||
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
|
||||
limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
|
||||
|
||||
// now this can be run on GNA
|
||||
if (squeezedInputOrder[0] < squeezedInputOrder[1]) { // interleave case
|
||||
@ -2359,7 +2359,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
|
||||
// find this input in vector sum all outputs in primitive
|
||||
auto it = std::find_if(concatLayerInfoItem.concatInputLayers.begin(),
|
||||
concatLayerInfoItem.concatInputLayers.end(),
|
||||
[&name](GNAPluginNS::GNAConcatLayer::ConcatConnectedLayerInfo &item) {
|
||||
[&name](GNAConcatLayer::ConcatConnectedLayerInfo &item) {
|
||||
return item.name == name;
|
||||
});
|
||||
if (it != concatLayerInfoItem.concatInputLayers.end()) {
|
||||
@ -2371,11 +2371,11 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
|
||||
std::find_if(concat_connection.begin(),
|
||||
concat_connection.end(),
|
||||
[&concatLayerInfo]
|
||||
(const std::pair<std::string, GNAPluginNS::GNAConcatLayer> &concatItem) -> bool {
|
||||
(const std::pair<std::string, GNAConcatLayer> &concatItem) -> bool {
|
||||
auto it = std::find_if(concatItem.second.concatInputLayers.begin(),
|
||||
concatItem.second.concatInputLayers.end(),
|
||||
[&concatLayerInfo]
|
||||
(const GNAPluginNS::GNAConcatLayer::ConcatConnectedLayerInfo &item) -> bool {
|
||||
(const GNAConcatLayer::ConcatConnectedLayerInfo &item) -> bool {
|
||||
return item.name == concatLayerInfo->first;
|
||||
});
|
||||
return it != concatItem.second.concatInputLayers.end();
|
||||
@ -2384,9 +2384,9 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
|
||||
auto outputSize = std::max(concatLayerInfoItem.reserved_size, num_data_bytes_out * 2);
|
||||
gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, ALIGN64(outputSize), 64);
|
||||
|
||||
std::function<void(GNAConcatLayer, GNAPluginNS::GnaInputs&, ConcatConnection&)> allocate_input_recursively =
|
||||
std::function<void(GNAConcatLayer, GnaInputs&, ConcatConnection&)> allocate_input_recursively =
|
||||
[&allocate_input_recursively](GNAConcatLayer clayer,
|
||||
GNAPluginNS::GnaInputs &inputs,
|
||||
GnaInputs &inputs,
|
||||
ConcatConnection& concat_connection) {
|
||||
size_t concatInputIdx = 0;
|
||||
for (auto &&inputLayer : clayer.concatInputLayers) {
|
||||
@ -2437,7 +2437,7 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
|
||||
gnamem->getQueue(mem_region)->reserve_ptr(layer, ptr, ALIGN64(num_data_bytes_out), 64);
|
||||
}
|
||||
|
||||
GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
void *ptr,
|
||||
size_t num_data_bytes_in,
|
||||
int32_t offset,
|
||||
@ -2465,7 +2465,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
auto minInput = inputs_ptr_->at(prevLayer->name).get_required_size();
|
||||
if (num_data_bytes_in < minInput) {
|
||||
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ?
|
||||
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
|
||||
limitations::noOfInputsLowPrecDivisor : limitations::noOfInputsDivisor;
|
||||
log::debug() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to" << ALIGN(minInput, noOfInputsDivisor);
|
||||
num_data_bytes_in = ALIGN(minInput, noOfInputsDivisor);
|
||||
}
|
||||
@ -2528,7 +2528,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
// find this input in vector sum all outputs in primitive
|
||||
auto it = std::find_if(splitLayerInfoItem.splitOutputLayers.begin(),
|
||||
splitLayerInfoItem.splitOutputLayers.end(),
|
||||
[&idx, &layer](GNAPluginNS::GNASplitLayer::SplitConnectedLayerInfo &item) {
|
||||
[&idx, &layer](GNASplitLayer::SplitConnectedLayerInfo &item) {
|
||||
return item.connectedTo == layer && item.insDataIdx == idx;
|
||||
});
|
||||
|
||||
|
@ -15,7 +15,6 @@
|
||||
#include "descriptions/gna_desc.hpp"
|
||||
#include "descriptions/gna_flags.hpp"
|
||||
#include "connection_details.hpp"
|
||||
#include "backend/dnn.hpp"
|
||||
#include "memory/gna_memory.hpp"
|
||||
#include "layers/gna_memory_layer.hpp"
|
||||
#include "layers/gna_concat_layer.hpp"
|
||||
@ -27,12 +26,14 @@
|
||||
#include "gna_device.hpp"
|
||||
#include "gna_data_types.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
class GNAGraphCompiler {
|
||||
private:
|
||||
std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnn;
|
||||
std::shared_ptr<GNAPluginNS::gna_memory_type> gnamem;
|
||||
std::shared_ptr<GNAPluginNS::GnaInputs> inputs_ptr_;
|
||||
std::shared_ptr<backend::AMIntelDNN> dnn;
|
||||
std::shared_ptr<gna_memory_type> gnamem;
|
||||
std::shared_ptr<GnaInputs> inputs_ptr_;
|
||||
|
||||
// layers with extra storage for connections and additional
|
||||
// non trivial processing
|
||||
@ -49,20 +50,20 @@ private:
|
||||
static void assertConvolutionLayoutProper(const InferenceEngine::DataPtr&);
|
||||
std::vector<uint8_t> static transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols);
|
||||
|
||||
std::unique_ptr<const GNALimitations::Cnn2D::AbstractValidator> cnn2dValidator;
|
||||
std::unique_ptr<const limitations::cnn2d::AbstractValidator> cnn2dValidator;
|
||||
|
||||
bool ShouldUseOnlyConv2DGnaIface() const;
|
||||
|
||||
public:
|
||||
GNAPluginNS::backend::DnnComponents dnnComponents;
|
||||
backend::DnnComponents dnnComponents;
|
||||
MemoryConnection memory_connection;
|
||||
ConcatConnection concat_connection;
|
||||
ConstConnections const_connections;
|
||||
|
||||
GNAGraphCompiler(const Config& gna_config);
|
||||
void setGNAMemoryPtr(std::shared_ptr<GNAPluginNS::gna_memory_type> gnaMemPtr);
|
||||
void setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr);
|
||||
void setInputsPtr(std::shared_ptr<GNAPluginNS::GnaInputs> inputsPtr);
|
||||
void setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr);
|
||||
void setDNNPtr(std::shared_ptr<backend::AMIntelDNN> dnnPtr);
|
||||
void setInputsPtr(std::shared_ptr<GnaInputs> inputsPtr);
|
||||
|
||||
void fillMemoryConnections(std::unordered_map<std::string,
|
||||
std::vector<InferenceEngine::CNNLayerPtr>> &memoryPairs);
|
||||
@ -102,7 +103,7 @@ public:
|
||||
* in case when we would like to use zero offset and connect from pointer set this to negative
|
||||
* @return layer used as input
|
||||
*/
|
||||
GNAPluginNS::ConnectionDetails connectInput(InferenceEngine::CNNLayerPtr layer,
|
||||
ConnectionDetails connectInput(InferenceEngine::CNNLayerPtr layer,
|
||||
void *pVoid,
|
||||
size_t num_data_bytes_in,
|
||||
int32_t offset = 0,
|
||||
@ -149,4 +150,6 @@ public:
|
||||
|
||||
void Reset();
|
||||
};
|
||||
} // namespace GNAPluginNS
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -11,7 +11,8 @@
|
||||
#include "layers/gna_layer_info.hpp"
|
||||
#include "ops/util/util.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
/**
|
||||
* @brief checks if it's a reshape from 4d to 3d tensor
|
||||
@ -104,7 +105,7 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin
|
||||
const auto layout = next->outData[0]->getLayout();
|
||||
const auto order = next->GetParamAsInts("order");
|
||||
if (layout != InferenceEngine::Layout::NCHW && layout != InferenceEngine::Layout::CHW ||
|
||||
order != GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC) &&
|
||||
order != permute::GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC) &&
|
||||
order != std::vector<int32_t>{0, 2, 1} /* NCW to NWC */) {
|
||||
return std::make_pair(nullptr, nullptr);
|
||||
}
|
||||
@ -155,7 +156,7 @@ inline std::pair<InferenceEngine::CNNLayerPtr, InferenceEngine::CNNLayerPtr> Fin
|
||||
const auto layout = prev->outData[0]->getLayout();
|
||||
const auto order = prev->GetParamAsInts("order");
|
||||
if (layout != InferenceEngine::Layout::NCHW && layout != InferenceEngine::Layout::CHW ||
|
||||
order != GetPermuteOrder(InferenceEngine::Layout::NHWC, InferenceEngine::Layout::NCHW) &&
|
||||
order != permute::GetPermuteOrder(InferenceEngine::Layout::NHWC, InferenceEngine::Layout::NCHW) &&
|
||||
order != std::vector<int32_t>{0, 2, 1} /* NWC to NCW */) {
|
||||
return std::make_pair(nullptr, nullptr);
|
||||
}
|
||||
@ -427,4 +428,5 @@ inline std::vector<TranspositionInfo> FindTranspositionInfoFromNextLayers(Infere
|
||||
return findTranspositionInfoRecursive(layer);
|
||||
}
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -8,7 +8,9 @@
|
||||
#include "gna_graph_tools.hpp"
|
||||
#include "layers/gna_layer_info.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
/**
|
||||
* @brief returns a pointer to 2D reshaped data to satisfy maximum size of zero dimension
|
||||
* @param input a pointer to data to be reshaped
|
||||
@ -47,14 +49,15 @@ inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input
|
||||
* @param layer
|
||||
*/
|
||||
inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
|
||||
if (GNAPluginNS::LayerInfo(layer).isPower() || GNAPluginNS::LayerInfo(layer).isCopy())
|
||||
if (LayerInfo(layer).isPower() || LayerInfo(layer).isCopy())
|
||||
return true;
|
||||
|
||||
if (!GNAPluginNS::LayerInfo(layer).isSyntheticScaleShift())
|
||||
if (!LayerInfo(layer).isSyntheticScaleShift())
|
||||
return false;
|
||||
|
||||
// Don't reshape diagonallayers with bias connection
|
||||
return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
|
||||
return !LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
|
||||
}
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
@ -6,7 +6,9 @@
|
||||
|
||||
#include "gna_plugin.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
|
||||
GNAInferRequest::GNAInferRequest(const std::shared_ptr<GNAPlugin>& plg,
|
||||
const std::vector<std::shared_ptr<const ov::Node>>& inputs,
|
||||
@ -170,4 +172,5 @@ void GNAInferRequest::CreateInferRequest() {
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -9,7 +9,9 @@
|
||||
#include "cpp_interfaces/interface/ie_iinfer_request_internal.hpp"
|
||||
#include "request_status.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
class GNAPlugin;
|
||||
|
||||
class GNAInferRequest : public InferenceEngine::IInferRequestInternal {
|
||||
@ -60,4 +62,6 @@ private:
|
||||
uint32_t _infer_request_idx = kRequestIndexInvalid;
|
||||
std::shared_ptr<GNAPlugin> plg;
|
||||
};
|
||||
} // namespace GNAPluginNS
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -11,11 +11,15 @@
|
||||
|
||||
#include <openvino/itt.hpp>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace itt {
|
||||
namespace domains {
|
||||
|
||||
OV_ITT_DOMAIN(GNAPlugin);
|
||||
OV_ITT_DOMAIN(GNA_LT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace domains
|
||||
} // namespace itt
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -28,12 +28,15 @@
|
||||
*/
|
||||
#define ALIGN64(number) ALIGN(number, 64)
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace tools {
|
||||
|
||||
template <typename T, typename... Args>
|
||||
std::unique_ptr<T> make_unique(Args&&... args) {
|
||||
return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
|
||||
}
|
||||
} // namespace tools
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace tools
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -29,7 +29,7 @@
|
||||
#include "serial/headers/latest/gna_model_header.hpp"
|
||||
#include "common/versioning.hpp"
|
||||
|
||||
using namespace GNAPluginNS;
|
||||
using namespace ov::intel_gna;
|
||||
|
||||
inline void writeNBytes(const void *ptr, uint32_t size, std::ostream & os) {
|
||||
os.write(static_cast<const char*>(ptr), size);
|
||||
@ -108,7 +108,7 @@ std::string GNAVersionSerializer::Import(std::istream& is) const {
|
||||
|
||||
const int gna_header_magic = is_little_endian() ? 0x4d414e47 : 0x474e414d;
|
||||
|
||||
GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
|
||||
header_latest::ModelHeader GNAModelSerial::ReadHeader(std::istream &is) {
|
||||
is.exceptions(std::istream::failbit);
|
||||
auto startPos = is.tellg();
|
||||
if (startPos == -1) {
|
||||
@ -122,11 +122,11 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
|
||||
stream_len -= startPos;
|
||||
is.seekg(startPos, is.beg);
|
||||
|
||||
HeaderLatest::ModelHeader header;
|
||||
header_latest::ModelHeader header;
|
||||
header.version.major = 0u;
|
||||
header.version.minor = 0u;
|
||||
auto size_of_headers_header = sizeof(HeaderLatest::ModelHeader::gnam) + sizeof(HeaderLatest::ModelHeader::headerSize)
|
||||
+ sizeof(HeaderLatest::ModelHeader::Version);
|
||||
auto size_of_headers_header = sizeof(header_latest::ModelHeader::gnam) + sizeof(header_latest::ModelHeader::headerSize)
|
||||
+ sizeof(header_latest::ModelHeader::Version);
|
||||
if (stream_len > size_of_headers_header) {
|
||||
readNBytes(&header, static_cast<uint32_t>(size_of_headers_header), is);
|
||||
} else {
|
||||
@ -142,34 +142,34 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
|
||||
}
|
||||
|
||||
is.seekg(startPos, is.beg);
|
||||
Header2dot1::ModelHeader tempHeader2dot1;
|
||||
header_2_dot_1::ModelHeader tempheader_2_dot_1;
|
||||
switch (header.version.major) {
|
||||
case 2:
|
||||
switch (header.version.minor) {
|
||||
case 1:
|
||||
readBits(tempHeader2dot1, is);
|
||||
header = HeaderLatest::ModelHeader(tempHeader2dot1);
|
||||
readBits(tempheader_2_dot_1, is);
|
||||
header = header_latest::ModelHeader(tempheader_2_dot_1);
|
||||
break;
|
||||
case 2:
|
||||
case 3:
|
||||
{
|
||||
Header2dot3::ModelHeader tempHeader2dot3;
|
||||
readBits(tempHeader2dot3, is);
|
||||
header = HeaderLatest::ModelHeader(tempHeader2dot3);
|
||||
header_2_dot_3::ModelHeader tempheader_2_dot_3;
|
||||
readBits(tempheader_2_dot_3, is);
|
||||
header = header_latest::ModelHeader(tempheader_2_dot_3);
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
{
|
||||
Header2dot4::ModelHeader tempHeader2dot4;
|
||||
readBits(tempHeader2dot4, is);
|
||||
header = HeaderLatest::ModelHeader(tempHeader2dot4);
|
||||
header_2_dot_4::ModelHeader tempheader_2_dot_4;
|
||||
readBits(tempheader_2_dot_4, is);
|
||||
header = header_latest::ModelHeader(tempheader_2_dot_4);
|
||||
break;
|
||||
}
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
case 8:
|
||||
readNBytes(&header, sizeof(HeaderLatest::ModelHeader), is);
|
||||
readNBytes(&header, sizeof(header_latest::ModelHeader), is);
|
||||
break;
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 8 and is: " << header.version.minor;
|
||||
@ -190,10 +190,10 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
|
||||
return header;
|
||||
}
|
||||
|
||||
GNAPluginNS::HeaderLatest::RuntimeEndPoint GNAModelSerial::ReadEndPoint(std::istream &is) {
|
||||
header_latest::RuntimeEndPoint GNAModelSerial::ReadEndPoint(std::istream &is) {
|
||||
is.exceptions(std::istream::failbit);
|
||||
|
||||
HeaderLatest::RuntimeEndPoint endPoint;
|
||||
header_latest::RuntimeEndPoint endPoint;
|
||||
switch (model_header_.version.major) {
|
||||
case 2:
|
||||
switch (model_header_.version.minor) {
|
||||
@ -204,20 +204,20 @@ GNAPluginNS::HeaderLatest::RuntimeEndPoint GNAModelSerial::ReadEndPoint(std::ist
|
||||
case 5:
|
||||
case 6:
|
||||
{
|
||||
Header2dot6::RuntimeEndPoint tempEndPoint2dot6;
|
||||
header_2_dot_6::RuntimeEndPoint tempEndPoint2dot6;
|
||||
readBits(tempEndPoint2dot6, is);
|
||||
endPoint = HeaderLatest::RuntimeEndPoint(tempEndPoint2dot6, model_header_.nGroup);
|
||||
endPoint = header_latest::RuntimeEndPoint(tempEndPoint2dot6, model_header_.nGroup);
|
||||
break;
|
||||
}
|
||||
case 7:
|
||||
{
|
||||
Header2dot7::RuntimeEndPoint tempEndPoint2dot7;
|
||||
header_2_dot_7::RuntimeEndPoint tempEndPoint2dot7;
|
||||
readBits(tempEndPoint2dot7, is);
|
||||
endPoint = HeaderLatest::RuntimeEndPoint(tempEndPoint2dot7);
|
||||
endPoint = header_latest::RuntimeEndPoint(tempEndPoint2dot7);
|
||||
break;
|
||||
}
|
||||
case 8:
|
||||
readNBytes(&endPoint, sizeof(HeaderLatest::RuntimeEndPoint), is);
|
||||
readNBytes(&endPoint, sizeof(header_latest::RuntimeEndPoint), is);
|
||||
break;
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 8 and is: "
|
||||
@ -259,8 +259,8 @@ static const std::map<Gna2OperationType, std::vector<uint32_t>> GnaParamSize{
|
||||
void GNAModelSerial::Import(void *basePointer,
|
||||
size_t gnaGraphSize,
|
||||
std::istream &is,
|
||||
GNAPluginNS::GnaInputs &inputs,
|
||||
GNAPluginNS::GnaOutputs &outputs,
|
||||
GnaInputs &inputs,
|
||||
GnaOutputs &outputs,
|
||||
TranspositionInfoMap &inputsTranspositionInfo,
|
||||
TranspositionInfoMap &outputsTranspositionInfo,
|
||||
std::string & libVersionFromFile) {
|
||||
@ -269,7 +269,7 @@ void GNAModelSerial::Import(void *basePointer,
|
||||
if (model_header_.version.major == 2) {
|
||||
for (auto inputIndex = 0; inputIndex < model_header_.nInputs; inputIndex++) {
|
||||
std::string name = (model_header_.version.minor >= 3) ? readString(is) : std::string("input" + std::to_string(inputIndex));
|
||||
inputs[name] = GNAPluginNS::InputDesc(name);
|
||||
inputs[name] = InputDesc(name);
|
||||
}
|
||||
if (model_header_.version.minor >= 5) {
|
||||
// 3. Read transposition input info
|
||||
@ -294,7 +294,7 @@ void GNAModelSerial::Import(void *basePointer,
|
||||
if (model_header_.version.major == 2) {
|
||||
for (auto outputIndex = 0; outputIndex < model_header_.nOutputs; outputIndex++) {
|
||||
std::string name = (model_header_.version.minor >= 3) ? readString(is) : std::string("output" + std::to_string(outputIndex));
|
||||
outputs[name] = GNAPluginNS::OutputDesc(name);
|
||||
outputs[name] = OutputDesc(name);
|
||||
}
|
||||
}
|
||||
// 7. Read outputs
|
||||
@ -416,8 +416,8 @@ void GNAModelSerial::Export(const GnaAllocations& allocations, std::ostream& os)
|
||||
return out;
|
||||
};
|
||||
|
||||
auto convert_to_serial = [&allocationsOrdered](const GNAPluginNS::GnaDesc& desc) {
|
||||
HeaderLatest::RuntimeEndPoint ep;
|
||||
auto convert_to_serial = [&allocationsOrdered](const GnaDesc& desc) {
|
||||
header_latest::RuntimeEndPoint ep;
|
||||
ep.elements_count = desc.num_elements;
|
||||
ep.scaleFactor = desc.scale_factor;
|
||||
ep.element_size = desc.tensor_precision.size();
|
||||
@ -441,12 +441,12 @@ void GNAModelSerial::Export(const GnaAllocations& allocations, std::ostream& os)
|
||||
/**
|
||||
* writing header
|
||||
*/
|
||||
HeaderLatest::ModelHeader header;
|
||||
header_latest::ModelHeader header;
|
||||
header.gnam[0] = 'G';
|
||||
header.gnam[1] = 'N';
|
||||
header.gnam[2] = 'A';
|
||||
header.gnam[3] = 'M';
|
||||
header.headerSize = sizeof(HeaderLatest::ModelHeader);
|
||||
header.headerSize = sizeof(header_latest::ModelHeader);
|
||||
header.gnaMemSize = gnaGraphSize;
|
||||
header.layersCount = layers.size();
|
||||
header.nGroup = 1; // just to support the old models
|
||||
@ -561,9 +561,9 @@ void GNAModelSerial::Export(const GnaAllocations& allocations, std::ostream& os)
|
||||
version_.Export(os);
|
||||
}
|
||||
|
||||
void GNAModelSerial::ImportInputs(std::istream &is, void* basePtr, GNAPluginNS::GnaInputs &inputs) {
|
||||
void GNAModelSerial::ImportInputs(std::istream &is, void* basePtr, GnaInputs &inputs) {
|
||||
for (auto &input : inputs.Get()) {
|
||||
HeaderLatest::RuntimeEndPoint ep = ReadEndPoint(is);
|
||||
header_latest::RuntimeEndPoint ep = ReadEndPoint(is);
|
||||
|
||||
input.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + ep.descriptor_offset));
|
||||
input.orientation = ep.orientation;
|
||||
@ -589,9 +589,9 @@ void GNAModelSerial::ImportInputs(std::istream &is, void* basePtr, GNAPluginNS::
|
||||
}
|
||||
}
|
||||
|
||||
void GNAModelSerial::ImportOutputs(std::istream &is, void* basePtr, GNAPluginNS::GnaOutputs &outputs) {
|
||||
void GNAModelSerial::ImportOutputs(std::istream &is, void* basePtr, GnaOutputs &outputs) {
|
||||
for (auto &output : outputs.Get()) {
|
||||
HeaderLatest::RuntimeEndPoint ep = ReadEndPoint(is);
|
||||
header_latest::RuntimeEndPoint ep = ReadEndPoint(is);
|
||||
|
||||
output.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + ep.descriptor_offset));
|
||||
output.orientation = ep.orientation;
|
||||
@ -648,9 +648,9 @@ void GNAModelSerial::ExportTranspositionInfo(std::ostream &os,
|
||||
}
|
||||
|
||||
void GNAModelSerial::AppendTensorNameIfNeeded(GnaDesc& nodeDesc) const {
|
||||
static constexpr Header2dot8::ModelHeader::Version kHasTensorNamesVersion;
|
||||
static constexpr header_2_dot_8::ModelHeader::Version kHasTensorNamesVersion;
|
||||
|
||||
if (HeaderLatest::IsFirstVersionLower(model_header_.version, kHasTensorNamesVersion) &&
|
||||
if (header_latest::IsFirstVersionLower(model_header_.version, kHasTensorNamesVersion) &&
|
||||
nodeDesc.tensor_names.empty()) {
|
||||
nodeDesc.tensor_names.insert(nodeDesc.name);
|
||||
}
|
||||
|
@ -34,16 +34,16 @@ public:
|
||||
private:
|
||||
Gna2Model * gna2model_;
|
||||
MemoryType states, *pstates_ = nullptr;
|
||||
GNAPluginNS::GnaInputs inputs_;
|
||||
GNAPluginNS::GnaOutputs outputs_;
|
||||
ov::intel_gna::GnaInputs inputs_;
|
||||
ov::intel_gna::GnaOutputs outputs_;
|
||||
TranspositionInfoMap inputs_transpose_info_;
|
||||
TranspositionInfoMap outputs_transpose_info_;
|
||||
GNAPluginNS::HeaderLatest::ModelHeader model_header_;
|
||||
ov::intel_gna::header_latest::ModelHeader model_header_;
|
||||
GNAVersionSerializer version_;
|
||||
|
||||
void ImportInputs(std::istream &is, void* basePtr, GNAPluginNS::GnaInputs &inputs);
|
||||
void ImportInputs(std::istream &is, void* basePtr, ov::intel_gna::GnaInputs &inputs);
|
||||
|
||||
void ImportOutputs(std::istream &is, void* basePtr, GNAPluginNS::GnaOutputs &outputs);
|
||||
void ImportOutputs(std::istream &is, void* basePtr, ov::intel_gna::GnaOutputs &outputs);
|
||||
|
||||
void ImportTranspositionInfo(std::istream &is, std::string &name, std::vector<TranspositionInfo> &transpositionInfo);
|
||||
|
||||
@ -53,7 +53,7 @@ private:
|
||||
* @brief Update input or output description to support importing of < 2.8 format where tensor_names were not present
|
||||
* @param nodeDesc input or output description to be appended
|
||||
*/
|
||||
void AppendTensorNameIfNeeded(GNAPluginNS::GnaDesc& nodeDesc) const;
|
||||
void AppendTensorNameIfNeeded(ov::intel_gna::GnaDesc& nodeDesc) const;
|
||||
|
||||
public:
|
||||
GNAModelSerial(Gna2Model* model, MemoryType& states_holder)
|
||||
@ -62,14 +62,14 @@ private:
|
||||
}
|
||||
|
||||
GNAModelSerial(Gna2Model* model,
|
||||
GNAPluginNS::GnaInputs& inputs,
|
||||
GNAPluginNS::GnaOutputs& outputs)
|
||||
ov::intel_gna::GnaInputs& inputs,
|
||||
ov::intel_gna::GnaOutputs& outputs)
|
||||
: gna2model_(model),
|
||||
inputs_(inputs),
|
||||
outputs_(outputs) {
|
||||
}
|
||||
|
||||
void setHeader(GNAPluginNS::HeaderLatest::ModelHeader header) {
|
||||
void setHeader(ov::intel_gna::header_latest::ModelHeader header) {
|
||||
model_header_ = header;
|
||||
}
|
||||
|
||||
@ -100,9 +100,9 @@ private:
|
||||
* @param is - opened input stream
|
||||
* @return
|
||||
*/
|
||||
static GNAPluginNS::HeaderLatest::ModelHeader ReadHeader(std::istream &is);
|
||||
static ov::intel_gna::header_latest::ModelHeader ReadHeader(std::istream &is);
|
||||
|
||||
GNAPluginNS::HeaderLatest::RuntimeEndPoint ReadEndPoint(std::istream &is);
|
||||
ov::intel_gna::header_latest::RuntimeEndPoint ReadEndPoint(std::istream &is);
|
||||
|
||||
/**
|
||||
* @brief Import model from FS into preallocated buffer,
|
||||
@ -114,8 +114,8 @@ private:
|
||||
void Import(void *basePointer,
|
||||
size_t gnaGraphSize,
|
||||
std::istream &is,
|
||||
GNAPluginNS::GnaInputs &inputs,
|
||||
GNAPluginNS::GnaOutputs &outputs,
|
||||
ov::intel_gna::GnaInputs &inputs,
|
||||
ov::intel_gna::GnaOutputs &outputs,
|
||||
TranspositionInfoMap& inputstranspositionInfo,
|
||||
TranspositionInfoMap& outputstranspositionInfo,
|
||||
std::string& modelLibVersion);
|
||||
|
@ -125,8 +125,8 @@ inline uint32_t ToByteSize(const Gna2DataType type) {
|
||||
using namespace std;
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
using namespace GNAPluginNS;
|
||||
using namespace GNAPluginNS::memory;
|
||||
|
||||
using namespace ov::intel_gna::memory;
|
||||
using namespace ov::intel_gna::frontend;
|
||||
|
||||
namespace InferenceEngine {
|
||||
@ -355,9 +355,9 @@ GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) :
|
||||
void GNAPlugin::Init() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "Init");
|
||||
dnn = std::make_shared<backend::AMIntelDNN>(backend::AMIntelDNN());
|
||||
gnaFlags = std::make_shared<GNAPluginNS::GNAFlags>(GNAPluginNS::GNAFlags());
|
||||
inputs_ptr_ = std::make_shared<GNAPluginNS::GnaInputs>(GNAPluginNS::GnaInputs());
|
||||
outputs_ = GNAPluginNS::GnaOutputs();
|
||||
gnaFlags = std::make_shared<GNAFlags>(GNAFlags());
|
||||
inputs_ptr_ = std::make_shared<GnaInputs>(GnaInputs());
|
||||
outputs_ = GnaOutputs();
|
||||
|
||||
graphCompiler.setDNNPtr(dnn);
|
||||
graphCompiler.setInputsPtr(inputs_ptr_);
|
||||
@ -508,7 +508,7 @@ bool GNAPlugin::TryToInitOutput(const std::string &portName, InferenceEngine::CN
|
||||
outputs_.at(portName).ptrs.resize(gnaFlags->num_requests);
|
||||
outputs_.at(portName).orientation = orientation;
|
||||
outputs_.at(portName).set_precision(numBytesPerElem);
|
||||
outputs_.at(portName).scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : GNAPluginNS::kScaleFactorDefault;
|
||||
outputs_.at(portName).scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : kScaleFactorDefault;
|
||||
outputs_.at(portName).num_elements = numElem;
|
||||
|
||||
// binding ptr for first infer request - then others will be setup during relocation
|
||||
@ -787,7 +787,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
||||
|
||||
// Check the network
|
||||
std::string error;
|
||||
if (!GNAPluginNS::GNALimitations::AreLayersSupported(network, error)) {
|
||||
if (!limitations::AreLayersSupported(network, error)) {
|
||||
THROW_GNA_EXCEPTION << error.c_str();
|
||||
}
|
||||
|
||||
@ -1082,7 +1082,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
||||
// update orientation of model intput layer
|
||||
for (auto& inputLayer : inputLayers) {
|
||||
if (LayerInfo(inputLayer).isInput()) {
|
||||
ov::intela_gna::helpers::updateModelInputOrientationWithoutConvolution(*inputLayer,
|
||||
ov::intel_gna::helpers::updateModelInputOrientationWithoutConvolution(*inputLayer,
|
||||
graphCompiler.dnnComponents,
|
||||
*inputs_ptr_);
|
||||
}
|
||||
@ -1092,7 +1092,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
||||
for (auto&& outPort : outputs_data_map_) {
|
||||
auto outLayer = getCreatorLayer(outPort.second).lock();
|
||||
if (outLayer && LayerInfo(outLayer).isOutput()) {
|
||||
ov::intela_gna::helpers::updateModelOutputOrientation(outPort.first,
|
||||
ov::intel_gna::helpers::updateModelOutputOrientation(outPort.first,
|
||||
outLayer->name,
|
||||
graphCompiler.dnnComponents,
|
||||
outputs_);
|
||||
@ -1113,11 +1113,11 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
||||
#endif
|
||||
}
|
||||
|
||||
bool GNAPluginNS::GNAPlugin::isFP32ModeActive() const {
|
||||
bool GNAPlugin::isFP32ModeActive() const {
|
||||
return gnaFlags->sw_fp32 || !gnadevice;
|
||||
}
|
||||
|
||||
std::string GNAPluginNS::GNAPlugin::effectiveGnaCompileTarget() const {
|
||||
std::string GNAPlugin::effectiveGnaCompileTarget() const {
|
||||
if (gnadevice) {
|
||||
return gnadevice->GetCompileTarget();
|
||||
} else if (!config.gnaCompileTarget.empty()) {
|
||||
@ -1161,7 +1161,7 @@ std::shared_ptr<request::ModelWrapper> GNAPlugin::createModelWrapperForLoadNetwo
|
||||
THROW_GNA_EXCEPTION << "dnn is nullptr cannot load network";
|
||||
}
|
||||
|
||||
std::weak_ptr<GNAPluginNS::backend::AMIntelDNN> weakDnn = dnn;
|
||||
std::weak_ptr<backend::AMIntelDNN> weakDnn = dnn;
|
||||
auto compileTarget = effectiveGnaCompileTarget();
|
||||
auto initializer = [weakDnn, compileTarget](Gna2Model* model) {
|
||||
if (auto dnn = weakDnn.lock()) {
|
||||
@ -1174,7 +1174,7 @@ std::shared_ptr<request::ModelWrapper> GNAPlugin::createModelWrapperForLoadNetwo
|
||||
return request::ModelWrapperFactory::createInitialized(std::move(initializer));
|
||||
}
|
||||
|
||||
std::shared_ptr<request::ModelWrapper> GNAPluginNS::GNAPlugin::createModelWrapperForImportNetwork(
|
||||
std::shared_ptr<request::ModelWrapper> GNAPlugin::createModelWrapperForImportNetwork(
|
||||
uint32_t numberOfOperations) {
|
||||
return request::ModelWrapperFactory::createWithNumberOfEmptyOperations(numberOfOperations);
|
||||
}
|
||||
@ -1238,20 +1238,21 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap& inputs, Infer
|
||||
int inputNum = 0;
|
||||
for (auto& input : inputs) {
|
||||
auto inputLayout = input.second->getTensorDesc().getLayout();
|
||||
if (inputLayout != Layout::C && inputLayout != Layout::NC && inputLayout != Layout::CN &&
|
||||
inputLayout != Layout::CHW && inputLayout != Layout::NCHW) {
|
||||
if (inputLayout != InferenceEngine::Layout::C && inputLayout != InferenceEngine::Layout::NC &&
|
||||
inputLayout != InferenceEngine::Layout::CN && inputLayout != InferenceEngine::Layout::CHW &&
|
||||
inputLayout != InferenceEngine::Layout::NCHW) {
|
||||
THROW_GNA_EXCEPTION << "Expected input blob to have Layout::C, Layout::NC, Layout::CN, Layout::NCHW or "
|
||||
"Layout::CHW. But was: "
|
||||
<< input.second->getTensorDesc().getLayout();
|
||||
}
|
||||
|
||||
if (inputLayout == Layout::NCHW || inputLayout == Layout::CHW) {
|
||||
if (inputLayout == InferenceEngine::Layout::NCHW || inputLayout == InferenceEngine::Layout::CHW) {
|
||||
// specific case that can be squeezed to 2d
|
||||
inputLayout = Layout::NC;
|
||||
inputLayout = InferenceEngine::Layout::NC;
|
||||
}
|
||||
|
||||
auto is1D = input.second->getTensorDesc().getLayout() == Layout::C;
|
||||
auto is3D = input.second->getTensorDesc().getLayout() == Layout::CHW;
|
||||
auto is1D = input.second->getTensorDesc().getLayout() == InferenceEngine::Layout::C;
|
||||
auto is3D = input.second->getTensorDesc().getLayout() == InferenceEngine::Layout::CHW;
|
||||
|
||||
if (inputs_ptr_->at(input.first).ptrs.empty()) {
|
||||
// should not happen in user code however might happen if there any non executable network based integration
|
||||
@ -1297,7 +1298,7 @@ uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap& inputs, Infer
|
||||
ImportFrames(inputs_ptr_->at(input.first).ptrs[index],
|
||||
input.second->cbuffer().as<float*>(),
|
||||
input.second->getTensorDesc().getPrecision(),
|
||||
gnaFlags->sw_fp32 ? GNAPluginNS::kScaleFactorDefault : inputs_ptr_->at(input.first).scale_factor,
|
||||
gnaFlags->sw_fp32 ? kScaleFactorDefault : inputs_ptr_->at(input.first).scale_factor,
|
||||
inputOrientation,
|
||||
importedFrames,
|
||||
targetGroups,
|
||||
@ -1394,21 +1395,21 @@ RequestStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
|
||||
for (auto&& outputBlobIt : requestResult) {
|
||||
auto& outputBlob = outputBlobIt.second;
|
||||
auto& outputDesc = outputs_.at(outputBlobIt.first);
|
||||
if (outputBlob->getTensorDesc().getLayout() != Layout::C &&
|
||||
outputBlob->getTensorDesc().getLayout() != Layout::NC &&
|
||||
outputBlob->getTensorDesc().getLayout() != Layout::CN &&
|
||||
outputBlob->getTensorDesc().getLayout() != Layout::NCHW &&
|
||||
outputBlob->getTensorDesc().getLayout() != Layout::CHW &&
|
||||
outputBlob->getTensorDesc().getLayout() != Layout::SCALAR) {
|
||||
if (outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::C &&
|
||||
outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::NC &&
|
||||
outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::CN &&
|
||||
outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::NCHW &&
|
||||
outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::CHW &&
|
||||
outputBlob->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR) {
|
||||
THROW_GNA_EXCEPTION << "Expected output blob to have Layout::C, Layout::NC, Layout::CN, Layout::NCHW or "
|
||||
"Layout::CHW. But was "
|
||||
<< outputBlob->getTensorDesc().getLayout();
|
||||
}
|
||||
|
||||
auto dims = outputBlob->getTensorDesc().getDims();
|
||||
auto is1D = outputBlob->getTensorDesc().getLayout() == Layout::C;
|
||||
auto isScalar = outputBlob->getTensorDesc().getLayout() == Layout::SCALAR;
|
||||
auto is3D = outputBlob->getTensorDesc().getLayout() == Layout::CHW;
|
||||
auto is1D = outputBlob->getTensorDesc().getLayout() == InferenceEngine::Layout::C;
|
||||
auto isScalar = outputBlob->getTensorDesc().getLayout() == InferenceEngine::Layout::SCALAR;
|
||||
auto is3D = outputBlob->getTensorDesc().getLayout() == InferenceEngine::Layout::CHW;
|
||||
auto batchSize = (is1D || isScalar || is3D) ? 1 : dims[0];
|
||||
auto elementsPerBatch =
|
||||
isScalar ? 1
|
||||
@ -1635,7 +1636,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
|
||||
SetNetworkInputs();
|
||||
SetNetworkOutputs();
|
||||
|
||||
ov::intela_gna::helpers::ApplyInputScaleFactors(config, header, *inputs_ptr_);
|
||||
ov::intel_gna::helpers::ApplyInputScaleFactors(config, header, *inputs_ptr_);
|
||||
|
||||
auto getOrientation = [](Gna2Operation& gnaOperation) {
|
||||
return gnaOperation.Type == Gna2OperationTypeConvolution ? kDnnNonInterleavedOrientation
|
||||
|
@ -26,8 +26,10 @@
|
||||
#include <legacy/ie_util_internal.hpp>
|
||||
#include <gna2-model-api.h>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace request {
|
||||
|
||||
class ModelWrapper;
|
||||
class WorkerPool;
|
||||
class Worker;
|
||||
@ -38,13 +40,13 @@ protected:
|
||||
std::string _pluginName = "GNA";
|
||||
|
||||
Config config {};
|
||||
std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnn;
|
||||
std::shared_ptr<GNAPluginNS::GNAFlags> gnaFlags;
|
||||
std::shared_ptr<GNAPluginNS::gna_memory_type> gnamem;
|
||||
std::shared_ptr<GNAPluginNS::GnaInputs> inputs_ptr_;
|
||||
GNAPluginNS::GnaOutputs outputs_;
|
||||
std::shared_ptr<backend::AMIntelDNN> dnn;
|
||||
std::shared_ptr<GNAFlags> gnaFlags;
|
||||
std::shared_ptr<gna_memory_type> gnamem;
|
||||
std::shared_ptr<GnaInputs> inputs_ptr_;
|
||||
GnaOutputs outputs_;
|
||||
|
||||
GNAPluginNS::GNAGraphCompiler graphCompiler;
|
||||
GNAGraphCompiler graphCompiler;
|
||||
|
||||
uint32_t activeLayerIndex = 0xffffffff;
|
||||
TranspositionInfoMap transpose_inputs_info;
|
||||
@ -237,4 +239,5 @@ protected:
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -19,10 +19,11 @@
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
using namespace ov::intel_gna;
|
||||
using namespace ov::intel_gna::common;
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
const uint8_t Config::max_num_requests;
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
@ -127,7 +128,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
|
||||
check_scale_factor(scale_factor);
|
||||
// missing scale factors are set to be 1.0f
|
||||
if (inputScaleFactors.size() <= input_index) {
|
||||
inputScaleFactors.resize(input_index + 1, GNAPluginNS::kScaleFactorDefault);
|
||||
inputScaleFactors.resize(input_index + 1, kScaleFactorDefault);
|
||||
}
|
||||
inputScaleFactors[input_index] = InferenceEngine::CNNLayer::ie_parse_float(value);
|
||||
} else if (key == GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE) || key == ov::intel_gna::firmware_model_image_path) {
|
||||
@ -414,4 +415,6 @@ std::vector<std::string> Config::GetSupportedKeys() const {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
} // namespace GNAPluginNS
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -14,7 +14,8 @@
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
static const float kScaleFactorDefault = 1.f;
|
||||
|
||||
@ -76,4 +77,5 @@ struct Config {
|
||||
static const uint8_t max_num_requests = 127;
|
||||
};
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -7,7 +7,6 @@
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace std;
|
||||
using namespace GNAPluginNS;
|
||||
|
||||
static const Version gnaPluginDescription = {
|
||||
{2, 1},
|
||||
|
@ -13,7 +13,8 @@
|
||||
#include "gna_plugin_config.hpp"
|
||||
#include <legacy/ie_util_internal.hpp>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
class GNAPluginInternal : public InferenceEngine::IInferencePlugin {
|
||||
private:
|
||||
@ -104,4 +105,5 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include <unordered_map>
|
||||
#include <memory>
|
||||
|
||||
using namespace GNAPluginNS;
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::PluginConfigParams;
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
#include "gna_slope_scale.h"
|
||||
#include "gna_slope_scale.hpp"
|
||||
|
||||
pwl_gna_slope_scale_t gna_slope(const double slope,
|
||||
const double in_scale,
|
||||
|
@ -5,7 +5,8 @@
|
||||
#include <ie_memcpy.h>
|
||||
#include "gna_data_types.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
/**
|
||||
* @brief convert a tensor or its parts from NCHW to NHWC order on the base of transposition information.
|
||||
@ -79,4 +80,5 @@ inline void ConvertTensorFromNCHWToNHWC(size_t precision, size_t rows, size_t co
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
@ -9,7 +9,9 @@
|
||||
#include <string>
|
||||
#include "gna_graph_tools.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
/**
|
||||
* @brief implements upstream search for BFS routine
|
||||
*/
|
||||
@ -113,5 +115,5 @@ inline UpstreamLayersContainer make_upstream_order(InferenceEngine::CNNLayer* or
|
||||
return fusedCnt;
|
||||
}
|
||||
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -9,7 +9,9 @@
|
||||
|
||||
#include <legacy/ie_layers.h>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
class GNAConcatLayer {
|
||||
InferenceEngine::CNNLayerPtr concatLayer;
|
||||
|
||||
@ -46,4 +48,6 @@ public:
|
||||
|
||||
std::vector<ConcatConnectedLayerInfo> concatInputLayers;
|
||||
};
|
||||
} // namespace GNAPluginNS
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -14,8 +14,10 @@
|
||||
#include "gna_graph_tools.hpp"
|
||||
#include "log/debug.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace GNAConvolutionLayer {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace gna_convolution_layer {
|
||||
|
||||
bool should_transpose_h_w(const uint32_t in_height,
|
||||
const uint32_t kernel_height,
|
||||
const uint32_t in_channels,
|
||||
@ -23,9 +25,13 @@ bool should_transpose_h_w(const uint32_t in_height,
|
||||
return in_height == kernel_height && in_channels == 1 && stride_height == 1;
|
||||
}
|
||||
|
||||
bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t in_channels,
|
||||
const uint32_t kernelHeight, const uint32_t kernelWidth,
|
||||
const uint32_t strideHeight, const uint32_t strideWidth) {
|
||||
bool isMappableFrom2DTo1D(const uint32_t inHeight,
|
||||
const uint32_t inWidth,
|
||||
const uint32_t in_channels,
|
||||
const uint32_t kernelHeight,
|
||||
const uint32_t kernelWidth,
|
||||
const uint32_t strideHeight,
|
||||
const uint32_t strideWidth) {
|
||||
if (inHeight <= 1 || inWidth <= 1) {
|
||||
// Mapping not needed since input is already 1D
|
||||
return false;
|
||||
@ -34,8 +40,11 @@ bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const
|
||||
should_transpose_h_w(inHeight, kernelHeight, in_channels, strideHeight);
|
||||
}
|
||||
|
||||
bool is3DInputOr2DKernel(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
|
||||
const uint32_t kernelHeight, const uint32_t kernelWidth) {
|
||||
bool is3DInputOr2DKernel(const uint32_t inHeight,
|
||||
const uint32_t inWidth,
|
||||
const uint32_t inDepth,
|
||||
const uint32_t kernelHeight,
|
||||
const uint32_t kernelWidth) {
|
||||
return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1);
|
||||
}
|
||||
|
||||
@ -46,18 +55,27 @@ double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
|
||||
// for kernelSize >= 14 -> 1.7
|
||||
// for kernelSize >= 9 -> 1.3
|
||||
// for kernelSize in {7, 8} -> 1.2
|
||||
const std::vector< KRT > reducers{ {49, 3.0}, {36, 2.6}, {21, 2.3}, {14, 1.7}, {9, 1.3}, {7, 1.2} };
|
||||
const std::vector<KRT> reducers{{49, 3.0}, {36, 2.6}, {21, 2.3}, {14, 1.7}, {9, 1.3}, {7, 1.2}};
|
||||
auto reducer = 1.0;
|
||||
const auto inDepth = InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
|
||||
const auto inDepth =
|
||||
InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
|
||||
const auto inHeight =
|
||||
InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
|
||||
const auto inWidth =
|
||||
InferenceEngine::GetDataDimByName(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
|
||||
if (is3DInputOr2DKernel(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
|
||||
!isMappableFrom2DTo1D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x, conv._stride_y, conv._stride_x)) {
|
||||
!isMappableFrom2DTo1D(inHeight,
|
||||
inWidth,
|
||||
inDepth,
|
||||
conv._kernel_y,
|
||||
conv._kernel_x,
|
||||
conv._stride_y,
|
||||
conv._stride_x)) {
|
||||
const auto kernelSize = conv._kernel_x * conv._kernel_y;
|
||||
auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize,
|
||||
[](const KRT& l, const KRT::first_type& r) {return l.first > r; });
|
||||
auto r =
|
||||
std::lower_bound(reducers.begin(), reducers.end(), kernelSize, [](const KRT& l, const KRT::first_type& r) {
|
||||
return l.first > r;
|
||||
});
|
||||
if (r != reducers.end())
|
||||
reducer = r->second;
|
||||
}
|
||||
@ -80,7 +98,8 @@ uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint3
|
||||
if (window > in || window == 0 || stride == 0) {
|
||||
THROW_GNA_EXCEPTION << "Invalid (input, window, stride) = (" << in << "," << window << "," << stride << ")";
|
||||
}
|
||||
if (window == in) return 1;
|
||||
if (window == in)
|
||||
return 1;
|
||||
|
||||
return (in - window - 1) / stride + 2;
|
||||
}
|
||||
@ -94,5 +113,6 @@ uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride) {
|
||||
return (in - 1) / stride + 1;
|
||||
}
|
||||
|
||||
} // namespace GNAConvolutionLayer
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace gna_convolution_layer
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -8,20 +8,28 @@
|
||||
|
||||
#include <legacy/ie_layers.h>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace GNAConvolutionLayer {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace gna_convolution_layer {
|
||||
|
||||
bool should_transpose_h_w(const uint32_t in_height,
|
||||
const uint32_t kernel_height,
|
||||
const uint32_t in_channels,
|
||||
const uint32_t stride_height);
|
||||
|
||||
bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inChannels,
|
||||
const uint32_t kernelHeight, const uint32_t kernelWidth,
|
||||
const uint32_t strideHeight, const uint32_t strideWidth);
|
||||
bool isMappableFrom2DTo1D(const uint32_t inHeight,
|
||||
const uint32_t inWidth,
|
||||
const uint32_t inChannels,
|
||||
const uint32_t kernelHeight,
|
||||
const uint32_t kernelWidth,
|
||||
const uint32_t strideHeight,
|
||||
const uint32_t strideWidth);
|
||||
|
||||
bool is3DInputOr2DKernel(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
|
||||
const uint32_t kernelHeight, const uint32_t kernelWidth);
|
||||
bool is3DInputOr2DKernel(const uint32_t inHeight,
|
||||
const uint32_t inWidth,
|
||||
const uint32_t inDepth,
|
||||
const uint32_t kernelHeight,
|
||||
const uint32_t kernelWidth);
|
||||
|
||||
double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv);
|
||||
|
||||
@ -31,5 +39,6 @@ uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint3
|
||||
|
||||
uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride);
|
||||
|
||||
} // namespace GNAConvolutionLayer
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace gna_convolution_layer
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -4,7 +4,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
/**
|
||||
* GNA primitive created in sorting order for this copy layer
|
||||
*/
|
||||
@ -14,4 +16,5 @@ static constexpr auto CopyLayerName = "Copy";
|
||||
*/
|
||||
static constexpr auto DelayedCopyLayerName = "DelayedCopy";
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -7,9 +7,8 @@
|
||||
#include "log/log.hpp"
|
||||
#include "log/debug.hpp"
|
||||
|
||||
using namespace ov::intel_gna;
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
SimpleCrop get_crop_params(const std::vector<int32_t>& axis_in,
|
||||
const std::vector<int32_t>& offset_in,
|
||||
@ -57,4 +56,5 @@ SimpleCrop GetCropParams(InferenceEngine::CropLayer* cropLayer) {
|
||||
return out_val;
|
||||
}
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -8,7 +8,9 @@
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
class GNACropLayer {
|
||||
InferenceEngine::CNNLayerPtr cropLayer;
|
||||
|
||||
@ -41,4 +43,5 @@ SimpleCrop get_crop_params(const std::vector<int32_t>& axis_in,
|
||||
|
||||
SimpleCrop GetCropParams(InferenceEngine::CropLayer* cropLayer);
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -12,12 +12,14 @@
|
||||
|
||||
using ov::intel_gna::frontend::make_fp32_blob;
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
class GNAFakeQuantizeLayer {
|
||||
InferenceEngine::CNNLayerPtr fqLayer;
|
||||
public :
|
||||
GNAFakeQuantizeLayer(InferenceEngine::CNNLayerPtr fqLayer)
|
||||
: fqLayer(fqLayer) {
|
||||
|
||||
public:
|
||||
GNAFakeQuantizeLayer(InferenceEngine::CNNLayerPtr fqLayer) : fqLayer(fqLayer) {
|
||||
if (!LayerInfo(fqLayer).isFakeQuantize()) {
|
||||
THROW_GNA_LAYER_EXCEPTION(fqLayer) << "cannot parse as fake quantize";
|
||||
}
|
||||
@ -30,7 +32,7 @@ class GNAFakeQuantizeLayer {
|
||||
DnnActivation fqActivation{};
|
||||
|
||||
fqActivation.fqParams.levels = fqLayer->GetParamAsSizeT("levels");
|
||||
auto inputShape = getShapeForRange(fqLayer, 1);
|
||||
auto inputShape = getShapeForRange(fqLayer, 1);
|
||||
auto outputShape = getShapeForRange(fqLayer, 3);
|
||||
|
||||
// TODO: check shapes broadcasting to shape of input at 0
|
||||
@ -40,26 +42,27 @@ class GNAFakeQuantizeLayer {
|
||||
fqActivation.fqParams.set = true;
|
||||
|
||||
fqActivation.fqParams.inputPerChannel = inputRangeSize != 1;
|
||||
fqActivation.fqParams.input_low = getParamFromInputAsFloats(fqLayer, 1);
|
||||
fqActivation.fqParams.input_high = getParamFromInputAsFloats(fqLayer, 2);
|
||||
fqActivation.fqParams.input_low = getParamFromInputAsFloats(fqLayer, 1);
|
||||
fqActivation.fqParams.input_high = getParamFromInputAsFloats(fqLayer, 2);
|
||||
|
||||
fqActivation.fqParams.outputPerChannel = outputRangeSize != 1;
|
||||
fqActivation.fqParams.output_low = getParamFromInputAsFloats(fqLayer, 3);
|
||||
fqActivation.fqParams.output_low = getParamFromInputAsFloats(fqLayer, 3);
|
||||
fqActivation.fqParams.output_high = getParamFromInputAsFloats(fqLayer, 4);
|
||||
fqActivation.type = kActFakeQuantize;
|
||||
|
||||
return fqActivation;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Retrieve input blob for FQ layer that connected to const layer
|
||||
*/
|
||||
InferenceEngine::Blob::Ptr getConstInputData() const {
|
||||
return LayerUtils::getParamFromInputAsBlob(fqLayer, 0);
|
||||
return layer_utils::getParamFromInputAsBlob(fqLayer, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Fake quantize has 5 input layers, while 4 of them always constant layer, and 1 might be a tensor - connection
|
||||
* @brief Fake quantize has 5 input layers, while 4 of them always constant layer, and 1 might be a tensor -
|
||||
* connection
|
||||
*/
|
||||
InferenceEngine::CNNLayerPtr getInputLayer() const {
|
||||
return getInputLayerAt(fqLayer, 0);
|
||||
@ -77,24 +80,24 @@ class GNAFakeQuantizeLayer {
|
||||
return getRange(fqLayer, 3);
|
||||
}
|
||||
|
||||
operator InferenceEngine::CNNLayerPtr () const {
|
||||
operator InferenceEngine::CNNLayerPtr() const {
|
||||
return fqLayer;
|
||||
}
|
||||
|
||||
InferenceEngine::CNNLayerPtr operator -> () const {
|
||||
InferenceEngine::CNNLayerPtr operator->() const {
|
||||
return fqLayer;
|
||||
}
|
||||
InferenceEngine::CNNLayerPtr operator * () const {
|
||||
InferenceEngine::CNNLayerPtr operator*() const {
|
||||
return fqLayer;
|
||||
}
|
||||
protected :
|
||||
|
||||
protected:
|
||||
static std::pair<std::vector<float>, std::vector<float>> getRange(InferenceEngine::CNNLayerPtr input, size_t idx) {
|
||||
auto shape = getShapeForRange(input, idx);
|
||||
auto shape = getShapeForRange(input, idx);
|
||||
auto rangeSize = InferenceEngine::details::product(shape.begin(), shape.end());
|
||||
|
||||
auto dataMin = LayerUtils::getParamFromInputAsBlob(input, idx);
|
||||
auto dataMax = LayerUtils::getParamFromInputAsBlob(input, idx + 1);
|
||||
auto dataMin = layer_utils::getParamFromInputAsBlob(input, idx);
|
||||
auto dataMax = layer_utils::getParamFromInputAsBlob(input, idx + 1);
|
||||
std::vector<float> minValues(rangeSize), maxValues(rangeSize);
|
||||
switch (dataMin->getTensorDesc().getPrecision()) {
|
||||
case InferenceEngine::Precision::FP32: {
|
||||
@ -112,46 +115,46 @@ class GNAFakeQuantizeLayer {
|
||||
}
|
||||
default:
|
||||
THROW_GNA_LAYER_EXCEPTION(input) << "cannot cast custom blob to type FP32, since it is of type: "
|
||||
<< dataMin->getTensorDesc().getPrecision();
|
||||
<< dataMin->getTensorDesc().getPrecision();
|
||||
break;
|
||||
}
|
||||
|
||||
return {minValues, maxValues};
|
||||
}
|
||||
|
||||
static float* getParamFromInputAsFloats(InferenceEngine::CNNLayerPtr input, size_t idx) {
|
||||
auto data = LayerUtils::getParamFromInputAsBlob(input, idx);
|
||||
static float* getParamFromInputAsFloats(InferenceEngine::CNNLayerPtr input, size_t idx) {
|
||||
auto data = layer_utils::getParamFromInputAsBlob(input, idx);
|
||||
if (data->getTensorDesc().getPrecision() != InferenceEngine::Precision::FP32) {
|
||||
THROW_GNA_LAYER_EXCEPTION(input) << "cannot cast custom blob to type FP32, since it is of type: "
|
||||
<< data->getTensorDesc().getPrecision();
|
||||
<< data->getTensorDesc().getPrecision();
|
||||
}
|
||||
return data->buffer().as<float*>();
|
||||
}
|
||||
|
||||
static InferenceEngine::SizeVector getShapeFromInput(InferenceEngine::CNNLayerPtr input, size_t idx) {
|
||||
auto data = LayerUtils::getParamFromInputAsBlob(input, idx);
|
||||
static InferenceEngine::SizeVector getShapeFromInput(InferenceEngine::CNNLayerPtr input, size_t idx) {
|
||||
auto data = layer_utils::getParamFromInputAsBlob(input, idx);
|
||||
return data->getTensorDesc().getDims();
|
||||
}
|
||||
|
||||
static InferenceEngine::CNNLayerPtr getInputLayerAt(InferenceEngine::CNNLayerPtr input, size_t idx) {
|
||||
static InferenceEngine::CNNLayerPtr getInputLayerAt(InferenceEngine::CNNLayerPtr input, size_t idx) {
|
||||
if (input->insData.size() <= idx) {
|
||||
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx << "input";
|
||||
}
|
||||
auto iLayerData = input->insData[idx].lock();
|
||||
if (!iLayerData) {
|
||||
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx
|
||||
<< ", input: cannot dereference data weak-pointer";
|
||||
THROW_GNA_LAYER_EXCEPTION(input)
|
||||
<< "cannot get data from " << idx << ", input: cannot dereference data weak-pointer";
|
||||
}
|
||||
auto iLayer = getCreatorLayer(iLayerData).lock();
|
||||
if (!iLayer) {
|
||||
THROW_GNA_LAYER_EXCEPTION(input) << "cannot get data from " << idx
|
||||
<< ", input: cannot dereference creator layer weak-pointer";
|
||||
THROW_GNA_LAYER_EXCEPTION(input)
|
||||
<< "cannot get data from " << idx << ", input: cannot dereference creator layer weak-pointer";
|
||||
}
|
||||
return iLayer;
|
||||
}
|
||||
|
||||
static InferenceEngine::SizeVector getShapeForRange(InferenceEngine::CNNLayerPtr input, size_t idx) {
|
||||
auto lowShape = getShapeFromInput(input, idx);
|
||||
auto lowShape = getShapeFromInput(input, idx);
|
||||
auto highShape = getShapeFromInput(input, idx + 1);
|
||||
if (lowShape.size() != highShape.size()) {
|
||||
THROW_GNA_LAYER_EXCEPTION(input) << "shapes mismatch for " << idx << " and " << idx + 1 << " inputs";
|
||||
@ -162,6 +165,8 @@ class GNAFakeQuantizeLayer {
|
||||
}
|
||||
}
|
||||
return lowShape;
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace GNAPluginNS
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -6,8 +6,10 @@
|
||||
|
||||
#include "gna_layer_info.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace LayerUtils {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace layer_utils {
|
||||
|
||||
/**
|
||||
* @brief retrievs blob from const layer connected to certain layer
|
||||
* @param input
|
||||
@ -38,5 +40,7 @@ inline InferenceEngine::Blob::Ptr getParamFromInputAsBlob(InferenceEngine::CNNLa
|
||||
|
||||
return iLayer->blobs["custom"];
|
||||
}
|
||||
} // namespace LayerUtils
|
||||
} // namespace GNAPluginNS
|
||||
|
||||
} // namespace layer_utils
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include <legacy/ie_layers.h>
|
||||
#include "caseless.hpp"
|
||||
#include "ie_algorithm.hpp"
|
||||
#include "backend/gna_types.h"
|
||||
#include "backend/gna_types.hpp"
|
||||
#include "gna_permute.hpp"
|
||||
#include "gna_lib_ver_selector.hpp"
|
||||
#include "gna_copy_layer.hpp"
|
||||
@ -21,7 +21,8 @@
|
||||
#include "backend/gna_limitations.hpp"
|
||||
#include "transformations/rt_info/gna_transpose_fusable.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
/**
|
||||
* @brief detecting of const pointer for dynamic cast operations
|
||||
@ -321,7 +322,7 @@ class LayerInfo {
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
auto inputsOrder = inputs->getTensorDesc().getDims();
|
||||
|
||||
return GNAPluginNS::isTrivialPermute(std::vector<int64_t>{begin(layerOrder), end(layerOrder)},
|
||||
return permute::isTrivialPermute(std::vector<int64_t>{begin(layerOrder), end(layerOrder)},
|
||||
inputsOrder);
|
||||
}
|
||||
bool isNonValuesChangable() const {
|
||||
@ -356,7 +357,7 @@ class LayerInfo {
|
||||
auto cropLayer = dynamic_cast<InferenceEngine::CropLayer *> (layer);
|
||||
if (cropLayer != nullptr && !cropLayer->offset.empty()) {
|
||||
const auto crop_params = GetCropParams(cropLayer);
|
||||
return GNAPluginNS::GNALimitations::isCropAffinedOffset(crop_params.start_offset);
|
||||
return limitations::isCropAffinedOffset(crop_params.start_offset);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -425,4 +426,5 @@ inline std::ostream & operator <<(std::ostream &os, const LayerInfo & info) {
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -8,10 +8,16 @@
|
||||
#include "gna_layer_type.hpp"
|
||||
#include "gna_layer_info.hpp"
|
||||
|
||||
GNAPluginNS::LayerType GNAPluginNS::LayerTypeFromStr(const std::string &str) {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
LayerType LayerTypeFromStr(const std::string& str) {
|
||||
auto it = LayerNameToType.find(str);
|
||||
if (it != LayerNameToType.end())
|
||||
return it->second;
|
||||
else
|
||||
return LayerType::NO_TYPE;
|
||||
}
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -9,9 +9,11 @@
|
||||
|
||||
#include <caseless.hpp>
|
||||
|
||||
#include "backend/dnn_types.h"
|
||||
#include "backend/dnn_types.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
namespace GNAPluginNS {
|
||||
enum class LayerType {
|
||||
Input,
|
||||
Convolution,
|
||||
@ -54,7 +56,7 @@ enum class LayerType {
|
||||
NO_TYPE
|
||||
};
|
||||
|
||||
static const InferenceEngine::details::caseless_map<std::string, GNAPluginNS::LayerType> LayerNameToType = {
|
||||
static const InferenceEngine::details::caseless_map<std::string, LayerType> LayerNameToType = {
|
||||
{ "Input" , LayerType::Input },
|
||||
{ "Convolution" , LayerType::Convolution },
|
||||
{ "ReLU" , LayerType::ReLU },
|
||||
@ -94,5 +96,7 @@ static const InferenceEngine::details::caseless_map<std::string, GNAPluginNS::La
|
||||
{"Gemm", LayerType::Gemm},
|
||||
};
|
||||
|
||||
GNAPluginNS::LayerType LayerTypeFromStr(const std::string &str);
|
||||
} // namespace GNAPluginNS
|
||||
LayerType LayerTypeFromStr(const std::string &str);
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -7,7 +7,9 @@
|
||||
#include "legacy/ie_layers.h"
|
||||
#include "debug.h"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
/**
|
||||
* maps type of connection to input and output layers also stores gna_pointer for alloc request
|
||||
*/
|
||||
@ -63,4 +65,6 @@ public:
|
||||
*/
|
||||
float scale_factor = 1.0f;
|
||||
};
|
||||
} // namespace GNAPluginNS
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -9,7 +9,10 @@
|
||||
#include "ie_common.h"
|
||||
#include "log/debug.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace permute {
|
||||
|
||||
template <class T>
|
||||
class PermuteSequence {
|
||||
public:
|
||||
@ -20,14 +23,14 @@ private:
|
||||
cnt_type permutes;
|
||||
|
||||
public:
|
||||
explicit PermuteSequence(std::vector<T> && orderVecIn) : orderVec(std::move(orderVecIn)) {
|
||||
explicit PermuteSequence(std::vector<T>&& orderVecIn) : orderVec(std::move(orderVecIn)) {
|
||||
std::vector<bool> counter(orderVec.size());
|
||||
for (auto && x : this->orderVec) {
|
||||
for (auto&& x : this->orderVec) {
|
||||
if (x < 0) {
|
||||
THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be >= 0";
|
||||
}
|
||||
if (x >= counter.size()) {
|
||||
THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be < "<< counter.size();
|
||||
THROW_GNA_EXCEPTION << "invalid order: element " << x << " should be < " << counter.size();
|
||||
}
|
||||
if (counter[x]) {
|
||||
THROW_GNA_EXCEPTION << "invalid order: element " << x << " present more than once";
|
||||
@ -65,13 +68,13 @@ public:
|
||||
i++;
|
||||
}
|
||||
|
||||
for (auto && cycle : permuteCycles) {
|
||||
for (auto&& cycle : permuteCycles) {
|
||||
for (int i = 0; i + 1 < cycle.size(); i++) {
|
||||
permutes.push_back(cycle[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
const cnt_type & cnt() const noexcept {
|
||||
const cnt_type& cnt() const noexcept {
|
||||
return permutes;
|
||||
}
|
||||
};
|
||||
@ -83,22 +86,22 @@ public:
|
||||
*/
|
||||
template <class Iterator>
|
||||
inline typename PermuteSequence<typename std::iterator_traits<Iterator>::value_type>::cnt_type genPermutations(
|
||||
Iterator beg, Iterator en) {
|
||||
static_assert(
|
||||
std::is_same<std::random_access_iterator_tag,
|
||||
typename std::iterator_traits<Iterator>::iterator_category>::value,
|
||||
"The genPermutations() function only accepts random access iterators or raw pointers to an array.\n");
|
||||
Iterator beg,
|
||||
Iterator en) {
|
||||
static_assert(std::is_same<std::random_access_iterator_tag,
|
||||
typename std::iterator_traits<Iterator>::iterator_category>::value,
|
||||
"The genPermutations() function only accepts random access iterators or raw pointers to an array.\n");
|
||||
using value_type = typename std::iterator_traits<Iterator>::value_type;
|
||||
std::vector<value_type> v;
|
||||
for (; beg != en; beg++) {
|
||||
v.push_back(*beg);
|
||||
}
|
||||
auto permute = PermuteSequence<value_type> (std::move(v));
|
||||
auto permute = PermuteSequence<value_type>(std::move(v));
|
||||
return permute.cnt();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline typename PermuteSequence<T>::cnt_type genPermutations(const std::initializer_list<T> & lst) {
|
||||
inline typename PermuteSequence<T>::cnt_type genPermutations(const std::initializer_list<T>& lst) {
|
||||
return genPermutations(lst.begin(), lst.end());
|
||||
}
|
||||
|
||||
@ -121,14 +124,12 @@ inline bool isTrivialPermute(const std::vector<int64_t> order, const std::vector
|
||||
// cases when all permutations happened either between 1 and X shape where no other dims in between
|
||||
auto transpose_seq = genPermutations(order.begin(), order.end());
|
||||
auto input_order_transformed = input_shape;
|
||||
for (auto && transp : transpose_seq) {
|
||||
for (auto&& transp : transpose_seq) {
|
||||
// check dims of transposed
|
||||
if (input_order_transformed[transp.first] == 1 &&
|
||||
input_order_transformed[transp.second] == 1) {
|
||||
if (input_order_transformed[transp.first] == 1 && input_order_transformed[transp.second] == 1) {
|
||||
return true;
|
||||
}
|
||||
if (input_order_transformed[transp.first] != 1 &&
|
||||
input_order_transformed[transp.second] != 1) {
|
||||
if (input_order_transformed[transp.first] != 1 && input_order_transformed[transp.second] != 1) {
|
||||
return false;
|
||||
}
|
||||
// check dims in between
|
||||
@ -143,4 +144,6 @@ inline bool isTrivialPermute(const std::vector<int64_t> order, const std::vector
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace permute
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -9,7 +9,9 @@
|
||||
#include <legacy/ie_layers.h>
|
||||
#include "backend/gna_limitations.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
// Split, Slice
|
||||
class GNASplitLayer {
|
||||
InferenceEngine::CNNLayerPtr splitLayer;
|
||||
@ -48,7 +50,7 @@ public:
|
||||
};
|
||||
|
||||
// @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
|
||||
static std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t maxSplitSize, uint32_t alignment = GNALimitations::inputByteAlignment) {
|
||||
static std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t maxSplitSize, uint32_t alignment = limitations::inputByteAlignment) {
|
||||
std::vector<uint32_t> splitSizes;
|
||||
uint32_t maxAlignedSplitSize = std::max(maxSplitSize - maxSplitSize % alignment, alignment);
|
||||
uint32_t usedSize = 0;
|
||||
@ -68,7 +70,7 @@ static std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
|
||||
IE_ASSERT(firstValuableDim != std::end(dims));
|
||||
auto splittedElementsSize = *firstValuableDim;
|
||||
auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim);
|
||||
auto alignment = GNALimitations::inputByteAlignment;
|
||||
auto alignment = limitations::inputByteAlignment;
|
||||
|
||||
// Split output size should be multiple by 64 to avoid align filters insertion,
|
||||
// but we need to check if our input size to split exceeds 64; if not we can always
|
||||
@ -81,8 +83,9 @@ static std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
|
||||
}
|
||||
}
|
||||
splitSizes = GetAlignedSplitSizes(splittedElementsSize,
|
||||
GNALimitations::bufferMaxSize * splittedElementsSize / totalElementsSize, alignment);
|
||||
limitations::bufferMaxSize * splittedElementsSize / totalElementsSize, alignment);
|
||||
return {splittedDimIx, splitSizes};
|
||||
}
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -11,7 +11,9 @@
|
||||
#include <caseless.hpp>
|
||||
#include "gna_graph_compiler.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
class LayersBuilder {
|
||||
using CreatorFnc = std::function<void(GNAGraphCompiler*, InferenceEngine::CNNLayerPtr)>;
|
||||
|
||||
@ -26,4 +28,6 @@ public:
|
||||
return LayerBuilder;
|
||||
}
|
||||
};
|
||||
} // namespace GNAPluginNS
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -12,7 +12,8 @@
|
||||
#include "gna_device.hpp"
|
||||
#include "memory/gna_mem_requests.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace memory {
|
||||
/**
|
||||
* wrap GNA interface into c++ allocator friendly one
|
||||
@ -35,9 +36,11 @@ class GNAAllocator {
|
||||
void deallocate(uint8_t *p, std::size_t n) {
|
||||
_device->free(p);
|
||||
}
|
||||
void setTag(void* memPtr, GNAPluginNS::memory::rRegion tagValue) {
|
||||
void setTag(void* memPtr, memory::rRegion tagValue) {
|
||||
_device->tagMemoryRegion(memPtr, tagValue);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace memory
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -9,7 +9,8 @@
|
||||
|
||||
#include "log/debug.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace memory {
|
||||
|
||||
/**
|
||||
@ -47,4 +48,5 @@ inline std::string rRegionToStr(const rRegion region) {
|
||||
}
|
||||
|
||||
} // namespace memory
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -10,7 +10,8 @@
|
||||
|
||||
#include "gna_mem_regions.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace memory {
|
||||
|
||||
enum rType : uint8_t {
|
||||
@ -126,5 +127,7 @@ struct MemRequest {
|
||||
_initializer(initializer) {
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace memory
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -19,7 +19,8 @@
|
||||
|
||||
using namespace ov::intel_gna;
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace memory {
|
||||
|
||||
/**
|
||||
@ -200,7 +201,7 @@ public:
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void iterate_binded(GNAPluginNS::memory::MemRequest & reference, const T & visitor) {
|
||||
void iterate_binded(memory::MemRequest & reference, const T & visitor) {
|
||||
for (auto &re : _mem_requests) {
|
||||
if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) {
|
||||
log::trace() << " [binded=" << re._type << ", ptr=" << re._ptr_out <<"]\n";
|
||||
@ -284,4 +285,5 @@ public:
|
||||
};
|
||||
|
||||
} // namespace memory
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -26,12 +26,13 @@
|
||||
#include <iomanip>
|
||||
#endif
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace memory {
|
||||
|
||||
class GNAFloatAllocator : public std::allocator < uint8_t > {
|
||||
public:
|
||||
void setTag(void*, GNAPluginNS::memory::rRegion) {
|
||||
void setTag(void*, memory::rRegion) {
|
||||
}
|
||||
};
|
||||
|
||||
@ -154,7 +155,7 @@ protected:
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void iterate_binded(GNAPluginNS::memory::MemRequest & reference, const T & visitor) {
|
||||
void iterate_binded(memory::MemRequest & reference, const T & visitor) {
|
||||
for (auto &re : getQueue(REGION_AUTO)->_mem_requests) {
|
||||
if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) {
|
||||
// log::trace() << " [binded=" << rTypeToStr(re._type) << ", ptr=" << re._ptr_out <<"]\n";
|
||||
@ -291,4 +292,5 @@ protected:
|
||||
};
|
||||
|
||||
} // namespace memory
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -8,123 +8,127 @@
|
||||
#include "ie_layouts.h"
|
||||
#include "gna_graph_tools.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace memory {
|
||||
|
||||
void GNAVariableState::Reset() {
|
||||
state->Reset();
|
||||
}
|
||||
void GNAVariableState::Reset() {
|
||||
state->Reset();
|
||||
}
|
||||
|
||||
InferenceEngine::Precision GNAVariableState::getPrecision() const {
|
||||
InferenceEngine::Precision state_precision;
|
||||
InferenceEngine::Precision GNAVariableState::getPrecision() const {
|
||||
InferenceEngine::Precision state_precision;
|
||||
|
||||
if (state->getInput()) {
|
||||
state_precision = state->getInput()->precision;
|
||||
} else {
|
||||
auto element_size = state->elementSizeBytes();
|
||||
switch (element_size) {
|
||||
case 4:
|
||||
state_precision = InferenceEngine::Precision::FP32;
|
||||
break;
|
||||
case 2:
|
||||
state_precision = InferenceEngine::Precision::I16;
|
||||
break;
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << "Incorrect state element size " << element_size <<
|
||||
" to determine precision for VariableState " << name;
|
||||
}
|
||||
}
|
||||
|
||||
return state_precision;
|
||||
}
|
||||
|
||||
void GNAVariableState::SetState(const InferenceEngine::Blob::Ptr& newState) {
|
||||
IE_ASSERT(newState != nullptr);
|
||||
|
||||
auto data_ptr = newState->cbuffer().as<void*>();
|
||||
IE_ASSERT(data_ptr != nullptr);
|
||||
auto data_size = newState->byteSize();
|
||||
auto data_elements = data_size / newState->element_size();
|
||||
if (ALIGN64(state->reserved_size) != ALIGN64((data_size / (newState->element_size() / state->elementSizeBytes())))) {
|
||||
THROW_GNA_EXCEPTION << "Failed to SetState. Sizes of new and old states do not match. ("
|
||||
<< state->reserved_size << " != " << (newState->element_size() / state->elementSizeBytes()) << ")";
|
||||
}
|
||||
|
||||
InferenceEngine::Precision state_precision = getPrecision();
|
||||
auto new_state_precision = newState->getTensorDesc().getPrecision();
|
||||
|
||||
if (state->gna_ptr == data_ptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (new_state_precision == state_precision) {
|
||||
std::memcpy(state->gna_ptr, data_ptr, data_size);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (state_precision) {
|
||||
case InferenceEngine::Precision::I16: {
|
||||
if (new_state_precision == InferenceEngine::Precision::FP32) {
|
||||
auto quantized =
|
||||
InferenceEngine::getInjectedData<ov::intel_gna::frontend::QuantizedLayerParams>(state->getInput());
|
||||
auto scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : state->scale_factor;
|
||||
GNAPluginNS::ConvertToInt16(static_cast<int16_t*>(state->gna_ptr),
|
||||
newState->buffer().as<float*>(),
|
||||
1,
|
||||
data_elements,
|
||||
scale_factor);
|
||||
} else {
|
||||
THROW_GNA_EXCEPTION << "Failed to SetState for VariableState " << name
|
||||
<< ". If old state precision is I16 only I16 and FP32 are allowed as new state precisions."
|
||||
<< " Old state: " << state_precision << " New state: " << new_state_precision;
|
||||
}
|
||||
if (state->getInput()) {
|
||||
state_precision = state->getInput()->precision;
|
||||
} else {
|
||||
auto element_size = state->elementSizeBytes();
|
||||
switch (element_size) {
|
||||
case 4:
|
||||
state_precision = InferenceEngine::Precision::FP32;
|
||||
break;
|
||||
case 2:
|
||||
state_precision = InferenceEngine::Precision::I16;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << "Failed to SetState for VariableState " << name
|
||||
<< ". Incorrect new/old precision pair"
|
||||
<< " Old state: " << state_precision << " New state: " << new_state_precision;
|
||||
THROW_GNA_EXCEPTION << "Incorrect state element size " << element_size
|
||||
<< " to determine precision for VariableState " << name;
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::Blob::CPtr GNAVariableState::GetState() const {
|
||||
auto elements = state->reserved_size / state->elementSizeBytes();
|
||||
InferenceEngine::Precision state_precision = getPrecision();
|
||||
return state_precision;
|
||||
}
|
||||
|
||||
if (state->getInput() && state_precision == InferenceEngine::Precision::I16) {
|
||||
void GNAVariableState::SetState(const InferenceEngine::Blob::Ptr& newState) {
|
||||
IE_ASSERT(newState != nullptr);
|
||||
|
||||
auto data_ptr = newState->cbuffer().as<void*>();
|
||||
IE_ASSERT(data_ptr != nullptr);
|
||||
auto data_size = newState->byteSize();
|
||||
auto data_elements = data_size / newState->element_size();
|
||||
if (ALIGN64(state->reserved_size) !=
|
||||
ALIGN64((data_size / (newState->element_size() / state->elementSizeBytes())))) {
|
||||
THROW_GNA_EXCEPTION << "Failed to SetState. Sizes of new and old states do not match. (" << state->reserved_size
|
||||
<< " != " << (newState->element_size() / state->elementSizeBytes()) << ")";
|
||||
}
|
||||
|
||||
InferenceEngine::Precision state_precision = getPrecision();
|
||||
auto new_state_precision = newState->getTensorDesc().getPrecision();
|
||||
|
||||
if (state->gna_ptr == data_ptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (new_state_precision == state_precision) {
|
||||
std::memcpy(state->gna_ptr, data_ptr, data_size);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (state_precision) {
|
||||
case InferenceEngine::Precision::I16: {
|
||||
if (new_state_precision == InferenceEngine::Precision::FP32) {
|
||||
auto quantized =
|
||||
InferenceEngine::getInjectedData<ov::intel_gna::frontend::QuantizedLayerParams>(state->getInput());
|
||||
auto scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : state->scale_factor;
|
||||
|
||||
auto result_blob = make_blob_with_precision(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::SizeVector({ 1, elements }),
|
||||
InferenceEngine::NC));
|
||||
|
||||
result_blob->allocate();
|
||||
auto buffer = result_blob->buffer().as<float*>();
|
||||
auto new_gna_ptr = static_cast<int16_t*>(state->gna_ptr);
|
||||
|
||||
for (int i = 0; i < elements; i++) {
|
||||
buffer[i] = new_gna_ptr[i] / scale_factor;
|
||||
}
|
||||
|
||||
return result_blob;
|
||||
ConvertToInt16(static_cast<int16_t*>(state->gna_ptr),
|
||||
newState->buffer().as<float*>(),
|
||||
1,
|
||||
data_elements,
|
||||
scale_factor);
|
||||
} else {
|
||||
auto result_blob = make_blob_with_precision(InferenceEngine::TensorDesc(state_precision,
|
||||
InferenceEngine::SizeVector({ 1, elements }),
|
||||
InferenceEngine::NC));
|
||||
result_blob->allocate();
|
||||
std::memcpy(result_blob->buffer(), state->gna_ptr, state->reserved_size);
|
||||
return result_blob;
|
||||
THROW_GNA_EXCEPTION
|
||||
<< "Failed to SetState for VariableState " << name
|
||||
<< ". If old state precision is I16 only I16 and FP32 are allowed as new state precisions."
|
||||
<< " Old state: " << state_precision << " New state: " << new_state_precision;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << "Failed to SetState for VariableState " << name << ". Incorrect new/old precision pair"
|
||||
<< " Old state: " << state_precision << " New state: " << new_state_precision;
|
||||
}
|
||||
}
|
||||
|
||||
float GNAVariableState::GetScaleFactor() const {
|
||||
InferenceEngine::Blob::CPtr GNAVariableState::GetState() const {
|
||||
auto elements = state->reserved_size / state->elementSizeBytes();
|
||||
InferenceEngine::Precision state_precision = getPrecision();
|
||||
|
||||
if (state->getInput() && state_precision == InferenceEngine::Precision::I16) {
|
||||
auto quantized =
|
||||
InferenceEngine::getInjectedData<ov::intel_gna::frontend::QuantizedLayerParams>(state->getInput());
|
||||
auto scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : state->scale_factor;
|
||||
return scale_factor;
|
||||
|
||||
auto result_blob =
|
||||
make_blob_with_precision(InferenceEngine::TensorDesc(InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::SizeVector({1, elements}),
|
||||
InferenceEngine::NC));
|
||||
|
||||
result_blob->allocate();
|
||||
auto buffer = result_blob->buffer().as<float*>();
|
||||
auto new_gna_ptr = static_cast<int16_t*>(state->gna_ptr);
|
||||
|
||||
for (int i = 0; i < elements; i++) {
|
||||
buffer[i] = new_gna_ptr[i] / scale_factor;
|
||||
}
|
||||
|
||||
return result_blob;
|
||||
} else {
|
||||
auto result_blob =
|
||||
make_blob_with_precision(InferenceEngine::TensorDesc(state_precision,
|
||||
InferenceEngine::SizeVector({1, elements}),
|
||||
InferenceEngine::NC));
|
||||
result_blob->allocate();
|
||||
std::memcpy(result_blob->buffer(), state->gna_ptr, state->reserved_size);
|
||||
return result_blob;
|
||||
}
|
||||
}
|
||||
|
||||
float GNAVariableState::GetScaleFactor() const {
|
||||
auto quantized = InferenceEngine::getInjectedData<ov::intel_gna::frontend::QuantizedLayerParams>(state->getInput());
|
||||
auto scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : state->scale_factor;
|
||||
return scale_factor;
|
||||
}
|
||||
|
||||
} // namespace memory
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -9,8 +9,10 @@
|
||||
#include <cpp_interfaces/interface/ie_ivariable_state_internal.hpp>
|
||||
#include "gna_plugin.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace memory {
|
||||
|
||||
class GNAVariableState : public InferenceEngine::IVariableStateInternal {
|
||||
public:
|
||||
GNAVariableState(std::string name, std::shared_ptr<GNAMemoryLayer> state)
|
||||
@ -33,5 +35,7 @@ private:
|
||||
*/
|
||||
InferenceEngine::Precision getPrecision() const;
|
||||
};
|
||||
|
||||
} // namespace memory
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -7,13 +7,17 @@
|
||||
#include <cstdint>
|
||||
#include "log/debug.hpp"
|
||||
|
||||
int32_t GNAPluginNS::memory::MemoryOffset(void *ptr_target, void *ptr_base) {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace memory {
|
||||
|
||||
int32_t MemoryOffset(void* ptr_target, void* ptr_base) {
|
||||
auto target = reinterpret_cast<uintptr_t>(ptr_target);
|
||||
auto base = reinterpret_cast<uintptr_t>(ptr_base);
|
||||
if (target == 0) { // handle NULL pointers separately
|
||||
return (-1);
|
||||
} else if (target < base) {
|
||||
THROW_GNA_EXCEPTION << "Target address value " << target << " is less than base address " << base;
|
||||
THROW_GNA_EXCEPTION << "Target address value " << target << " is less than base address " << base;
|
||||
} else {
|
||||
uint64_t diff = target - base;
|
||||
if (diff > 0x7fffffff) {
|
||||
@ -23,3 +27,6 @@ int32_t GNAPluginNS::memory::MemoryOffset(void *ptr_target, void *ptr_base) {
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace memory
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -6,10 +6,12 @@
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace memory {
|
||||
|
||||
int32_t MemoryOffset(void *ptr_target, void *ptr_base);
|
||||
|
||||
} // namespace memory
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -84,7 +84,7 @@ static bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
|
||||
if (std::dynamic_pointer_cast<ngraph::opset8::Split>(input_op) || std::dynamic_pointer_cast<ngraph::opset8::VariadicSplit>(input_op)) {
|
||||
for (size_t index = 0; index < input_op_out_index; index++) {
|
||||
size_t outputSize = ngraph::shape_size(input_op->get_output_shape(index));
|
||||
offset += outputSize * GNAPluginNS::GNALimitations::bytesPerSplitElement;
|
||||
offset += outputSize * limitations::bytesPerSplitElement;
|
||||
}
|
||||
}
|
||||
return (offset == ALIGN64(offset));
|
||||
@ -93,7 +93,7 @@ static bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
|
||||
static bool is_crop_affined(std::shared_ptr<ngraph::Node> node) {
|
||||
auto crop = std::dynamic_pointer_cast<ngraph::op::CropIE>(node);
|
||||
if (crop != nullptr && !crop->offset.empty()) {
|
||||
return GNAPluginNS::GNALimitations::isCropAffinedOffset(crop->offset.back());
|
||||
return limitations::isCropAffinedOffset(crop->offset.back());
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -117,7 +117,7 @@ static bool is_trivial_transpose(std::shared_ptr<ngraph::Node> node) {
|
||||
auto input = transpose->input(0).get_source_output().get_node_shared_ptr();
|
||||
auto input_order = transpose->get_input_shape(0);
|
||||
|
||||
return GNAPluginNS::isTrivialPermute(node_order, input_order);
|
||||
return permute::isTrivialPermute(node_order, input_order);
|
||||
}
|
||||
|
||||
inline std::shared_ptr<ov::Node> get_prev_node_skipping_certain(const std::shared_ptr<ngraph::Node>& node,
|
||||
|
@ -24,7 +24,7 @@
|
||||
#include <legacy/net_pass.h>
|
||||
#include <layers/gna_copy_layer.hpp>
|
||||
|
||||
#include "backend/dnn_types.h"
|
||||
#include "backend/dnn_types.hpp"
|
||||
#include "log/debug.hpp"
|
||||
#include "log/log.hpp"
|
||||
#include "frontend/quantization.hpp"
|
||||
@ -46,10 +46,12 @@
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
using namespace GNAPluginNS;
|
||||
using namespace ov::intel_gna::frontend;
|
||||
using namespace ov::intel_gna::common;
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
#define pass_trace() log::debug() << "[" << getName() << "] "
|
||||
|
||||
std::shared_ptr<IPassManager> BasePass::getPassManager() {
|
||||
@ -98,14 +100,14 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
|
||||
});
|
||||
IE_ASSERT(inputLayer != nullptr);
|
||||
size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() :
|
||||
Get2DReshapedData(nextLayer->outData[0], GNALimitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8)->getDims()[1];
|
||||
Get2DReshapedData(nextLayer->outData[0], limitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8)->getDims()[1];
|
||||
std::vector<float> weightsValues(weightsSize, fillValue);
|
||||
IE_ASSERT(diagLayer != nullptr);
|
||||
diagLayer->_weights = make_shared_blob<float>(
|
||||
TensorDesc(
|
||||
nextLayer->outData[0]->getTensorDesc().getPrecision(),
|
||||
SizeVector({weightsValues.size()}),
|
||||
Layout::C));
|
||||
InferenceEngine::Layout::C));
|
||||
diagLayer->_weights->allocate();
|
||||
CopyVectorToBlob(diagLayer->_weights, weightsValues);
|
||||
auto dataPtr = std::make_shared<Data>(diagName, nextLayer->outData[0]->getTensorDesc());
|
||||
@ -666,7 +668,7 @@ void RemovePermutationsNHWCToNCHWPass::run() {
|
||||
}
|
||||
// HWC layout enum is used here as the only available in CNNNetwork for 3D vectors,
|
||||
// but the real layout is NCW and it's the one used in order vector later
|
||||
return dims_size == 4 ? Layout::NHWC : Layout::HWC;
|
||||
return dims_size == 4 ? InferenceEngine::Layout::NHWC : InferenceEngine::Layout::HWC;
|
||||
};
|
||||
|
||||
auto setTransposedOrder = [getTransposedLayout](InferenceEngine::DataPtr data) {
|
||||
@ -677,13 +679,17 @@ void RemovePermutationsNHWCToNCHWPass::run() {
|
||||
if (LayerInfo(current_layer).isConcat()) {
|
||||
auto concat_layer = dynamic_cast<InferenceEngine::ConcatLayer*> (current_layer.get());
|
||||
auto dims_size = data->getDims().size();
|
||||
concat_layer->_axis = (dims_size == 4 ? GetPermuteOrder(Layout::NHWC, Layout::NCHW) :
|
||||
concat_layer->_axis = (dims_size == 4 ? permute::GetPermuteOrder(InferenceEngine::Layout::NHWC,
|
||||
InferenceEngine::Layout::NCHW)
|
||||
:
|
||||
std::vector<int32_t>{0, 2, 1})[concat_layer->_axis];
|
||||
}
|
||||
|
||||
// NWC->NCW layouts are used here for order vector, see comments a few lines above
|
||||
auto dims = data->getDims();
|
||||
auto order = dims.size() == 4 ? GetPermuteOrder(Layout::NCHW, Layout::NHWC) :
|
||||
auto order = dims.size() == 4
|
||||
? permute::GetPermuteOrder(InferenceEngine::Layout::NCHW, InferenceEngine::Layout::NHWC)
|
||||
:
|
||||
std::vector<int32_t>{0, 2, 1};
|
||||
InferenceEngine::SizeVector new_dims;
|
||||
for (int i = 0; i < dims.size(); ++i) {
|
||||
@ -1074,7 +1080,7 @@ void FlattenTrivialConcatPass::run() {
|
||||
auto concatInput = getLayerByIndex(input_idx, concatLayer);
|
||||
|
||||
auto tensor = InferenceEngine::TensorDesc(concatInput->getTensorDesc());
|
||||
tensor.reshape(SizeVector({1, total_sizes[input_idx]}), Layout::NC);
|
||||
tensor.reshape(SizeVector({1, total_sizes[input_idx]}), InferenceEngine::Layout::NC);
|
||||
auto reshapeName = l->name + "_input_"+ std::to_string(input_idx) +"_reshape";
|
||||
auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized);
|
||||
|
||||
@ -1091,7 +1097,7 @@ void FlattenTrivialConcatPass::run() {
|
||||
auto total_size = std::accumulate(dims.begin(), dims.end(), size_t(1), std::multiplies<size_t>());
|
||||
|
||||
auto new_tensor = output->getTensorDesc();
|
||||
new_tensor.reshape(SizeVector({1, total_size}), Layout::NC);
|
||||
new_tensor.reshape(SizeVector({1, total_size}), InferenceEngine::Layout::NC);
|
||||
|
||||
auto new_output = CNNReplaceDataWithChangedTensorDescription(output, new_tensor);
|
||||
log::debug() << "\tChanged " << output->getName() << " dims to 2D" << std::endl;
|
||||
@ -1197,7 +1203,7 @@ void InsertConcatAligningFilterPass::run() {
|
||||
TensorDesc(
|
||||
concatInput->getTensorDesc().getPrecision(),
|
||||
SizeVector({filterWeights.size()}),
|
||||
Layout::C));
|
||||
InferenceEngine::Layout::C));
|
||||
concatAligningFilter->_weights->allocate();
|
||||
if (!concatAligningFilter->_weights->buffer().as<float*>()) {
|
||||
THROW_GNA_EXCEPTION << "Failed to allocate weights of size " << filterWeights.size() << " for " << filterName;
|
||||
@ -1208,10 +1214,10 @@ void InsertConcatAligningFilterPass::run() {
|
||||
// modifying output rows to be used - to avoid modification to original concat we are store num of elements in params
|
||||
dims[1] = num_rows_out;
|
||||
|
||||
if ((concatInput->getLayout() == Layout::NC && dims[0] > 8) ||
|
||||
(concatInput->getLayout() == Layout::CN && dims[1] > 8)) {
|
||||
THROW_GNA_EXCEPTION << "unsupported batch number '" <<
|
||||
(concatInput->getLayout() == Layout::NC ? dims[0] : dims[1]) <<
|
||||
if ((concatInput->getLayout() == InferenceEngine::Layout::NC && dims[0] > 8) ||
|
||||
(concatInput->getLayout() == InferenceEngine::Layout::CN && dims[1] > 8)) {
|
||||
THROW_GNA_EXCEPTION << "unsupported batch number '" << (concatInput->getLayout() == InferenceEngine::Layout::NC ? dims[0] : dims[1])
|
||||
<<
|
||||
"' in layer '" << concatLayer->name << "'";
|
||||
}
|
||||
|
||||
@ -1312,8 +1318,7 @@ void ReorderConcatInputsPass::run() {
|
||||
|
||||
auto linkOutData = std::make_shared<Data>(linkName,
|
||||
TensorDesc(Precision::FP32,
|
||||
SizeVector({ 1 }),
|
||||
Layout::C));
|
||||
SizeVector({ 1 }), InferenceEngine::Layout::C));
|
||||
getCreatorLayer(linkOutData) = link;
|
||||
|
||||
link->outData.push_back(linkOutData);
|
||||
@ -1340,7 +1345,7 @@ void InsertSplitAligningFilterPass::run() {
|
||||
}
|
||||
|
||||
auto outFunctionalLayers = CNNNetGetAllNextLayersSkipCertain(l, -1, [](CNNLayerPtr next_layer) {
|
||||
return GNAPluginNS::LayerInfo(next_layer).isNonFunctional();
|
||||
return LayerInfo(next_layer).isNonFunctional();
|
||||
});
|
||||
size_t padding = 0;
|
||||
for (auto &&outFunctionalLayer : outFunctionalLayers) {
|
||||
@ -1387,16 +1392,16 @@ void InsertSplitAligningFilterPass::run() {
|
||||
IE_ASSERT(filterLayer != nullptr);
|
||||
|
||||
// encodes offset to beginning of split layer input
|
||||
filterLayer->params["offset"] = std::to_string(aligned64_offset / GNALimitations::bytesPerSplitElement);
|
||||
filterLayer->params["offset"] = std::to_string(aligned64_offset / limitations::bytesPerSplitElement);
|
||||
auto dims = splitOutput->getTensorDesc().getDims();
|
||||
if (dims.size() > 3) {
|
||||
THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
|
||||
}
|
||||
|
||||
const auto offsetOfUnalignment = (currentOffset - aligned64_offset) / GNALimitations::bytesPerSplitElement;
|
||||
const auto offsetOfUnalignment = (currentOffset - aligned64_offset) / limitations::bytesPerSplitElement;
|
||||
// TODO consider to use a different number of filters do decrese the number of trailing zeros (additionalPaddingOfFilter)
|
||||
const auto numberOfFilters = GNALimitations::convMinFiltersNum;
|
||||
const auto filterSize = ALIGN(offsetOfUnalignment + numberOfFilters, GNALimitations::convFilterSizeDivider);
|
||||
const auto numberOfFilters = limitations::convMinFiltersNum;
|
||||
const auto filterSize = ALIGN(offsetOfUnalignment + numberOfFilters, limitations::convFilterSizeDivider);
|
||||
|
||||
// filterWeights: numberOfFilters X (offsetOfUnalignment + additionalPaddingOfFilter + numberOfFilters)
|
||||
// offsetOfUnalignment - the leading zeros in the filter
|
||||
@ -1424,7 +1429,7 @@ void InsertSplitAligningFilterPass::run() {
|
||||
filterLayer->_weights = make_shared_blob<float>(TensorDesc(
|
||||
inputData->getTensorDesc().getPrecision(),
|
||||
SizeVector({filterWeights.size()}),
|
||||
Layout::C));
|
||||
InferenceEngine::Layout::C));
|
||||
filterLayer->_weights->allocate();
|
||||
CopyVectorToBlob(filterLayer->_weights, filterWeights);
|
||||
|
||||
@ -1433,7 +1438,7 @@ void InsertSplitAligningFilterPass::run() {
|
||||
filterLayer->_biases = make_shared_blob<float>(TensorDesc(
|
||||
inputData->getTensorDesc().getPrecision(),
|
||||
SizeVector({ biasWeights.size() }),
|
||||
Layout::C));
|
||||
InferenceEngine::Layout::C));
|
||||
filterLayer->_biases->allocate();
|
||||
CopyVectorToBlob(filterLayer->_biases, biasWeights);
|
||||
|
||||
@ -1452,7 +1457,7 @@ void InsertSplitAligningFilterPass::run() {
|
||||
}
|
||||
|
||||
// search data that starts from unaligned location
|
||||
currentOffset += outputSize * GNALimitations::bytesPerSplitElement;
|
||||
currentOffset += outputSize * limitations::bytesPerSplitElement;
|
||||
splitOutIndex++;
|
||||
}
|
||||
}
|
||||
@ -1490,7 +1495,7 @@ void EltwiseSplitOverChannelsPass::run() {
|
||||
auto oData = l->outData.front();
|
||||
auto oDims = oData->getDims();
|
||||
auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims));
|
||||
if (totalElementsSize <= GNALimitations::bufferMaxSize) {
|
||||
if (totalElementsSize <= limitations::bufferMaxSize) {
|
||||
continue;
|
||||
}
|
||||
auto splitSizesPerAxis = AlignedSplitSizesPerAxis(oDims);
|
||||
@ -1602,7 +1607,7 @@ void SubstituteScaleShiftBroadCastPass::run() {
|
||||
dataDims = reshaped_data[insData->getName()];
|
||||
} else {
|
||||
dataDims = HasTo2DReshapeData(l) ?
|
||||
Get2DReshapedData(insData, GNALimitations::GetMinBatchToFitInBuffer(insData), 8)->getDims() :
|
||||
Get2DReshapedData(insData, limitations::GetMinBatchToFitInBuffer(insData), 8)->getDims() :
|
||||
insData->getDims();
|
||||
}
|
||||
|
||||
@ -1634,7 +1639,7 @@ void SubstituteScaleShiftBroadCastPass::run() {
|
||||
}
|
||||
|
||||
auto tensor = InferenceEngine::TensorDesc(insData->getTensorDesc());
|
||||
tensor.reshape(SizeVector{ batchSize, nElements }, Layout::NC);
|
||||
tensor.reshape(SizeVector{batchSize, nElements}, InferenceEngine::Layout::NC);
|
||||
auto reshapeName = scaleShift->name + "_input_" + std::to_string(0) + "_reshape";
|
||||
auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized);
|
||||
auto layer_before_scale_shift = getCreatorLayer(insData);
|
||||
@ -1949,7 +1954,7 @@ void FuseFQIntoWeightsPass::run() {
|
||||
<< LAYER_NAME(weightableLayer) << "\n";
|
||||
|
||||
auto biases = weightableLayer->insData.size() == 3 ?
|
||||
LayerUtils::getParamFromInputAsBlob(weightableLayer, biasesIdx) : nullptr;
|
||||
layer_utils::getParamFromInputAsBlob(weightableLayer, biasesIdx) : nullptr;
|
||||
auto quantizedWeights = gnaFakeQuantizeLayer.getConstInputData();
|
||||
|
||||
// 1. broke existing connections - by detaching fq subgraph from rest of graph
|
||||
@ -2032,7 +2037,8 @@ void FuseFQIntoWeightsPass::run() {
|
||||
transform->func_id = gnaFakeQuantizeLayer.parseAsActivation();
|
||||
|
||||
auto quantizedWeightsData = quantizedWeights->buffer();
|
||||
auto dequantizedWeights = make_shared_blob<float>(TensorDesc(Precision::FP32, { outputSize }, Layout::C));
|
||||
auto dequantizedWeights =
|
||||
make_shared_blob<float>(TensorDesc(Precision::FP32, {outputSize}, InferenceEngine::Layout::C));
|
||||
dequantizedWeights->allocate();
|
||||
|
||||
auto resultBuffer = dequantizedWeights->buffer();
|
||||
@ -2460,3 +2466,6 @@ int PassManager::run(int index) {
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
@ -9,7 +9,9 @@
|
||||
#include <map>
|
||||
#include <ie_common.h>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
/**
|
||||
* @brief interface for gna-pass, special transformer that will be run on input network in order to generate GNABlob
|
||||
*/
|
||||
@ -244,4 +246,5 @@ public:
|
||||
int run(int index = 0);
|
||||
};
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -9,12 +9,12 @@
|
||||
#include <gna_graph_tools.hpp>
|
||||
|
||||
namespace ov {
|
||||
namespace intela_gna {
|
||||
namespace intel_gna {
|
||||
namespace helpers {
|
||||
|
||||
void updateModelInputOrientationWithoutConvolution(const InferenceEngine::CNNLayer& inputLayer,
|
||||
const GNAPluginNS::backend::DnnComponents& components,
|
||||
GNAPluginNS::GnaInputs& inputs) {
|
||||
const backend::DnnComponents& components,
|
||||
GnaInputs& inputs) {
|
||||
// does not make sense to go further is there is no input to set
|
||||
auto input = inputs.find(inputLayer.name);
|
||||
|
||||
@ -84,8 +84,8 @@ void updateModelInputOrientationWithoutConvolution(const InferenceEngine::CNNLay
|
||||
|
||||
void updateModelOutputOrientation(const std::string& outputName,
|
||||
const std::string& cnnlayerName,
|
||||
const GNAPluginNS::backend::DnnComponents& components,
|
||||
GNAPluginNS::GnaOutputs& outputs) {
|
||||
const backend::DnnComponents& components,
|
||||
GnaOutputs& outputs) {
|
||||
// if there is no output to set does not make sense to go further
|
||||
auto output = outputs.find(outputName);
|
||||
if (output == outputs.end()) {
|
||||
@ -99,5 +99,5 @@ void updateModelOutputOrientation(const std::string& outputName,
|
||||
}
|
||||
}
|
||||
} // namespace helpers
|
||||
} // namespace intela_gna
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
@ -13,7 +13,8 @@
|
||||
#include "descriptions/gna_desc.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intela_gna {
|
||||
namespace intel_gna {
|
||||
|
||||
/**
|
||||
* @namespace helpers contains helpers tools for gna plugin.
|
||||
*/
|
||||
@ -38,8 +39,8 @@ namespace helpers {
|
||||
* @throws if orientations of input for multiple layers are different
|
||||
*/
|
||||
void updateModelInputOrientationWithoutConvolution(const InferenceEngine::CNNLayer& inputLayer,
|
||||
const GNAPluginNS::backend::DnnComponents& components,
|
||||
GNAPluginNS::GnaInputs& inputs);
|
||||
const backend::DnnComponents& components,
|
||||
GnaInputs& inputs);
|
||||
|
||||
/**
|
||||
* @brief Update expected orientation for model output of given \p outputName. It is needed to recognize if extra
|
||||
@ -60,9 +61,9 @@ void updateModelInputOrientationWithoutConvolution(const InferenceEngine::CNNLay
|
||||
*/
|
||||
void updateModelOutputOrientation(const std::string& outputName,
|
||||
const std::string& cnnlayerName,
|
||||
const GNAPluginNS::backend::DnnComponents& components,
|
||||
GNAPluginNS::GnaOutputs& outputs);
|
||||
const backend::DnnComponents& components,
|
||||
GnaOutputs& outputs);
|
||||
|
||||
} // namespace helpers
|
||||
} // namespace intela_gna
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
@ -4,7 +4,10 @@
|
||||
|
||||
#include "preprocessing.hpp"
|
||||
|
||||
int16_t GNAPluginNS::ConvertFloatToInt16(float src) {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
int16_t ConvertFloatToInt16(float src) {
|
||||
float rounding_value = (src > 0) ? 0.5f : -0.5f;
|
||||
float value = src + rounding_value;
|
||||
if (value > 32767.0) {
|
||||
@ -15,7 +18,7 @@ int16_t GNAPluginNS::ConvertFloatToInt16(float src) {
|
||||
return (int16_t)value;
|
||||
}
|
||||
|
||||
int8_t GNAPluginNS::ConvertFloatToInt8(float src) {
|
||||
int8_t ConvertFloatToInt8(float src) {
|
||||
float rounding_value = (src > 0) ? 0.5f : -0.5f;
|
||||
float value = src + rounding_value;
|
||||
if (value > 127.0) {
|
||||
@ -26,15 +29,18 @@ int8_t GNAPluginNS::ConvertFloatToInt8(float src) {
|
||||
return (int8_t)value;
|
||||
}
|
||||
|
||||
void GNAPluginNS::ConvertToInt16(int16_t *ptr_dst,
|
||||
const float *ptr_src,
|
||||
const uint32_t num_rows,
|
||||
const uint32_t num_columns,
|
||||
const float scale_factor) {
|
||||
void ConvertToInt16(int16_t* ptr_dst,
|
||||
const float* ptr_src,
|
||||
const uint32_t num_rows,
|
||||
const uint32_t num_columns,
|
||||
const float scale_factor) {
|
||||
if (!ptr_dst || !ptr_src) {
|
||||
return;
|
||||
}
|
||||
for (uint32_t i = 0; i < num_rows*num_columns; i++) {
|
||||
ptr_dst[i] = ConvertFloatToInt16(ptr_src[i]*scale_factor);
|
||||
for (uint32_t i = 0; i < num_rows * num_columns; i++) {
|
||||
ptr_dst[i] = ConvertFloatToInt16(ptr_src[i] * scale_factor);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -6,7 +6,8 @@
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
void ConvertToInt16(int16_t *ptr_dst,
|
||||
const float *ptr_src,
|
||||
@ -32,4 +33,5 @@ inline void UnscaleAndCast(T2 *ptr_dst, T1 *ptr_src, const uint32_t num_rows, co
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -6,7 +6,8 @@
|
||||
|
||||
#include "gna2_model_helper.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace request {
|
||||
|
||||
ModelWrapper::ModelWrapper(ConstructionPassKey) {
|
||||
@ -33,4 +34,5 @@ const Gna2Model& ModelWrapper::object() const {
|
||||
}
|
||||
|
||||
} // namespace request
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -6,8 +6,10 @@
|
||||
|
||||
#include <gna2-model-api.h>
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace request {
|
||||
|
||||
class ModelWrapperFactory;
|
||||
|
||||
/**
|
||||
@ -58,4 +60,5 @@ private:
|
||||
};
|
||||
|
||||
} // namespace request
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -9,7 +9,8 @@
|
||||
#include "backend/am_intel_dnn.hpp"
|
||||
#include "gna2_model_helper.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace request {
|
||||
|
||||
std::shared_ptr<ModelWrapper> ModelWrapperFactory::createTrivial() {
|
||||
@ -49,4 +50,5 @@ std::shared_ptr<ModelWrapper> ModelWrapperFactory::createInitialized(ModelInitia
|
||||
}
|
||||
|
||||
} // namespace request
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -9,7 +9,8 @@
|
||||
|
||||
#include "model_wrapper.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace request {
|
||||
|
||||
class ModelWrapperFactory {
|
||||
@ -22,4 +23,5 @@ public:
|
||||
};
|
||||
|
||||
} // namespace request
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -9,7 +9,8 @@
|
||||
|
||||
#include "request_status.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace request {
|
||||
|
||||
/**
|
||||
@ -27,7 +28,7 @@ public:
|
||||
* @brief Callback invoked by wait operation.
|
||||
* @param requestID id of request to be used for wait
|
||||
* @param timeoutMilliseconds timeout of wait in milliseconds
|
||||
* @return Status of subrequest @see GNAPluginNS::RequestStatus
|
||||
* @return Status of subrequest @see RequestStatus
|
||||
*
|
||||
*/
|
||||
using WaitHandler = std::function<RequestStatus(uint32_t requestID, int64_t timeoutMilliseconds)>;
|
||||
@ -37,7 +38,7 @@ public:
|
||||
/**
|
||||
* @brief Wait until subrequest will be finished for given timeout.
|
||||
* @param timeoutMilliseconds timeout in milliseconds
|
||||
* @return status of execution of subrequest @see GNAPluginNS::RequestStatus
|
||||
* @return status of execution of subrequest @see RequestStatus
|
||||
*/
|
||||
virtual RequestStatus wait(int64_t timeoutMilliseconds) = 0;
|
||||
|
||||
@ -69,4 +70,5 @@ public:
|
||||
};
|
||||
|
||||
} // namespace request
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -9,7 +9,8 @@
|
||||
#include "log/debug.hpp"
|
||||
#include "log/log.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace request {
|
||||
|
||||
SubrequestImpl::SubrequestImpl(EnqueueHandler enqueueHandler, WaitHandler waitHandler)
|
||||
@ -64,4 +65,5 @@ bool SubrequestImpl::isCompleted() const {
|
||||
}
|
||||
|
||||
} // namespace request
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -6,7 +6,8 @@
|
||||
|
||||
#include "subrequest.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace request {
|
||||
|
||||
/**
|
||||
@ -34,7 +35,7 @@ public:
|
||||
/**
|
||||
* @brief Wait until subrequest will be finished for given timeout.
|
||||
* @param timeoutMilliseconds timeout in milliseconds
|
||||
* @return status of execution of subrequest @see GNAPluginNS::RequestStatus
|
||||
* @return status of execution of subrequest @see RequestStatus
|
||||
*/
|
||||
RequestStatus wait(int64_t timeoutMilliseconds) override;
|
||||
|
||||
@ -72,4 +73,5 @@ private:
|
||||
};
|
||||
|
||||
} // namespace request
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
@ -12,7 +12,8 @@
|
||||
|
||||
#include "request_status.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
namespace request {
|
||||
|
||||
class ModelWrapper;
|
||||
@ -46,7 +47,7 @@ public:
|
||||
/**
|
||||
* @brief Wait untril request will be not finished for give timeout.
|
||||
* @param timeoutMilliseconds timeout in milliseconds
|
||||
* @return status of execution of ongoing request. @see GNAPluginNS::RequestStatus
|
||||
* @return status of execution of ongoing request. @see RequestStatus
|
||||
*/
|
||||
virtual RequestStatus wait(int64_t timeoutMilliseconds) = 0;
|
||||
|
||||
@ -85,4 +86,5 @@ public:
|
||||
};
|
||||
|
||||
} // namespace request
|
||||
} // namespace GNAPluginNS
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user