* [GNA] Use stride instead of window for pooling (#5946) * Use pool stride instead of window size where applicable * Add test for pooling stride not equal to wnd * Add more tests and cleanup * Fix SW_FP32 legacy cnn * [WIP] Refactor CNN1D * Remove unused (commented out) code * Add tests * Gna split align convert to conv filter (#6347) * Make unaligned split based on Conv instead of Affine * Dump Gna2Tensor.Data pointer for debugging * Apply suggestions from code review * Reuse conv helpers * Cleanup CNN fields * Disable weights reducer on ConvolutionFilter # Conflicts: # inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp # inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp Co-authored-by: Krzysztof Bruniecki <krzysztof.bruniecki@intel.com>
This commit is contained in:
@@ -25,6 +25,7 @@
|
||||
#include "dnn_types.h"
|
||||
#include "gna_types.h"
|
||||
#include "gna_limitations.hpp"
|
||||
#include "layers/gna_convolution_layer.hpp"
|
||||
|
||||
#if GNA_LIB_VER == 2
|
||||
#include <gna2-model-api.h>
|
||||
@@ -50,6 +51,9 @@
|
||||
|
||||
using namespace GNAPluginNS::backend;
|
||||
|
||||
using GNAPluginNS::GNAConvolutionLayer::outputFromConv;
|
||||
using GNAPluginNS::GNAConvolutionLayer::outputFromPooling;
|
||||
using GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy;
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::BeginNewWrite(uint32_t index) {
|
||||
dump_write_index = index;
|
||||
@@ -144,20 +148,15 @@ void GNAPluginNS::backend::AMIntelDNN::InitAffineComponentPrivate(intel_dnn_comp
|
||||
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel_dnn_component_t &comp,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_rows_out,
|
||||
uint32_t num_columns_out,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
uint32_t num_bytes_per_weight,
|
||||
uint32_t num_bytes_per_bias,
|
||||
uint32_t num_filters,
|
||||
uint32_t num_filter_rows,
|
||||
uint32_t num_filter_coefficients,
|
||||
uint32_t num_feature_maps,
|
||||
uint32_t num_feature_map_rows,
|
||||
uint32_t num_feature_map_columns,
|
||||
const uint32_t convStride,
|
||||
float weight_scale_factor,
|
||||
float output_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
@@ -165,9 +164,9 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
|
||||
void *&ptr_filters,
|
||||
void *&ptr_biases,
|
||||
bool postInitMem) {
|
||||
comp.num_rows_in = num_rows_in;
|
||||
comp.num_rows_in = 1;
|
||||
comp.num_columns_in = num_columns_in;
|
||||
comp.num_rows_out = num_rows_out;
|
||||
comp.num_rows_out = 1;
|
||||
comp.num_columns_out = num_columns_out;
|
||||
comp.num_bytes_per_input = num_bytes_per_input;
|
||||
comp.num_bytes_per_output = num_bytes_per_output;
|
||||
@@ -180,11 +179,8 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
|
||||
comp.op.conv1D.num_bytes_per_weight = num_bytes_per_weight;
|
||||
comp.op.conv1D.num_bytes_per_bias = num_bytes_per_bias;
|
||||
comp.op.conv1D.num_filters = num_filters;
|
||||
comp.op.conv1D.num_filter_rows = num_filter_rows;
|
||||
comp.op.conv1D.num_filter_coefficients = num_filter_coefficients;
|
||||
comp.op.conv1D.num_feature_maps = num_feature_maps;
|
||||
comp.op.conv1D.num_feature_map_rows = num_feature_map_rows;
|
||||
comp.op.conv1D.num_feature_map_columns = num_feature_map_columns;
|
||||
comp.op.conv1D.convStride = convStride;
|
||||
comp.op.conv1D.weight_scale_factor = weight_scale_factor;
|
||||
comp.output_scale_factor = output_scale_factor;
|
||||
comp.input_scale_factor = output_scale_factor / weight_scale_factor;
|
||||
@@ -201,18 +197,17 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional1DComponentPrivate(intel
|
||||
ptr_outputs = &comp.ptr_outputs;
|
||||
}
|
||||
|
||||
if (comp.num_rows_in * comp.num_columns_in % 8 != 0) {
|
||||
THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << comp.num_rows_in * comp.num_columns_in <<
|
||||
if (num_columns_in % 8 != 0) {
|
||||
THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << num_columns_in <<
|
||||
") is not a multiply by 8";
|
||||
}
|
||||
if (comp.op.conv1D.num_filters < GNALimitations::convMinFiltersNum ||
|
||||
comp.op.conv1D.num_filters > GNALimitations::convMaxFiltersNum ||
|
||||
comp.op.conv1D.num_filters % GNALimitations::convFiltersNumDivider != 0) {
|
||||
THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << comp.op.conv1D.num_filters;
|
||||
if (num_filters < GNALimitations::convMinFiltersNum ||
|
||||
num_filters > GNALimitations::convMaxFiltersNum ||
|
||||
num_filters % GNALimitations::convFiltersNumDivider != 0) {
|
||||
THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << num_filters;
|
||||
}
|
||||
auto filter_stride_size = comp.op.conv1D.num_feature_maps * comp.op.conv1D.num_feature_map_columns;
|
||||
auto max_number_of_out_elements = (comp.num_columns_in - comp.op.conv1D.num_filter_coefficients) / filter_stride_size + 1;
|
||||
if (comp.num_columns_out / max_number_of_out_elements != comp.op.conv1D.num_filters) {
|
||||
auto max_number_of_out_elements = outputFromConv(num_columns_in, num_filter_coefficients, convStride);
|
||||
if (num_columns_out / max_number_of_out_elements != num_filters) {
|
||||
THROW_GNA_EXCEPTION << "Number of outputs or feature map config is incorrect in Convolutional1DComponent";
|
||||
}
|
||||
}
|
||||
@@ -543,11 +538,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteGraphWizModel(const char *filename)
|
||||
if (IS_CONV_1D(k)) {
|
||||
auto &conv = components[k].op.conv1D;
|
||||
graph << " <TR><TD> num_filters</TD><TD>" << conv.num_filters<< "</TD></TR>\n";
|
||||
graph << " <TR><TD> num_filter_rows</TD><TD>" << conv.num_filter_rows<< "</TD></TR>\n";
|
||||
graph << " <TR><TD> num_filter_coefficients</TD><TD>" << conv.num_filter_coefficients<< "</TD></TR>\n";
|
||||
graph << " <TR><TD> num_feature_maps</TD><TD>" << conv.num_feature_maps<< "</TD></TR>\n";
|
||||
graph << " <TR><TD> num_feature_map_rows</TD><TD>" << conv.num_feature_map_rows<< "</TD></TR>\n";
|
||||
graph << " <TR><TD> num_feature_map_columns</TD><TD>" << conv.num_feature_map_columns<< "</TD></TR>\n";
|
||||
graph << " <TR><TD> conv_stride</TD><TD>" << conv.convStride<< "</TD></TR>\n";
|
||||
graph << " <TR><TD> wscale</TD><TD>" << conv.weight_scale_factor<< "</TD></TR>\n";
|
||||
graph << " <TR><TD> wbit</TD><TD>" << conv.num_bytes_per_weight<< "</TD></TR>\n";
|
||||
graph << " <TR><TD> bbit</TD><TD>" << conv.num_bytes_per_bias<< "</TD></TR>\n";
|
||||
@@ -943,21 +935,15 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
|
||||
break;
|
||||
case kDnnConvolutional1dOp: {
|
||||
uint32_t num_filters = component[i].op.conv1D.num_filters;
|
||||
uint32_t num_filter_rows = component[i].op.conv1D.num_filter_rows;
|
||||
uint32_t num_filter_coefficients = component[i].op.conv1D.num_filter_coefficients;
|
||||
uint32_t num_feature_maps = component[i].op.conv1D.num_feature_maps;
|
||||
uint32_t num_feature_map_rows = component[i].op.conv1D.num_feature_map_rows;
|
||||
uint32_t num_feature_map_columns = component[i].op.conv1D.num_feature_map_columns;
|
||||
const auto convStride = component[i].op.conv1D.convStride;
|
||||
uint32_t num_bytes_per_weight = component[i].op.conv1D.num_bytes_per_weight;
|
||||
uint32_t num_bytes_per_bias = component[i].op.conv1D.num_bytes_per_bias;
|
||||
float weight_scale_factor = component[i].op.conv1D.weight_scale_factor;
|
||||
float output_scale_factor = component[i].output_scale_factor;
|
||||
out_file << "<num_filters> " << std::dec << num_filters << "\n";
|
||||
out_file << "<num_filter_coefficients> " << std::dec << num_filter_coefficients << "\n";
|
||||
out_file << "<num_filter_rows> " << std::dec << num_filter_rows << "\n";
|
||||
out_file << "<num_feature_maps> " << std::dec << num_feature_maps << "\n";
|
||||
out_file << "<num_feature_map_rows> " << std::dec << num_feature_map_rows << "\n";
|
||||
out_file << "<num_feature_map_columns> " << std::dec << num_feature_map_columns << "\n";
|
||||
out_file << "<conv_stride> " << std::dec << convStride << "\n";
|
||||
if ((compute_precision_ == kDnnInt) && (logging_precision == kDnnFloat)) {
|
||||
out_file << "<num_bytes_per_weight> " << std::dec << 4 << "\n";
|
||||
out_file << "<num_bytes_per_bias> " << std::dec << 4 << "\n";
|
||||
@@ -1374,26 +1360,6 @@ uint32_t GNAPluginNS::backend::AMIntelDNN::CountLayers() {
|
||||
return n;
|
||||
}
|
||||
|
||||
namespace {
|
||||
uint32_t outputFromConv(const uint32_t in, const uint32_t flt, const uint32_t stride) {
|
||||
// floor[(in - flt)/stride] + 1, GNA Spec 1.24
|
||||
if (flt > in || flt == 0 || stride == 0) {
|
||||
THROW_GNA_EXCEPTION << "Invalid (input, filter, stride) = (" << in << "," << flt << "," << stride << ")";
|
||||
}
|
||||
return (in - flt) / stride + 1;
|
||||
}
|
||||
|
||||
uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint32_t stride) {
|
||||
// ceil[(in - window)/stride] + 1, GNA Spec 1.24
|
||||
if (window > in || window == 0 || stride == 0) {
|
||||
THROW_GNA_EXCEPTION << "Invalid (input, window, stride) = (" << in << "," << window << "," << stride << ")";
|
||||
}
|
||||
if (window == in) return 1;
|
||||
|
||||
return (in - window - 1) / stride + 2;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
#if GNA_LIB_VER == 2
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel) {
|
||||
Gna2Operation * gnaOperation;
|
||||
@@ -1431,10 +1397,8 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
|
||||
#endif
|
||||
for (int i = 0; i < component.size(); i++) {
|
||||
// std::cout << "Component + " << i <<"=GNA_" << std::distance(ptr_nnet->pLayers, pLayer) << "\n";
|
||||
#if GNA_LIB_VER == 2
|
||||
auto& comp = component[i];
|
||||
#endif
|
||||
switch (component[i].operation) {
|
||||
switch (comp.operation) {
|
||||
case kDnnAffineOp:
|
||||
#if GNA_LIB_VER == 2
|
||||
HelperGna2OperationInitFullyConnectedAffine(gnaOperation, gnaUserAllocator, gnaUserFree,
|
||||
@@ -1598,7 +1562,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
|
||||
comp.op.conv1D.ptr_biases),
|
||||
nullptr,
|
||||
create_shape1D_parameter(
|
||||
comp.op.conv1D.num_feature_maps * comp.op.conv1D.num_feature_map_columns),
|
||||
comp.op.conv1D.convStride),
|
||||
nullptr,
|
||||
nullptr);
|
||||
|
||||
@@ -1624,11 +1588,11 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
|
||||
pConvolutionalLayer->nBytesBias = component[i].op.conv1D.num_bytes_per_bias;
|
||||
pConvolutionalLayer->nBytesFilterCoefficient = component[i].op.conv1D.num_bytes_per_weight;
|
||||
pConvolutionalLayer->nFilters = component[i].op.conv1D.num_filters;
|
||||
pConvolutionalLayer->nFilterRows = component[i].op.conv1D.num_filter_rows;
|
||||
pConvolutionalLayer->nFilterRows = comp.op.conv1D.num_filter_coefficients / comp.op.conv1D.convStride;
|
||||
pConvolutionalLayer->nFilterCoefficients = component[i].op.conv1D.num_filter_coefficients;
|
||||
pConvolutionalLayer->nFeatureMaps = component[i].op.conv1D.num_feature_maps;
|
||||
pConvolutionalLayer->nFeatureMapRows = component[i].op.conv1D.num_feature_map_rows;
|
||||
pConvolutionalLayer->nFeatureMapColumns = component[i].op.conv1D.num_feature_map_columns;
|
||||
pConvolutionalLayer->nFeatureMaps = 1;
|
||||
pConvolutionalLayer->nFeatureMapColumns = component[i].op.conv1D.convStride;
|
||||
pConvolutionalLayer->nFeatureMapRows = pLayer->nInputColumns / pConvolutionalLayer->nFeatureMapColumns;
|
||||
pConvolutionalLayer->poolType = INTEL_NO_POOLING; // will be overwritten
|
||||
pConvolutionalLayer->nPoolSize = 0; // will be overwritten
|
||||
pConvolutionalLayer->nPoolStride = 0; // will be overwritten
|
||||
@@ -1684,9 +1648,8 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
|
||||
Gna2Shape* poolStride{};
|
||||
|
||||
if (gnaOperation->Operands[InOpIdx]->Shape.NumberOfDimensions == 2) { // kDnnConvolutional1dOp
|
||||
// TODO: issue 50379 find out why looks like CNN1D pooling uses stride == window only
|
||||
poolWindow = create_shape1D_parameter(comp.op.maxpool.poolingWindowXY[0]);
|
||||
poolStride = create_shape1D_parameter(comp.op.maxpool.poolingWindowXY[0]);
|
||||
poolStride = create_shape1D_parameter(comp.op.maxpool.poolingStrideXY[0]);
|
||||
} else {
|
||||
poolWindow = create_shape2D_parameter(comp.op.maxpool.poolingWindowXY[1], comp.op.maxpool.poolingWindowXY[0]);
|
||||
poolStride = create_shape2D_parameter(comp.op.maxpool.poolingStrideXY[1], comp.op.maxpool.poolingStrideXY[0]);
|
||||
@@ -1750,17 +1713,15 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
|
||||
THROW_GNA_EXCEPTION << "Encountered activation component before pooling component at." << i;
|
||||
} else {
|
||||
pConvolutionalLayer->poolType = INTEL_MAX_POOLING;
|
||||
// TODO: issue 50379 find out why looks like CNN1D pooling uses stride == window only
|
||||
pConvolutionalLayer->nPoolSize = component[i].op.maxpool.poolingWindowXY[0];
|
||||
pConvolutionalLayer->nPoolStride = component[i].op.maxpool.poolingWindowXY[0];
|
||||
pConvolutionalLayer->nPoolStride = component[i].op.maxpool.poolingStrideXY[0];
|
||||
|
||||
// number of output columns correction - based on GNA-library expectations
|
||||
auto nFltSize = pConvolutionalLayer->nFilterCoefficients;
|
||||
auto fltStrideSz = pConvolutionalLayer->nFeatureMaps * pConvolutionalLayer->nFeatureMapColumns; // always move 1 "row"
|
||||
auto outFromConv = outputFromConv(pLayer->nInputColumns, nFltSize, fltStrideSz);
|
||||
// FLAT input matrix, pooled outputs per filter
|
||||
// TODO: Issue 50386 check why (outFromConv - 1) an not (outFromConv - nPoolSize)
|
||||
pLayer->nOutputColumns = pConvolutionalLayer->nFilters * ((outFromConv - 1) / pConvolutionalLayer->nPoolStride + 1);
|
||||
pLayer->nOutputColumns = pConvolutionalLayer->nFilters * outputFromPoolingLegacy(outFromConv, pConvolutionalLayer->nPoolStride);
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
|
||||
@@ -89,20 +89,15 @@ public:
|
||||
|
||||
template<class A, class B, class C, class D>
|
||||
static void InitConvolutional1DComponent(intel_dnn_component_t &comp,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_rows_out,
|
||||
uint32_t num_columns_out,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
uint32_t num_bytes_per_weight,
|
||||
uint32_t num_bytes_per_bias,
|
||||
uint32_t num_filters,
|
||||
uint32_t num_filter_rows,
|
||||
uint32_t num_filter_coefficients,
|
||||
uint32_t num_feature_maps,
|
||||
uint32_t num_feature_map_rows,
|
||||
uint32_t num_feature_map_columns,
|
||||
uint32_t convStride,
|
||||
float weight_scale_factor,
|
||||
float output_scale_factor,
|
||||
A *&ptr_inputs,
|
||||
@@ -110,20 +105,15 @@ public:
|
||||
C *&ptr_filters,
|
||||
D *&ptr_biases) {
|
||||
InitConvolutional1DComponentPrivate(comp,
|
||||
num_rows_in,
|
||||
num_columns_in,
|
||||
num_rows_out,
|
||||
num_columns_out,
|
||||
num_bytes_per_input,
|
||||
num_bytes_per_output,
|
||||
num_bytes_per_weight,
|
||||
num_bytes_per_bias,
|
||||
num_filters,
|
||||
num_filter_rows,
|
||||
num_filter_coefficients,
|
||||
num_feature_maps,
|
||||
num_feature_map_rows,
|
||||
num_feature_map_columns,
|
||||
convStride,
|
||||
weight_scale_factor,
|
||||
output_scale_factor,
|
||||
(void *&) ptr_inputs,
|
||||
@@ -428,20 +418,15 @@ private:
|
||||
bool postInitMem);
|
||||
|
||||
static void InitConvolutional1DComponentPrivate(intel_dnn_component_t &comp,
|
||||
uint32_t num_rows_in,
|
||||
uint32_t num_columns_in,
|
||||
uint32_t num_rows_out,
|
||||
uint32_t num_columns_out,
|
||||
uint32_t num_bytes_per_input,
|
||||
uint32_t num_bytes_per_output,
|
||||
uint32_t num_bytes_per_weight,
|
||||
uint32_t num_bytes_per_bias,
|
||||
uint32_t num_filters,
|
||||
uint32_t num_filter_rows,
|
||||
uint32_t num_filter_coefficients,
|
||||
uint32_t num_feature_maps,
|
||||
uint32_t num_feature_map_rows,
|
||||
uint32_t num_feature_map_columns,
|
||||
uint32_t convStride,
|
||||
float weight_scale_factor,
|
||||
float output_scale_factor,
|
||||
void *&ptr_inputs,
|
||||
|
||||
@@ -145,15 +145,12 @@ typedef struct {
|
||||
uint32_t num_bytes_per_weight;
|
||||
uint32_t num_bytes_per_bias;
|
||||
uint32_t num_filters;
|
||||
uint32_t num_filter_rows;
|
||||
uint32_t num_filter_coefficients;
|
||||
uint32_t num_feature_maps;
|
||||
uint32_t num_feature_map_rows;
|
||||
uint32_t num_feature_map_columns;
|
||||
uint32_t convStride;
|
||||
float weight_scale_factor;
|
||||
void *ptr_filters; // filters stored one after the other
|
||||
void *ptr_biases;
|
||||
} intel_convolutionalD_t;
|
||||
} intel_convolutional1D_t;
|
||||
|
||||
typedef struct {
|
||||
std::array<uint32_t, 2> convStride;
|
||||
@@ -273,7 +270,7 @@ struct intel_dnn_component_t {
|
||||
intel_dnn_orientation_t orientation_out;
|
||||
union operation_struct_t {
|
||||
intel_affine_t affine;
|
||||
intel_convolutionalD_t conv1D;
|
||||
intel_convolutional1D_t conv1D;
|
||||
intel_convolutional2D_t conv2D;
|
||||
intel_maxpool_t maxpool;
|
||||
intel_piecewiselinear_t pwl;
|
||||
|
||||
@@ -17,6 +17,7 @@ constexpr uint32_t bufferMaxSize = 65528;
|
||||
constexpr uint32_t convMinFiltersNum = 4;
|
||||
constexpr uint32_t convMaxFiltersNum = 65532;
|
||||
constexpr uint32_t convFiltersNumDivider = 4;
|
||||
constexpr uint32_t convFilterSizeDivider = 8;
|
||||
constexpr uint32_t convFilterMaxSize = 768;
|
||||
constexpr uint32_t convEachKernelByteAlignment = 16;
|
||||
constexpr uint32_t noOfInputsDivisor = 8;
|
||||
|
||||
@@ -1134,7 +1134,7 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
|
||||
|
||||
double weights_reducer = 1.0;
|
||||
auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl);
|
||||
if (conv) {
|
||||
if (conv && !LayerInfo(conv).isConvolutionFilter()) {
|
||||
const auto inDepth = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
|
||||
weights_reducer = GNAConvolutionLayer::getWeightsReducer(*conv);
|
||||
weights_reducer *= MAX_VAL_2B_FEAT * scaleRange * inDepth / std::numeric_limits<int32_t>::max();
|
||||
|
||||
@@ -390,6 +390,7 @@ void DumpGna2Model(const Gna2Model& gnaModel, const std::string dumpFolderNameGN
|
||||
dumpFile << "\tOperand " << j << " (" << GetOperandName(operation.Type, j) << ")"
|
||||
<< " type: " << GetOperandType(operand.Type) <<
|
||||
" shape: " << GetSimpleString(operand.Shape) <<
|
||||
" data: " << operand.Data <<
|
||||
" layout: ";
|
||||
|
||||
DumpCharArray(dumpFile, operand.Layout, GNA2_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS);
|
||||
|
||||
@@ -170,7 +170,7 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
|
||||
InferenceEngine::details::product(begin(dataOutput->getDims()),
|
||||
end(dataOutput->getDims())) * dataOutput->getPrecision().size();
|
||||
|
||||
if (LayerInfo(outFunctionalLayer.first).isAffineFilter()) {
|
||||
if (LayerInfo(outFunctionalLayer.first).isConvolutionFilter()) {
|
||||
size_t aligned64_offset = outFunctionalLayer.first->GetParamAsInt("offset");
|
||||
layerInfoItem.splitOutputLayers.emplace_back(
|
||||
outFunctionalLayer.first,
|
||||
@@ -359,36 +359,33 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
}
|
||||
|
||||
// have to pad input to let last kernel meets it's corresponding input
|
||||
uint32_t num_inputs = in_width * in_channels;
|
||||
const auto num_inputs = in_width * in_channels;
|
||||
uint32_t num_input_padding = ALIGN(num_inputs, 8) - num_inputs;
|
||||
|
||||
// convert to 2D and set GNA input feature map size
|
||||
uint32_t num_feature_map_columns = in_channels * convolution._stride_x * convolution._stride_y;
|
||||
auto convStride = convolution._stride_x * convolution._stride_y;
|
||||
if (convolution._stride_y != 1) {
|
||||
num_feature_map_columns = in_channels * convolution._stride_x;
|
||||
convStride = convolution._stride_x;
|
||||
} else if (in_width == 1 && convolution._stride_x != 1) {
|
||||
num_feature_map_columns = in_channels * convolution._stride_y;
|
||||
convStride = convolution._stride_y;
|
||||
}
|
||||
uint32_t num_feature_map_rows = (in_channels * in_width) / num_feature_map_columns;
|
||||
const auto effectiveStride = in_channels * convStride;
|
||||
|
||||
uint32_t num_filters = convolution._out_depth;
|
||||
uint32_t num_filter_coefficients = single_conv_kernel_size + num_conv_kernel_padding;
|
||||
uint32_t num_filter_rows = num_filter_coefficients / num_feature_map_columns;
|
||||
uint32_t num_columns_in = num_inputs + num_input_padding;
|
||||
|
||||
uint32_t num_columns_out = (((num_inputs - num_filter_coefficients) / num_feature_map_columns) + 1) * convolution._out_depth;
|
||||
uint32_t num_columns_out_unpadded = (((num_inputs - single_conv_kernel_size) / num_feature_map_columns) + 1) * convolution._out_depth;
|
||||
uint32_t num_columns_out = (((num_inputs - num_filter_coefficients) / effectiveStride) + 1) * convolution._out_depth;
|
||||
uint32_t num_columns_out_unpadded = (((num_inputs - single_conv_kernel_size) / effectiveStride) + 1) * convolution._out_depth;
|
||||
|
||||
uint32_t original_num_feature_map_rows = num_feature_map_rows;
|
||||
uint32_t original_input_padding = num_input_padding;
|
||||
uint32_t additional_padding = 0;
|
||||
|
||||
// if kernel padding to multiple of 8 will cause missed outputs, need to pad further
|
||||
while (num_columns_out < out_batch * out_channels * out_width) {
|
||||
num_input_padding = original_input_padding + additional_padding;
|
||||
num_feature_map_rows = original_num_feature_map_rows + (num_input_padding) / num_feature_map_columns;
|
||||
num_columns_in = num_inputs + num_input_padding;
|
||||
num_columns_out = (((num_inputs + num_input_padding - num_filter_coefficients) / num_feature_map_columns) + 1) * convolution._out_depth;
|
||||
num_columns_out = (((num_inputs + num_input_padding - num_filter_coefficients) / effectiveStride) + 1) * convolution._out_depth;
|
||||
dnn->new_num_conv_columns = num_columns_out;
|
||||
additional_padding += 8;
|
||||
}
|
||||
@@ -424,23 +421,17 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
weight_scale_factor = quantized->_weights_quant.GetScale();
|
||||
output_scale_factor = quantized->_dst_quant.GetScale();
|
||||
}
|
||||
|
||||
auto& currentComponent = dnnComponents.addComponent(convolution.name, "convolution");
|
||||
dnn->InitConvolutional1DComponent(currentComponent,
|
||||
1,
|
||||
num_columns_in,
|
||||
1,
|
||||
num_columns_out,
|
||||
num_bytes_per_input,
|
||||
num_bytes_per_output,
|
||||
num_bytes_per_weight,
|
||||
num_bytes_per_bias,
|
||||
num_filters,
|
||||
num_filter_rows,
|
||||
num_filter_coefficients,
|
||||
1,
|
||||
num_feature_map_rows,
|
||||
num_feature_map_columns,
|
||||
effectiveStride,
|
||||
weight_scale_factor,
|
||||
output_scale_factor,
|
||||
ptr_inputs,
|
||||
@@ -469,8 +460,8 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
if (inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) {
|
||||
// Kaldi features are opposite orientation
|
||||
dnn->do_rotate_input = true;
|
||||
dnn->num_rotate_rows = num_feature_map_columns;
|
||||
dnn->num_rotate_columns = original_num_feature_map_rows;
|
||||
dnn->num_rotate_rows = effectiveStride;
|
||||
dnn->num_rotate_columns = num_inputs / effectiveStride;
|
||||
} else {
|
||||
dnn->do_rotate_input = false;
|
||||
}
|
||||
@@ -574,20 +565,10 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
const auto outputs = convolution.outData.front();
|
||||
|
||||
// have to pad input to let last kernel meets it's corresponding input
|
||||
uint32_t num_inputs = in_width * in_height * in_channels;
|
||||
const auto num_inputs = in_width * in_height * in_channels;
|
||||
uint32_t num_input_padding = ALIGN(num_inputs, 8) - num_inputs;
|
||||
|
||||
// convert to 2D and set GNA input feature map size
|
||||
uint32_t num_feature_map_columns = in_channels * convolution._stride_x * convolution._stride_y;
|
||||
if (in_height == 1 && convolution._stride_y != 1) {
|
||||
num_feature_map_columns = in_channels * convolution._stride_x;
|
||||
} else if (in_width == 1 && convolution._stride_x != 1) {
|
||||
num_feature_map_columns = in_channels * convolution._stride_y;
|
||||
}
|
||||
uint32_t num_feature_map_rows = (in_channels * in_height * in_width) / num_feature_map_columns;
|
||||
|
||||
const uint32_t filter_n = convolution._out_depth;
|
||||
uint32_t original_num_feature_map_rows = num_feature_map_rows;
|
||||
|
||||
// if kernel padding to multiple of 8 will cause missed outputs, need to pad further
|
||||
if (num_input_padding == 0) {
|
||||
@@ -653,15 +634,17 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
auto connectedInputLayer = connectInput(layer, ptr_inputs, num_data_bytes_in).input;
|
||||
|
||||
// TODO: convolution might be not the first layer in sorted order but connected via split for example - dont know how kaldi will handle that
|
||||
if (!dnn->do_rotate_input) {
|
||||
if (inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) {
|
||||
// Kaldi features are opposite orientation
|
||||
dnn->do_rotate_input = true;
|
||||
dnn->num_rotate_rows = num_feature_map_columns;
|
||||
dnn->num_rotate_columns = original_num_feature_map_rows;
|
||||
} else {
|
||||
dnn->do_rotate_input = false;
|
||||
if (!dnn->do_rotate_input && inputs->getLayout() != Layout::NHWC && LayerInfo(connectedInputLayer).isInput()) {
|
||||
// Kaldi features are opposite orientation
|
||||
dnn->do_rotate_input = true;
|
||||
dnn->num_rotate_rows = in_channels;
|
||||
if (in_height != 1) {
|
||||
dnn->num_rotate_rows *= convolution._stride_y;
|
||||
}
|
||||
if (in_width != 1) {
|
||||
dnn->num_rotate_rows *= convolution._stride_x;
|
||||
}
|
||||
dnn->num_rotate_columns = num_inputs / dnn->num_rotate_rows;
|
||||
}
|
||||
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
@@ -669,7 +652,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
const auto kernelHW = convolution._kernel_y * convolution._kernel_x;
|
||||
|
||||
std::vector<uint8_t> transposedWeights;
|
||||
const auto singleKernelSize = in_channels* kernelHW* convolution.precision.size();
|
||||
const auto singleKernelSize = in_channels* kernelHW * convolution.precision.size();
|
||||
const auto kernelPad = Gna2RoundUp(singleKernelSize, 16) - singleKernelSize;
|
||||
for (uint32_t k = 0; k < convolution._out_depth; k++) {
|
||||
uint8_t* ptr_filt_current
|
||||
@@ -869,6 +852,7 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
swap(h_dim_in, w_dim_in);
|
||||
swap(h_dim_out, w_dim_out);
|
||||
swap(pooling._kernel[X_AXIS], pooling._kernel[Y_AXIS]);
|
||||
swap(pooling._stride[X_AXIS], pooling._stride[Y_AXIS]);
|
||||
}
|
||||
|
||||
void* ptr_inputs = nullptr;
|
||||
@@ -1748,8 +1732,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
}
|
||||
}
|
||||
|
||||
void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto filterLayer = dynamic_cast<InferenceEngine::WeightableLayer*> (layer.get());
|
||||
void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto filterLayer = dynamic_cast<InferenceEngine::ConvolutionLayer*> (layer.get());
|
||||
|
||||
if (filterLayer == nullptr) {
|
||||
return;
|
||||
@@ -1772,62 +1756,57 @@ void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer)
|
||||
auto outputs = *layer->outData.begin();
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
|
||||
const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ?
|
||||
const auto noOfInputsDivisor = gnaFlags->input_low_precision ?
|
||||
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
|
||||
uint32_t num_columns_in = GetDataDimSize(inputs, 2);
|
||||
uint32_t num_rows_out = GetDataDimSize(outputs, 1);
|
||||
uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
|
||||
const uint32_t orginalInputSize = GetDataDimSize(inputs, 1);
|
||||
const uint32_t orginalOutputSize = GetDataDimSize(outputs, 1);
|
||||
if (orginalInputSize != orginalOutputSize) {
|
||||
THROW_GNA_LAYER_EXCEPTION(filterLayer) << "Number in inputs (" << orginalInputSize <<
|
||||
") should be equal to number of outputs (" << orginalOutputSize << ")!";
|
||||
}
|
||||
const auto numberOfFilters = filterLayer->_out_depth;
|
||||
const auto convolutionStride = numberOfFilters;
|
||||
const auto filterWidth = filterLayer->_kernel_x;
|
||||
const auto minOutputsPerFilter = ALIGN(orginalOutputSize, numberOfFilters) / numberOfFilters;
|
||||
const auto minInputsNeeded = (minOutputsPerFilter - 1) * convolutionStride + filterWidth;
|
||||
const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, noOfInputsDivisor);
|
||||
|
||||
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
|
||||
auto biasPrecision = filterLayer->_biases ? filterLayer->_biases->getTensorDesc().getPrecision() : outputs->getPrecision();
|
||||
auto numOutputs = GNAConvolutionLayer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride);
|
||||
numOutputs *= numberOfFilters;
|
||||
const auto& biasPrecision = filterLayer->_biases ? filterLayer->_biases->getTensorDesc().getPrecision() : outputs->getPrecision();
|
||||
auto& currentComponent = dnnComponents.addComponent(layer->name, "affine");
|
||||
|
||||
dnn->InitAffineComponent(currentComponent,
|
||||
num_rows_in + num_padding,
|
||||
num_columns_in,
|
||||
num_rows_out,
|
||||
layer->params["num_rows_for_pwl"] = std::to_string(numOutputs);
|
||||
dnn->InitConvolutional1DComponent(currentComponent,
|
||||
numInputsFullyPadedAndAligned,
|
||||
numOutputs,
|
||||
inputs->getPrecision().size(),
|
||||
outputs->getPrecision().size(),
|
||||
filterLayer->_weights->getTensorDesc().getPrecision().size(),
|
||||
biasPrecision.size(),
|
||||
numberOfFilters,
|
||||
filterWidth,
|
||||
convolutionStride,
|
||||
quantized == nullptr ? 1 : quantized->_weights_quant.GetScale(),
|
||||
quantized == nullptr ? 1 : quantized->_dst_quant.GetScale(),
|
||||
ptr_inputs,
|
||||
ptr_outputs,
|
||||
ptr_weights,
|
||||
ptr_biases,
|
||||
false);
|
||||
ptr_biases);
|
||||
|
||||
size_t num_data_bytes_out =
|
||||
InferenceEngine::details::product(
|
||||
begin(outputs->getDims()), end(outputs->getDims())) * 4;
|
||||
|
||||
size_t num_data_bytes_in = num_columns_in *
|
||||
ALIGN(num_rows_in, noOfInputsDivisor) * inputs->getPrecision().size();
|
||||
size_t num_data_bytes_in = numInputsFullyPadedAndAligned * inputs->getPrecision().size();
|
||||
|
||||
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
|
||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||
|
||||
if (num_padding == 0) {
|
||||
gnamem->readonly().push_ptr(ptr_weights,
|
||||
filterLayer->_weights->cbuffer().as<const void*>(),
|
||||
filterLayer->_weights->byteSize(),
|
||||
64);
|
||||
} else {
|
||||
auto elementsIn = (num_rows_in + num_padding) * num_columns_in;
|
||||
auto paddedWeights = elementsIn * num_rows_out;
|
||||
auto paddedWeightsSize = paddedWeights * filterLayer->precision.size();
|
||||
|
||||
gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
|
||||
size_t offset = 0;
|
||||
for (uint32_t i = 0; i < num_rows_out && size >= offset; i++) {
|
||||
ie_memcpy(reinterpret_cast<uint8_t*>(data) + offset, size - offset,
|
||||
filterLayer->_weights->cbuffer().as<const uint8_t*>() + num_rows_in * i * filterLayer->precision.size(),
|
||||
num_rows_in* filterLayer->precision.size());
|
||||
offset += (num_rows_in + num_padding) * filterLayer->precision.size();
|
||||
}
|
||||
}, 64);
|
||||
}
|
||||
gnamem->readonly().push_ptr(ptr_weights,
|
||||
filterLayer->_weights->cbuffer().as<const void*>(),
|
||||
filterLayer->_weights->byteSize(),
|
||||
64);
|
||||
|
||||
if (filterLayer->_biases) {
|
||||
gnamem->readonly().push_ptr(ptr_biases,
|
||||
@@ -1835,7 +1814,7 @@ void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer)
|
||||
filterLayer->_biases->byteSize(),
|
||||
64);
|
||||
} else {
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
|
||||
gnamem->readonly().push_value(ptr_biases, 0.0f, numberOfFilters, 64);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1898,13 +1877,18 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
}
|
||||
|
||||
// TODO: solve this by layer level transformations
|
||||
auto concatAlignFilter = CNNNetPrevLayer(layer, 0);
|
||||
if (LayerInfo(concatAlignFilter).isConcatAlignFilter()) {
|
||||
auto rowsCopiedOffset = concatAlignFilter->GetParamAsInt("rows_copied_offset");
|
||||
auto prevLayer = CNNNetPrevLayer(layer, 0);
|
||||
if (LayerInfo(prevLayer).isConcatAlignFilter()) {
|
||||
auto rowsCopiedOffset = prevLayer->GetParamAsInt("rows_copied_offset");
|
||||
if (rowsCopiedOffset != 0) {
|
||||
num_rows -= rowsCopiedOffset / outputs->getPrecision().size();
|
||||
layer->params["output_offset"] = std::to_string(rowsCopiedOffset);
|
||||
}
|
||||
} else if (LayerInfo(prevLayer).isConvolutionFilter()) {
|
||||
const auto num_rows_for_pwl = prevLayer->GetParamAsInt("num_rows_for_pwl", 0);
|
||||
if (num_rows_for_pwl != 0) {
|
||||
num_rows = num_rows_for_pwl;
|
||||
}
|
||||
}
|
||||
size_t num_data_bytes_out = num_columns * num_rows * outputs->getPrecision().size();
|
||||
size_t num_data_bytes_in = num_columns * num_rows * inputs->getPrecision().size();
|
||||
@@ -2155,7 +2139,7 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
|
||||
{{"FullyConnected", "InnerProduct"}, CREATE(AffinePrimitive)},
|
||||
{{"Gemm"}, CREATE(GemmPrimitive)},
|
||||
{{"ScaleShift"}, CREATE(DiagonalPrimitive)},
|
||||
{{"AffineFilter"}, CREATE(AffineFilterPrimitive)},
|
||||
{{"ConvolutionFilter"}, CREATE(ConvolutionFilterPrimitive)},
|
||||
{{"ConcatAlignFilter"}, CREATE(ConcatAlignFilterPrimitive)},
|
||||
{{"Const"}, CREATE(ConstPrimitive)},
|
||||
{{"Eltwise"}, CREATE(EltwisePrimitive)}, // same as diagonal while weights are not taken from network, rather than from another output
|
||||
|
||||
@@ -111,7 +111,7 @@ public:
|
||||
void CreateLayerPrimitive(InferenceEngine::CNNLayerPtr);
|
||||
|
||||
void AffinePrimitive(InferenceEngine::CNNLayerPtr, bool isDiag = false);
|
||||
void AffineFilterPrimitive(InferenceEngine::CNNLayerPtr);
|
||||
void ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr);
|
||||
void ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr);
|
||||
void DiagonalPrimitive(InferenceEngine::CNNLayerPtr);
|
||||
void ConstPrimitive(InferenceEngine::CNNLayerPtr);
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "gna_convolution_layer.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <legacy/ie_layers.h>
|
||||
#include "gna_graph_tools.hpp"
|
||||
#include "gna_plugin_log.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace GNAConvolutionLayer {
|
||||
bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth) {
|
||||
return inHeight > 1 && inWidth > 1 && inWidth == kernelWidth && strideWidth == 1;
|
||||
}
|
||||
|
||||
// 3D input or 2D kernel
|
||||
bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
|
||||
const uint32_t kernelHeight, const uint32_t kernelWidth) {
|
||||
return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1);
|
||||
}
|
||||
|
||||
double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
|
||||
using KRT = std::pair<uint32_t, double>;
|
||||
// Empirically determined weights reducers for 2D Convolution
|
||||
// i.e.:
|
||||
// for kernelSize >= 9 -> 1.3
|
||||
// for kernelSize in {7, 8} -> 1.2
|
||||
const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} };
|
||||
auto reducer = 1.0;
|
||||
const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
|
||||
const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
|
||||
const auto inWidth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
|
||||
if (isConv2D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
|
||||
!isMappableFrom2DTo1D(inHeight, inWidth, conv._kernel_x, conv._stride_x)) {
|
||||
const auto kernelSize = conv._kernel_x * conv._kernel_y;
|
||||
auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize,
|
||||
[](const KRT& l, const KRT::first_type& r) {return l.first > r; });
|
||||
if (r != reducers.end())
|
||||
reducer = r->second;
|
||||
}
|
||||
return reducer;
|
||||
}
|
||||
|
||||
uint32_t outputFromConv(const uint32_t in, const uint32_t flt, const uint32_t stride) {
|
||||
// floor[(in - flt)/stride] + 1, GNA Spec 1.24
|
||||
if (flt > in || flt == 0 || stride == 0) {
|
||||
THROW_GNA_EXCEPTION << "Invalid (input, filter, stride) = (" << in << "," << flt << "," << stride << ")";
|
||||
}
|
||||
return (in - flt) / stride + 1;
|
||||
}
|
||||
|
||||
uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint32_t stride) {
|
||||
// ceil[(in - window)/stride] + 1, GNA Spec 1.24
|
||||
if (window > in || window == 0 || stride == 0) {
|
||||
THROW_GNA_EXCEPTION << "Invalid (input, window, stride) = (" << in << "," << window << "," << stride << ")";
|
||||
}
|
||||
if (window == in) return 1;
|
||||
|
||||
return (in - window - 1) / stride + 2;
|
||||
}
|
||||
|
||||
uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride) {
|
||||
// floor[(in - 1)/stride] + 1, GNA 1.0/2.0 HW Spec
|
||||
// See issue 50386 for details
|
||||
if (in == 0 || stride == 0) {
|
||||
THROW_GNA_EXCEPTION << "Invalid (input, stride) = (" << in << "," << stride << ")";
|
||||
}
|
||||
return (in - 1) / stride + 1;
|
||||
}
|
||||
|
||||
} // namespace GNAConvolutionLayer
|
||||
} // namespace GNAPluginNS
|
||||
@@ -4,46 +4,25 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
|
||||
#include <legacy/ie_layers.h>
|
||||
#include "../gna_graph_tools.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
struct GNAConvolutionLayer {
|
||||
static bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth) {
|
||||
return inHeight > 1 && inWidth > 1 && inWidth == kernelWidth && strideWidth == 1;
|
||||
}
|
||||
namespace GNAConvolutionLayer {
|
||||
bool isMappableFrom2DTo1D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t kernelWidth, const uint32_t strideWidth);
|
||||
|
||||
// 3D input or 2D kernel
|
||||
static bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
|
||||
const uint32_t kernelHeight, const uint32_t kernelWidth) {
|
||||
return (kernelHeight > 1 && kernelWidth > 1) || (inHeight > 1 && inWidth > 1 && inDepth > 1);
|
||||
}
|
||||
// 3D input or 2D kernel
|
||||
bool isConv2D(const uint32_t inHeight, const uint32_t inWidth, const uint32_t inDepth,
|
||||
const uint32_t kernelHeight, const uint32_t kernelWidth);
|
||||
|
||||
static double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv) {
|
||||
using KRT = std::pair<uint32_t, double>;
|
||||
// Empirically determined weights reducers for 2D Convolution
|
||||
// i.e.:
|
||||
// for kernelSize >= 9 -> 1.3
|
||||
// for kernelSize in {7, 8} -> 1.2
|
||||
const std::vector< KRT > reducers{ {9, 1.3}, {7, 1.2} };
|
||||
auto reducer = 1.0;
|
||||
const auto inDepth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::C);
|
||||
const auto inHeight = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::H);
|
||||
const auto inWidth = GetDataDimSize(conv.insData.front().lock(), InferenceEngine::DataDimName::W);
|
||||
if (isConv2D(inHeight, inWidth, inDepth, conv._kernel_y, conv._kernel_x) &&
|
||||
!isMappableFrom2DTo1D(inHeight, inWidth, conv._kernel_x, conv._stride_x)) {
|
||||
const auto kernelSize = conv._kernel_x * conv._kernel_y;
|
||||
auto r = std::lower_bound(reducers.begin(), reducers.end(), kernelSize,
|
||||
[](const KRT& l, const KRT::first_type& r) {return l.first > r; });
|
||||
if (r != reducers.end())
|
||||
reducer = r->second;
|
||||
}
|
||||
return reducer;
|
||||
}
|
||||
};
|
||||
} // namespace GNAPluginNS
|
||||
double getWeightsReducer(InferenceEngine::ConvolutionLayer& conv);
|
||||
|
||||
uint32_t outputFromConv(const uint32_t in, const uint32_t flt, const uint32_t stride);
|
||||
|
||||
uint32_t outputFromPooling(const uint32_t in, const uint32_t window, const uint32_t stride);
|
||||
|
||||
uint32_t outputFromPoolingLegacy(const uint32_t in, const uint32_t stride);
|
||||
|
||||
} // namespace GNAConvolutionLayer
|
||||
} // namespace GNAPluginNS
|
||||
|
||||
@@ -70,6 +70,7 @@ class LayerInfo {
|
||||
[this]() { return isFullyConnected(); },
|
||||
[this]() { return isAffineFilter(); },
|
||||
[this]() { return isConcatAlignFilter(); },
|
||||
[this]() { return isConvolutionFilter(); },
|
||||
[this]() { return isEltwise(); },
|
||||
[this]() { return isScaleShift(); },
|
||||
[this]() { return isConvolution(); },
|
||||
@@ -131,6 +132,9 @@ class LayerInfo {
|
||||
bool isAffineFilter() const noexcept {
|
||||
return isOfType("AffineFilter");
|
||||
}
|
||||
bool isConvolutionFilter() const noexcept {
|
||||
return isOfType("ConvolutionFilter");
|
||||
}
|
||||
bool isRelu() const noexcept {
|
||||
return isOfType("relu");
|
||||
}
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
#include "gna_graph_patterns.hpp"
|
||||
#include "gna_data_types.hpp"
|
||||
#include "gna_tensor_tools.hpp"
|
||||
#include "backend/gna_limitations.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
@@ -1368,35 +1369,49 @@ void InsertSplitAligningFilterPass::run() {
|
||||
gnalog() << std::endl;
|
||||
#endif
|
||||
auto filterLayer =
|
||||
std::make_shared<WeightableLayer>(LayerParams({filterName, "AffineFilter", Precision::FP32}));
|
||||
std::make_shared<ConvolutionLayer>(LayerParams({filterName, "ConvolutionFilter", Precision::FP32}));
|
||||
|
||||
auto inputData = splitOutput;
|
||||
|
||||
size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(currentOffset) - 64));
|
||||
size_t
|
||||
newOutputSize = (currentOffset + ALIGN(outputSize, 8) * bytesPerSplitElement - aligned64_offset)
|
||||
/ bytesPerSplitElement;
|
||||
|
||||
IE_ASSERT(filterLayer != nullptr);
|
||||
|
||||
// encodes offset to beginning of split layer input
|
||||
filterLayer->params["offset"] = std::to_string(aligned64_offset / bytesPerSplitElement);
|
||||
|
||||
auto dims = splitOutput->getTensorDesc().getDims();
|
||||
if (dims.size() > 3) {
|
||||
THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
|
||||
}
|
||||
|
||||
auto num_rows_out = dims[1] * (dims.size() != 2 ? dims[2] : 1);
|
||||
std::vector<float> filterWeights(newOutputSize * num_rows_out, 0.f);
|
||||
const auto offsetOfUnalignment = (currentOffset - aligned64_offset) / bytesPerSplitElement;
|
||||
// TODO consider to use a different number of filters do decrese the number of trailing zeros (additionalPaddingOfFilter)
|
||||
const auto numberOfFilters = GNALimitations::convMinFiltersNum;
|
||||
const auto filterSize = ALIGN(offsetOfUnalignment + numberOfFilters, GNALimitations::convFilterSizeDivider);
|
||||
|
||||
auto offset = (currentOffset - aligned64_offset) / bytesPerSplitElement;
|
||||
|
||||
for (int i = 0; i != outputSize; i++) {
|
||||
filterWeights[offset] = 1.0f;
|
||||
offset += newOutputSize + 1;
|
||||
// filterWeights: numberOfFilters X (offsetOfUnalignment + additionalPaddingOfFilter + numberOfFilters)
|
||||
// offsetOfUnalignment - the leading zeros in the filter
|
||||
// |
|
||||
// | additionalPaddingOfFilter = filterSize - offsetOfUnalignment - numberOfFilters
|
||||
// ____|___ ___|___
|
||||
// | | | |
|
||||
// 0 0 ... 0 1 0 0 0 0 ... 0
|
||||
// 0 0 ... 0 0 1 0 0 0 ... 0
|
||||
// 0 0 ... 0 0 0 1 0 0 ... 0
|
||||
// 0 0 ... 0 0 0 0 1 0 ... 0
|
||||
std::vector<float> filterWeights(filterSize * 4, 0.f);
|
||||
for (auto f = 0u; f < numberOfFilters; f++) {
|
||||
filterWeights[f * filterSize + f + offsetOfUnalignment] = 1;
|
||||
}
|
||||
|
||||
filterLayer->_out_depth = numberOfFilters;
|
||||
filterLayer->_stride_x = numberOfFilters;
|
||||
filterLayer->_stride_y = 1;
|
||||
filterLayer->_kernel_x = filterSize;
|
||||
filterLayer->_kernel_y = 1;
|
||||
filterLayer->_padding_x = 0;
|
||||
filterLayer->_padding_y = 0;
|
||||
|
||||
filterLayer->_weights = make_shared_blob<float>(TensorDesc(
|
||||
inputData->getTensorDesc().getPrecision(),
|
||||
SizeVector({filterWeights.size()}),
|
||||
@@ -1404,6 +1419,15 @@ void InsertSplitAligningFilterPass::run() {
|
||||
filterLayer->_weights->allocate();
|
||||
CopyVectorToBlob(filterLayer->_weights, filterWeights);
|
||||
|
||||
std::vector<float> biasWeights(numberOfFilters, 0.f);
|
||||
|
||||
filterLayer->_biases = make_shared_blob<float>(TensorDesc(
|
||||
inputData->getTensorDesc().getPrecision(),
|
||||
SizeVector({ biasWeights.size() }),
|
||||
Layout::C));
|
||||
filterLayer->_biases->allocate();
|
||||
CopyVectorToBlob(filterLayer->_biases, biasWeights);
|
||||
|
||||
auto outData = std::make_shared<Data>(filterName,
|
||||
TensorDesc(splitOutput->getTensorDesc().getPrecision(),
|
||||
splitOutput->getTensorDesc().getDims(),
|
||||
|
||||
@@ -12,36 +12,38 @@
|
||||
#include "backend/dnn_types.h"
|
||||
#include "backend/gna_limitations.hpp"
|
||||
#include "gna_lib_ver_selector.hpp"
|
||||
#include "layers/gna_convolution_layer.hpp"
|
||||
|
||||
using namespace GNAPluginNS::GNAConvolutionLayer;
|
||||
|
||||
void CNNFilter32(intel_dnn_component_t *component) {
|
||||
float *ptr_filters = reinterpret_cast<float *>(component->op.conv1D.ptr_filters);
|
||||
float *ptr_biases = reinterpret_cast<float *>(component->op.conv1D.ptr_biases);
|
||||
float *ptr_inputs = reinterpret_cast<float *>(component->ptr_inputs);
|
||||
float *ptr_outputs = reinterpret_cast<float *>(component->ptr_outputs);
|
||||
uint32_t num_filter_outputs = component->op.conv1D.num_feature_map_rows - component->op.conv1D.num_filter_rows + 1;
|
||||
uint32_t
|
||||
num_inputs_band_stride = component->op.conv1D.num_feature_maps * component->op.conv1D.num_feature_map_columns;
|
||||
uint32_t num_filter_coefficients = component->op.conv1D.num_filter_coefficients;
|
||||
auto filters = reinterpret_cast<float *>(component->op.conv1D.ptr_filters);
|
||||
auto biases = reinterpret_cast<float *>(component->op.conv1D.ptr_biases);
|
||||
auto input = reinterpret_cast<float *>(component->ptr_inputs);
|
||||
auto output = reinterpret_cast<float *>(component->ptr_outputs);
|
||||
|
||||
const auto convolutionStride = component->op.conv1D.convStride;
|
||||
const auto filterSize = component->op.conv1D.num_filter_coefficients;
|
||||
const auto numberOfInputs = component->num_columns_in;
|
||||
const auto numberOfOutputsPerFilter = outputFromConv(numberOfInputs, filterSize, convolutionStride);
|
||||
const auto numberOfFilters = component->op.conv1D.num_filters;
|
||||
|
||||
std::string layer_name;
|
||||
layer_name = " In layer '" + std::string(component->original_layer_name) + "'";
|
||||
if (component->num_rows_in != 1 || component->num_rows_out != 1) {
|
||||
THROW_GNA_EXCEPTION << "Bad number of rows in CNNFilter32!" << layer_name;
|
||||
}
|
||||
if (component->num_columns_out < num_filter_outputs * component->op.conv1D.num_filters) {
|
||||
if (component->num_columns_out < numberOfOutputsPerFilter * numberOfFilters) {
|
||||
THROW_GNA_EXCEPTION << "Bad num_columns_out in CNNFilter32!" << layer_name;
|
||||
}
|
||||
|
||||
for (uint32_t j = 0; j < num_filter_outputs; j++) {
|
||||
float *ptr_in = ptr_inputs + j * num_inputs_band_stride;
|
||||
for (uint32_t i = 0; i < component->op.conv1D.num_filters; i++) {
|
||||
float *ptr_coef = ptr_filters + i * num_filter_coefficients;
|
||||
float sum = ptr_biases[i];
|
||||
for (uint32_t k = 0; k < num_filter_coefficients; k++) {
|
||||
sum += ptr_in[k] * ptr_coef[k];
|
||||
for (uint32_t j = 0; j < numberOfOutputsPerFilter; j++, input += convolutionStride, output += numberOfFilters) {
|
||||
auto filter = filters;
|
||||
for (uint32_t i = 0; i < numberOfFilters; i++, filter += filterSize) {
|
||||
output[i] = biases[i];
|
||||
for (uint32_t k = 0; k < filterSize; k++) {
|
||||
output[i] += input[k] * filter[k];
|
||||
}
|
||||
ptr_outputs[j * component->op.conv1D.num_filters + i] = sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -49,9 +51,8 @@ void CNNFilter32(intel_dnn_component_t *component) {
|
||||
void CNNMaxPoolLegacy(intel_dnn_component_t *component, intel_dnn_number_type_t number_type, const bool sumPoolingOverRide) {
|
||||
const uint32_t num_inputs = component->op.maxpool.inCHW[0] * component->op.maxpool.inCHW[1] * component->op.maxpool.inCHW[2];
|
||||
const uint32_t in_c = component->op.maxpool.inCHW[0];
|
||||
// TODO: issue 50379 find out why looks like CNN1D pooling uses stride == window only
|
||||
const uint32_t num_pool_size = component->op.maxpool.poolingWindowXY[0];
|
||||
const uint32_t num_pool_step = component->op.maxpool.poolingWindowXY[0];
|
||||
const uint32_t num_pool_step = component->op.maxpool.poolingStrideXY[0];
|
||||
const uint32_t num_rows_in = num_inputs / in_c;
|
||||
|
||||
if (number_type == kDnnInt) {
|
||||
@@ -114,7 +115,7 @@ void CNNMaxPoolLegacy(intel_dnn_component_t *component, intel_dnn_number_type_t
|
||||
}
|
||||
} else {
|
||||
for (uint32_t j = 0; j < num_rows_in; j += num_pool_step) {
|
||||
float max = -1e20f;
|
||||
float max = std::numeric_limits<float>::lowest();
|
||||
uint32_t num_end = (j + num_pool_size > num_rows_in) ? num_rows_in : j + num_pool_size;
|
||||
for (uint32_t k = j; k < num_end; k++) {
|
||||
if (ptr_inputs[k * in_c + i] > max) max = ptr_inputs[k * in_c + i];
|
||||
|
||||
@@ -33,7 +33,6 @@ TEST_P(GnaConvolutionReluSequenceTest, CompareWithRefs) {
|
||||
Run();
|
||||
}
|
||||
|
||||
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::Precision::FP16
|
||||
@@ -200,4 +199,149 @@ INSTANTIATE_TEST_CASE_P(smoke_ConvolutionReluSequenceTest, GnaConvolutionReluSeq
|
||||
::testing::ValuesIn(configs)),
|
||||
GnaConvolutionReluSequenceTest::getTestCaseName);
|
||||
|
||||
const InferenceEngine::SizeVector inputShape1Even = {
|
||||
{1, 1, 48, 1},
|
||||
};
|
||||
|
||||
const InferenceEngine::SizeVector inputShape1DOneAbove = {
|
||||
{1, 1, 41, 1},
|
||||
};
|
||||
|
||||
const InferenceEngine::SizeVector inputShape1DOneBelow = {
|
||||
{1, 1, 47, 1},
|
||||
};
|
||||
|
||||
const InferenceEngine::SizeVector inputShape1DMultichannel4 = {
|
||||
{1, 4, 49, 1},
|
||||
};
|
||||
|
||||
const InferenceEngine::SizeVector inputShape1DMultichannel5 = {
|
||||
{1, 5, 49, 1},
|
||||
};
|
||||
|
||||
const InferenceEngine::SizeVector inputShape1DMultichannel6 = {
|
||||
{1, 6, 49, 1},
|
||||
};
|
||||
|
||||
const InferenceEngine::SizeVector inputShape1DMultichannel7 = {
|
||||
{1, 7, 49, 1},
|
||||
};
|
||||
|
||||
const InferenceEngine::SizeVector inputShape1DMultichannel8 = {
|
||||
{1, 8, 49, 1},
|
||||
};
|
||||
|
||||
const InferenceEngine::SizeVector inputShape1DMultichannel9 = {
|
||||
{1, 9, 49, 1},
|
||||
};
|
||||
|
||||
const std::vector<convReluSpecificParams> poolingStrideBelowWindow = {
|
||||
{
|
||||
{3, 1}, // Kernel size
|
||||
{2, 1}, // Stride
|
||||
{0, 0}, // Pad begin
|
||||
{0, 0}, // Pad end
|
||||
4, // Num out channels
|
||||
{4, 1}, //Pooling window
|
||||
{2, 1} //Pooling stride
|
||||
},
|
||||
};
|
||||
|
||||
const std::vector<convReluSpecificParams> poolingStrideAboveWindow = {
|
||||
{
|
||||
{3, 1}, // Kernel size
|
||||
{2, 1}, // Stride
|
||||
{0, 0}, // Pad begin
|
||||
{0, 0}, // Pad end
|
||||
4, // Num out channels
|
||||
{2, 1}, //Pooling window
|
||||
{4, 1} //Pooling stride
|
||||
},
|
||||
};
|
||||
|
||||
const std::vector<convReluSpecificParamsAll> poolingStrideNotEqualWindowAll = {
|
||||
{
|
||||
inputShape1Even,
|
||||
poolingStrideBelowWindow
|
||||
},
|
||||
{
|
||||
inputShape1DOneAbove,
|
||||
poolingStrideBelowWindow
|
||||
},
|
||||
{
|
||||
inputShape1DOneBelow,
|
||||
poolingStrideBelowWindow
|
||||
},
|
||||
{
|
||||
inputShape1Even,
|
||||
poolingStrideAboveWindow
|
||||
},
|
||||
{
|
||||
inputShape1DOneAbove,
|
||||
poolingStrideAboveWindow
|
||||
},
|
||||
{
|
||||
inputShape1DOneBelow,
|
||||
poolingStrideAboveWindow
|
||||
},
|
||||
{
|
||||
inputShape1DMultichannel4,
|
||||
poolingStrideBelowWindow
|
||||
},
|
||||
{
|
||||
inputShape1DMultichannel4,
|
||||
poolingStrideAboveWindow
|
||||
},
|
||||
{
|
||||
inputShape1DMultichannel5,
|
||||
poolingStrideBelowWindow
|
||||
},
|
||||
{
|
||||
inputShape1DMultichannel5,
|
||||
poolingStrideAboveWindow
|
||||
},
|
||||
{
|
||||
inputShape1DMultichannel6,
|
||||
poolingStrideBelowWindow
|
||||
},
|
||||
{
|
||||
inputShape1DMultichannel6,
|
||||
poolingStrideAboveWindow
|
||||
},
|
||||
{
|
||||
inputShape1DMultichannel7,
|
||||
poolingStrideBelowWindow
|
||||
},
|
||||
{
|
||||
inputShape1DMultichannel7,
|
||||
poolingStrideAboveWindow
|
||||
},
|
||||
{
|
||||
inputShape1DMultichannel8,
|
||||
poolingStrideBelowWindow
|
||||
},
|
||||
{
|
||||
inputShape1DMultichannel8,
|
||||
poolingStrideAboveWindow
|
||||
},
|
||||
{
|
||||
inputShape1DMultichannel9,
|
||||
poolingStrideBelowWindow
|
||||
},
|
||||
{
|
||||
inputShape1DMultichannel9,
|
||||
poolingStrideAboveWindow
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_ConvolutionPoolingStrideNotEqualWindowTest, ConvolutionReluSequenceTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(poolingStrideNotEqualWindowAll),
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(InferenceEngine::Precision::UNSPECIFIED),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::ValuesIn(configs)),
|
||||
|
||||
ConvolutionReluSequenceTest::getTestCaseName);
|
||||
} // namespace
|
||||
|
||||
Reference in New Issue
Block a user