[GNA] Enable convolution in height dimension for NHWC native models (#4864)
This commit is contained in:
parent
8d2f58caad
commit
df5e0d68aa
@ -393,13 +393,12 @@ inline void quantizeWeightsBiasesConv(const QuantDesc & quantDesc,
|
||||
<< "cannot copy weights for layer :"<< conv->name << " of size" << intWeights->byteSize();
|
||||
}
|
||||
|
||||
auto getBiasSizeForLayer = [](InferenceEngine::WeightableLayer *wl) {
|
||||
auto getBiasSizeForLayer = [](InferenceEngine::WeightableLayer *wl) -> size_t {
|
||||
if (wl->_biases) {
|
||||
return wl->_biases->size();
|
||||
}
|
||||
// calculating biases len using outdata dims
|
||||
auto & dims = wl->outData.front()->getDims();
|
||||
return dims[1];
|
||||
// calculating biases len using outdata dims: biases number should be equal to output channels number
|
||||
return InferenceEngine::GetDataDimSize(wl->outData.front(), InferenceEngine::DataDimName::C);
|
||||
};
|
||||
|
||||
using BiasesPrecision = typename QuantDesc::BiasesPrecision;
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "gna_plugin_log.hpp"
|
||||
#include "gna_slope_scale.h"
|
||||
#include "runtime/pwl.h"
|
||||
#include "gna_data_types.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
namespace frontend {
|
||||
@ -1080,9 +1081,8 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
|
||||
double weights_reducer = 1.0;
|
||||
auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl);
|
||||
if (conv) {
|
||||
auto dims = conv->insData.front().lock()->getDims();
|
||||
|
||||
weights_reducer = MAX_VAL_2B_FEAT * scaleRange * dims[1] / std::numeric_limits<int32_t>::max();
|
||||
auto channels_num = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
|
||||
weights_reducer = MAX_VAL_2B_FEAT * scaleRange * channels_num / std::numeric_limits<int32_t>::max();
|
||||
weights_reducer = std::max(1.0, weights_reducer);
|
||||
}
|
||||
quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weights_reducer);
|
||||
|
@ -17,9 +17,6 @@
|
||||
#include "memory/polymorph_allocator.hpp"
|
||||
#include "memory/gna_memory.hpp"
|
||||
|
||||
#define FROM_IR_DIM(mem, idx)\
|
||||
((mem->getTensorDesc().getDims().size() > (idx) - 1) ? mem->getTensorDesc().getDims()[mem->getTensorDesc().getDims().size() - (idx)] : 1)
|
||||
|
||||
struct TranspositionInfo {
|
||||
bool transpose;
|
||||
size_t num_transpose_rows;
|
||||
|
@ -243,17 +243,15 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
|
||||
const auto outputs = layer->outData.front();
|
||||
assertConvolutionLayoutProper(inputs);
|
||||
|
||||
const auto in_order = getFromIRDimsOrderNCHW(inputs->getLayout());
|
||||
const auto in_batch = static_cast<uint32_t>(FROM_IR_DIM(inputs, in_order[0]));
|
||||
const auto in_channels = static_cast<uint32_t>(FROM_IR_DIM(inputs, in_order[1]));
|
||||
auto in_height = static_cast<uint32_t>(FROM_IR_DIM(inputs, in_order[2]));
|
||||
auto in_width = static_cast<uint32_t>(FROM_IR_DIM(inputs, in_order[3]));
|
||||
const auto in_batch = GetDataDimSize(inputs, InferenceEngine::DataDimName::N);
|
||||
const auto in_channels = GetDataDimSize(inputs, InferenceEngine::DataDimName::C);
|
||||
auto in_height = GetDataDimSize(inputs, InferenceEngine::DataDimName::H);
|
||||
auto in_width = GetDataDimSize(inputs, InferenceEngine::DataDimName::W);
|
||||
|
||||
const auto out_order = getFromIRDimsOrderNCHW(outputs->getLayout());
|
||||
const auto out_batch = static_cast<uint32_t>(FROM_IR_DIM(outputs, out_order[0]));
|
||||
const auto out_channels = static_cast<uint32_t>(FROM_IR_DIM(outputs, out_order[1]));
|
||||
auto out_height = static_cast<uint32_t>(FROM_IR_DIM(outputs, out_order[2]));
|
||||
auto out_width = static_cast<uint32_t>(FROM_IR_DIM(outputs, out_order[3]));
|
||||
const auto out_batch = GetDataDimSize(outputs, InferenceEngine::DataDimName::N);
|
||||
const auto out_channels = GetDataDimSize(outputs, InferenceEngine::DataDimName::C);
|
||||
auto out_height = GetDataDimSize(outputs, InferenceEngine::DataDimName::H);
|
||||
auto out_width = GetDataDimSize(outputs, InferenceEngine::DataDimName::W);
|
||||
|
||||
if (in_height > 1 && in_width == 1) {
|
||||
std::swap(in_height, in_width);
|
||||
@ -300,25 +298,25 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
|
||||
|
||||
// TODO: refine following condition
|
||||
if (((in_channels > 1) && (in_height > 1) && (in_width > 1)) || // 3D input
|
||||
(convolution._kernel_x != 1 && convolution._kernel_y != 1 && convolution._kernel_y != in_channels) || // 2D kernel
|
||||
(inputs->getLayout() != Layout::NHWC && in_height != 1)) {
|
||||
(convolution._kernel_x != 1 && convolution._kernel_y != 1) || // 2D kernel
|
||||
in_height != 1) {
|
||||
// TensorFlow default layout is NHWC
|
||||
// OpenVino Default layout is NCHW
|
||||
// GNA Convolution input is NHCW
|
||||
// When layer layout is in NHWC it means that is was created by PassManager
|
||||
#if GNA_LIB_VER == 2
|
||||
return finalizeConvolution2DPrimitive(layer, in_batch, in_channels, in_height, in_width,
|
||||
out_batch, out_channels, out_height, out_width);
|
||||
out_batch, out_channels, out_height, out_width);
|
||||
#endif
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) << "Convolution 2D is not supported on GNA 1.0 library";
|
||||
}
|
||||
finalizeConvolution1DPrimitive(layer, in_batch, in_channels, in_height, in_width,
|
||||
out_batch, out_channels, out_height, out_width);
|
||||
finalizeConvolution1DPrimitive(layer, in_batch, in_channels, in_width,
|
||||
out_batch, out_channels, out_width);
|
||||
}
|
||||
|
||||
void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerPtr layer,
|
||||
uint32_t in_batch, uint32_t in_channels, uint32_t in_height, uint32_t in_width,
|
||||
uint32_t out_batch, uint32_t out_channels, uint32_t out_height, uint32_t out_width) {
|
||||
uint32_t in_batch, uint32_t in_channels, uint32_t in_width,
|
||||
uint32_t out_batch, uint32_t out_channels, uint32_t out_width) {
|
||||
auto& convolution = dynamic_cast<ConvolutionLayer&>(*layer.get());
|
||||
printConvolutionLayer(convolution);
|
||||
|
||||
@ -331,18 +329,15 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
THROW_GNA_LAYER_EXCEPTION(&convolution) << "Padding isn't supported by GNA";
|
||||
}
|
||||
|
||||
std::size_t calculated_out_width = (in_width * in_height - convolution._kernel_x + 2 * convolution._padding_x) / convolution._stride_x + 1;
|
||||
if (out_width * in_height != calculated_out_width) {
|
||||
std::size_t calculated_out_width = (in_width - convolution._kernel_x + 2 * convolution._padding_x) / convolution._stride_x + 1;
|
||||
if (out_width != calculated_out_width) {
|
||||
THROW_GNA_LAYER_EXCEPTION(&convolution) << "Invalid output configuration. "
|
||||
<< calculated_out_width << " != " << out_width * in_height;
|
||||
<< calculated_out_width << " != " << out_width;
|
||||
}
|
||||
|
||||
uint32_t total_conv_kernel_size = convolution._kernel_x * convolution._kernel_y * convolution._out_depth;
|
||||
uint32_t single_conv_kernel_size = convolution._kernel_x * convolution._kernel_y;
|
||||
if (convolution._kernel_y != in_channels) { // work around the strange special case where 1D kernel gets rewritten as 2D kernel
|
||||
total_conv_kernel_size *= in_channels;
|
||||
single_conv_kernel_size *= in_channels;
|
||||
}
|
||||
IE_ASSERT(convolution._kernel_y == 1);
|
||||
uint32_t total_conv_kernel_size = convolution._kernel_x * convolution._out_depth * in_channels;
|
||||
uint32_t single_conv_kernel_size = convolution._kernel_x * in_channels;
|
||||
auto actual_kernel_size = details::product(convolution._weights->getTensorDesc().getDims());
|
||||
if (total_conv_kernel_size != actual_kernel_size) {
|
||||
THROW_GNA_LAYER_EXCEPTION(&convolution) << "Weights size does not equal kernel size "
|
||||
@ -358,17 +353,17 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
}
|
||||
|
||||
// have to pad input to let last kernel meets it's corresponding input
|
||||
uint32_t num_inputs = in_width * in_height * in_channels;
|
||||
uint32_t num_inputs = in_width * in_channels;
|
||||
uint32_t num_input_padding = ALIGN(num_inputs, 8) - num_inputs;
|
||||
|
||||
// convert to 2D and set GNA input feature map size
|
||||
uint32_t num_feature_map_columns = in_channels * convolution._stride_x * convolution._stride_y;
|
||||
if (in_height == 1 && convolution._stride_y != 1) {
|
||||
if (convolution._stride_y != 1) {
|
||||
num_feature_map_columns = in_channels * convolution._stride_x;
|
||||
} else if (in_width == 1 && convolution._stride_x != 1) {
|
||||
num_feature_map_columns = in_channels * convolution._stride_y;
|
||||
}
|
||||
uint32_t num_feature_map_rows = (in_channels * in_height * in_width) / num_feature_map_columns;
|
||||
uint32_t num_feature_map_rows = (in_channels * in_width) / num_feature_map_columns;
|
||||
|
||||
uint32_t num_filters = convolution._out_depth;
|
||||
uint32_t num_filter_coefficients = single_conv_kernel_size + num_conv_kernel_padding;
|
||||
@ -383,7 +378,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
uint32_t additional_padding = 0;
|
||||
|
||||
// if kernel padding to multiple of 8 will cause missed outputs, need to pad further
|
||||
while (num_columns_out < out_batch * out_channels * out_height * out_width) {
|
||||
while (num_columns_out < out_batch * out_channels * out_width) {
|
||||
num_input_padding = original_input_padding + additional_padding;
|
||||
num_feature_map_rows = original_num_feature_map_rows + (num_input_padding) / num_feature_map_columns;
|
||||
num_columns_in = num_inputs + num_input_padding;
|
||||
@ -398,9 +393,9 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
|
||||
gnalog() << LAYER_NAME(&convolution) << "Inputs padding is " << num_input_padding << "\n";
|
||||
}
|
||||
|
||||
if (num_columns_out_unpadded != out_batch * out_channels * out_height * out_width) {
|
||||
if (num_columns_out_unpadded != out_batch * out_channels * out_width) {
|
||||
THROW_GNA_LAYER_EXCEPTION(&convolution) << "Number of output columns does not equal output tensor size "
|
||||
<< num_columns_out_unpadded << " vs " << out_batch * out_channels * out_height * out_width;
|
||||
<< num_columns_out_unpadded << " vs " << out_batch * out_channels * out_width;
|
||||
}
|
||||
|
||||
void* ptr_inputs = nullptr;
|
||||
@ -833,15 +828,13 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
auto outputs = *layer->outData.begin();
|
||||
|
||||
const auto in_order = getFromIRDimsOrderNCHW(inputs->getLayout());
|
||||
uint32_t w_dim_in = FROM_IR_DIM(inputs, in_order[3]);
|
||||
uint32_t h_dim_in = FROM_IR_DIM(inputs, in_order[2]);
|
||||
const uint32_t c_dim_in = FROM_IR_DIM(inputs, in_order[1]);
|
||||
uint32_t w_dim_in = GetDataDimSize(inputs, InferenceEngine::DataDimName::W);
|
||||
uint32_t h_dim_in = GetDataDimSize(inputs, InferenceEngine::DataDimName::H);
|
||||
const uint32_t c_dim_in = GetDataDimSize(inputs, InferenceEngine::DataDimName::C);
|
||||
|
||||
const auto out_order = getFromIRDimsOrderNCHW(outputs->getLayout());
|
||||
uint32_t w_dim_out = FROM_IR_DIM(outputs, out_order[3]);
|
||||
uint32_t h_dim_out = FROM_IR_DIM(outputs, out_order[2]);
|
||||
const uint32_t c_dim_out = FROM_IR_DIM(outputs, out_order[1]);
|
||||
uint32_t w_dim_out = GetDataDimSize(outputs, InferenceEngine::DataDimName::W);
|
||||
uint32_t h_dim_out = GetDataDimSize(outputs, InferenceEngine::DataDimName::H);
|
||||
const uint32_t c_dim_out = GetDataDimSize(outputs, InferenceEngine::DataDimName::C);
|
||||
|
||||
if (w_dim_in == 1) { // swap dimensions if needed to support swapped 1D case
|
||||
swap(h_dim_in, w_dim_in);
|
||||
@ -1029,7 +1022,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
std::vector<int> axis, dim, offset;
|
||||
for (int n = 0; n < cropLayer->axis.size(); n++) {
|
||||
uint32_t input_dim = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[n]);
|
||||
uint32_t input_dim = GetDataDimSize(inputs, inputs->getDims().size() - cropLayer->axis[n]);
|
||||
// Exclude crop layer components that do nothing
|
||||
if (cropLayer->offset[n] == 0 && cropLayer->dim[n] == input_dim) {
|
||||
continue;
|
||||
@ -1088,10 +1081,10 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
}
|
||||
|
||||
// TODO: add unit tests for 4d crops blobs
|
||||
uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - axis.front());
|
||||
uint32_t num_rows_in = GetDataDimSize(inputs, inputs->getDims().size() - axis.front());
|
||||
uint32_t num_columns_in = 1;
|
||||
|
||||
uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - axis.front());
|
||||
uint32_t num_rows_out = GetDataDimSize(outputs, inputs->getDims().size() - axis.front());
|
||||
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
|
||||
|
||||
void* ptr_inputs = nullptr;
|
||||
@ -1180,22 +1173,20 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
auto outputs = *layer->outData.begin();
|
||||
|
||||
auto in_4b_order = getFromIRDimsOrderNCHW(inputs4Bytes->getLayout());
|
||||
auto in_4b_batch = FROM_IR_DIM(inputs4Bytes, in_4b_order[0]);
|
||||
auto in_4b_channels = FROM_IR_DIM(inputs4Bytes, in_4b_order[1]);
|
||||
auto in_4b_height = FROM_IR_DIM(inputs4Bytes, in_4b_order[2]);
|
||||
auto in_4b_width = FROM_IR_DIM(inputs4Bytes, in_4b_order[3]);
|
||||
auto in_4b_batch = GetDataDimSize(inputs4Bytes, InferenceEngine::DataDimName::N);
|
||||
auto in_4b_channels = GetDataDimSize(inputs4Bytes, InferenceEngine::DataDimName::C);
|
||||
auto in_4b_height = GetDataDimSize(inputs4Bytes, InferenceEngine::DataDimName::H);
|
||||
auto in_4b_width = GetDataDimSize(inputs4Bytes, InferenceEngine::DataDimName::W);
|
||||
auto in_4b_total_size = in_4b_batch * in_4b_channels * in_4b_height * in_4b_width;
|
||||
|
||||
auto in_2b_order = getFromIRDimsOrderNCHW(inputs2Bytes->getLayout());
|
||||
auto in_2b_batch = FROM_IR_DIM(inputs2Bytes, in_2b_order[0]);
|
||||
auto in_2b_channels = FROM_IR_DIM(inputs2Bytes, in_2b_order[1]);
|
||||
auto in_2b_height = FROM_IR_DIM(inputs2Bytes, in_2b_order[2]);
|
||||
auto in_2b_width = FROM_IR_DIM(inputs2Bytes, in_2b_order[3]);
|
||||
auto in_2b_batch = GetDataDimSize(inputs2Bytes, InferenceEngine::DataDimName::N);
|
||||
auto in_2b_channels = GetDataDimSize(inputs2Bytes, InferenceEngine::DataDimName::C);
|
||||
auto in_2b_height = GetDataDimSize(inputs2Bytes, InferenceEngine::DataDimName::H);
|
||||
auto in_2b_width = GetDataDimSize(inputs2Bytes, InferenceEngine::DataDimName::W);
|
||||
auto in_2b_total_size = in_2b_batch * in_2b_channels * in_2b_height * in_2b_width;
|
||||
|
||||
if ((in_2b_batch > 1) || (in_4b_batch > 1)) {
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) << " Inputs with batch size that not equals 1 is not supported";
|
||||
if (((in_2b_batch > 1) || (in_4b_batch > 1)) && in_2b_batch != in_4b_batch) {
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) << " Inputs with different batch sizes that not equals 1 is not supported";
|
||||
}
|
||||
|
||||
if (in_4b_total_size != in_2b_total_size) {
|
||||
@ -1294,7 +1285,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
||||
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
|
||||
uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size;
|
||||
uint32_t num_columns_in = batch_size;
|
||||
uint32_t num_rows_out = isDiag ? num_rows_in : FROM_IR_DIM(outputs, 1);
|
||||
uint32_t num_rows_out = isDiag ? num_rows_in : GetDataDimSize(outputs, 1);
|
||||
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
|
||||
uint32_t num_padding_out = isDiag ? num_padding : 0;
|
||||
|
||||
@ -1481,8 +1472,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
auto outputs = *layer->outData.begin();
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
|
||||
uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
|
||||
uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
|
||||
uint32_t num_columns_in = GetDataDimSize(inputs, 2);
|
||||
uint32_t num_rows_out = GetDataDimSize(outputs, 1);
|
||||
uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
|
||||
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
|
||||
|
||||
@ -1617,8 +1608,8 @@ void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer)
|
||||
auto outputs = *layer->outData.begin();
|
||||
auto inputs = layer->insData.begin()->lock();
|
||||
|
||||
uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
|
||||
uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
|
||||
uint32_t num_columns_in = GetDataDimSize(inputs, 2);
|
||||
uint32_t num_rows_out = GetDataDimSize(outputs, 1);
|
||||
uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
|
||||
|
||||
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
|
||||
@ -1718,16 +1709,16 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
auto orientation = kDnnInterleavedOrientation;
|
||||
|
||||
if (inputs->getDims().size() == 4) {
|
||||
uint32_t w_dim_in = FROM_IR_DIM(inputs, 1);
|
||||
uint32_t h_dim_in = FROM_IR_DIM(inputs, 2);
|
||||
uint32_t c_dim_in = FROM_IR_DIM(inputs, 3);
|
||||
uint32_t b_dim_in = FROM_IR_DIM(inputs, 4);
|
||||
uint32_t w_dim_in = GetDataDimSize(inputs, 1);
|
||||
uint32_t h_dim_in = GetDataDimSize(inputs, 2);
|
||||
uint32_t c_dim_in = GetDataDimSize(inputs, 3);
|
||||
uint32_t b_dim_in = GetDataDimSize(inputs, 4);
|
||||
|
||||
num_columns = (w_dim_in == 1) ? h_dim_in * c_dim_in * b_dim_in : w_dim_in * c_dim_in * b_dim_in;
|
||||
num_rows = (w_dim_in == 1) ? w_dim_in : h_dim_in;
|
||||
} else {
|
||||
num_columns = FROM_IR_DIM(inputs, 2);
|
||||
num_rows = FROM_IR_DIM(inputs, 1);
|
||||
num_columns = GetDataDimSize(inputs, 2);
|
||||
num_rows = GetDataDimSize(inputs, 1);
|
||||
}
|
||||
|
||||
if (dnn->new_num_conv_columns) {
|
||||
@ -2460,17 +2451,3 @@ GNAGraphCompiler::transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint
|
||||
}
|
||||
return temp_buffer;
|
||||
}
|
||||
|
||||
std::vector<std::size_t> GNAGraphCompiler::getFromIRDimsOrderNCHW(InferenceEngine::Layout layout) {
|
||||
std::vector<std::size_t> order;
|
||||
switch (layout) {
|
||||
case Layout::NHWC:
|
||||
order = { 4, 1, 3, 2 };
|
||||
break;
|
||||
case Layout::NCHW:
|
||||
default:
|
||||
order = { 4, 3, 2, 1 };
|
||||
break;
|
||||
}
|
||||
return order;
|
||||
}
|
||||
|
@ -50,7 +50,6 @@ private:
|
||||
static void printPoolingLayer(const InferenceEngine::PoolingLayer& layer);
|
||||
static void assertConvolutionLayoutProper(const InferenceEngine::DataPtr&);
|
||||
std::vector<uint8_t> static transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols);
|
||||
std::vector<std::size_t> static getFromIRDimsOrderNCHW(InferenceEngine::Layout layout);
|
||||
|
||||
public:
|
||||
GNAPluginNS::backend::DnnComponents dnnComponents;
|
||||
@ -127,8 +126,8 @@ public:
|
||||
void CopyPrimitive(InferenceEngine::CNNLayerPtr);
|
||||
|
||||
void finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerPtr,
|
||||
uint32_t in_batch, uint32_t in_channels, uint32_t in_height, uint32_t in_width,
|
||||
uint32_t out_batch, uint32_t out_channels, uint32_t out_height, uint32_t out_width);
|
||||
uint32_t in_batch, uint32_t in_channels, uint32_t in_width,
|
||||
uint32_t out_batch, uint32_t out_channels, uint32_t out_width);
|
||||
#if GNA_LIB_VER == 2
|
||||
void finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerPtr,
|
||||
uint32_t in_batch, uint32_t in_channels, uint32_t in_height, uint32_t in_width,
|
||||
|
@ -141,8 +141,9 @@ inline std::vector<TranspositionInfo> FindTranspositionInfoFromPrevLayers(Infere
|
||||
std::function<std::vector<TranspositionInfo>(InferenceEngine::CNNLayerPtr)> findTranspositionInfoRecursive =
|
||||
[&findTranspositionInfoRecursive](InferenceEngine::CNNLayerPtr layer) -> std::vector<TranspositionInfo> {
|
||||
auto getTransposeInfoFromData = [](InferenceEngine::DataPtr data, bool transpose = true) {
|
||||
auto rows = FROM_IR_DIM(data, 3);
|
||||
auto columns = FROM_IR_DIM(data, 1) * FROM_IR_DIM(data, 2);
|
||||
auto rows = InferenceEngine::GetDataDimSize(data, InferenceEngine::DataDimName::C);
|
||||
auto columns = InferenceEngine::GetDataDimSize(data, InferenceEngine::DataDimName::H) *
|
||||
InferenceEngine::GetDataDimSize(data, InferenceEngine::DataDimName::W);
|
||||
return std::vector<TranspositionInfo>{{transpose, rows, columns}};
|
||||
};
|
||||
if (LayerInfo(layer).isConvolution() || LayerInfo(layer).isPooling()) {
|
||||
@ -236,8 +237,9 @@ inline std::vector<TranspositionInfo> FindTranspositionInfoFromNextLayers(Infere
|
||||
std::function<std::vector<TranspositionInfo>(InferenceEngine::CNNLayerPtr)> findTranspositionInfoRecursive =
|
||||
[&findTranspositionInfoRecursive](InferenceEngine::CNNLayerPtr layer) -> std::vector<TranspositionInfo> {
|
||||
if (LayerInfo(layer).isConvolution()) {
|
||||
auto rows = FROM_IR_DIM(layer->input(), 3);
|
||||
auto columns = FROM_IR_DIM(layer->input(), 1) * FROM_IR_DIM(layer->input(), 2);
|
||||
auto rows = InferenceEngine::GetDataDimSize(layer->input(), InferenceEngine::DataDimName::C);
|
||||
auto columns = InferenceEngine::GetDataDimSize(layer->input(), InferenceEngine::DataDimName::H) *
|
||||
InferenceEngine::GetDataDimSize(layer->input(), InferenceEngine::DataDimName::W);
|
||||
return {{true, rows, columns}};
|
||||
}
|
||||
|
||||
|
@ -779,4 +779,53 @@ inline void CNNNetworkReconnectLayer(CNNLayerPtr old_prev_layer, CNNLayerPtr new
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief returns a size of a specified data dimension depending on its back offset
|
||||
* @param data a pointer to the data
|
||||
* @param backOffset back dimension offset
|
||||
*/
|
||||
inline uint32_t GetDataDimSize(InferenceEngine::DataPtr data, uint32_t backOffset) {
|
||||
auto dims = data->getDims();
|
||||
return (dims.size() > backOffset - 1) ? dims[dims.size() - backOffset] : 1;
|
||||
}
|
||||
|
||||
enum class DataDimName {
|
||||
N, C, H, W
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief returns a size of a specified data dimension depending on the layout
|
||||
* @param data a pointer to the data
|
||||
* @param dimName dimension name
|
||||
*/
|
||||
inline uint32_t GetDataDimSize(InferenceEngine::DataPtr data, DataDimName dimName) {
|
||||
uint32_t dimIxInNCHW = static_cast<uint32_t>(dimName);
|
||||
IE_ASSERT(dimIxInNCHW <= 3);
|
||||
|
||||
std::vector<uint32_t> backOffsets;
|
||||
switch (data->getLayout()) {
|
||||
case Layout::C:
|
||||
// 1 will be returned for offsets > 1
|
||||
backOffsets = std::vector<uint32_t>{1, 2, 3, 4};
|
||||
break;
|
||||
case Layout::NC:
|
||||
// 1 will be returned for offsets > 2
|
||||
backOffsets = std::vector<uint32_t>{2, 1, 3, 4};
|
||||
break;
|
||||
case Layout::HWC:
|
||||
// 1 will be returned for offset 4
|
||||
case Layout::NHWC:
|
||||
backOffsets = std::vector<uint32_t>{4, 1, 3, 2};
|
||||
break;
|
||||
case Layout::CHW:
|
||||
// 1 will be returned for offset 4
|
||||
case Layout::NCHW:
|
||||
backOffsets = std::vector<uint32_t>{4, 3, 2, 1};
|
||||
break;
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << data->getName() << " Unexpected layout " << data->getLayout();
|
||||
}
|
||||
return GetDataDimSize(data, backOffsets[dimIxInNCHW]);
|
||||
}
|
||||
|
||||
} // namespace InferenceEngine
|
||||
|
@ -677,16 +677,6 @@ void RemovePermutationsNHWCToNCHWPass::run() {
|
||||
}
|
||||
|
||||
nhwc_layout_patterns.push_back({prev, next});
|
||||
|
||||
auto* convolution = dynamic_cast<ConvolutionLayer*>(l.get());
|
||||
if (!convolution) {
|
||||
THROW_GNA_EXCEPTION << "Invalid type of convolution layer";
|
||||
}
|
||||
if (convolution->_kernel_y != 1) {
|
||||
THROW_GNA_LAYER_EXCEPTION(l) << "this case is not implemented yet";
|
||||
}
|
||||
auto in_channels = convolution->input()->getDims()[1];
|
||||
convolution->_kernel_y = in_channels;
|
||||
}
|
||||
|
||||
for (const auto& layers : nhwc_layout_patterns) {
|
||||
@ -2286,8 +2276,8 @@ void TransposeWeightsFromNCHWToNHWCPass::run() {
|
||||
|
||||
// Transpose all constant inputs
|
||||
for (auto && input : constInputs) {
|
||||
auto rows = FROM_IR_DIM(input->outData[0], 3);
|
||||
auto columns = FROM_IR_DIM(input->outData[0], 1) * FROM_IR_DIM(input->outData[0], 2);
|
||||
auto rows = GetDataDimSize(input->outData[0], DataDimName::C);
|
||||
auto columns = GetDataDimSize(input->outData[0], DataDimName::H) * GetDataDimSize(input->outData[0], DataDimName::W);
|
||||
auto blob = input->blobs["custom"];
|
||||
// A constant should have the same number of channels since concatenation will be in height/weight dimension
|
||||
TranspositionInfo concatTranspositionInfo{true, rows, columns};
|
||||
|
@ -74,15 +74,17 @@ class RemovePermutationsNHWCToNCHWPassTest : public testing::WithParamInterface<
|
||||
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 3, 1, 2 }));
|
||||
|
||||
size_t num_out_channels = 12;
|
||||
size_t kernal_size = 8;
|
||||
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
|
||||
ngraph::op::PadType::VALID, num_out_channels);
|
||||
size_t kernel_size = 8;
|
||||
std::vector<size_t> kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, kernel_size} : std::vector<size_t>{kernel_size, 1});
|
||||
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
|
||||
ngraph::op::PadType::VALID, num_out_channels);
|
||||
|
||||
auto permute2 = std::make_shared<ngraph::opset1::Transpose>(conv1,
|
||||
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));
|
||||
|
||||
size_t out_width = (inputShape[2] - kernal_size) + 1;
|
||||
std::vector<size_t> outFormShapes = { 1, out_width * num_out_channels };
|
||||
size_t out_width = (inputShape[2] - kernal_shape[1]) + 1;
|
||||
size_t out_height = (inputShape[1] - kernal_shape[0]) + 1;
|
||||
std::vector<size_t> outFormShapes = { 1, out_width * out_height * num_out_channels };
|
||||
auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
|
||||
auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(permute2, pattern2, false);
|
||||
|
||||
@ -122,7 +124,9 @@ protected:
|
||||
auto permute1 = std::make_shared<ngraph::opset1::Transpose>(params[0],
|
||||
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 3, 1, 2 }));
|
||||
|
||||
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, { 1, 8 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, 12);
|
||||
size_t kernal_size = 8;
|
||||
std::vector<size_t> kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, kernal_size} : std::vector<size_t>{kernal_size, 1});
|
||||
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, 12);
|
||||
|
||||
auto permute2 = std::make_shared<ngraph::opset1::Transpose>(conv1,
|
||||
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));
|
||||
@ -200,20 +204,23 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
|
||||
|
||||
size_t num_out_channels = 12;
|
||||
size_t kernal_size = 8;
|
||||
auto kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, kernal_size} : std::vector<size_t>{kernal_size, 1});
|
||||
std::vector<float> filter_weights = CommonTestUtils::generate_float_numbers(num_out_channels * inputShape[3] * kernal_size,
|
||||
-0.2f, 0.2f);
|
||||
auto conv1 = ngraph::builder::makeConvolution(relu1, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
|
||||
auto conv1 = ngraph::builder::makeConvolution(relu1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
|
||||
ngraph::op::PadType::VALID, num_out_channels, false, filter_weights);
|
||||
auto pool = ngraph::builder::makePooling(conv1, {1, 2}, {0, 0}, {0, 0}, {1, 2}, ngraph::op::RoundingType::FLOOR,
|
||||
auto pool_kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, 2} : std::vector<size_t>{2, 1});
|
||||
auto pool = ngraph::builder::makePooling(conv1, pool_kernal_shape, {0, 0}, {0, 0}, pool_kernal_shape, ngraph::op::RoundingType::FLOOR,
|
||||
ngraph::op::PadType::VALID, false, ngraph::helpers::PoolingTypes::MAX);
|
||||
|
||||
size_t out_width = ((inputShape[2] - kernal_size) + 1) / 2;
|
||||
size_t out_width = ((inputShape[2] - kernal_shape[1]) + 1) / pool_kernal_shape[1];
|
||||
size_t out_height = ((inputShape[1] - kernal_shape[0]) + 1) / pool_kernal_shape[0];
|
||||
auto relu2 = std::make_shared<ngraph::opset3::Relu>(pool);
|
||||
|
||||
auto permute2 = std::make_shared<ngraph::opset1::Transpose>(relu2,
|
||||
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));
|
||||
|
||||
std::vector<size_t> outFormShapes = { 1, out_width * num_out_channels };
|
||||
std::vector<size_t> outFormShapes = { 1, out_width * out_height * num_out_channels };
|
||||
auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
|
||||
auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(permute2, pattern2, false);
|
||||
|
||||
@ -283,22 +290,25 @@ class RemovePermutationsWithTwoConvTest : public testing::WithParamInterface<rem
|
||||
|
||||
size_t num_out_channels = 12;
|
||||
size_t kernal_size = 8;
|
||||
std::vector<size_t> kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, kernal_size} : std::vector<size_t>{kernal_size, 1});
|
||||
std::vector<float> filter_weights_1 = CommonTestUtils::generate_float_numbers(num_out_channels * inputShape[3] * kernal_size,
|
||||
0.0f, 0.5f);
|
||||
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
|
||||
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
|
||||
ngraph::op::PadType::VALID, num_out_channels, false, filter_weights_1);
|
||||
size_t out_width = ((inputShape[2] - kernal_size) + 1);
|
||||
size_t out_width = ((inputShape[2] - kernal_shape[1]) + 1);
|
||||
size_t out_height = ((inputShape[1] - kernal_shape[0]) + 1);
|
||||
|
||||
std::vector<float> filter_weights_2 = CommonTestUtils::generate_float_numbers(num_out_channels * num_out_channels * kernal_size,
|
||||
-0.2f, 0.2f);
|
||||
auto conv2 = ngraph::builder::makeConvolution(conv1, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
|
||||
auto conv2 = ngraph::builder::makeConvolution(conv1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
|
||||
ngraph::op::PadType::VALID, num_out_channels, false, filter_weights_2);
|
||||
out_width = ((out_width - kernal_size) + 1);
|
||||
out_width = ((out_width - kernal_shape[1]) + 1);
|
||||
out_height = ((out_height - kernal_shape[0]) + 1);
|
||||
|
||||
auto permute2 = std::make_shared<ngraph::opset1::Transpose>(conv2,
|
||||
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));
|
||||
|
||||
std::vector<size_t> outFormShapes = { 1, out_width * num_out_channels };
|
||||
std::vector<size_t> outFormShapes = { 1, out_width * out_height * num_out_channels };
|
||||
auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
|
||||
auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(permute2, pattern2, false);
|
||||
|
||||
@ -363,6 +373,7 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
|
||||
auto params = ngraph::builder::makeParams(ngPrc, { {1, 2 * in_total_dims_size} });
|
||||
auto split = ngraph::builder::makeSplit(params[0], ngPrc, 2, 1);
|
||||
auto in_width = inputShape[2];
|
||||
auto in_height = inputShape[1];
|
||||
auto in_channels = inputShape[3];
|
||||
|
||||
auto pattern1 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, inputShape);
|
||||
@ -372,9 +383,10 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
|
||||
|
||||
size_t num_out_channels = 12;
|
||||
size_t kernal_size = 8;
|
||||
std::vector<size_t> kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, kernal_size} : std::vector<size_t>{kernal_size, 1});
|
||||
std::vector<float> filter_weights_1 = CommonTestUtils::generate_float_numbers(num_out_channels * in_channels * kernal_size,
|
||||
-0.2f, 0.2f);
|
||||
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
|
||||
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
|
||||
ngraph::op::PadType::VALID, num_out_channels, false, filter_weights_1);
|
||||
|
||||
auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, inputShape);
|
||||
@ -384,7 +396,7 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
|
||||
|
||||
std::vector<float> filter_weights_2 = CommonTestUtils::generate_float_numbers(num_out_channels * in_channels * kernal_size,
|
||||
-0.2f, 0.2f);
|
||||
auto conv2 = ngraph::builder::makeConvolution(permute2, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
|
||||
auto conv2 = ngraph::builder::makeConvolution(permute2, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
|
||||
ngraph::op::PadType::VALID, num_out_channels, false, filter_weights_2);
|
||||
|
||||
auto add = std::make_shared<ngraph::opset1::Add>(conv1, conv2);
|
||||
@ -392,8 +404,9 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
|
||||
auto permute3 = std::make_shared<ngraph::opset1::Transpose>(add,
|
||||
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));
|
||||
|
||||
size_t out_width = ((in_width - kernal_size) + 1);
|
||||
std::vector<size_t> outFormShapes = { 1, out_width * num_out_channels };
|
||||
size_t out_width = ((in_width - kernal_shape[1]) + 1);
|
||||
size_t out_height = ((in_height - kernal_shape[0]) + 1);
|
||||
std::vector<size_t> outFormShapes = { 1, out_width * out_height * num_out_channels };
|
||||
auto pattern3 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
|
||||
auto reshape3 = std::make_shared<ngraph::opset1::Reshape>(permute3, pattern3, false);
|
||||
|
||||
@ -440,7 +453,13 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
|
||||
{1, 1, 168, 8},
|
||||
{1, 1, 32, 1},
|
||||
{1, 1, 32, 2},
|
||||
{1, 1, 32, 8}
|
||||
{1, 1, 32, 8},
|
||||
{1, 168, 1, 1},
|
||||
{1, 168, 1, 2},
|
||||
{1, 168, 1, 8},
|
||||
{1, 32, 1, 1},
|
||||
{1, 32, 1, 2},
|
||||
{1, 32, 1, 8}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_PermutationPass, RemovePermutationsNHWCToNCHWPassTest,
|
||||
|
Loading…
Reference in New Issue
Block a user