[GNA] Enable convolution in height dimension for NHWC native models (#4864)

This commit is contained in:
Elizaveta Lobanova 2021-04-02 09:35:50 +03:00 committed by GitHub
parent 8d2f58caad
commit df5e0d68aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 161 additions and 129 deletions

View File

@ -393,13 +393,12 @@ inline void quantizeWeightsBiasesConv(const QuantDesc & quantDesc,
<< "cannot copy weights for layer :"<< conv->name << " of size" << intWeights->byteSize();
}
auto getBiasSizeForLayer = [](InferenceEngine::WeightableLayer *wl) {
auto getBiasSizeForLayer = [](InferenceEngine::WeightableLayer *wl) -> size_t {
if (wl->_biases) {
return wl->_biases->size();
}
// calculating biases len using outdata dims
auto & dims = wl->outData.front()->getDims();
return dims[1];
// calculating biases len using outdata dims: biases number should be equal to output channels number
return InferenceEngine::GetDataDimSize(wl->outData.front(), InferenceEngine::DataDimName::C);
};
using BiasesPrecision = typename QuantDesc::BiasesPrecision;

View File

@ -17,6 +17,7 @@
#include "gna_plugin_log.hpp"
#include "gna_slope_scale.h"
#include "runtime/pwl.h"
#include "gna_data_types.hpp"
namespace GNAPluginNS {
namespace frontend {
@ -1080,9 +1081,8 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
double weights_reducer = 1.0;
auto conv = dynamic_cast<InferenceEngine::ConvolutionLayer *>(wl);
if (conv) {
auto dims = conv->insData.front().lock()->getDims();
weights_reducer = MAX_VAL_2B_FEAT * scaleRange * dims[1] / std::numeric_limits<int32_t>::max();
auto channels_num = GetDataDimSize(conv->insData.front().lock(), InferenceEngine::DataDimName::C);
weights_reducer = MAX_VAL_2B_FEAT * scaleRange * channels_num / std::numeric_limits<int32_t>::max();
weights_reducer = std::max(1.0, weights_reducer);
}
quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weights_reducer);

View File

@ -17,9 +17,6 @@
#include "memory/polymorph_allocator.hpp"
#include "memory/gna_memory.hpp"
#define FROM_IR_DIM(mem, idx)\
((mem->getTensorDesc().getDims().size() > (idx) - 1) ? mem->getTensorDesc().getDims()[mem->getTensorDesc().getDims().size() - (idx)] : 1)
struct TranspositionInfo {
bool transpose;
size_t num_transpose_rows;

View File

@ -243,17 +243,15 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
const auto outputs = layer->outData.front();
assertConvolutionLayoutProper(inputs);
const auto in_order = getFromIRDimsOrderNCHW(inputs->getLayout());
const auto in_batch = static_cast<uint32_t>(FROM_IR_DIM(inputs, in_order[0]));
const auto in_channels = static_cast<uint32_t>(FROM_IR_DIM(inputs, in_order[1]));
auto in_height = static_cast<uint32_t>(FROM_IR_DIM(inputs, in_order[2]));
auto in_width = static_cast<uint32_t>(FROM_IR_DIM(inputs, in_order[3]));
const auto in_batch = GetDataDimSize(inputs, InferenceEngine::DataDimName::N);
const auto in_channels = GetDataDimSize(inputs, InferenceEngine::DataDimName::C);
auto in_height = GetDataDimSize(inputs, InferenceEngine::DataDimName::H);
auto in_width = GetDataDimSize(inputs, InferenceEngine::DataDimName::W);
const auto out_order = getFromIRDimsOrderNCHW(outputs->getLayout());
const auto out_batch = static_cast<uint32_t>(FROM_IR_DIM(outputs, out_order[0]));
const auto out_channels = static_cast<uint32_t>(FROM_IR_DIM(outputs, out_order[1]));
auto out_height = static_cast<uint32_t>(FROM_IR_DIM(outputs, out_order[2]));
auto out_width = static_cast<uint32_t>(FROM_IR_DIM(outputs, out_order[3]));
const auto out_batch = GetDataDimSize(outputs, InferenceEngine::DataDimName::N);
const auto out_channels = GetDataDimSize(outputs, InferenceEngine::DataDimName::C);
auto out_height = GetDataDimSize(outputs, InferenceEngine::DataDimName::H);
auto out_width = GetDataDimSize(outputs, InferenceEngine::DataDimName::W);
if (in_height > 1 && in_width == 1) {
std::swap(in_height, in_width);
@ -300,25 +298,25 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
// TODO: refine following condition
if (((in_channels > 1) && (in_height > 1) && (in_width > 1)) || // 3D input
(convolution._kernel_x != 1 && convolution._kernel_y != 1 && convolution._kernel_y != in_channels) || // 2D kernel
(inputs->getLayout() != Layout::NHWC && in_height != 1)) {
(convolution._kernel_x != 1 && convolution._kernel_y != 1) || // 2D kernel
in_height != 1) {
// TensorFlow default layout is NHWC
// OpenVino Default layout is NCHW
// GNA Convolution input is NHCW
// When layer layout is in NHWC it means that is was created by PassManager
#if GNA_LIB_VER == 2
return finalizeConvolution2DPrimitive(layer, in_batch, in_channels, in_height, in_width,
out_batch, out_channels, out_height, out_width);
out_batch, out_channels, out_height, out_width);
#endif
THROW_GNA_LAYER_EXCEPTION(layer) << "Convolution 2D is not supported on GNA 1.0 library";
}
finalizeConvolution1DPrimitive(layer, in_batch, in_channels, in_height, in_width,
out_batch, out_channels, out_height, out_width);
finalizeConvolution1DPrimitive(layer, in_batch, in_channels, in_width,
out_batch, out_channels, out_width);
}
void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerPtr layer,
uint32_t in_batch, uint32_t in_channels, uint32_t in_height, uint32_t in_width,
uint32_t out_batch, uint32_t out_channels, uint32_t out_height, uint32_t out_width) {
uint32_t in_batch, uint32_t in_channels, uint32_t in_width,
uint32_t out_batch, uint32_t out_channels, uint32_t out_width) {
auto& convolution = dynamic_cast<ConvolutionLayer&>(*layer.get());
printConvolutionLayer(convolution);
@ -331,18 +329,15 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
THROW_GNA_LAYER_EXCEPTION(&convolution) << "Padding isn't supported by GNA";
}
std::size_t calculated_out_width = (in_width * in_height - convolution._kernel_x + 2 * convolution._padding_x) / convolution._stride_x + 1;
if (out_width * in_height != calculated_out_width) {
std::size_t calculated_out_width = (in_width - convolution._kernel_x + 2 * convolution._padding_x) / convolution._stride_x + 1;
if (out_width != calculated_out_width) {
THROW_GNA_LAYER_EXCEPTION(&convolution) << "Invalid output configuration. "
<< calculated_out_width << " != " << out_width * in_height;
<< calculated_out_width << " != " << out_width;
}
uint32_t total_conv_kernel_size = convolution._kernel_x * convolution._kernel_y * convolution._out_depth;
uint32_t single_conv_kernel_size = convolution._kernel_x * convolution._kernel_y;
if (convolution._kernel_y != in_channels) { // work around the strange special case where 1D kernel gets rewritten as 2D kernel
total_conv_kernel_size *= in_channels;
single_conv_kernel_size *= in_channels;
}
IE_ASSERT(convolution._kernel_y == 1);
uint32_t total_conv_kernel_size = convolution._kernel_x * convolution._out_depth * in_channels;
uint32_t single_conv_kernel_size = convolution._kernel_x * in_channels;
auto actual_kernel_size = details::product(convolution._weights->getTensorDesc().getDims());
if (total_conv_kernel_size != actual_kernel_size) {
THROW_GNA_LAYER_EXCEPTION(&convolution) << "Weights size does not equal kernel size "
@ -358,17 +353,17 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
}
// have to pad input to let last kernel meets it's corresponding input
uint32_t num_inputs = in_width * in_height * in_channels;
uint32_t num_inputs = in_width * in_channels;
uint32_t num_input_padding = ALIGN(num_inputs, 8) - num_inputs;
// convert to 2D and set GNA input feature map size
uint32_t num_feature_map_columns = in_channels * convolution._stride_x * convolution._stride_y;
if (in_height == 1 && convolution._stride_y != 1) {
if (convolution._stride_y != 1) {
num_feature_map_columns = in_channels * convolution._stride_x;
} else if (in_width == 1 && convolution._stride_x != 1) {
num_feature_map_columns = in_channels * convolution._stride_y;
}
uint32_t num_feature_map_rows = (in_channels * in_height * in_width) / num_feature_map_columns;
uint32_t num_feature_map_rows = (in_channels * in_width) / num_feature_map_columns;
uint32_t num_filters = convolution._out_depth;
uint32_t num_filter_coefficients = single_conv_kernel_size + num_conv_kernel_padding;
@ -383,7 +378,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
uint32_t additional_padding = 0;
// if kernel padding to multiple of 8 will cause missed outputs, need to pad further
while (num_columns_out < out_batch * out_channels * out_height * out_width) {
while (num_columns_out < out_batch * out_channels * out_width) {
num_input_padding = original_input_padding + additional_padding;
num_feature_map_rows = original_num_feature_map_rows + (num_input_padding) / num_feature_map_columns;
num_columns_in = num_inputs + num_input_padding;
@ -398,9 +393,9 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
gnalog() << LAYER_NAME(&convolution) << "Inputs padding is " << num_input_padding << "\n";
}
if (num_columns_out_unpadded != out_batch * out_channels * out_height * out_width) {
if (num_columns_out_unpadded != out_batch * out_channels * out_width) {
THROW_GNA_LAYER_EXCEPTION(&convolution) << "Number of output columns does not equal output tensor size "
<< num_columns_out_unpadded << " vs " << out_batch * out_channels * out_height * out_width;
<< num_columns_out_unpadded << " vs " << out_batch * out_channels * out_width;
}
void* ptr_inputs = nullptr;
@ -833,15 +828,13 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto inputs = layer->insData.begin()->lock();
auto outputs = *layer->outData.begin();
const auto in_order = getFromIRDimsOrderNCHW(inputs->getLayout());
uint32_t w_dim_in = FROM_IR_DIM(inputs, in_order[3]);
uint32_t h_dim_in = FROM_IR_DIM(inputs, in_order[2]);
const uint32_t c_dim_in = FROM_IR_DIM(inputs, in_order[1]);
uint32_t w_dim_in = GetDataDimSize(inputs, InferenceEngine::DataDimName::W);
uint32_t h_dim_in = GetDataDimSize(inputs, InferenceEngine::DataDimName::H);
const uint32_t c_dim_in = GetDataDimSize(inputs, InferenceEngine::DataDimName::C);
const auto out_order = getFromIRDimsOrderNCHW(outputs->getLayout());
uint32_t w_dim_out = FROM_IR_DIM(outputs, out_order[3]);
uint32_t h_dim_out = FROM_IR_DIM(outputs, out_order[2]);
const uint32_t c_dim_out = FROM_IR_DIM(outputs, out_order[1]);
uint32_t w_dim_out = GetDataDimSize(outputs, InferenceEngine::DataDimName::W);
uint32_t h_dim_out = GetDataDimSize(outputs, InferenceEngine::DataDimName::H);
const uint32_t c_dim_out = GetDataDimSize(outputs, InferenceEngine::DataDimName::C);
if (w_dim_in == 1) { // swap dimensions if needed to support swapped 1D case
swap(h_dim_in, w_dim_in);
@ -1029,7 +1022,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
std::vector<int> axis, dim, offset;
for (int n = 0; n < cropLayer->axis.size(); n++) {
uint32_t input_dim = FROM_IR_DIM(inputs, inputs->getDims().size() - cropLayer->axis[n]);
uint32_t input_dim = GetDataDimSize(inputs, inputs->getDims().size() - cropLayer->axis[n]);
// Exclude crop layer components that do nothing
if (cropLayer->offset[n] == 0 && cropLayer->dim[n] == input_dim) {
continue;
@ -1088,10 +1081,10 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
}
// TODO: add unit tests for 4d crops blobs
uint32_t num_rows_in = FROM_IR_DIM(inputs, inputs->getDims().size() - axis.front());
uint32_t num_rows_in = GetDataDimSize(inputs, inputs->getDims().size() - axis.front());
uint32_t num_columns_in = 1;
uint32_t num_rows_out = FROM_IR_DIM(outputs, inputs->getDims().size() - axis.front());
uint32_t num_rows_out = GetDataDimSize(outputs, inputs->getDims().size() - axis.front());
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
void* ptr_inputs = nullptr;
@ -1180,22 +1173,20 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
auto outputs = *layer->outData.begin();
auto in_4b_order = getFromIRDimsOrderNCHW(inputs4Bytes->getLayout());
auto in_4b_batch = FROM_IR_DIM(inputs4Bytes, in_4b_order[0]);
auto in_4b_channels = FROM_IR_DIM(inputs4Bytes, in_4b_order[1]);
auto in_4b_height = FROM_IR_DIM(inputs4Bytes, in_4b_order[2]);
auto in_4b_width = FROM_IR_DIM(inputs4Bytes, in_4b_order[3]);
auto in_4b_batch = GetDataDimSize(inputs4Bytes, InferenceEngine::DataDimName::N);
auto in_4b_channels = GetDataDimSize(inputs4Bytes, InferenceEngine::DataDimName::C);
auto in_4b_height = GetDataDimSize(inputs4Bytes, InferenceEngine::DataDimName::H);
auto in_4b_width = GetDataDimSize(inputs4Bytes, InferenceEngine::DataDimName::W);
auto in_4b_total_size = in_4b_batch * in_4b_channels * in_4b_height * in_4b_width;
auto in_2b_order = getFromIRDimsOrderNCHW(inputs2Bytes->getLayout());
auto in_2b_batch = FROM_IR_DIM(inputs2Bytes, in_2b_order[0]);
auto in_2b_channels = FROM_IR_DIM(inputs2Bytes, in_2b_order[1]);
auto in_2b_height = FROM_IR_DIM(inputs2Bytes, in_2b_order[2]);
auto in_2b_width = FROM_IR_DIM(inputs2Bytes, in_2b_order[3]);
auto in_2b_batch = GetDataDimSize(inputs2Bytes, InferenceEngine::DataDimName::N);
auto in_2b_channels = GetDataDimSize(inputs2Bytes, InferenceEngine::DataDimName::C);
auto in_2b_height = GetDataDimSize(inputs2Bytes, InferenceEngine::DataDimName::H);
auto in_2b_width = GetDataDimSize(inputs2Bytes, InferenceEngine::DataDimName::W);
auto in_2b_total_size = in_2b_batch * in_2b_channels * in_2b_height * in_2b_width;
if ((in_2b_batch > 1) || (in_4b_batch > 1)) {
THROW_GNA_LAYER_EXCEPTION(layer) << " Inputs with batch size that not equals 1 is not supported";
if (((in_2b_batch > 1) || (in_4b_batch > 1)) && in_2b_batch != in_4b_batch) {
THROW_GNA_LAYER_EXCEPTION(layer) << " Inputs with different batch sizes that not equals 1 is not supported";
}
if (in_4b_total_size != in_2b_total_size) {
@ -1294,7 +1285,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size;
uint32_t num_columns_in = batch_size;
uint32_t num_rows_out = isDiag ? num_rows_in : FROM_IR_DIM(outputs, 1);
uint32_t num_rows_out = isDiag ? num_rows_in : GetDataDimSize(outputs, 1);
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
uint32_t num_padding_out = isDiag ? num_padding : 0;
@ -1481,8 +1472,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
auto outputs = *layer->outData.begin();
auto inputs = layer->insData.begin()->lock();
uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
uint32_t num_columns_in = GetDataDimSize(inputs, 2);
uint32_t num_rows_out = GetDataDimSize(outputs, 1);
uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
@ -1617,8 +1608,8 @@ void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer)
auto outputs = *layer->outData.begin();
auto inputs = layer->insData.begin()->lock();
uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
uint32_t num_columns_in = GetDataDimSize(inputs, 2);
uint32_t num_rows_out = GetDataDimSize(outputs, 1);
uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
@ -1718,16 +1709,16 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto orientation = kDnnInterleavedOrientation;
if (inputs->getDims().size() == 4) {
uint32_t w_dim_in = FROM_IR_DIM(inputs, 1);
uint32_t h_dim_in = FROM_IR_DIM(inputs, 2);
uint32_t c_dim_in = FROM_IR_DIM(inputs, 3);
uint32_t b_dim_in = FROM_IR_DIM(inputs, 4);
uint32_t w_dim_in = GetDataDimSize(inputs, 1);
uint32_t h_dim_in = GetDataDimSize(inputs, 2);
uint32_t c_dim_in = GetDataDimSize(inputs, 3);
uint32_t b_dim_in = GetDataDimSize(inputs, 4);
num_columns = (w_dim_in == 1) ? h_dim_in * c_dim_in * b_dim_in : w_dim_in * c_dim_in * b_dim_in;
num_rows = (w_dim_in == 1) ? w_dim_in : h_dim_in;
} else {
num_columns = FROM_IR_DIM(inputs, 2);
num_rows = FROM_IR_DIM(inputs, 1);
num_columns = GetDataDimSize(inputs, 2);
num_rows = GetDataDimSize(inputs, 1);
}
if (dnn->new_num_conv_columns) {
@ -2460,17 +2451,3 @@ GNAGraphCompiler::transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint
}
return temp_buffer;
}
std::vector<std::size_t> GNAGraphCompiler::getFromIRDimsOrderNCHW(InferenceEngine::Layout layout) {
std::vector<std::size_t> order;
switch (layout) {
case Layout::NHWC:
order = { 4, 1, 3, 2 };
break;
case Layout::NCHW:
default:
order = { 4, 3, 2, 1 };
break;
}
return order;
}

View File

@ -50,7 +50,6 @@ private:
static void printPoolingLayer(const InferenceEngine::PoolingLayer& layer);
static void assertConvolutionLayoutProper(const InferenceEngine::DataPtr&);
std::vector<uint8_t> static transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols);
std::vector<std::size_t> static getFromIRDimsOrderNCHW(InferenceEngine::Layout layout);
public:
GNAPluginNS::backend::DnnComponents dnnComponents;
@ -127,8 +126,8 @@ public:
void CopyPrimitive(InferenceEngine::CNNLayerPtr);
void finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerPtr,
uint32_t in_batch, uint32_t in_channels, uint32_t in_height, uint32_t in_width,
uint32_t out_batch, uint32_t out_channels, uint32_t out_height, uint32_t out_width);
uint32_t in_batch, uint32_t in_channels, uint32_t in_width,
uint32_t out_batch, uint32_t out_channels, uint32_t out_width);
#if GNA_LIB_VER == 2
void finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerPtr,
uint32_t in_batch, uint32_t in_channels, uint32_t in_height, uint32_t in_width,

View File

@ -141,8 +141,9 @@ inline std::vector<TranspositionInfo> FindTranspositionInfoFromPrevLayers(Infere
std::function<std::vector<TranspositionInfo>(InferenceEngine::CNNLayerPtr)> findTranspositionInfoRecursive =
[&findTranspositionInfoRecursive](InferenceEngine::CNNLayerPtr layer) -> std::vector<TranspositionInfo> {
auto getTransposeInfoFromData = [](InferenceEngine::DataPtr data, bool transpose = true) {
auto rows = FROM_IR_DIM(data, 3);
auto columns = FROM_IR_DIM(data, 1) * FROM_IR_DIM(data, 2);
auto rows = InferenceEngine::GetDataDimSize(data, InferenceEngine::DataDimName::C);
auto columns = InferenceEngine::GetDataDimSize(data, InferenceEngine::DataDimName::H) *
InferenceEngine::GetDataDimSize(data, InferenceEngine::DataDimName::W);
return std::vector<TranspositionInfo>{{transpose, rows, columns}};
};
if (LayerInfo(layer).isConvolution() || LayerInfo(layer).isPooling()) {
@ -236,8 +237,9 @@ inline std::vector<TranspositionInfo> FindTranspositionInfoFromNextLayers(Infere
std::function<std::vector<TranspositionInfo>(InferenceEngine::CNNLayerPtr)> findTranspositionInfoRecursive =
[&findTranspositionInfoRecursive](InferenceEngine::CNNLayerPtr layer) -> std::vector<TranspositionInfo> {
if (LayerInfo(layer).isConvolution()) {
auto rows = FROM_IR_DIM(layer->input(), 3);
auto columns = FROM_IR_DIM(layer->input(), 1) * FROM_IR_DIM(layer->input(), 2);
auto rows = InferenceEngine::GetDataDimSize(layer->input(), InferenceEngine::DataDimName::C);
auto columns = InferenceEngine::GetDataDimSize(layer->input(), InferenceEngine::DataDimName::H) *
InferenceEngine::GetDataDimSize(layer->input(), InferenceEngine::DataDimName::W);
return {{true, rows, columns}};
}

View File

@ -779,4 +779,53 @@ inline void CNNNetworkReconnectLayer(CNNLayerPtr old_prev_layer, CNNLayerPtr new
}
}
/**
* @brief returns a size of a specified data dimension depending on its back offset
* @param data a pointer to the data
* @param backOffset back dimension offset
*/
inline uint32_t GetDataDimSize(InferenceEngine::DataPtr data, uint32_t backOffset) {
auto dims = data->getDims();
return (dims.size() > backOffset - 1) ? dims[dims.size() - backOffset] : 1;
}
enum class DataDimName {
N, C, H, W
};
/**
* @brief returns a size of a specified data dimension depending on the layout
* @param data a pointer to the data
* @param dimName dimension name
*/
inline uint32_t GetDataDimSize(InferenceEngine::DataPtr data, DataDimName dimName) {
uint32_t dimIxInNCHW = static_cast<uint32_t>(dimName);
IE_ASSERT(dimIxInNCHW <= 3);
std::vector<uint32_t> backOffsets;
switch (data->getLayout()) {
case Layout::C:
// 1 will be returned for offsets > 1
backOffsets = std::vector<uint32_t>{1, 2, 3, 4};
break;
case Layout::NC:
// 1 will be returned for offsets > 2
backOffsets = std::vector<uint32_t>{2, 1, 3, 4};
break;
case Layout::HWC:
// 1 will be returned for offset 4
case Layout::NHWC:
backOffsets = std::vector<uint32_t>{4, 1, 3, 2};
break;
case Layout::CHW:
// 1 will be returned for offset 4
case Layout::NCHW:
backOffsets = std::vector<uint32_t>{4, 3, 2, 1};
break;
default:
THROW_GNA_EXCEPTION << data->getName() << " Unexpected layout " << data->getLayout();
}
return GetDataDimSize(data, backOffsets[dimIxInNCHW]);
}
} // namespace InferenceEngine

View File

@ -677,16 +677,6 @@ void RemovePermutationsNHWCToNCHWPass::run() {
}
nhwc_layout_patterns.push_back({prev, next});
auto* convolution = dynamic_cast<ConvolutionLayer*>(l.get());
if (!convolution) {
THROW_GNA_EXCEPTION << "Invalid type of convolution layer";
}
if (convolution->_kernel_y != 1) {
THROW_GNA_LAYER_EXCEPTION(l) << "this case is not implemented yet";
}
auto in_channels = convolution->input()->getDims()[1];
convolution->_kernel_y = in_channels;
}
for (const auto& layers : nhwc_layout_patterns) {
@ -2286,8 +2276,8 @@ void TransposeWeightsFromNCHWToNHWCPass::run() {
// Transpose all constant inputs
for (auto && input : constInputs) {
auto rows = FROM_IR_DIM(input->outData[0], 3);
auto columns = FROM_IR_DIM(input->outData[0], 1) * FROM_IR_DIM(input->outData[0], 2);
auto rows = GetDataDimSize(input->outData[0], DataDimName::C);
auto columns = GetDataDimSize(input->outData[0], DataDimName::H) * GetDataDimSize(input->outData[0], DataDimName::W);
auto blob = input->blobs["custom"];
// A constant should have the same number of channels since concatenation will be in height/weight dimension
TranspositionInfo concatTranspositionInfo{true, rows, columns};

View File

@ -74,15 +74,17 @@ class RemovePermutationsNHWCToNCHWPassTest : public testing::WithParamInterface<
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 3, 1, 2 }));
size_t num_out_channels = 12;
size_t kernal_size = 8;
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
ngraph::op::PadType::VALID, num_out_channels);
size_t kernel_size = 8;
std::vector<size_t> kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, kernel_size} : std::vector<size_t>{kernel_size, 1});
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
ngraph::op::PadType::VALID, num_out_channels);
auto permute2 = std::make_shared<ngraph::opset1::Transpose>(conv1,
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));
size_t out_width = (inputShape[2] - kernal_size) + 1;
std::vector<size_t> outFormShapes = { 1, out_width * num_out_channels };
size_t out_width = (inputShape[2] - kernal_shape[1]) + 1;
size_t out_height = (inputShape[1] - kernal_shape[0]) + 1;
std::vector<size_t> outFormShapes = { 1, out_width * out_height * num_out_channels };
auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(permute2, pattern2, false);
@ -122,7 +124,9 @@ protected:
auto permute1 = std::make_shared<ngraph::opset1::Transpose>(params[0],
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 3, 1, 2 }));
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, { 1, 8 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, 12);
size_t kernal_size = 8;
std::vector<size_t> kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, kernal_size} : std::vector<size_t>{kernal_size, 1});
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 }, ngraph::op::PadType::VALID, 12);
auto permute2 = std::make_shared<ngraph::opset1::Transpose>(conv1,
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));
@ -200,20 +204,23 @@ class RemovePermutationsWithPoolAndActTest : public testing::WithParamInterface<
size_t num_out_channels = 12;
size_t kernal_size = 8;
auto kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, kernal_size} : std::vector<size_t>{kernal_size, 1});
std::vector<float> filter_weights = CommonTestUtils::generate_float_numbers(num_out_channels * inputShape[3] * kernal_size,
-0.2f, 0.2f);
auto conv1 = ngraph::builder::makeConvolution(relu1, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
auto conv1 = ngraph::builder::makeConvolution(relu1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
ngraph::op::PadType::VALID, num_out_channels, false, filter_weights);
auto pool = ngraph::builder::makePooling(conv1, {1, 2}, {0, 0}, {0, 0}, {1, 2}, ngraph::op::RoundingType::FLOOR,
auto pool_kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, 2} : std::vector<size_t>{2, 1});
auto pool = ngraph::builder::makePooling(conv1, pool_kernal_shape, {0, 0}, {0, 0}, pool_kernal_shape, ngraph::op::RoundingType::FLOOR,
ngraph::op::PadType::VALID, false, ngraph::helpers::PoolingTypes::MAX);
size_t out_width = ((inputShape[2] - kernal_size) + 1) / 2;
size_t out_width = ((inputShape[2] - kernal_shape[1]) + 1) / pool_kernal_shape[1];
size_t out_height = ((inputShape[1] - kernal_shape[0]) + 1) / pool_kernal_shape[0];
auto relu2 = std::make_shared<ngraph::opset3::Relu>(pool);
auto permute2 = std::make_shared<ngraph::opset1::Transpose>(relu2,
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));
std::vector<size_t> outFormShapes = { 1, out_width * num_out_channels };
std::vector<size_t> outFormShapes = { 1, out_width * out_height * num_out_channels };
auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(permute2, pattern2, false);
@ -283,22 +290,25 @@ class RemovePermutationsWithTwoConvTest : public testing::WithParamInterface<rem
size_t num_out_channels = 12;
size_t kernal_size = 8;
std::vector<size_t> kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, kernal_size} : std::vector<size_t>{kernal_size, 1});
std::vector<float> filter_weights_1 = CommonTestUtils::generate_float_numbers(num_out_channels * inputShape[3] * kernal_size,
0.0f, 0.5f);
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
ngraph::op::PadType::VALID, num_out_channels, false, filter_weights_1);
size_t out_width = ((inputShape[2] - kernal_size) + 1);
size_t out_width = ((inputShape[2] - kernal_shape[1]) + 1);
size_t out_height = ((inputShape[1] - kernal_shape[0]) + 1);
std::vector<float> filter_weights_2 = CommonTestUtils::generate_float_numbers(num_out_channels * num_out_channels * kernal_size,
-0.2f, 0.2f);
auto conv2 = ngraph::builder::makeConvolution(conv1, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
auto conv2 = ngraph::builder::makeConvolution(conv1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
ngraph::op::PadType::VALID, num_out_channels, false, filter_weights_2);
out_width = ((out_width - kernal_size) + 1);
out_width = ((out_width - kernal_shape[1]) + 1);
out_height = ((out_height - kernal_shape[0]) + 1);
auto permute2 = std::make_shared<ngraph::opset1::Transpose>(conv2,
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));
std::vector<size_t> outFormShapes = { 1, out_width * num_out_channels };
std::vector<size_t> outFormShapes = { 1, out_width * out_height * num_out_channels };
auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(permute2, pattern2, false);
@ -363,6 +373,7 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
auto params = ngraph::builder::makeParams(ngPrc, { {1, 2 * in_total_dims_size} });
auto split = ngraph::builder::makeSplit(params[0], ngPrc, 2, 1);
auto in_width = inputShape[2];
auto in_height = inputShape[1];
auto in_channels = inputShape[3];
auto pattern1 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, inputShape);
@ -372,9 +383,10 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
size_t num_out_channels = 12;
size_t kernal_size = 8;
std::vector<size_t> kernal_shape = (inputShape[1] == 1 ? std::vector<size_t>{1, kernal_size} : std::vector<size_t>{kernal_size, 1});
std::vector<float> filter_weights_1 = CommonTestUtils::generate_float_numbers(num_out_channels * in_channels * kernal_size,
-0.2f, 0.2f);
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
ngraph::op::PadType::VALID, num_out_channels, false, filter_weights_1);
auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, inputShape);
@ -384,7 +396,7 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
std::vector<float> filter_weights_2 = CommonTestUtils::generate_float_numbers(num_out_channels * in_channels * kernal_size,
-0.2f, 0.2f);
auto conv2 = ngraph::builder::makeConvolution(permute2, ngPrc, { 1, kernal_size }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
auto conv2 = ngraph::builder::makeConvolution(permute2, ngPrc, kernal_shape, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 },
ngraph::op::PadType::VALID, num_out_channels, false, filter_weights_2);
auto add = std::make_shared<ngraph::opset1::Add>(conv1, conv2);
@ -392,8 +404,9 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
auto permute3 = std::make_shared<ngraph::opset1::Transpose>(add,
ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 }));
size_t out_width = ((in_width - kernal_size) + 1);
std::vector<size_t> outFormShapes = { 1, out_width * num_out_channels };
size_t out_width = ((in_width - kernal_shape[1]) + 1);
size_t out_height = ((in_height - kernal_shape[0]) + 1);
std::vector<size_t> outFormShapes = { 1, out_width * out_height * num_out_channels };
auto pattern3 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes);
auto reshape3 = std::make_shared<ngraph::opset1::Reshape>(permute3, pattern3, false);
@ -440,7 +453,13 @@ class RemovePermutationsWithEltwiseTest : public testing::WithParamInterface<rem
{1, 1, 168, 8},
{1, 1, 32, 1},
{1, 1, 32, 2},
{1, 1, 32, 8}
{1, 1, 32, 8},
{1, 168, 1, 1},
{1, 168, 1, 2},
{1, 168, 1, 8},
{1, 32, 1, 1},
{1, 32, 1, 2},
{1, 32, 1, 8}
};
INSTANTIATE_TEST_CASE_P(smoke_PermutationPass, RemovePermutationsNHWCToNCHWPassTest,