[GNA] Fixed Eltwise split and batch size selection during 2d reshape (#7042)

* [GNA] Fixed Eltwise split and batch size selection during 2d reshape

* [GNA] Added exception if memory isn't allocated for concat filter

* Added assert for minZeroDimSize

* [GNA] Added unit test for GetAlignedSplitSizes()
This commit is contained in:
Elizaveta Lobanova 2021-08-16 19:27:48 +03:00 committed by GitHub
parent 05c632e072
commit a9f7c8effa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 102 additions and 53 deletions

View File

@ -7,6 +7,7 @@
#include "dnn_types.h" #include "dnn_types.h"
#include <cstdint> #include <cstdint>
#include <cpp/ie_cnn_network.h> #include <cpp/ie_cnn_network.h>
#include <ie_algorithm.hpp>
namespace GNAPluginNS { namespace GNAPluginNS {
namespace GNALimitations { namespace GNALimitations {
@ -114,5 +115,10 @@ public:
bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage); bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
inline size_t GetMinBatchToFitInBuffer(InferenceEngine::DataPtr input) {
auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims()));
return total_size / bufferMaxSize + 1;
}
} // namespace GNALimitations } // namespace GNALimitations
} // namespace GNAPluginNS } // namespace GNAPluginNS

View File

@ -683,7 +683,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto input = layer->insData[0].lock(); auto input = layer->insData[0].lock();
auto outputs = *layer->outData.begin(); auto outputs = *layer->outData.begin();
auto reshaped_dims = Get2DReshapedData(input, 8)->getDims(); auto reshaped_dims = Get2DReshapedData(input, GNALimitations::GetMinBatchToFitInBuffer(input), 8)->getDims();
const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ? const uint32_t noOfInputsDivisor = gnaFlags->input_low_precision ?
GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor; GNALimitations::noOfInputsLowPrecDivisor : GNALimitations::noOfInputsDivisor;
uint32_t num_rows_in = reshaped_dims[1]; uint32_t num_rows_in = reshaped_dims[1];
@ -908,7 +908,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto inputs = layer->insData.begin()->lock(); auto inputs = layer->insData.begin()->lock();
auto outputs = *layer->outData.begin(); auto outputs = *layer->outData.begin();
auto reshaped_dims = Get2DReshapedData(inputs, 8)->getDims(); auto reshaped_dims = Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims();
uint32_t num_rows_in = reshaped_dims[1]; uint32_t num_rows_in = reshaped_dims[1];
uint32_t num_columns_in = reshaped_dims[0]; uint32_t num_columns_in = reshaped_dims[0];
uint32_t num_rows_out = num_rows_in; uint32_t num_rows_out = num_rows_in;
@ -1410,7 +1410,8 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
noOfInputsDivisor = GNALimitations::noOfInputsLowPrecDivisor; noOfInputsDivisor = GNALimitations::noOfInputsLowPrecDivisor;
} }
auto input_data = HasTo2DReshapeData(layer) ? Get2DReshapedData(inputs, 8) : inputs; auto input_data = HasTo2DReshapeData(layer) ?
Get2DReshapedData(inputs, GNALimitations::GetMinBatchToFitInBuffer(inputs), 8) : inputs;
auto in_dims = input_data->getDims(); auto in_dims = input_data->getDims();
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front(); auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size; uint32_t num_rows_in = InferenceEngine::details::product(in_dims) / batch_size;

View File

@ -15,7 +15,9 @@ namespace GNAPluginNS {
* @param input a pointer to data to be reshaped * @param input a pointer to data to be reshaped
* @param maxZeroDimSize the maximum size of zero dimension * @param maxZeroDimSize the maximum size of zero dimension
*/ */
inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input, size_t maxZeroDimSize) { inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input, size_t minZeroDimSize,
size_t maxZeroDimSize) {
IE_ASSERT(minZeroDimSize > 0);
auto dims = input->getDims(); auto dims = input->getDims();
uint32_t numRowsIn = InferenceEngine::details::product(begin(dims), end(dims)); uint32_t numRowsIn = InferenceEngine::details::product(begin(dims), end(dims));
uint32_t numColumnsIn = 1; uint32_t numColumnsIn = 1;
@ -23,7 +25,7 @@ inline InferenceEngine::DataPtr Get2DReshapedData(InferenceEngine::DataPtr input
if (numRowsIn % 8 == 0) { if (numRowsIn % 8 == 0) {
if (dims.size() >= 2 || dims[0] >= maxZeroDimSize) { if (dims.size() >= 2 || dims[0] >= maxZeroDimSize) {
size_t indexDivide = maxZeroDimSize; size_t indexDivide = maxZeroDimSize;
while (indexDivide > 1) { while (indexDivide > minZeroDimSize) {
if ((numRowsIn / 8) % indexDivide == 0) break; if ((numRowsIn / 8) % indexDivide == 0) break;
--indexDivide; --indexDivide;
} }
@ -55,4 +57,5 @@ inline bool HasTo2DReshapeData(InferenceEngine::CNNLayerPtr layer) {
// Don't reshape diagonallayers with bias connection // Don't reshape diagonallayers with bias connection
return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput(); return !GNAPluginNS::LayerInfo(getCreatorLayer(layer->insData.front().lock()).lock()).has32BOutput();
} }
} // namespace GNAPluginNS } // namespace GNAPluginNS

View File

@ -45,4 +45,18 @@ public:
}; };
std::vector<SplitConnectedLayerInfo> splitOutputLayers; std::vector<SplitConnectedLayerInfo> splitOutputLayers;
}; };
// @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
static std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, uint32_t maxSplitSize, uint32_t alignment = 64) {
std::vector<uint32_t> splitSizes;
uint32_t maxAlignedSplitSize = maxSplitSize - maxSplitSize % alignment;
uint32_t usedSize = 0;
while (usedSize < totalSize) {
uint32_t partSize = std::min(totalSize - usedSize, maxAlignedSplitSize);
splitSizes.push_back(partSize);
usedSize += partSize;
}
return splitSizes;
}
} // namespace GNAPluginNS } // namespace GNAPluginNS

View File

@ -87,7 +87,7 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
}); });
IE_ASSERT(inputLayer != nullptr); IE_ASSERT(inputLayer != nullptr);
size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() : size_t weightsSize = LayerInfo(prevLayer).has32BOutput() ? nextLayer->outData[0]->getDims().back() :
Get2DReshapedData(nextLayer->outData[0], 8)->getDims()[1]; Get2DReshapedData(nextLayer->outData[0], GNALimitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8)->getDims()[1];
std::vector<float> weightsValues(weightsSize, fillValue); std::vector<float> weightsValues(weightsSize, fillValue);
IE_ASSERT(diagLayer != nullptr); IE_ASSERT(diagLayer != nullptr);
diagLayer->_weights = make_shared_blob<float>( diagLayer->_weights = make_shared_blob<float>(
@ -1113,6 +1113,9 @@ void InsertConcatAligningFilterPass::run() {
SizeVector({filterWeights.size()}), SizeVector({filterWeights.size()}),
Layout::C)); Layout::C));
concatAligningFilter->_weights->allocate(); concatAligningFilter->_weights->allocate();
if (!concatAligningFilter->_weights->buffer().as<float*>()) {
THROW_GNA_EXCEPTION << "Failed to allocate weights of size " << filterWeights.size() << " for " << filterName;
}
CopyVectorToBlob(concatAligningFilter->_weights, filterWeights); CopyVectorToBlob(concatAligningFilter->_weights, filterWeights);
@ -1395,15 +1398,20 @@ void EltwiseSplitOverChannelsPass::run() {
THROW_GNA_LAYER_EXCEPTION(l) << "number of outputs expected to be 1"; THROW_GNA_LAYER_EXCEPTION(l) << "number of outputs expected to be 1";
} }
auto oData = l->outData.front(); auto oData = l->outData.front();
auto out_width = GetDataDimSize(oData, DataDimName::W); auto oDims = oData->getDims();
auto totalElementsForOutput = details::product(oData->getDims().begin(), oData->getDims().end()); auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims));
// gna limit this to be OxFFFF if (totalElementsSize <= GNALimitations::bufferMaxSize) {
auto maxAffineElements = 65536 - 64;
if (totalElementsForOutput <= maxAffineElements) {
continue; continue;
} }
auto totalSplits = 1 + totalElementsForOutput / maxAffineElements; auto firstValuableDim = std::find_if(std::begin(oDims), std::end(oDims), [](size_t val) { return val > 1; });
IE_ASSERT(firstValuableDim != std::end(oDims));
auto splittedElementsSize = *firstValuableDim;
auto splittedDimIx = std::distance(std::begin(oDims), firstValuableDim);
// Split output size should be multiple by 64 to avoid align filters insertion
auto splitSizes = GetAlignedSplitSizes(splittedElementsSize,
GNALimitations::bufferMaxSize * splittedElementsSize / totalElementsSize);
pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n"; pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n";
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(l); auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
@ -1421,27 +1429,13 @@ void EltwiseSplitOverChannelsPass::run() {
auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc(); auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc();
// create split layer outputs // create split layer outputs
size_t usedElements = 0; for (auto elementsNum : splitSizes) {
for (size_t i = 0; i < totalSplits; i++) { auto newDims = oDims;
SizeVector newDims; newDims[splittedDimIx] = elementsNum;
size_t elements_num = std::min(totalElementsForOutput - usedElements,
static_cast<size_t>(maxAffineElements));
if (inputDesc.getDims().size() == 2) {
newDims = SizeVector{1, elements_num};
} else {
elements_num = elements_num - elements_num % out_width;
newDims = SizeVector{1, elements_num / out_width, out_width};
}
auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout()); auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout());
auto data = std::make_shared<Data>(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc); auto data = std::make_shared<Data>(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc);
getCreatorLayer(data) = split; getCreatorLayer(data) = split;
split->outData.push_back(data); split->outData.push_back(data);
usedElements += elements_num;
if (usedElements == totalElementsForOutput) {
break;
}
} }
// replacing connection X->eltwise to X->split // replacing connection X->eltwise to X->split
auto oData = CNNLayerFindOutData(l, kThEltwiseInput); auto oData = CNNLayerFindOutData(l, kThEltwiseInput);
@ -1461,7 +1455,7 @@ void EltwiseSplitOverChannelsPass::run() {
concat->outData.push_back(masterEltwise->outData.front()); concat->outData.push_back(masterEltwise->outData.front());
getCreatorLayer(masterEltwise->outData.front()) = concat; getCreatorLayer(masterEltwise->outData.front()) = concat;
for (size_t k = 0; k != totalSplits; k++) { for (size_t k = 0; k != splitSizes.size(); k++) {
auto eltwiseRaw = std::make_shared<EltwiseLayer>( auto eltwiseRaw = std::make_shared<EltwiseLayer>(
LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32}); LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32});
IE_ASSERT(eltwiseRaw != nullptr); IE_ASSERT(eltwiseRaw != nullptr);
@ -1521,7 +1515,9 @@ void SubstituteScaleShiftBroadCastPass::run() {
if (was_reshaped) { if (was_reshaped) {
dataDims = reshaped_data[insData->getName()]; dataDims = reshaped_data[insData->getName()];
} else { } else {
dataDims = HasTo2DReshapeData(l) ? Get2DReshapedData(insData, 8)->getDims() : insData->getDims(); dataDims = HasTo2DReshapeData(l) ?
Get2DReshapedData(insData, GNALimitations::GetMinBatchToFitInBuffer(insData), 8)->getDims() :
insData->getDims();
} }
if (dataDims.size() <= 2) { if (dataDims.size() <= 2) {

View File

@ -12,6 +12,7 @@
#include <ngraph/pattern/op/wrap_type.hpp> #include <ngraph/pattern/op/wrap_type.hpp>
#include <ngraph/rt_info.hpp> #include <ngraph/rt_info.hpp>
#include "backend/gna_limitations.hpp" #include "backend/gna_limitations.hpp"
#include "layers/gna_split_layer.hpp"
using namespace GNAPluginNS; using namespace GNAPluginNS;
@ -19,22 +20,6 @@ NGRAPH_RTTI_DEFINITION(SplitConvolution, "SplitConvolution", 0);
NGRAPH_RTTI_DEFINITION(SplitConvolutionWithBias, "SplitConvolutionWithBias", 0); NGRAPH_RTTI_DEFINITION(SplitConvolutionWithBias, "SplitConvolutionWithBias", 0);
NGRAPH_RTTI_DEFINITION(SplitConvolutionWithFq, "SplitConvolutionWithFq", 0); NGRAPH_RTTI_DEFINITION(SplitConvolutionWithFq, "SplitConvolutionWithFq", 0);
static std::vector<int64_t> GetConvSplitSizes(std::shared_ptr<ngraph::Node> conv) {
uint32_t width = conv->get_input_shape(0).back();
uint32_t in_channels = conv->get_input_shape(0).at(1);
uint32_t usedWidth = 0;
std::vector<int64_t> split_sizes;
uint32_t width_max_size = GNALimitations::bufferMaxSize / in_channels;
width_max_size = width_max_size - width_max_size % 64;
while (usedWidth < width) {
uint32_t width_part = std::min(width - usedWidth, width_max_size);
split_sizes.push_back(width_part);
usedWidth += width_part;
}
IE_ASSERT(usedWidth == width);
return split_sizes;
}
static bool Convert(std::shared_ptr<ngraph::Node> conv, static bool Convert(std::shared_ptr<ngraph::Node> conv,
std::shared_ptr<ngraph::Node> add, std::shared_ptr<ngraph::Node> add,
std::shared_ptr<ngraph::Node> bias, std::shared_ptr<ngraph::Node> bias,
@ -45,15 +30,21 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
return false; return false;
} }
auto split_sizes = GetConvSplitSizes(conv); uint32_t width = conv->get_input_shape(0).back();
uint32_t in_channels = conv->get_input_shape(0).at(1);
auto split_sizes = GetAlignedSplitSizes(width, GNALimitations::bufferMaxSize / in_channels);
IE_ASSERT(split_sizes.size() > 1); IE_ASSERT(split_sizes.size() > 1);
std::vector<int64_t> split_sizes_casted(split_sizes.size());
std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) {
return static_cast<int64_t>(size);
});
/* TODO check if it's NHWC convolution wrapped with transposes or all input dimensions except of width == 1, /* TODO check if it's NHWC convolution wrapped with transposes or all input dimensions except of width == 1,
otherwise this split axis isn't supported */ otherwise this split axis isn't supported */
const int64_t width_axis = conv->get_input_shape(0).size() - 1; const int64_t width_axis = conv->get_input_shape(0).size() - 1;
auto split_node = std::make_shared<ngraph::opset7::VariadicSplit>(conv->input_value(0), auto split_node = std::make_shared<ngraph::opset7::VariadicSplit>(conv->input_value(0),
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{width_axis}), ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{width_axis}),
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes.size()}), split_sizes)); ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes_casted.size()}), split_sizes_casted));
ngraph::copy_runtime_info(conv, split_node); ngraph::copy_runtime_info(conv, split_node);
split_node->set_friendly_name(conv->get_friendly_name() + "/split"); split_node->set_friendly_name(conv->get_friendly_name() + "/split");
ngraph::OutputVector convOutputs; ngraph::OutputVector convOutputs;

View File

@ -54,8 +54,8 @@ protected:
auto params = ngraph::builder::makeParams(ngPrc, { inputShape }); auto params = ngraph::builder::makeParams(ngPrc, { inputShape });
auto const_mult2 = ngraph::builder::makeConstant<float>(ngPrc, inputShape, {-1.0f}); auto const_mult2 = ngraph::builder::makeConstant<float>(ngPrc, inputShape, {-1.0f});
auto sum = ngraph::builder::makeEltwise(params[0], const_mult2, ngraph::helpers::EltwiseTypes::MULTIPLY); auto mul = ngraph::builder::makeEltwise(params[0], const_mult2, ngraph::helpers::EltwiseTypes::MULTIPLY);
function = std::make_shared<ngraph::Function>(sum, params, "EltwiseSplitOverChannelsPassTest"); function = std::make_shared<ngraph::Function>(mul, params, "EltwiseSplitOverChannelsPassTest");
} }
}; };
@ -77,7 +77,8 @@ const std::vector<std::map<std::string, std::string>> configs = {
const std::vector<std::vector<size_t>> inputShape = { const std::vector<std::vector<size_t>> inputShape = {
{1, 67000}, {1, 67000},
{1, 500000} {1, 500000},
{1, 936, 513}
}; };
INSTANTIATE_TEST_SUITE_P(smoke_EltwiseSplitOverChennels, EltwiseSplitOverChannelsPassTest, INSTANTIATE_TEST_SUITE_P(smoke_EltwiseSplitOverChennels, EltwiseSplitOverChannelsPassTest,

View File

@ -47,6 +47,7 @@ std::map<std::vector<size_t>, std::vector<std::vector<size_t>>> basic = {
{{1, 4, 4, 128}, {{}}}, {{1, 4, 4, 128}, {{}}},
{{8}, {{}}}, {{8}, {{}}},
{{5}, {{}}}, {{5}, {{}}},
{{1, 936, 513}, {{}}}
}; };
const auto basicCases = ::testing::Combine( const auto basicCases = ::testing::Combine(

View File

@ -65,7 +65,7 @@ class Get2DReshapedDataTest : public ::testing::Test {
InferenceEngine::Layout layout) const { InferenceEngine::Layout layout) const {
auto data = std::make_shared<InferenceEngine::Data>(input_name, auto data = std::make_shared<InferenceEngine::Data>(input_name,
InferenceEngine::TensorDesc(precision, input_shape.first, layout)); InferenceEngine::TensorDesc(precision, input_shape.first, layout));
auto new_data = GNAPluginNS::Get2DReshapedData(data, max_batch_size); auto new_data = GNAPluginNS::Get2DReshapedData(data, 1, max_batch_size);
ASSERT_EQ(new_data->getDims(), input_shape.second); ASSERT_EQ(new_data->getDims(), input_shape.second);
ASSERT_EQ(new_data->getPrecision(), precision); ASSERT_EQ(new_data->getPrecision(), precision);
ASSERT_EQ(new_data->getLayout(), layout); ASSERT_EQ(new_data->getLayout(), layout);

View File

@ -0,0 +1,36 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <gtest/gtest.h>
// to suppress deprecated definition errors
#define IMPLEMENT_INFERENCE_ENGINE_PLUGIN
#include "layers/gna_split_layer.hpp"
namespace {
using GetAlignedSplitSizesData = std::tuple<
uint32_t, // total size
uint32_t, // maximum split size
uint32_t, // alignment
std::vector<uint32_t> // expected sizes
>;
const std::vector<GetAlignedSplitSizesData> data = {
GetAlignedSplitSizesData{1024, 100, 64, std::vector<uint32_t>(16, 64)},
GetAlignedSplitSizesData{151, 100, 64, std::vector<uint32_t>{64, 64, 23}},
GetAlignedSplitSizesData{151, 65, 32, std::vector<uint32_t>{64, 64, 23}},
GetAlignedSplitSizesData{151, 65, 1, std::vector<uint32_t>{65, 65, 21}}
};
TEST(GetAlignedSplitSizesTest, testAlignedSplitSizes) {
for (const auto &dataItem : data) {
auto sizes = GNAPluginNS::GetAlignedSplitSizes(std::get<0>(dataItem), std::get<1>(dataItem),
std::get<2>(dataItem));
ASSERT_EQ(sizes, std::get<3>(dataItem));
}
}
} // namespace