[GNA] Added fix for eltwise layer with more 65k elements (#1943)

This commit is contained in:
Andrey Dmitriev 2020-08-28 18:26:31 +03:00 committed by GitHub
parent 245920a95d
commit bdbb04f47b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 231 additions and 0 deletions

View File

@ -256,6 +256,22 @@ inline int CNNLayerFindOutDataIdx(CNNLayerPtr layer, int insDataIdx) {
return std::distance(prevLayer->outData.begin(), outDataIt);
}
/// @brief utility to locate output data from given insData index and given layer
/// also it returns iterator that represent link to this layer in inputToMap
inline std::pair<DataPtr, std::map<std::string, CNNLayerPtr>::iterator> CNNLayerFindOutData(CNNLayerPtr layer, int insDataIdx) {
auto oDataIdx = CNNLayerFindOutDataIdx(layer, insDataIdx);
auto prevLayer = CNNNetPrevLayer(layer, insDataIdx);
auto oData = prevLayer->outData[oDataIdx];
for (auto inputTo = getInputTo(oData).begin();
inputTo != getInputTo(oData).end();
inputTo++) {
if (inputTo->second == layer) {
return {oData, inputTo};
}
}
THROW_GNA_LAYER_EXCEPTION(layer) << "cannot locate input data for: " << insDataIdx;
}
/**
* @brief swap two layer in graph - with modifying input/output references
* also if layers have different dimensions they are preserved, so layers should be dimensions agnostic

View File

@ -368,6 +368,7 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
passes->registerPass<SubstituteSoftSignPass>();
passes->registerPass<ReorderMaxPoolPass>();
passes->registerPass<EltwiseSplitOverChannelsPass>();
passes->registerPass<InsertSplitAligningFilterPass>();
passes->registerPass<InsertConcatAligningFilterPass>();

View File

@ -49,6 +49,19 @@ class Policy {
REMOVE_LAST,
REMOVE_ALL
} NHWCToNCHWPolicy = NHWCToNCHW::REMOVE_ALL;
/**
* @brief trim of gna diagonal affine layer maximum elements number
*/
class GNAAffineDiagonal {
public:
enum : uint32_t {
UNLIMIT,
// gna limit this to be OxFFFF
LIMITED_TO_DEFAULT_GNA2_65536 = 65536 - 64
};
uint32_t limitedTo = LIMITED_TO_DEFAULT_GNA2_65536;
} GNAAffineDiagonalPolicy;
};
inline std::ostream& operator<<(std::ostream& os, Policy::ScaleShift policy) {

View File

@ -626,6 +626,20 @@ void InsertIdentityLayerPass::run() {
CNNLayerPtr activationLayer =
std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
// TODO: why index is 0 ? - better use direct indexing in getCandidateFunction
// detecting ins-data-idx
size_t insDataIdx = std::numeric_limits<size_t>::max();
for (size_t i = 0; i != l->insData.size(); i++) {
if (getCreatorLayer(l->insData[i].lock()).lock() == prev) {
insDataIdx = i;
break;
}
}
if (insDataIdx == std::numeric_limits<size_t>::max()) {
THROW_GNA_EXCEPTION << "cannot insert identity layer after" << prev->name << " and before " << l->name;
}
auto inputData = l->insData[0].lock();
auto dataPtr = std::make_shared<Data>("identity_data_" + std::to_string(numOfIdentityLayers), inputData->getTensorDesc());
@ -1009,6 +1023,107 @@ static InferenceEngine::Blob::Ptr tileBlob(Blob::Ptr& blob, size_t TileTo) {
return tiledBlob;
}
void EltwiseSplitOverChannelsPass::run() {
if (getPassManager()->getPolicy().GNAAffineDiagonalPolicy.limitedTo == Policy::GNAAffineDiagonal::UNLIMIT) {
return;
}
for (auto & l : *pLayers) {
if (!LayerInfo(l).isEltwise()) {
continue;
}
auto masterEltwise = std::dynamic_pointer_cast<EltwiseLayer>(l);
if (l->outData.size() != 1) {
THROW_GNA_LAYER_EXCEPTION(l) << "number of outputs expected to be 1";
}
auto oData = l->outData.front();
auto totalElementsForOutput = details::product(oData->getDims().begin(), oData->getDims().end());
auto maxAffineElements = getPassManager()->getPolicy().GNAAffineDiagonalPolicy.limitedTo;
if (totalElementsForOutput <= maxAffineElements) {
continue;
}
// TODO: for now lets put split of 2 elements as restrictions
auto totalSplits = 1 + totalElementsForOutput / maxAffineElements;
if (totalSplits > 2) {
THROW_GNA_LAYER_EXCEPTION(l) << "split layer over output channels on more than 2 layers unsupported";
}
pass_trace() << "transforming " << LAYER_NAME(l) << " by splitting it to multiple eltwise operations\n";
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
std::vector<CNNLayerPtr> splitLayers(2);
for (size_t kThEltwiseInput = 0; kThEltwiseInput != 2; kThEltwiseInput++) {
// create split layer
auto splitRaw = std::make_shared<SplitLayer>(
LayerParams{l->name + "/split/" + std::to_string(kThEltwiseInput), "Split", Precision::FP32});
auto split = quantized ? InferenceEngine::injectData<QuantizedLayerParams>(splitRaw) : splitRaw;
splitLayers[kThEltwiseInput] = split;
split->insData.push_back(l->insData[kThEltwiseInput]);
auto inputDesc = l->insData[kThEltwiseInput].lock()->getTensorDesc();
// need to split this desc
if (inputDesc.getLayout() != Layout::NC) {
THROW_GNA_LAYER_EXCEPTION(l)
<< "cannot split over channel: input " << std::to_string(kThEltwiseInput)
<< " layout need to be NC";
}
// create split layer outputs
for (size_t i = 0;; i++) {
auto elements_num = std::min(totalElementsForOutput - i * maxAffineElements,
static_cast<size_t>(maxAffineElements));
SizeVector newDims = {1, elements_num};
auto newDesc = TensorDesc(inputDesc.getPrecision(), newDims, inputDesc.getLayout());
auto data = std::make_shared<Data>(l->name + "/" + std::to_string(kThEltwiseInput) + "/1", newDesc);
getCreatorLayer(data) = split;
split->outData.push_back(data);
if (elements_num != maxAffineElements) {
break;
}
}
// replacing connection X->eltwise to X->split
auto oData = CNNLayerFindOutData(l, kThEltwiseInput);
oData.second->second = split;
}
// create concatlayer
auto concatRaw = std::make_shared<ConcatLayer>(
LayerParams{l->name + "/concat", "Concat", Precision::FP32});
auto concat = quantized ? InferenceEngine::injectData<QuantizedLayerParams>(concatRaw) : concatRaw;
concat->outData.push_back(masterEltwise->outData.front());
getCreatorLayer(masterEltwise->outData.front()) = concat;
// create new eltwise layers - here 2 hardcode
for (size_t k = 0; k != totalSplits; k++) {
auto eltwiseRaw = std::make_shared<EltwiseLayer>(
LayerParams{l->name + "/eltwise/" + std::to_string(k), "Eltwise", Precision::FP32});
eltwiseRaw->_operation = masterEltwise->_operation;
eltwiseRaw->coeff = masterEltwise->coeff;
auto eltwise = quantized ? InferenceEngine::injectData<QuantizedLayerParams>(eltwiseRaw) : eltwiseRaw;
eltwise->insData.push_back(splitLayers[0]->outData[k]);
eltwise->insData.push_back(splitLayers[1]->outData[k]);
getInputTo(splitLayers[0]->outData[k])[eltwise->name] = eltwise;
getInputTo(splitLayers[1]->outData[k])[eltwise->name] = eltwise;
SizeVector newDims = splitLayers[1]->outData[k]->getDims();
auto newDesc = TensorDesc(splitLayers[1]->outData[k]->getPrecision(), newDims,
splitLayers[1]->outData[k]->getLayout());
auto data = std::make_shared<Data>(l->name + "/elwise/out/" + std::to_string(k), newDesc);
getCreatorLayer(data) = eltwise;
eltwise->outData.push_back(data);
getInputTo(data)[concat->name] = concat;
concat->insData.push_back(data);
}
}
}
void SubstituteScaleShiftBroadCastPass::run() {
for (auto & l : *pLayers) {
LayerInfo layerInfo(l);

View File

@ -101,6 +101,10 @@ DECL_PASS(SubstitutePRelu);
*/
DECL_PASS(SubstituteSoftSign);
/**
* brief split ofver channels for Elementwise-layer to avoid GNA-HW limitation of 65 elements per eltwise
*/
DECL_PASS(EltwiseSplitOverChannels);
/**
* diagonal layer insertion required in cases where activation followed by split layers, or any other
* topology changing layers

View File

@ -0,0 +1,82 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <memory>
#include <tuple>
#include <string>
#include <ie_core.hpp>
#include "functional_test_utils/layer_test_utils.hpp"
#include "functional_test_utils/blob_utils.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
typedef std::tuple<
InferenceEngine::Precision, // Network Precision
std::string, // Target Device
std::map<std::string, std::string> //Configuration
> EltwiseSplitOverChannelsPassParams;
namespace LayerTestsDefinitions {
class EltwiseSplitOverChannelsPassTest : public testing::WithParamInterface<EltwiseSplitOverChannelsPassParams>,
public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(testing::TestParamInfo<EltwiseSplitOverChannelsPassParams> obj) {
InferenceEngine::Precision netPrecision;
std::string targetDevice;
std::map<std::string, std::string> configuration;
std::tie(netPrecision, targetDevice, configuration) = obj.param;
std::ostringstream result;
result << "netPRC=" << netPrecision.name() << "_";
result << "targetDevice=" << targetDevice << "_";
for (auto const& configItem : configuration) {
result << "_configItem=" << configItem.first << "_" << configItem.second;
}
return result.str();
}
protected:
void SetUp() override {
InferenceEngine::Precision netPrecision;
std::tie(netPrecision, targetDevice, configuration) = this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto params = ngraph::builder::makeParams(ngPrc, { {1, 67000} });
auto const_mult2 = ngraph::builder::makeConstant(ngPrc, {1, 67000}, {-1.0f});
auto sum = ngraph::builder::makeEltwise(params[0], const_mult2, ngraph::helpers::EltwiseTypes::MULTIPLY);
function = std::make_shared<ngraph::Function>(sum, params, "RemovePermutationPass");
}
};
TEST_P(EltwiseSplitOverChannelsPassTest, CompareWithRefImpl) {
Run();
};
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16,
};
const std::vector<std::map<std::string, std::string>> configs = {
{
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
{"GNA_COMPACT_MODE", "NO"}
}
};
INSTANTIATE_TEST_CASE_P(EltwiseSplitOverChennels, EltwiseSplitOverChannelsPassTest,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::ValuesIn(configs)),
EltwiseSplitOverChannelsPassTest::getTestCaseName);
} // namespace LayerTestsDefinitions