[GNA] add support for 4d eltwise (#1353)
* [GNA] add support for 4d eltwise * cpplint fix * refactor * fix FP16 broadcast pass * change anonymous function to InferenceEngine::details::product * introduce anonymous function to the pass & add layer checks * refactor tileBlob lambda
This commit is contained in:
@@ -957,8 +957,30 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||
|
||||
auto outputs = *layer->outData.begin();
|
||||
|
||||
uint32_t num_rows_in = FROM_IR_DIM(inputs4Bytes, 1);
|
||||
uint32_t num_columns_in = FROM_IR_DIM(inputs4Bytes, 2);
|
||||
auto in_4b_order = getFromIRDimsOrderNCHW(inputs4Bytes->getLayout());
|
||||
auto in_4b_batch = FROM_IR_DIM(inputs4Bytes, in_4b_order[0]);
|
||||
auto in_4b_channels = FROM_IR_DIM(inputs4Bytes, in_4b_order[1]);
|
||||
auto in_4b_height = FROM_IR_DIM(inputs4Bytes, in_4b_order[2]);
|
||||
auto in_4b_width = FROM_IR_DIM(inputs4Bytes, in_4b_order[3]);
|
||||
auto in_4b_total_size = in_4b_batch * in_4b_channels * in_4b_height * in_4b_width;
|
||||
|
||||
auto in_2b_order = getFromIRDimsOrderNCHW(inputs2Bytes->getLayout());
|
||||
auto in_2b_batch = FROM_IR_DIM(inputs2Bytes, in_2b_order[0]);
|
||||
auto in_2b_channels = FROM_IR_DIM(inputs2Bytes, in_2b_order[1]);
|
||||
auto in_2b_height = FROM_IR_DIM(inputs2Bytes, in_2b_order[2]);
|
||||
auto in_2b_width = FROM_IR_DIM(inputs2Bytes, in_2b_order[3]);
|
||||
auto in_2b_total_size = in_2b_batch * in_2b_channels * in_2b_height * in_2b_width;
|
||||
|
||||
if ((in_2b_batch > 1) || (in_4b_batch > 1)) {
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) << " Inputs with batch size that not equals 1 is not supported";
|
||||
}
|
||||
|
||||
if (in_4b_total_size != in_2b_total_size) {
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) << " Inputs size mismatch " << in_4b_total_size << " != " << in_2b_total_size;
|
||||
}
|
||||
|
||||
uint32_t num_rows_in = in_4b_channels * in_4b_height * in_4b_width;
|
||||
uint32_t num_columns_in = in_4b_batch;
|
||||
uint32_t num_rows_out = num_rows_in;
|
||||
uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
|
||||
|
||||
|
||||
@@ -384,6 +384,7 @@ void GNAPlugin::LoadNetwork(ICNNNetwork & _network) {
|
||||
passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
|
||||
passes->registerPass<SubstituteScaleShiftBroadCastPass>();
|
||||
passes->registerPass<FuseMultipleIdentitiesPass>();
|
||||
passes->registerPass<BroadcastConstPass>();
|
||||
passIdx = passes->run(passIdx);
|
||||
};
|
||||
|
||||
|
||||
@@ -986,7 +986,6 @@ void InsertSplitAligningFilterPass::run() {
|
||||
CNNNetworkInsertLayer(l, nullptr, filterWithQuant, splitOutIndex);
|
||||
}
|
||||
|
||||
|
||||
// search data that starts from unaligned location
|
||||
currentOffset += outputSize * bytesPerSplitElement;
|
||||
splitOutIndex++;
|
||||
@@ -994,6 +993,22 @@ void InsertSplitAligningFilterPass::run() {
|
||||
}
|
||||
}
|
||||
|
||||
static InferenceEngine::Blob::Ptr tileBlob(Blob::Ptr& blob, size_t TileTo) {
|
||||
auto weightsElements = blob->size();
|
||||
auto weightsBytes = blob->byteSize();
|
||||
if (weightsElements == 0) {
|
||||
THROW_IE_EXCEPTION << "Blob size is 0";
|
||||
}
|
||||
|
||||
auto tiledBlob = make_plain_blob(blob->getTensorDesc().getPrecision(), { TileTo });
|
||||
tiledBlob->allocate();
|
||||
|
||||
for (int i = 0; i < (TileTo / weightsElements); ++i) {
|
||||
ie_memcpy(tiledBlob->buffer().as<uint8_t*>() + i * weightsBytes, weightsBytes, blob->cbuffer(), weightsBytes);
|
||||
}
|
||||
return tiledBlob;
|
||||
}
|
||||
|
||||
void SubstituteScaleShiftBroadCastPass::run() {
|
||||
for (auto & l : *pLayers) {
|
||||
LayerInfo layerInfo(l);
|
||||
@@ -1036,34 +1051,15 @@ void SubstituteScaleShiftBroadCastPass::run() {
|
||||
gnalog() << "Substitution ScaleShift broadcast for layer: " << l->name << "\n";
|
||||
// approach 1 - weights tiling
|
||||
if (getPassManager()->getPolicy().ScaleShiftPolicy == Policy::ScaleShift::WEIGHTS_TILING) {
|
||||
auto tileBlob = [](Blob::Ptr &blob, size_t TileTo){
|
||||
auto weightsElements = blob->size();
|
||||
auto weightsBytes = blob->byteSize();
|
||||
if (weightsElements == 0) {
|
||||
THROW_IE_EXCEPTION << "Blob size is 0";
|
||||
}
|
||||
if (TileTo % weightsElements) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto tiledBlob = make_plain_blob(blob->getTensorDesc().getPrecision(), {TileTo});
|
||||
tiledBlob->allocate();
|
||||
|
||||
|
||||
for (int i=0; i != TileTo / weightsElements; i++) {
|
||||
ie_memcpy(tiledBlob->buffer().as<uint8_t*>() + i * weightsBytes, weightsBytes, blob->cbuffer(), weightsBytes);
|
||||
}
|
||||
blob = tiledBlob;
|
||||
return true;
|
||||
};
|
||||
|
||||
if (!tileBlob(scaleShift->_weights, nElements)) {
|
||||
if (nElements % scaleShift->_weights->size()) {
|
||||
THROW_GNA_EXCEPTION << "Cannot tile weights for layer: " << l->name << ", due to weights size not GCD of dims product";
|
||||
}
|
||||
scaleShift->_weights = tileBlob(scaleShift->_weights, nElements);
|
||||
if (scaleShift->_biases) {
|
||||
if (!tileBlob(scaleShift->_biases, nElements)) {
|
||||
if (nElements % scaleShift->_biases->size()) {
|
||||
THROW_GNA_EXCEPTION << "Cannot tile biases for layer: " << l->name << ", due to biases size not GCD of dims product";
|
||||
}
|
||||
scaleShift->_biases = tileBlob(scaleShift->_biases, nElements);
|
||||
}
|
||||
|
||||
// currently data type no providing reshape method of tensor desc
|
||||
@@ -1076,6 +1072,51 @@ void SubstituteScaleShiftBroadCastPass::run() {
|
||||
}
|
||||
}
|
||||
|
||||
void BroadcastConstPass::run() {
|
||||
for (auto& constLayer : *pLayers) {
|
||||
if (!LayerInfo(constLayer).isConst()) {
|
||||
continue;
|
||||
}
|
||||
auto isNonFunctional = [](CNNLayerPtr l) {
|
||||
return LayerInfo(l).isNonFunctional();
|
||||
};
|
||||
if (!CNNNetHasNextLayerSkipCertain(constLayer, 0, 0, isNonFunctional)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto nextLayer = CNNNetGetNextLayerSkipCertain(constLayer, 0, 0, isNonFunctional).first;
|
||||
|
||||
if (!LayerInfo(nextLayer).isEltwise()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto constDims = constLayer->outData.front()->getTensorDesc().getDims();
|
||||
auto constDimsSize = product(constDims.begin(), constDims.end());
|
||||
auto eltwiseDims = nextLayer->outData.front()->getTensorDesc().getDims();
|
||||
auto eltwiseDimsSize = product(eltwiseDims.begin(), eltwiseDims.end());
|
||||
|
||||
if (constDimsSize == eltwiseDimsSize) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (eltwiseDimsSize % constDimsSize) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (constLayer->blobs.find("custom") == constLayer->blobs.end()) {
|
||||
THROW_GNA_LAYER_EXCEPTION(constLayer) << "Const layer " << constLayer->name << " is missing 'custom' parameter";
|
||||
}
|
||||
|
||||
auto currentConstBlob = constLayer->blobs.find("custom")->second;
|
||||
|
||||
constLayer->blobs.find("custom")->second = tileBlob(currentConstBlob, eltwiseDimsSize);
|
||||
|
||||
constLayer->outData.front()->setDims(nextLayer->outData.front()->getDims());
|
||||
constLayer->outData.front()->setLayout(nextLayer->outData.front()->getLayout());
|
||||
gnalog() << "Const layer '" << constLayer->name << "' was changed to match output of '" << nextLayer->name << "'\n";
|
||||
}
|
||||
}
|
||||
|
||||
void UnrollLSTMCellPass::run() {
|
||||
InferenceEngine::NetPass::UnrollRNN_if(*getPassManager()->getNetwork(), [] (const RNNCellBase& rnn) -> bool {
|
||||
if (rnn.clip != 0.0f)
|
||||
|
||||
@@ -160,6 +160,11 @@ DECL_PASS_BEFORE_COPY(RemoveConst);
|
||||
*/
|
||||
DECL_PASS(FuseMultipleIdentities);
|
||||
|
||||
/**
|
||||
* @brief Brodcast data in Const layer
|
||||
*/
|
||||
DECL_PASS(BroadcastConst);
|
||||
|
||||
struct PassManagerSettings {
|
||||
Policy policy;
|
||||
/// @brief whether to run passes before copy
|
||||
|
||||
@@ -0,0 +1,176 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
#include "functional_test_utils/plugin_cache.hpp"
|
||||
#include "functional_test_utils/layer_test_utils.hpp"
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
#include "ngraph_functions/pass/convert_prc.hpp"
|
||||
|
||||
typedef std::tuple<
|
||||
InferenceEngine::Precision, // Network Precision
|
||||
std::string, // Target Device
|
||||
std::map<std::string, std::string>, // Configuration
|
||||
ngraph::helpers::EltwiseTypes // Type of eltwise
|
||||
> eltwiseParams;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class Eltwise4dBroadcast : public testing::WithParamInterface<eltwiseParams>,
|
||||
public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<eltwiseParams> obj) {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::string targetDevice;
|
||||
std::map<std::string, std::string> configuration;
|
||||
ngraph::helpers::EltwiseTypes eltwiseType;
|
||||
std::tie(netPrecision, targetDevice, configuration, eltwiseType) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "targetDevice=" << targetDevice << "_";
|
||||
for (auto const& configItem : configuration) {
|
||||
result << "_configItem=" << configItem.first << "_" << configItem.second;
|
||||
}
|
||||
result << "_eltwiseType=" << eltwiseType;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
ngraph::helpers::EltwiseTypes eltwiseType;
|
||||
std::tie(netPrecision, targetDevice, configuration, eltwiseType) = this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
outPrc = InferenceEngine::Precision::FP32;
|
||||
|
||||
auto params = ngraph::builder::makeParams(ngPrc, { {1, 72} });
|
||||
|
||||
std::vector<size_t> outFormShapes1 = { 1, 1, 6, 12 };
|
||||
auto pattern1 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, outFormShapes1);
|
||||
auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(params[0], pattern1, false);
|
||||
|
||||
auto constant1 = ngraph::builder::makeConstant(ngPrc, { 1, 1, 1, 12 }, {}, true);
|
||||
auto eltwise = ngraph::builder::makeEltwise(reshape1, constant1, eltwiseType);
|
||||
|
||||
std::vector<size_t> outFormShapes2 = { 1, 72 };
|
||||
auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes2);
|
||||
auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(eltwise, pattern2, false);
|
||||
|
||||
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(reshape2) };
|
||||
function = std::make_shared<ngraph::Function>(results, params, "Eltwise4dBroadcast");
|
||||
}
|
||||
};
|
||||
|
||||
class Eltwise4dMultipleInput : public testing::WithParamInterface<eltwiseParams>,
|
||||
public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<eltwiseParams> obj) {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::string targetDevice;
|
||||
std::map<std::string, std::string> configuration;
|
||||
ngraph::helpers::EltwiseTypes eltwiseType;
|
||||
std::tie(netPrecision, targetDevice, configuration, eltwiseType) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "targetDevice=" << targetDevice << "_";
|
||||
for (auto const& configItem : configuration) {
|
||||
result << "_configItem=" << configItem.first << "_" << configItem.second;
|
||||
}
|
||||
result << "_eltwiseType=" << eltwiseType;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
ngraph::helpers::EltwiseTypes eltwiseType;
|
||||
std::tie(netPrecision, targetDevice, configuration, eltwiseType) = this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
outPrc = InferenceEngine::Precision::FP32;
|
||||
|
||||
auto params = ngraph::builder::makeParams(ngPrc, { {1, 72}, {1, 72} });
|
||||
|
||||
std::vector<size_t> outFormShapes1 = { 1, 1, 6, 12 };
|
||||
auto pattern1 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, outFormShapes1);
|
||||
auto reshape1 = std::make_shared<ngraph::opset1::Reshape>(params[0], pattern1, false);
|
||||
|
||||
auto reshape2 = std::make_shared<ngraph::opset1::Reshape>(params[1], pattern1, false);
|
||||
|
||||
auto eltwise = ngraph::builder::makeEltwise(reshape1, reshape2, eltwiseType);
|
||||
|
||||
std::vector<size_t> outFormShapes2 = { 1, 72 };
|
||||
auto pattern2 = std::make_shared<ngraph::opset1::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes2);
|
||||
auto reshape3 = std::make_shared<ngraph::opset1::Reshape>(eltwise, pattern2, false);
|
||||
|
||||
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(reshape3) };
|
||||
function = std::make_shared<ngraph::Function>(results, params, "Eltwise4dMultipleInput");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(Eltwise4dBroadcast, CompareWithRefImpl) {
|
||||
Run();
|
||||
};
|
||||
|
||||
TEST_P(Eltwise4dMultipleInput, CompareWithRefImpl) {
|
||||
Run();
|
||||
};
|
||||
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::Precision::FP16
|
||||
};
|
||||
|
||||
const std::vector<std::map<std::string, std::string>> configs = {
|
||||
{
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
|
||||
{"GNA_SCALE_FACTOR_0", "1638.4"}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<std::map<std::string, std::string>> configsMultiple = {
|
||||
{
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
|
||||
{"GNA_SCALE_FACTOR_0", "1638.4"},
|
||||
{"GNA_SCALE_FACTOR_1", "1638.4"}
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<ngraph::helpers::EltwiseTypes> eltwiseOpTypes = {
|
||||
ngraph::helpers::EltwiseTypes::MULTIPLY,
|
||||
ngraph::helpers::EltwiseTypes::SUBTRACT,
|
||||
ngraph::helpers::EltwiseTypes::ADD
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Eltwise4d, Eltwise4dBroadcast,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::ValuesIn(configs),
|
||||
::testing::ValuesIn(eltwiseOpTypes)),
|
||||
Eltwise4dBroadcast::getTestCaseName);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Eltwise4d, Eltwise4dMultipleInput,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::ValuesIn(configsMultiple),
|
||||
::testing::ValuesIn(eltwiseOpTypes)),
|
||||
Eltwise4dMultipleInput::getTestCaseName);
|
||||
|
||||
} // namespace LayerTestsDefinitions
|
||||
Reference in New Issue
Block a user