[GNA]: Cleanup policy (#6216)
This commit is contained in:
@@ -34,7 +34,6 @@
|
||||
#include "layers/gna_crop_layer.hpp"
|
||||
#include "layers/gna_fake_quantize_layer.hpp"
|
||||
#include "round_float_define.hpp"
|
||||
#include "gna_plugin_policy.hpp"
|
||||
#include "gna_groups.hpp"
|
||||
#include "backend/gna_limitations.hpp"
|
||||
|
||||
@@ -62,10 +61,6 @@ void GNAGraphCompiler::setGNAFlagsPtr(std::shared_ptr<GNAPluginNS::GNAFlags> gna
|
||||
this->gnaFlags = std::move(gnaFlagsPtr);
|
||||
}
|
||||
|
||||
void GNAGraphCompiler::setPolicy(GNAPluginNS::Policy policyToSet) {
|
||||
this->policy = policyToSet;
|
||||
}
|
||||
|
||||
intel_dnn_component_t * GNAGraphCompiler::find_first_unused_input(InferenceEngine::CNNLayerPtr current) {
|
||||
if (current->insData.empty())
|
||||
return nullptr;
|
||||
@@ -547,10 +542,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
||||
auto effectiveInputWidth = in_width;
|
||||
auto effectiveInputHeight = in_height;
|
||||
|
||||
if (policy.cnn2dInputPaddingSupported) {
|
||||
effectiveInputWidth += convolution._padding_x * 2;
|
||||
effectiveInputHeight += convolution._padding_y * 2;
|
||||
} else if (convolution._padding_x != 0 || convolution._padding_y != 0 ||
|
||||
if (convolution._padding_x != 0 || convolution._padding_y != 0 ||
|
||||
convolution._pads_end.at(X_AXIS) != 0 || convolution._pads_end.at(Y_AXIS) != 0) {
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) << "Convolution's input padding is not supported";
|
||||
}
|
||||
@@ -1641,7 +1633,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
||||
uint32_t num_rows_copied = 0;
|
||||
// in case of left alignment succeed, but due to number of elements not multiple of 8 we need to insert align_filter
|
||||
// we are improving it by inserting copy layer of size that covers most of elements - remained max of 32x31 affine filter
|
||||
if (policy.ConcatAlignmentPolicy == Policy::ConcatAlignment::FAST && 0 == numRowsPadded && ALIGN(num_rows_in, 32) > 32) {
|
||||
if (0 == numRowsPadded && ALIGN(num_rows_in, 32) > 32) {
|
||||
// can we use copy at all
|
||||
num_rows_copied = ALIGN(num_rows_in, 32) - 32;
|
||||
|
||||
|
||||
@@ -27,7 +27,6 @@
|
||||
#include "backend/gna_limitations.hpp"
|
||||
#include "gna_device.hpp"
|
||||
#include "gna_data_types.hpp"
|
||||
#include "gna_plugin_policy.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
class GNAGraphCompiler {
|
||||
@@ -36,7 +35,6 @@ private:
|
||||
std::shared_ptr<GNAPluginNS::gna_memory_type> gnamem;
|
||||
std::shared_ptr<GNAPluginNS::InputDesc> inputDesc;
|
||||
std::shared_ptr<GNAPluginNS::GNAFlags> gnaFlags;
|
||||
Policy policy;
|
||||
|
||||
// layers with extra storage for connections and additional
|
||||
// non trivial processing
|
||||
@@ -64,7 +62,6 @@ public:
|
||||
void setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr);
|
||||
void setInputDescPtr(std::shared_ptr<GNAPluginNS::InputDesc> inputDescPtr);
|
||||
void setGNAFlagsPtr(std::shared_ptr<GNAPluginNS::GNAFlags> gnaFlagsPtr);
|
||||
void setPolicy(GNAPluginNS::Policy policy);
|
||||
|
||||
void fillMemoryConnections(std::unordered_map<std::string,
|
||||
std::vector<InferenceEngine::CNNLayerPtr>> &memoryPairs);
|
||||
|
||||
@@ -738,7 +738,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
||||
// network optimisation phases
|
||||
int passIdx = 0;
|
||||
auto run_passes = [&] (const CNNNetwork& network, bool runBeforeCopy, bool lowPrecision) {
|
||||
auto passes = make_shared<PassManager>(PassManagerSettings{policy, runBeforeCopy, lowPrecision}, network);
|
||||
auto passes = make_shared<PassManager>(PassManagerSettings{runBeforeCopy, lowPrecision}, network);
|
||||
passes->registerPass<RemoveConstPass>();
|
||||
passes->registerPass<UnrollTIPass>();
|
||||
passes->registerPass<RemoveConstPass>();
|
||||
@@ -765,13 +765,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
||||
passes->registerPass<FlattenTrivialConcatPass>();
|
||||
passes->registerPass<InsertConcatAligningFilterPass>();
|
||||
passes->registerPass<ReorderConcatInputsPass>();
|
||||
if (policy.PermutePolicy != Policy::Permute::DISABLED) {
|
||||
passes->registerPass<ReversePermutationsPass>();
|
||||
}
|
||||
if (policy.NHWCToNCHWPolicy != Policy::NHWCToNCHW::DISABLED) {
|
||||
passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
|
||||
}
|
||||
|
||||
passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
|
||||
passes->registerPass<InsertIdentityLayerPass>();
|
||||
passes->registerPass<BreakFusingOfOutputLayersPass>();
|
||||
passes->registerPass<InsertDiagonalLayerPass>();
|
||||
@@ -840,9 +834,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
||||
|
||||
auto sortedNet = CNNNetSortTopologicallyEx(newNet, make_fuzed_order);
|
||||
|
||||
// passing policy to compiler
|
||||
graphCompiler.setPolicy(policy);
|
||||
|
||||
if (sortedNet.empty()) {
|
||||
THROW_GNA_EXCEPTION << "Sorted network is empty";
|
||||
}
|
||||
|
||||
@@ -21,7 +21,6 @@
|
||||
#include "backend/am_intel_dnn.hpp"
|
||||
#include "gna_data_types.hpp"
|
||||
#include "gna_graph_compiler.hpp"
|
||||
#include "gna_plugin_policy.hpp"
|
||||
#include "gna_plugin_log.hpp"
|
||||
#include "gna_plugin_config.hpp"
|
||||
#include <legacy/ie_util_internal.hpp>
|
||||
@@ -69,8 +68,6 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
|
||||
|
||||
intel_dnn_number_type_t output_type = kDnnInt;
|
||||
|
||||
GNAPluginNS::Policy policy;
|
||||
|
||||
#if GNA_LIB_VER == 2
|
||||
void createRequestConfigsForGnaModels();
|
||||
#endif
|
||||
@@ -158,11 +155,6 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
|
||||
INFERENCE_ENGINE_DEPRECATED("Use InferRequest::QueryState instead")
|
||||
std::vector<InferenceEngine::IVariableStateInternal::Ptr> QueryState();
|
||||
|
||||
/**
|
||||
* test-wise API
|
||||
*/
|
||||
void SetPolicy(GNAPluginNS::Policy p) {policy = p;}
|
||||
|
||||
/**
|
||||
* QueryMetrics API
|
||||
*/
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ostream>
|
||||
namespace GNAPluginNS {
|
||||
/**
|
||||
* @brief policy agregates various settings that cannot be tweak using configuration options right now,
|
||||
* and essential to keep test coverage for options both in on and off cases
|
||||
*/
|
||||
class Policy {
|
||||
public:
|
||||
/**
|
||||
* @brief for scaleshift substitution, weight tiling simplify final graph but have extra weights overhead
|
||||
* if not defined scaleshift broadcast will result in creating multiple diagonal layers instead of weight tiling
|
||||
*/
|
||||
enum class ScaleShift {
|
||||
WEIGHTS_TILING,
|
||||
/**
|
||||
* GNA has limited amount of batch so even existed topologies cannot be substituted with only batching,
|
||||
* this option combines batch and weights tiling
|
||||
*/
|
||||
BATCH_AND_WEIGHTS_TILING,
|
||||
DIAGLAYER_TILING
|
||||
} ScaleShiftPolicy = ScaleShift::WEIGHTS_TILING;
|
||||
|
||||
/**
|
||||
* Policy on whether to substitute permute layers or not
|
||||
*/
|
||||
enum class Permute {
|
||||
DISABLED,
|
||||
AUTO_PERMUTE
|
||||
} PermutePolicy = Permute::DISABLED;
|
||||
|
||||
enum class FlattenTrivialConcatConversion {
|
||||
DISABLED,
|
||||
ENABLED
|
||||
} ConcatConversionPolicy = FlattenTrivialConcatConversion::ENABLED;
|
||||
|
||||
enum class ConcatAlignment {
|
||||
DISABLED,
|
||||
DISABLED_FOR_FP32,
|
||||
ENABLED,
|
||||
FAST
|
||||
} ConcatAlignmentPolicy = ConcatAlignment::FAST;
|
||||
|
||||
/**
|
||||
* Policy to support --disable_nhwc_to_nchw option in MO
|
||||
*/
|
||||
enum class NHWCToNCHW {
|
||||
DISABLED,
|
||||
REMOVE_LAST,
|
||||
REMOVE_ALL
|
||||
} NHWCToNCHWPolicy = NHWCToNCHW::REMOVE_ALL;
|
||||
|
||||
/**
|
||||
* @brief trim of gna diagonal affine layer maximum elements number
|
||||
*/
|
||||
class GNAAffineDiagonal {
|
||||
public:
|
||||
enum : uint32_t {
|
||||
UNLIMIT,
|
||||
// gna limit this to be OxFFFF
|
||||
LIMITED_TO_DEFAULT_GNA2_65536 = 65536 - 64
|
||||
};
|
||||
uint32_t limitedTo = LIMITED_TO_DEFAULT_GNA2_65536;
|
||||
} GNAAffineDiagonalPolicy;
|
||||
|
||||
bool cnn2dInputPaddingSupported = false;
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, Policy::ScaleShift policy) {
|
||||
switch (policy) {
|
||||
case Policy::ScaleShift::WEIGHTS_TILING : os << "WEIGHTS_TILING"; break;
|
||||
case Policy::ScaleShift::BATCH_AND_WEIGHTS_TILING: os << "BATCH_AND_WEIGHTS_TILING"; break;
|
||||
case Policy::ScaleShift::DIAGLAYER_TILING : os << "DIAGLAYER_TILING"; break;
|
||||
default : os.setstate(std::ios_base::failbit);
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, Policy::ConcatAlignment policy) {
|
||||
switch (policy) {
|
||||
case Policy::ConcatAlignment::DISABLED : os << "DISABLED"; break;
|
||||
case Policy::ConcatAlignment::DISABLED_FOR_FP32 : os << "DISABLED_FOR_FP32"; break;
|
||||
case Policy::ConcatAlignment::ENABLED : os << "ENABLED"; break;
|
||||
case Policy::ConcatAlignment::FAST : os << "FAST"; break;
|
||||
default : os.setstate(std::ios_base::failbit);
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
|
||||
} // namespace GNAPluginNS
|
||||
@@ -2,7 +2,6 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "gna_plugin_policy.hpp"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
@@ -613,99 +612,6 @@ void SubstitutePReluPass::run() {
|
||||
}
|
||||
}
|
||||
|
||||
void ReversePermutationsPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ReversePermutationsPass");
|
||||
std::function<CNNLayerPtr(CNNLayerPtr, std::function<bool(CNNLayerPtr)>)> prevLayerSkipCertain
|
||||
= [&prevLayerSkipCertain](CNNLayerPtr layer, std::function<bool(CNNLayerPtr)> shouldSkip) -> CNNLayerPtr {
|
||||
if (CNNNetHasPrevLayer(layer.get())) {
|
||||
return nullptr;
|
||||
}
|
||||
auto prev = CNNNetPrevLayer(layer);
|
||||
|
||||
if (!shouldSkip(prev)) return prevLayerSkipCertain(prev, shouldSkip);
|
||||
|
||||
return prev;
|
||||
};
|
||||
|
||||
std::function<CNNLayerPtr(CNNLayerPtr)> nextLayerSkipReshape = [&nextLayerSkipReshape](CNNLayerPtr layer) -> CNNLayerPtr {
|
||||
if (layer->outData.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
if (getInputTo(layer->outData.front()).size() != 1) {
|
||||
return nullptr;
|
||||
}
|
||||
auto next = getInputTo(layer->outData.front()).begin()->second;
|
||||
|
||||
if (LayerInfo(next).isNonFunctional()) return nextLayerSkipReshape(next);
|
||||
|
||||
return next;
|
||||
};
|
||||
|
||||
auto prevConv = [&prevLayerSkipCertain](CNNLayerPtr layer) -> CNNLayerPtr {
|
||||
return prevLayerSkipCertain(layer, [] (CNNLayerPtr l2) {
|
||||
return
|
||||
LayerInfo(l2).isNonFunctional() ||
|
||||
LayerInfo(l2).isPooling() ||
|
||||
LayerInfo(l2).isActivation();
|
||||
});
|
||||
};
|
||||
|
||||
std::unordered_set<std::string> affineWithPermutedWeights;
|
||||
std::list<CNNLayerPtr> permutationstoRemove;
|
||||
|
||||
for (auto & l : *pLayers) {
|
||||
if (!LayerInfo(l).isPermute()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto layerOrder = l->GetParamAsInts("order");
|
||||
|
||||
if (layerOrder != std::vector<int>({0, 3, 2, 1})) {
|
||||
THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << ", order: was " << l->GetParamAsString("order") <<
|
||||
", but support order is 0,3,2,1";
|
||||
}
|
||||
|
||||
// search for it's input convolution
|
||||
auto prev = prevConv(l);
|
||||
|
||||
// pooling no used in speech models without convolution
|
||||
if (!prev) {
|
||||
THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << " no valid input to that layer";
|
||||
}
|
||||
|
||||
// we can remove that permutation if it is input to ScaleShift or FC layer
|
||||
auto next = nextLayerSkipReshape(l);
|
||||
if (!next || !LayerInfo(next).isFullyConnected()) {
|
||||
THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << " no valid output of that layer";
|
||||
}
|
||||
|
||||
permutationstoRemove.push_back(l);
|
||||
|
||||
// removing that permutation layer and saving information about affine
|
||||
affineWithPermutedWeights.insert(next->name);
|
||||
}
|
||||
|
||||
for (auto && toRemove : permutationstoRemove) {
|
||||
CNNNetworkRemoveLayer(toRemove);
|
||||
}
|
||||
|
||||
// search for conv->affine sequences
|
||||
for (auto & l : *pLayers) {
|
||||
if (!LayerInfo(l).isFullyConnected() || 0 != affineWithPermutedWeights.count(l->name)) {
|
||||
continue;
|
||||
}
|
||||
// found an affine layer that not involved in permutations removing
|
||||
// searching whether it has direct input from convolution
|
||||
auto prevConvLayer = prevConv(l);
|
||||
if (!prevConvLayer) continue;
|
||||
|
||||
auto directPrev = CNNNetPrevLayer(l);
|
||||
|
||||
// TODO : make new permute
|
||||
CNNNetworkInsertLayer(l, directPrev, CNNLayerPtr(nullptr));
|
||||
}
|
||||
}
|
||||
|
||||
void RemovePermutationsNHWCToNCHWPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "RemovePermutationsNHWCToNCHWPass");
|
||||
std::set<CNNLayerPtr> permutations_to_remove;
|
||||
@@ -720,7 +626,7 @@ void RemovePermutationsNHWCToNCHWPass::run() {
|
||||
|
||||
if (prev == nullptr || next == nullptr) continue;
|
||||
|
||||
if (LayerInfo(prev).isPermute() && getPassManager()->getPolicy().NHWCToNCHWPolicy == Policy::NHWCToNCHW::REMOVE_ALL) {
|
||||
if (LayerInfo(prev).isPermute()) {
|
||||
permutations_to_remove.insert(prev);
|
||||
}
|
||||
|
||||
@@ -1040,9 +946,6 @@ void FlattenTrivialConcatPass::run() {
|
||||
// 1, 1, 5, 3 then for axis 0, 1, 2 the change will be made and inputs will be reshaped to 1, 15,
|
||||
// but for shape 2, 1, 5, 3 only axis 0 is valid and inputs will reshape to 1, 30
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
|
||||
if (getPassManager()->getPolicy().ConcatConversionPolicy == Policy::FlattenTrivialConcatConversion::DISABLED) return;
|
||||
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED) return;
|
||||
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED_FOR_FP32 && !quantized) return;
|
||||
|
||||
auto getLayerByIndex = [](int idx, ConcatLayer* concatLayer) {
|
||||
auto input = concatLayer->insData[idx];
|
||||
@@ -1118,14 +1021,6 @@ void FlattenTrivialConcatPass::run() {
|
||||
void InsertConcatAligningFilterPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertConcatAligningFilterPass");
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
|
||||
|
||||
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED) {
|
||||
return;
|
||||
}
|
||||
// aligning specific not required in fp32 mode
|
||||
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED_FOR_FP32 && !quantized) {
|
||||
return;
|
||||
}
|
||||
// currently concat layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this no necessary but usefull for testing
|
||||
const int bytesPerConcatElement = 2;
|
||||
|
||||
@@ -1244,10 +1139,6 @@ void InsertConcatAligningFilterPass::run() {
|
||||
void ReorderConcatInputsPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ReorderConcatInputsPass");
|
||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
|
||||
// aligning specific not required in fp32 mode
|
||||
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED_FOR_FP32 && !quantized) {
|
||||
return;
|
||||
}
|
||||
int numOfLinkLayers = 0;
|
||||
|
||||
for (auto& l : *pLayers) {
|
||||
@@ -1461,9 +1352,6 @@ static InferenceEngine::Blob::Ptr tileBlob(Blob::Ptr& blob, size_t TileTo) {
|
||||
|
||||
void EltwiseSplitOverChannelsPass::run() {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "EltwiseSplitOverChannelsPass");
|
||||
if (getPassManager()->getPolicy().GNAAffineDiagonalPolicy.limitedTo == Policy::GNAAffineDiagonal::UNLIMIT) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto & l : *pLayers) {
|
||||
if (!LayerInfo(l).isEltwise()) {
|
||||
@@ -1478,7 +1366,8 @@ void EltwiseSplitOverChannelsPass::run() {
|
||||
auto oData = l->outData.front();
|
||||
auto out_width = GetDataDimSize(oData, DataDimName::W);
|
||||
auto totalElementsForOutput = details::product(oData->getDims().begin(), oData->getDims().end());
|
||||
auto maxAffineElements = getPassManager()->getPolicy().GNAAffineDiagonalPolicy.limitedTo;
|
||||
// gna limit this to be OxFFFF
|
||||
auto maxAffineElements = 65536 - 64;
|
||||
if (totalElementsForOutput <= maxAffineElements) {
|
||||
continue;
|
||||
}
|
||||
@@ -1629,31 +1518,25 @@ void SubstituteScaleShiftBroadCastPass::run() {
|
||||
}
|
||||
|
||||
gnalog() << "Substitution ScaleShift broadcast for layer: " << l->name << "\n";
|
||||
// approach 1 - weights tiling
|
||||
if (getPassManager()->getPolicy().ScaleShiftPolicy == Policy::ScaleShift::WEIGHTS_TILING) {
|
||||
if (nElements % scaleShift->_weights->size()) {
|
||||
THROW_GNA_EXCEPTION << "Cannot tile weights for layer: " << l->name << ", due to weights size not GCD of dims product";
|
||||
}
|
||||
scaleShift->_weights = tileBlob(scaleShift->_weights, nElements);
|
||||
if (scaleShift->_biases) {
|
||||
if (nElements % scaleShift->_biases->size()) {
|
||||
THROW_GNA_EXCEPTION << "Cannot tile biases for layer: " << l->name << ", due to biases size not GCD of dims product";
|
||||
}
|
||||
scaleShift->_biases = tileBlob(scaleShift->_biases, nElements);
|
||||
}
|
||||
|
||||
auto tensor = InferenceEngine::TensorDesc(insData->getTensorDesc());
|
||||
tensor.reshape(SizeVector{ batchSize, nElements }, Layout::NC);
|
||||
auto reshapeName = scaleShift->name + "_input_" + std::to_string(0) + "_reshape";
|
||||
auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized);
|
||||
auto layer_before_scale_shift = getCreatorLayer(insData);
|
||||
|
||||
CNNNetworkInsertLayer(layer_before_scale_shift.lock(), l, reshape);
|
||||
gnalog() << "\tInserted " << reshapeName << " between " << layer_before_scale_shift.lock()->name << " and " << l->name << std::endl;
|
||||
} else {
|
||||
THROW_GNA_EXCEPTION << "Not implemented substitution of scaleshift broadcast policy of "
|
||||
<< getPassManager()->getPolicy().ScaleShiftPolicy << "using layers tiling, layer: " << l->name;
|
||||
if (nElements % scaleShift->_weights->size()) {
|
||||
THROW_GNA_EXCEPTION << "Cannot tile weights for layer: " << l->name << ", due to weights size not GCD of dims product";
|
||||
}
|
||||
scaleShift->_weights = tileBlob(scaleShift->_weights, nElements);
|
||||
if (scaleShift->_biases) {
|
||||
if (nElements % scaleShift->_biases->size()) {
|
||||
THROW_GNA_EXCEPTION << "Cannot tile biases for layer: " << l->name << ", due to biases size not GCD of dims product";
|
||||
}
|
||||
scaleShift->_biases = tileBlob(scaleShift->_biases, nElements);
|
||||
}
|
||||
|
||||
auto tensor = InferenceEngine::TensorDesc(insData->getTensorDesc());
|
||||
tensor.reshape(SizeVector{ batchSize, nElements }, Layout::NC);
|
||||
auto reshapeName = scaleShift->name + "_input_" + std::to_string(0) + "_reshape";
|
||||
auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized);
|
||||
auto layer_before_scale_shift = getCreatorLayer(insData);
|
||||
|
||||
CNNNetworkInsertLayer(layer_before_scale_shift.lock(), l, reshape);
|
||||
gnalog() << "\tInserted " << reshapeName << " between " << layer_before_scale_shift.lock()->name << " and " << l->name << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <ie_common.h>
|
||||
#include "gna_plugin_policy.hpp"
|
||||
|
||||
namespace GNAPluginNS {
|
||||
/**
|
||||
@@ -29,7 +28,6 @@ class IPassManager {
|
||||
public:
|
||||
virtual ~IPassManager() = default;
|
||||
virtual int &getIntVar(std::string name) = 0;
|
||||
virtual const Policy &getPolicy() const = 0;
|
||||
virtual const bool& isLowPrecision() const = 0;
|
||||
virtual InferenceEngine::CNNNetwork &getNetwork() = 0;
|
||||
};
|
||||
@@ -75,17 +73,6 @@ DECL_PASS(InsertIdentityLayer);
|
||||
*/
|
||||
DECL_PASS(SubstituteScaleShiftBroadCast);
|
||||
|
||||
/**
|
||||
* @brief GNA convolution layers have deinterleaved layout, while affine one doesn't
|
||||
* so between convolution and affine layers permute layers need to be inserted,
|
||||
* current MO approach is to insert such permutations
|
||||
* since GNA-HW already support conv->affine in permuted for, this pass inverses MO behavior
|
||||
* so its remove permutations of certain form conv->conv, and between conv->affine
|
||||
* and insert permutation between conv->affine if they are missed in IR
|
||||
* @param layers
|
||||
*/
|
||||
DECL_PASS(ReversePermutations);
|
||||
|
||||
/**
|
||||
* @brief Pass support --disable_nhwc_to_nchw option in MO
|
||||
* @param layers
|
||||
@@ -220,7 +207,6 @@ DECL_PASS(MoveFakeQuantizeLayerIntoQuantParams);
|
||||
DECL_PASS(TransposeWeightsFromNCHWToNHWC);
|
||||
|
||||
struct PassManagerSettings {
|
||||
Policy policy;
|
||||
/// @brief whether to run passes before copy
|
||||
bool runBeforeCopy;
|
||||
bool lowPrecision;
|
||||
@@ -245,9 +231,6 @@ public:
|
||||
int & getIntVar(std::string name) override {
|
||||
return intMap[name];
|
||||
}
|
||||
const Policy & getPolicy() const override {
|
||||
return settings.policy;
|
||||
}
|
||||
const bool& isLowPrecision() const override {
|
||||
return settings.lowPrecision;
|
||||
}
|
||||
|
||||
@@ -107,7 +107,6 @@ void GNAPropagateMatcher :: match() {
|
||||
try {
|
||||
// matching gna propagate forward call.
|
||||
GNAPlugin plugin(_env.config);
|
||||
plugin.SetPolicy(_env.policy);
|
||||
size_t inputSize = 10;
|
||||
size_t outputSize = 10;
|
||||
InputsDataMap inputsInfo;
|
||||
@@ -508,7 +507,7 @@ void GNAPluginCreationMatcher :: match() {
|
||||
void GNAPluginAOTMatcher :: match() {
|
||||
// matching gna_propagate forward call.
|
||||
MockICNNNetwork net;
|
||||
|
||||
|
||||
size_t weightsSize = 656384;
|
||||
auto weights = make_shared_blob<uint8_t >({ Precision::U8, {weightsSize}, Layout::C });
|
||||
weights->allocate();
|
||||
@@ -744,7 +743,7 @@ void GNAQueryStateMatcher :: match() {
|
||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {weightsSize}, Layout::C });
|
||||
weights->allocate();
|
||||
fillWeights(weights);
|
||||
|
||||
|
||||
InferenceEngine::Core core;
|
||||
InferenceEngine::CNNNetwork network;
|
||||
ASSERT_NO_THROW_IE_EXCEPTION(network = core.ReadNetwork(_env.model, weights));
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
#include <cpp/ie_cnn_network.h>
|
||||
|
||||
#include <backend/dnn_types.h>
|
||||
#include <gna_plugin_policy.hpp>
|
||||
#include <backend/gna_types.h>
|
||||
#include <gna/gna_config.hpp>
|
||||
#include <gna_plugin.hpp>
|
||||
@@ -95,7 +94,6 @@ class GnaPluginTestEnvironment {
|
||||
bool exportNetworkOnly = false;
|
||||
std::function<void (InferenceEngine::CNNNetwork &)> cb;
|
||||
std::map<std::string, std::string> config;
|
||||
GNAPluginNS::Policy policy;
|
||||
bool matchThrows = false;
|
||||
uint32_t proc_type = static_cast<intel_gna_proc_t>(GNA_SOFTWARE & GNA_HARDWARE);
|
||||
std::string importedModelFileName;
|
||||
@@ -168,10 +166,6 @@ class GNATestConfigurability : public GNATestBase{
|
||||
_env.config[GNA_CONFIG_KEY(DEVICE_MODE)] = GNA_CONFIG_VALUE(SW_FP32);
|
||||
return *dynamic_cast<T*>(this);
|
||||
}
|
||||
T & withPolicy(GNAPluginNS::Policy::ConcatAlignment concatAlignmentPolicy) {
|
||||
_env.policy.ConcatAlignmentPolicy = concatAlignmentPolicy;
|
||||
return *dynamic_cast<T*>(this);
|
||||
}
|
||||
T & withGNADeviceMode(std::string value) {
|
||||
_env.config[GNA_CONFIG_KEY(DEVICE_MODE)] = value;
|
||||
return *dynamic_cast<T*>(this);
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
#include <ie_precision.hpp>
|
||||
#include "../gna_matcher.hpp"
|
||||
|
||||
using GNAAlignFilterTestParams = std::tuple<InferenceEngine::Precision, GNAPluginNS::Policy::ConcatAlignment, std::size_t, std::size_t>;
|
||||
using GNAAlignFilterTestParams = std::tuple<InferenceEngine::Precision, std::size_t, std::size_t>;
|
||||
using namespace GNAPluginNS;
|
||||
|
||||
class GNAAlignFilterTest : public GNATest<>,
|
||||
@@ -20,11 +20,9 @@ class GNAAlignFilterTest : public GNATest<>,
|
||||
|
||||
static std::string getTestName(const testing::TestParamInfo<GNAAlignFilterTestParams>& params) {
|
||||
std::string test_name;
|
||||
if (std::get<1>(params.param) == GNAPluginNS::Policy::ConcatAlignment::FAST) {
|
||||
test_name += "fast_";
|
||||
}
|
||||
test_name += "concat_of(" + std::to_string(std::get<2>(params.param));
|
||||
test_name += "_" + std::to_string(std::get<3>(params.param));
|
||||
test_name += "fast_";
|
||||
test_name += "concat_of(" + std::to_string(std::get<1>(params.param));
|
||||
test_name += "_" + std::to_string(std::get<2>(params.param));
|
||||
test_name += ")_on_";
|
||||
test_name += std::get<0>(params.param).name();
|
||||
return test_name;
|
||||
@@ -34,10 +32,9 @@ class GNAAlignFilterTest : public GNATest<>,
|
||||
|
||||
InferenceEngine::Precision precision = InferenceEngine::Precision::FP32;
|
||||
std::size_t concat_inputs[2];
|
||||
GNAPluginNS::Policy::ConcatAlignment alignmentPolicy;
|
||||
|
||||
void SetUp() override {
|
||||
std::tie(precision, alignmentPolicy, concat_inputs[0], concat_inputs[1]) = GetParam();
|
||||
std::tie(precision, concat_inputs[0], concat_inputs[1]) = GetParam();
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Function> getNgraphModel() {
|
||||
@@ -83,40 +80,18 @@ TEST_P(GNAAlignFilterTest, concatWith_2_Inputs_Small_mem_footprint) {
|
||||
return getFastAffineFilterParams(sz).second;
|
||||
};
|
||||
|
||||
switch(alignmentPolicy) {
|
||||
case Policy::ConcatAlignment::ENABLED : {
|
||||
//align first input by 8
|
||||
auto firstFilter = ALIGN(concat_inputs[0], 8) * concat_inputs[0];
|
||||
//align first input by 8
|
||||
auto extraLeftElementsForSecond = concat_inputs[0] + 32 - ALIGN(concat_inputs[0], 32);
|
||||
expected_copy_layers = getNumCopyElements(concat_inputs[0]);
|
||||
expected_affine_size = getsNumFilterWeights(concat_inputs[0]);
|
||||
|
||||
auto secondFilter = ALIGN(concat_inputs[1], 8) * (extraLeftElementsForSecond + concat_inputs[1]);
|
||||
// calculation size for second filter
|
||||
auto offset = ALIGN(concat_inputs[0], 32) - 32;
|
||||
auto zerolen = concat_inputs[0] - offset;
|
||||
auto second_output_len = zerolen + concat_inputs[1];
|
||||
|
||||
expected_affine_size = firstFilter + secondFilter;
|
||||
break;
|
||||
}
|
||||
case Policy::ConcatAlignment::FAST : {
|
||||
|
||||
expected_copy_layers = getNumCopyElements(concat_inputs[0]);
|
||||
expected_affine_size = getsNumFilterWeights(concat_inputs[0]);
|
||||
|
||||
// calculation size for second filter
|
||||
auto offset = ALIGN(concat_inputs[0], 32) - 32;
|
||||
auto zerolen = concat_inputs[0] - offset;
|
||||
auto second_output_len = zerolen + concat_inputs[1];
|
||||
|
||||
expected_affine_size += second_output_len * ALIGN(concat_inputs[1], 8);
|
||||
break;
|
||||
}
|
||||
|
||||
default : {
|
||||
FAIL() << "unsupported align policy: " << alignmentPolicy;
|
||||
}
|
||||
}
|
||||
expected_affine_size += second_output_len * ALIGN(concat_inputs[1], 8);
|
||||
|
||||
assert_that().onInferNgraphModel(ngraf)
|
||||
.inNotCompactMode()
|
||||
.withPolicy(alignmentPolicy)
|
||||
.withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_0", 1.0f)
|
||||
.withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_1", 1.0f)
|
||||
.withGNAConfig(GNA_CONFIG_KEY(PRECISION), precision.name())
|
||||
@@ -152,7 +127,6 @@ TEST_P(GNAAlignFilterTest, concatWith_2_Inputs_accurate) {
|
||||
.gna()
|
||||
.propagate_forward()
|
||||
.onCPU()
|
||||
.withPolicy(alignmentPolicy)
|
||||
.called_with()
|
||||
.input(ngraf->get_parameters().at(0)->get_name(), input_data[0])
|
||||
.input(ngraf->get_parameters().at(1)->get_name(), input_data[1])
|
||||
@@ -161,7 +135,6 @@ TEST_P(GNAAlignFilterTest, concatWith_2_Inputs_accurate) {
|
||||
assert_that().onInferNgraphModel(ngraf)
|
||||
.inNotCompactMode()
|
||||
.gna()
|
||||
.withPolicy(alignmentPolicy)
|
||||
.withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_0", 1.0f)
|
||||
.withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_1", 1.0f)
|
||||
.withGNAConfig(GNA_CONFIG_KEY(PRECISION), "I16")
|
||||
@@ -175,8 +148,6 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
GNAAlignFilterTest,
|
||||
testing::Combine(
|
||||
testing::Values(InferenceEngine::Precision::FP32, InferenceEngine::Precision::I16),
|
||||
//fast or not fast alignment policy
|
||||
testing::Values(GNAPluginNS::Policy::ConcatAlignment::FAST, GNAPluginNS::Policy::ConcatAlignment::ENABLED),
|
||||
// Size of first Split layer output
|
||||
testing::Values(31, 49),
|
||||
// Size of second Split layer output
|
||||
|
||||
Reference in New Issue
Block a user