[GNA]: Cleanup policy (#6216)

This commit is contained in:
Nadezhda Ageeva
2021-06-23 11:20:20 +03:00
committed by GitHub
parent afe033a39a
commit 3307b1f17c
10 changed files with 39 additions and 333 deletions

View File

@@ -34,7 +34,6 @@
#include "layers/gna_crop_layer.hpp"
#include "layers/gna_fake_quantize_layer.hpp"
#include "round_float_define.hpp"
#include "gna_plugin_policy.hpp"
#include "gna_groups.hpp"
#include "backend/gna_limitations.hpp"
@@ -62,10 +61,6 @@ void GNAGraphCompiler::setGNAFlagsPtr(std::shared_ptr<GNAPluginNS::GNAFlags> gna
this->gnaFlags = std::move(gnaFlagsPtr);
}
void GNAGraphCompiler::setPolicy(GNAPluginNS::Policy policyToSet) {
this->policy = policyToSet;
}
intel_dnn_component_t * GNAGraphCompiler::find_first_unused_input(InferenceEngine::CNNLayerPtr current) {
if (current->insData.empty())
return nullptr;
@@ -547,10 +542,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
auto effectiveInputWidth = in_width;
auto effectiveInputHeight = in_height;
if (policy.cnn2dInputPaddingSupported) {
effectiveInputWidth += convolution._padding_x * 2;
effectiveInputHeight += convolution._padding_y * 2;
} else if (convolution._padding_x != 0 || convolution._padding_y != 0 ||
if (convolution._padding_x != 0 || convolution._padding_y != 0 ||
convolution._pads_end.at(X_AXIS) != 0 || convolution._pads_end.at(Y_AXIS) != 0) {
THROW_GNA_LAYER_EXCEPTION(layer) << "Convolution's input padding is not supported";
}
@@ -1641,7 +1633,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
uint32_t num_rows_copied = 0;
// in case of left alignment succeed, but due to number of elements not multiple of 8 we need to insert align_filter
// we are improving it by inserting copy layer of size that covers most of elements - remained max of 32x31 affine filter
if (policy.ConcatAlignmentPolicy == Policy::ConcatAlignment::FAST && 0 == numRowsPadded && ALIGN(num_rows_in, 32) > 32) {
if (0 == numRowsPadded && ALIGN(num_rows_in, 32) > 32) {
// can we use copy at all
num_rows_copied = ALIGN(num_rows_in, 32) - 32;

View File

@@ -27,7 +27,6 @@
#include "backend/gna_limitations.hpp"
#include "gna_device.hpp"
#include "gna_data_types.hpp"
#include "gna_plugin_policy.hpp"
namespace GNAPluginNS {
class GNAGraphCompiler {
@@ -36,7 +35,6 @@ private:
std::shared_ptr<GNAPluginNS::gna_memory_type> gnamem;
std::shared_ptr<GNAPluginNS::InputDesc> inputDesc;
std::shared_ptr<GNAPluginNS::GNAFlags> gnaFlags;
Policy policy;
// layers with extra storage for connections and additional
// non trivial processing
@@ -64,7 +62,6 @@ public:
void setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr);
void setInputDescPtr(std::shared_ptr<GNAPluginNS::InputDesc> inputDescPtr);
void setGNAFlagsPtr(std::shared_ptr<GNAPluginNS::GNAFlags> gnaFlagsPtr);
void setPolicy(GNAPluginNS::Policy policy);
void fillMemoryConnections(std::unordered_map<std::string,
std::vector<InferenceEngine::CNNLayerPtr>> &memoryPairs);

View File

@@ -738,7 +738,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
// network optimisation phases
int passIdx = 0;
auto run_passes = [&] (const CNNNetwork& network, bool runBeforeCopy, bool lowPrecision) {
auto passes = make_shared<PassManager>(PassManagerSettings{policy, runBeforeCopy, lowPrecision}, network);
auto passes = make_shared<PassManager>(PassManagerSettings{runBeforeCopy, lowPrecision}, network);
passes->registerPass<RemoveConstPass>();
passes->registerPass<UnrollTIPass>();
passes->registerPass<RemoveConstPass>();
@@ -765,13 +765,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
passes->registerPass<FlattenTrivialConcatPass>();
passes->registerPass<InsertConcatAligningFilterPass>();
passes->registerPass<ReorderConcatInputsPass>();
if (policy.PermutePolicy != Policy::Permute::DISABLED) {
passes->registerPass<ReversePermutationsPass>();
}
if (policy.NHWCToNCHWPolicy != Policy::NHWCToNCHW::DISABLED) {
passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
}
passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
passes->registerPass<InsertIdentityLayerPass>();
passes->registerPass<BreakFusingOfOutputLayersPass>();
passes->registerPass<InsertDiagonalLayerPass>();
@@ -840,9 +834,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
auto sortedNet = CNNNetSortTopologicallyEx(newNet, make_fuzed_order);
// passing policy to compiler
graphCompiler.setPolicy(policy);
if (sortedNet.empty()) {
THROW_GNA_EXCEPTION << "Sorted network is empty";
}

View File

@@ -21,7 +21,6 @@
#include "backend/am_intel_dnn.hpp"
#include "gna_data_types.hpp"
#include "gna_graph_compiler.hpp"
#include "gna_plugin_policy.hpp"
#include "gna_plugin_log.hpp"
#include "gna_plugin_config.hpp"
#include <legacy/ie_util_internal.hpp>
@@ -69,8 +68,6 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
intel_dnn_number_type_t output_type = kDnnInt;
GNAPluginNS::Policy policy;
#if GNA_LIB_VER == 2
void createRequestConfigsForGnaModels();
#endif
@@ -158,11 +155,6 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
INFERENCE_ENGINE_DEPRECATED("Use InferRequest::QueryState instead")
std::vector<InferenceEngine::IVariableStateInternal::Ptr> QueryState();
/**
* test-wise API
*/
void SetPolicy(GNAPluginNS::Policy p) {policy = p;}
/**
* QueryMetrics API
*/

View File

@@ -1,96 +0,0 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <ostream>
namespace GNAPluginNS {
/**
* @brief policy agregates various settings that cannot be tweak using configuration options right now,
* and essential to keep test coverage for options both in on and off cases
*/
class Policy {
public:
/**
* @brief for scaleshift substitution, weight tiling simplify final graph but have extra weights overhead
* if not defined scaleshift broadcast will result in creating multiple diagonal layers instead of weight tiling
*/
enum class ScaleShift {
WEIGHTS_TILING,
/**
* GNA has limited amount of batch so even existed topologies cannot be substituted with only batching,
* this option combines batch and weights tiling
*/
BATCH_AND_WEIGHTS_TILING,
DIAGLAYER_TILING
} ScaleShiftPolicy = ScaleShift::WEIGHTS_TILING;
/**
* Policy on whether to substitute permute layers or not
*/
enum class Permute {
DISABLED,
AUTO_PERMUTE
} PermutePolicy = Permute::DISABLED;
enum class FlattenTrivialConcatConversion {
DISABLED,
ENABLED
} ConcatConversionPolicy = FlattenTrivialConcatConversion::ENABLED;
enum class ConcatAlignment {
DISABLED,
DISABLED_FOR_FP32,
ENABLED,
FAST
} ConcatAlignmentPolicy = ConcatAlignment::FAST;
/**
* Policy to support --disable_nhwc_to_nchw option in MO
*/
enum class NHWCToNCHW {
DISABLED,
REMOVE_LAST,
REMOVE_ALL
} NHWCToNCHWPolicy = NHWCToNCHW::REMOVE_ALL;
/**
* @brief trim of gna diagonal affine layer maximum elements number
*/
class GNAAffineDiagonal {
public:
enum : uint32_t {
UNLIMIT,
// gna limit this to be OxFFFF
LIMITED_TO_DEFAULT_GNA2_65536 = 65536 - 64
};
uint32_t limitedTo = LIMITED_TO_DEFAULT_GNA2_65536;
} GNAAffineDiagonalPolicy;
bool cnn2dInputPaddingSupported = false;
};
inline std::ostream& operator<<(std::ostream& os, Policy::ScaleShift policy) {
switch (policy) {
case Policy::ScaleShift::WEIGHTS_TILING : os << "WEIGHTS_TILING"; break;
case Policy::ScaleShift::BATCH_AND_WEIGHTS_TILING: os << "BATCH_AND_WEIGHTS_TILING"; break;
case Policy::ScaleShift::DIAGLAYER_TILING : os << "DIAGLAYER_TILING"; break;
default : os.setstate(std::ios_base::failbit);
}
return os;
}
inline std::ostream& operator<<(std::ostream& os, Policy::ConcatAlignment policy) {
switch (policy) {
case Policy::ConcatAlignment::DISABLED : os << "DISABLED"; break;
case Policy::ConcatAlignment::DISABLED_FOR_FP32 : os << "DISABLED_FOR_FP32"; break;
case Policy::ConcatAlignment::ENABLED : os << "ENABLED"; break;
case Policy::ConcatAlignment::FAST : os << "FAST"; break;
default : os.setstate(std::ios_base::failbit);
}
return os;
}
} // namespace GNAPluginNS

View File

@@ -2,7 +2,6 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "gna_plugin_policy.hpp"
#include <vector>
#include <string>
#include <memory>
@@ -613,99 +612,6 @@ void SubstitutePReluPass::run() {
}
}
void ReversePermutationsPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ReversePermutationsPass");
std::function<CNNLayerPtr(CNNLayerPtr, std::function<bool(CNNLayerPtr)>)> prevLayerSkipCertain
= [&prevLayerSkipCertain](CNNLayerPtr layer, std::function<bool(CNNLayerPtr)> shouldSkip) -> CNNLayerPtr {
if (CNNNetHasPrevLayer(layer.get())) {
return nullptr;
}
auto prev = CNNNetPrevLayer(layer);
if (!shouldSkip(prev)) return prevLayerSkipCertain(prev, shouldSkip);
return prev;
};
std::function<CNNLayerPtr(CNNLayerPtr)> nextLayerSkipReshape = [&nextLayerSkipReshape](CNNLayerPtr layer) -> CNNLayerPtr {
if (layer->outData.empty()) {
return nullptr;
}
if (getInputTo(layer->outData.front()).size() != 1) {
return nullptr;
}
auto next = getInputTo(layer->outData.front()).begin()->second;
if (LayerInfo(next).isNonFunctional()) return nextLayerSkipReshape(next);
return next;
};
auto prevConv = [&prevLayerSkipCertain](CNNLayerPtr layer) -> CNNLayerPtr {
return prevLayerSkipCertain(layer, [] (CNNLayerPtr l2) {
return
LayerInfo(l2).isNonFunctional() ||
LayerInfo(l2).isPooling() ||
LayerInfo(l2).isActivation();
});
};
std::unordered_set<std::string> affineWithPermutedWeights;
std::list<CNNLayerPtr> permutationstoRemove;
for (auto & l : *pLayers) {
if (!LayerInfo(l).isPermute()) {
continue;
}
auto layerOrder = l->GetParamAsInts("order");
if (layerOrder != std::vector<int>({0, 3, 2, 1})) {
THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << ", order: was " << l->GetParamAsString("order") <<
", but support order is 0,3,2,1";
}
// search for it's input convolution
auto prev = prevConv(l);
// pooling no used in speech models without convolution
if (!prev) {
THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << " no valid input to that layer";
}
// we can remove that permutation if it is input to ScaleShift or FC layer
auto next = nextLayerSkipReshape(l);
if (!next || !LayerInfo(next).isFullyConnected()) {
THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << " no valid output of that layer";
}
permutationstoRemove.push_back(l);
// removing that permutation layer and saving information about affine
affineWithPermutedWeights.insert(next->name);
}
for (auto && toRemove : permutationstoRemove) {
CNNNetworkRemoveLayer(toRemove);
}
// search for conv->affine sequences
for (auto & l : *pLayers) {
if (!LayerInfo(l).isFullyConnected() || 0 != affineWithPermutedWeights.count(l->name)) {
continue;
}
// found an affine layer that not involved in permutations removing
// searching whether it has direct input from convolution
auto prevConvLayer = prevConv(l);
if (!prevConvLayer) continue;
auto directPrev = CNNNetPrevLayer(l);
// TODO : make new permute
CNNNetworkInsertLayer(l, directPrev, CNNLayerPtr(nullptr));
}
}
void RemovePermutationsNHWCToNCHWPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "RemovePermutationsNHWCToNCHWPass");
std::set<CNNLayerPtr> permutations_to_remove;
@@ -720,7 +626,7 @@ void RemovePermutationsNHWCToNCHWPass::run() {
if (prev == nullptr || next == nullptr) continue;
if (LayerInfo(prev).isPermute() && getPassManager()->getPolicy().NHWCToNCHWPolicy == Policy::NHWCToNCHW::REMOVE_ALL) {
if (LayerInfo(prev).isPermute()) {
permutations_to_remove.insert(prev);
}
@@ -1040,9 +946,6 @@ void FlattenTrivialConcatPass::run() {
// 1, 1, 5, 3 then for axis 0, 1, 2 the change will be made and inputs will be reshaped to 1, 15,
// but for shape 2, 1, 5, 3 only axis 0 is valid and inputs will reshape to 1, 30
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
if (getPassManager()->getPolicy().ConcatConversionPolicy == Policy::FlattenTrivialConcatConversion::DISABLED) return;
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED) return;
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED_FOR_FP32 && !quantized) return;
auto getLayerByIndex = [](int idx, ConcatLayer* concatLayer) {
auto input = concatLayer->insData[idx];
@@ -1118,14 +1021,6 @@ void FlattenTrivialConcatPass::run() {
void InsertConcatAligningFilterPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertConcatAligningFilterPass");
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED) {
return;
}
// aligning specific not required in fp32 mode
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED_FOR_FP32 && !quantized) {
return;
}
// currently concat layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this no necessary but usefull for testing
const int bytesPerConcatElement = 2;
@@ -1244,10 +1139,6 @@ void InsertConcatAligningFilterPass::run() {
void ReorderConcatInputsPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ReorderConcatInputsPass");
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
// aligning specific not required in fp32 mode
if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED_FOR_FP32 && !quantized) {
return;
}
int numOfLinkLayers = 0;
for (auto& l : *pLayers) {
@@ -1461,9 +1352,6 @@ static InferenceEngine::Blob::Ptr tileBlob(Blob::Ptr& blob, size_t TileTo) {
void EltwiseSplitOverChannelsPass::run() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "EltwiseSplitOverChannelsPass");
if (getPassManager()->getPolicy().GNAAffineDiagonalPolicy.limitedTo == Policy::GNAAffineDiagonal::UNLIMIT) {
return;
}
for (auto & l : *pLayers) {
if (!LayerInfo(l).isEltwise()) {
@@ -1478,7 +1366,8 @@ void EltwiseSplitOverChannelsPass::run() {
auto oData = l->outData.front();
auto out_width = GetDataDimSize(oData, DataDimName::W);
auto totalElementsForOutput = details::product(oData->getDims().begin(), oData->getDims().end());
auto maxAffineElements = getPassManager()->getPolicy().GNAAffineDiagonalPolicy.limitedTo;
// gna limit this to be OxFFFF
auto maxAffineElements = 65536 - 64;
if (totalElementsForOutput <= maxAffineElements) {
continue;
}
@@ -1629,31 +1518,25 @@ void SubstituteScaleShiftBroadCastPass::run() {
}
gnalog() << "Substitution ScaleShift broadcast for layer: " << l->name << "\n";
// approach 1 - weights tiling
if (getPassManager()->getPolicy().ScaleShiftPolicy == Policy::ScaleShift::WEIGHTS_TILING) {
if (nElements % scaleShift->_weights->size()) {
THROW_GNA_EXCEPTION << "Cannot tile weights for layer: " << l->name << ", due to weights size not GCD of dims product";
}
scaleShift->_weights = tileBlob(scaleShift->_weights, nElements);
if (scaleShift->_biases) {
if (nElements % scaleShift->_biases->size()) {
THROW_GNA_EXCEPTION << "Cannot tile biases for layer: " << l->name << ", due to biases size not GCD of dims product";
}
scaleShift->_biases = tileBlob(scaleShift->_biases, nElements);
}
auto tensor = InferenceEngine::TensorDesc(insData->getTensorDesc());
tensor.reshape(SizeVector{ batchSize, nElements }, Layout::NC);
auto reshapeName = scaleShift->name + "_input_" + std::to_string(0) + "_reshape";
auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized);
auto layer_before_scale_shift = getCreatorLayer(insData);
CNNNetworkInsertLayer(layer_before_scale_shift.lock(), l, reshape);
gnalog() << "\tInserted " << reshapeName << " between " << layer_before_scale_shift.lock()->name << " and " << l->name << std::endl;
} else {
THROW_GNA_EXCEPTION << "Not implemented substitution of scaleshift broadcast policy of "
<< getPassManager()->getPolicy().ScaleShiftPolicy << "using layers tiling, layer: " << l->name;
if (nElements % scaleShift->_weights->size()) {
THROW_GNA_EXCEPTION << "Cannot tile weights for layer: " << l->name << ", due to weights size not GCD of dims product";
}
scaleShift->_weights = tileBlob(scaleShift->_weights, nElements);
if (scaleShift->_biases) {
if (nElements % scaleShift->_biases->size()) {
THROW_GNA_EXCEPTION << "Cannot tile biases for layer: " << l->name << ", due to biases size not GCD of dims product";
}
scaleShift->_biases = tileBlob(scaleShift->_biases, nElements);
}
auto tensor = InferenceEngine::TensorDesc(insData->getTensorDesc());
tensor.reshape(SizeVector{ batchSize, nElements }, Layout::NC);
auto reshapeName = scaleShift->name + "_input_" + std::to_string(0) + "_reshape";
auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized);
auto layer_before_scale_shift = getCreatorLayer(insData);
CNNNetworkInsertLayer(layer_before_scale_shift.lock(), l, reshape);
gnalog() << "\tInserted " << reshapeName << " between " << layer_before_scale_shift.lock()->name << " and " << l->name << std::endl;
}
}

View File

@@ -8,7 +8,6 @@
#include <string>
#include <map>
#include <ie_common.h>
#include "gna_plugin_policy.hpp"
namespace GNAPluginNS {
/**
@@ -29,7 +28,6 @@ class IPassManager {
public:
virtual ~IPassManager() = default;
virtual int &getIntVar(std::string name) = 0;
virtual const Policy &getPolicy() const = 0;
virtual const bool& isLowPrecision() const = 0;
virtual InferenceEngine::CNNNetwork &getNetwork() = 0;
};
@@ -75,17 +73,6 @@ DECL_PASS(InsertIdentityLayer);
*/
DECL_PASS(SubstituteScaleShiftBroadCast);
/**
* @brief GNA convolution layers have deinterleaved layout, while affine one doesn't
* so between convolution and affine layers permute layers need to be inserted,
* current MO approach is to insert such permutations
* since GNA-HW already support conv->affine in permuted for, this pass inverses MO behavior
* so its remove permutations of certain form conv->conv, and between conv->affine
* and insert permutation between conv->affine if they are missed in IR
* @param layers
*/
DECL_PASS(ReversePermutations);
/**
* @brief Pass support --disable_nhwc_to_nchw option in MO
* @param layers
@@ -220,7 +207,6 @@ DECL_PASS(MoveFakeQuantizeLayerIntoQuantParams);
DECL_PASS(TransposeWeightsFromNCHWToNHWC);
struct PassManagerSettings {
Policy policy;
/// @brief whether to run passes before copy
bool runBeforeCopy;
bool lowPrecision;
@@ -245,9 +231,6 @@ public:
int & getIntVar(std::string name) override {
return intMap[name];
}
const Policy & getPolicy() const override {
return settings.policy;
}
const bool& isLowPrecision() const override {
return settings.lowPrecision;
}

View File

@@ -107,7 +107,6 @@ void GNAPropagateMatcher :: match() {
try {
// matching gna propagate forward call.
GNAPlugin plugin(_env.config);
plugin.SetPolicy(_env.policy);
size_t inputSize = 10;
size_t outputSize = 10;
InputsDataMap inputsInfo;
@@ -508,7 +507,7 @@ void GNAPluginCreationMatcher :: match() {
void GNAPluginAOTMatcher :: match() {
// matching gna_propagate forward call.
MockICNNNetwork net;
size_t weightsSize = 656384;
auto weights = make_shared_blob<uint8_t >({ Precision::U8, {weightsSize}, Layout::C });
weights->allocate();
@@ -744,7 +743,7 @@ void GNAQueryStateMatcher :: match() {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {weightsSize}, Layout::C });
weights->allocate();
fillWeights(weights);
InferenceEngine::Core core;
InferenceEngine::CNNNetwork network;
ASSERT_NO_THROW_IE_EXCEPTION(network = core.ReadNetwork(_env.model, weights));

View File

@@ -23,7 +23,6 @@
#include <cpp/ie_cnn_network.h>
#include <backend/dnn_types.h>
#include <gna_plugin_policy.hpp>
#include <backend/gna_types.h>
#include <gna/gna_config.hpp>
#include <gna_plugin.hpp>
@@ -95,7 +94,6 @@ class GnaPluginTestEnvironment {
bool exportNetworkOnly = false;
std::function<void (InferenceEngine::CNNNetwork &)> cb;
std::map<std::string, std::string> config;
GNAPluginNS::Policy policy;
bool matchThrows = false;
uint32_t proc_type = static_cast<intel_gna_proc_t>(GNA_SOFTWARE & GNA_HARDWARE);
std::string importedModelFileName;
@@ -168,10 +166,6 @@ class GNATestConfigurability : public GNATestBase{
_env.config[GNA_CONFIG_KEY(DEVICE_MODE)] = GNA_CONFIG_VALUE(SW_FP32);
return *dynamic_cast<T*>(this);
}
T & withPolicy(GNAPluginNS::Policy::ConcatAlignment concatAlignmentPolicy) {
_env.policy.ConcatAlignmentPolicy = concatAlignmentPolicy;
return *dynamic_cast<T*>(this);
}
T & withGNADeviceMode(std::string value) {
_env.config[GNA_CONFIG_KEY(DEVICE_MODE)] = value;
return *dynamic_cast<T*>(this);

View File

@@ -11,7 +11,7 @@
#include <ie_precision.hpp>
#include "../gna_matcher.hpp"
using GNAAlignFilterTestParams = std::tuple<InferenceEngine::Precision, GNAPluginNS::Policy::ConcatAlignment, std::size_t, std::size_t>;
using GNAAlignFilterTestParams = std::tuple<InferenceEngine::Precision, std::size_t, std::size_t>;
using namespace GNAPluginNS;
class GNAAlignFilterTest : public GNATest<>,
@@ -20,11 +20,9 @@ class GNAAlignFilterTest : public GNATest<>,
static std::string getTestName(const testing::TestParamInfo<GNAAlignFilterTestParams>& params) {
std::string test_name;
if (std::get<1>(params.param) == GNAPluginNS::Policy::ConcatAlignment::FAST) {
test_name += "fast_";
}
test_name += "concat_of(" + std::to_string(std::get<2>(params.param));
test_name += "_" + std::to_string(std::get<3>(params.param));
test_name += "fast_";
test_name += "concat_of(" + std::to_string(std::get<1>(params.param));
test_name += "_" + std::to_string(std::get<2>(params.param));
test_name += ")_on_";
test_name += std::get<0>(params.param).name();
return test_name;
@@ -34,10 +32,9 @@ class GNAAlignFilterTest : public GNATest<>,
InferenceEngine::Precision precision = InferenceEngine::Precision::FP32;
std::size_t concat_inputs[2];
GNAPluginNS::Policy::ConcatAlignment alignmentPolicy;
void SetUp() override {
std::tie(precision, alignmentPolicy, concat_inputs[0], concat_inputs[1]) = GetParam();
std::tie(precision, concat_inputs[0], concat_inputs[1]) = GetParam();
}
std::shared_ptr<ngraph::Function> getNgraphModel() {
@@ -83,40 +80,18 @@ TEST_P(GNAAlignFilterTest, concatWith_2_Inputs_Small_mem_footprint) {
return getFastAffineFilterParams(sz).second;
};
switch(alignmentPolicy) {
case Policy::ConcatAlignment::ENABLED : {
//align first input by 8
auto firstFilter = ALIGN(concat_inputs[0], 8) * concat_inputs[0];
//align first input by 8
auto extraLeftElementsForSecond = concat_inputs[0] + 32 - ALIGN(concat_inputs[0], 32);
expected_copy_layers = getNumCopyElements(concat_inputs[0]);
expected_affine_size = getsNumFilterWeights(concat_inputs[0]);
auto secondFilter = ALIGN(concat_inputs[1], 8) * (extraLeftElementsForSecond + concat_inputs[1]);
// calculation size for second filter
auto offset = ALIGN(concat_inputs[0], 32) - 32;
auto zerolen = concat_inputs[0] - offset;
auto second_output_len = zerolen + concat_inputs[1];
expected_affine_size = firstFilter + secondFilter;
break;
}
case Policy::ConcatAlignment::FAST : {
expected_copy_layers = getNumCopyElements(concat_inputs[0]);
expected_affine_size = getsNumFilterWeights(concat_inputs[0]);
// calculation size for second filter
auto offset = ALIGN(concat_inputs[0], 32) - 32;
auto zerolen = concat_inputs[0] - offset;
auto second_output_len = zerolen + concat_inputs[1];
expected_affine_size += second_output_len * ALIGN(concat_inputs[1], 8);
break;
}
default : {
FAIL() << "unsupported align policy: " << alignmentPolicy;
}
}
expected_affine_size += second_output_len * ALIGN(concat_inputs[1], 8);
assert_that().onInferNgraphModel(ngraf)
.inNotCompactMode()
.withPolicy(alignmentPolicy)
.withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_0", 1.0f)
.withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_1", 1.0f)
.withGNAConfig(GNA_CONFIG_KEY(PRECISION), precision.name())
@@ -152,7 +127,6 @@ TEST_P(GNAAlignFilterTest, concatWith_2_Inputs_accurate) {
.gna()
.propagate_forward()
.onCPU()
.withPolicy(alignmentPolicy)
.called_with()
.input(ngraf->get_parameters().at(0)->get_name(), input_data[0])
.input(ngraf->get_parameters().at(1)->get_name(), input_data[1])
@@ -161,7 +135,6 @@ TEST_P(GNAAlignFilterTest, concatWith_2_Inputs_accurate) {
assert_that().onInferNgraphModel(ngraf)
.inNotCompactMode()
.gna()
.withPolicy(alignmentPolicy)
.withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_0", 1.0f)
.withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_1", 1.0f)
.withGNAConfig(GNA_CONFIG_KEY(PRECISION), "I16")
@@ -175,8 +148,6 @@ INSTANTIATE_TEST_SUITE_P(
GNAAlignFilterTest,
testing::Combine(
testing::Values(InferenceEngine::Precision::FP32, InferenceEngine::Precision::I16),
//fast or not fast alignment policy
testing::Values(GNAPluginNS::Policy::ConcatAlignment::FAST, GNAPluginNS::Policy::ConcatAlignment::ENABLED),
// Size of first Split layer output
testing::Values(31, 49),
// Size of second Split layer output