[GNA] Fixes for GNA 3.0 library (#7180)

* Pass compileTarget to am_intel_dnn

* Enable tests for GNA lib version prefix 3.0

* Fix conv split transform for 2d cnn tests

* Apply review
This commit is contained in:
Krzysztof Bruniecki 2021-08-25 09:06:10 +02:00 committed by GitHub
parent 3a28ffaf57
commit ce21344585
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 106 additions and 26 deletions

View File

@ -23,6 +23,7 @@
#include "dnn.hpp"
#include "am_intel_dnn.hpp"
#include "dnn_types.h"
#include "gna/gna_config.hpp"
#include "gna_types.h"
#include "gna_limitations.hpp"
#include "layers/gna_convolution_layer.hpp"
@ -248,6 +249,16 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional2DComponentPrivate(intel
ptr_inputs = &comp.ptr_inputs;
ptr_outputs = &comp.ptr_outputs;
}
bool GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(const Gna2Operation& op) {
// GNA compile target GNA_TARGET_3_0 does not support pooling window < pooling stride
return op.Type == Gna2OperationTypeConvolution &&
op.NumberOfParameters > std::max(PoolStrideParamIdx, PoolWinParamIdx) &&
op.Parameters[PoolStrideParamIdx] != nullptr &&
op.Parameters[PoolWinParamIdx] != nullptr &&
static_cast<Gna2Shape*>(op.Parameters[PoolStrideParamIdx])->NumberOfDimensions == 1 &&
static_cast<Gna2Shape*>(op.Parameters[PoolStrideParamIdx])->Dimensions[0] > static_cast<Gna2Shape*>(op.Parameters[PoolWinParamIdx])->Dimensions[0];
}
#endif
void GNAPluginNS::backend::AMIntelDNN::InitMaxpoolComponentPrivate(intel_dnn_component_t &comp,
@ -1361,7 +1372,7 @@ uint32_t GNAPluginNS::backend::AMIntelDNN::CountLayers() {
}
#if GNA_LIB_VER == 2
void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel) {
void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model* gnaModel, const std::string& gnaCompileTarget) {
Gna2Operation * gnaOperation;
if (gnaModel == nullptr)
THROW_GNA_EXCEPTION << "Invalid input parameter";
@ -1677,9 +1688,16 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
const auto fltStride = fltStrideShape->Dimensions[0];
const auto outFromConv = outputFromConv(inVecCnt, nFltSize, fltStride);
// FLAT input matrix, pooled outputs per filter
// TODO: Issue 50386 check why (outFromConv - 1) an not (outFromConv - poolingWindow)
outputTensor.Shape.Dimensions[1] =
(outFromConv - 1) / poolStride->Dimensions[0] + 1;
auto effectiveCompileTarget = gnaCompileTarget;
if (isOperationCnnLegacySpecific(*gnaOperation)) {
effectiveCompileTarget = InferenceEngine::GNAConfigParams::GNA_TARGET_2_0;
}
if (effectiveCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) {
outputTensor.Shape.Dimensions[1] = outputFromPooling(outFromConv, poolWindow->Dimensions[0], poolStride->Dimensions[0]);
} else {
outputTensor.Shape.Dimensions[1] = outputFromPoolingLegacy(outFromConv, poolStride->Dimensions[0]);
}
} else { // kDnnConvolutional2dOp
// Override GNA operation output pointer with the one from pooling component
outputTensor.Data = comp.ptr_outputs;

View File

@ -10,6 +10,7 @@
#include "dnn_types.h"
#include "gna_types.h"
#include "gna/gna_config.hpp"
#include "gna_plugin_log.hpp"
@ -152,6 +153,10 @@ public:
(void*&)ptr_filters,
(void*&)ptr_biases);
}
// Checks whether operation is Convolution and its parameters makes it specific to GNA1/GNA2 targets
// It does not guarantee that operation fully compatible to GNA1/GNA2, but for sure is not comaptible with GNA3 target
static bool isOperationCnnLegacySpecific(const Gna2Operation& operation);
#endif
template<class A, class B>
@ -293,7 +298,7 @@ public:
#if GNA_LIB_VER == 2
void InitGNAStruct(Gna2Model *gnaModel);
void InitGNAStruct(Gna2Model *gnaModel, const std::string& gnaCompileTarget = InferenceEngine::GNAConfigParams::GNA_TARGET_2_0);
void DestroyGNAStruct(Gna2Model *gnaModel);
#else

View File

@ -31,7 +31,7 @@ bool RangeLimit2D::isValid(const uint32_t h, const uint32_t w) const {
}
std::string RangeLimit2D::GetErrorOrEmpty(const uint32_t h, const uint32_t w) const {
return hLimit.GetErrorOrEmpty(h) + hLimit.GetErrorOrEmpty(w);
return hLimit.GetErrorOrEmpty(h) + wLimit.GetErrorOrEmpty(w);
}
RangeMultipleLimit::RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn) : RangeLimit(rlIn), multiplier(multiplierIn) {

View File

@ -24,6 +24,7 @@
#include "gna-api.h"
#endif
#include "backend/am_intel_dnn.hpp"
#include "gna/gna_config.hpp"
#include "gna_plugin_log.hpp"
@ -115,13 +116,26 @@ uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId, Gna2Accelera
return reqId;
}
void enforceLegacyCnn(Gna2Operation& operation) {
snprintf(
const_cast<char*>(operation.Operands[1]->Layout),
sizeof(operation.Operands[1]->Layout) / sizeof(char),
"GNA1");
}
void GNADeviceHelper::enforceLegacyCnns(Gna2Model& gnaModel) {
for (uint32_t i = 0; i < gnaModel.NumberOfOperations; i++) {
if (gnaModel.Operations[i].Type == Gna2OperationTypeConvolution) {
snprintf(
const_cast<char*>(gnaModel.Operations[i].Operands[1]->Layout),
sizeof(gnaModel.Operations[i].Operands[1]->Layout) / sizeof(char),
"GNA1");
enforceLegacyCnn(gnaModel.Operations[i]);
}
}
}
void GNADeviceHelper::enforceLegacyCnnsWhenNeeded(Gna2Model& gnaModel) {
for (uint32_t i = 0; i < gnaModel.NumberOfOperations; i++) {
auto& op = gnaModel.Operations[i];
if (GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(op)) {
enforceLegacyCnn(op);
}
}
}
@ -132,6 +146,7 @@ uint32_t GNADeviceHelper::createModel(Gna2Model& gnaModel) const {
if (enforceLegacyCnnNeeded()) {
enforceLegacyCnns(gnaModel);
}
enforceLegacyCnnsWhenNeeded(gnaModel);
#if GNA_LIB_VER == 2 && defined MODEL_DUMP
std::string path =
#ifdef _WIN32
@ -582,3 +597,14 @@ void GNADeviceHelper::getGnaPerfCounters(std::map<std::string, InferenceEngine::
#endif
retPerfCounters["1.2 Stall scoring time in HW"] = info;
}
std::string GNADeviceHelper::getEffectiveGnaCompileTarget() const {
#if GNA_LIB_VER == 1
return InferenceEngine::GNAConfigParams::GNA_TARGET_2_0;
#else
if (getTargetDevice(false) == Gna2DeviceVersion3_0) {
return InferenceEngine::GNAConfigParams::GNA_TARGET_3_0;
}
return InferenceEngine::GNAConfigParams::GNA_TARGET_2_0;
#endif
}

View File

@ -180,6 +180,7 @@ public:
void getGnaPerfCounters(std::map<std::string,
InferenceEngine::InferenceEngineProfileInfo>& retPerfCounters);
static std::string GetGnaLibraryVersion();
std::string getEffectiveGnaCompileTarget() const;
private:
void open(uint8_t const n_threads);
@ -194,6 +195,7 @@ public:
static const std::map <const std::pair<Gna2OperationType, int32_t>, const std::string > operandTypes;
static void enforceLegacyCnns(Gna2Model& gnaModel);
static void enforceLegacyCnnsWhenNeeded(Gna2Model& gnaModel);
Gna2DeviceVersion parseDeclaredTarget(std::string target, const bool execTarget) const;
Gna2DeviceVersion getDefaultTarget() const;
Gna2DeviceVersion getTargetDevice(bool execTarget) const;

View File

@ -991,10 +991,14 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
#else
nnets.emplace_back(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap());
#endif
std::string effectiveGnaCompileTarget = config.gnaCompileTarget;
if (gnadevice) {
effectiveGnaCompileTarget = gnadevice->getEffectiveGnaCompileTarget();
}
if (!gnaFlags->sw_fp32 && !graphCompiler.dnnComponents.components.empty()) {
// number of layer gets calculated inside that InitGNAStruct function
#if GNA_LIB_VER == 2
dnn->InitGNAStruct(&std::get<0>(gnaModels.front())->obj);
dnn->InitGNAStruct(&std::get<0>(gnaModels.front())->obj, effectiveGnaCompileTarget);
#else
dnn->InitGNAStruct(&std::get<0>(nnets.front())->obj);
#endif
@ -1005,7 +1009,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
#if GNA_LIB_VER == 2
gnaModels.push_back(std::make_tuple(make_shared<CPPWrapper<Gna2Model>>()));
// this can be improved by just copy all structures, but we are too lazy
dnn->InitGNAStruct(&std::get<0>(gnaModels.back())->obj);
dnn->InitGNAStruct(&std::get<0>(gnaModels.back())->obj, effectiveGnaCompileTarget);
#else
nnets.emplace_back(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap());
dnn->InitGNAStruct(&std::get<0>(nnets.back())->obj);

View File

@ -12,6 +12,7 @@
#include "backend/gna_limitations.hpp"
#include "layers/gna_split_layer.hpp"
#include "layers/gna_convolution_layer.hpp"
using namespace GNAPluginNS;
@ -19,6 +20,34 @@ NGRAPH_RTTI_DEFINITION(SplitConvolution, "SplitConvolution", 0);
NGRAPH_RTTI_DEFINITION(SplitConvolutionWithBias, "SplitConvolutionWithBias", 0);
NGRAPH_RTTI_DEFINITION(SplitConvolutionWithFq, "SplitConvolutionWithFq", 0);
// Don't split when convolution is 2D and is not mappable to 1D
static bool shouldSplitCnn(const ngraph::Output<ngraph::Node>& node) {
auto convolution = dynamic_cast<ngraph::opset7::Convolution*>(node.get_node());
IE_ASSERT(convolution != nullptr);
auto& input = convolution->get_input_shape(0);
auto& filters = convolution->get_input_shape(1);
uint32_t width = input.back();
uint32_t in_channels = input.at(1);
if (input.size() >= 4 && filters.size() >= 4) {
uint32_t height = input.at(2);
auto kH = filters.at(2);
auto kW = filters.at(3);
auto sW = convolution->get_strides().at(1);
if (GNAConvolutionLayer::isConv2D(height, width, in_channels, kH, kW) &&
!GNAConvolutionLayer::isMappableFrom2DTo1D(height, width, kW, sW)) {
return false;
}
}
return true;
}
std::shared_ptr<ngraph::Node> getConvForMatcher() {
return ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ ngraph::pattern::any_input(),
ngraph::pattern::any_input() }, [](const ngraph::Output<ngraph::Node>& convolution) {
return shouldSplitCnn(convolution);
});
}
static bool Convert(std::shared_ptr<ngraph::Node> conv,
std::shared_ptr<ngraph::Node> add,
std::shared_ptr<ngraph::Node> bias,
@ -28,9 +57,9 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
if (input_size <= GNALimitations::bufferMaxSize) {
return false;
}
uint32_t width = conv->get_input_shape(0).back();
uint32_t in_channels = conv->get_input_shape(0).at(1);
auto& input = conv->get_input_shape(0);
uint32_t width = input.back();
uint32_t in_channels = input.at(1);
auto split_sizes = GetAlignedSplitSizes(width, GNALimitations::bufferMaxSize / in_channels);
IE_ASSERT(split_sizes.size() > 1);
std::vector<int64_t> split_sizes_casted(split_sizes.size());
@ -40,7 +69,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
/* TODO check if it's NHWC convolution wrapped with transposes or all input dimensions except of width == 1,
otherwise this split axis isn't supported */
const int64_t width_axis = conv->get_input_shape(0).size() - 1;
const int64_t width_axis = input.size() - 1;
auto split_node = std::make_shared<ngraph::opset7::VariadicSplit>(conv->input_value(0),
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{width_axis}),
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes_casted.size()}), split_sizes_casted));
@ -68,9 +97,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
}
SplitConvolution::SplitConvolution() {
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
ngraph::pattern::any_input()});
auto conv = getConvForMatcher();
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
const auto& pattern_map = m.get_pattern_value_map();
return Convert(pattern_map.at(conv).get_node_shared_ptr(), nullptr, nullptr, nullptr);
@ -81,8 +108,7 @@ SplitConvolution::SplitConvolution() {
}
SplitConvolutionWithBias::SplitConvolutionWithBias() {
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
ngraph::pattern::any_input()});
auto conv = getConvForMatcher();
auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
auto add = ngraph::pattern::wrap_type<ngraph::opset7::Add>({conv, bias});
@ -97,8 +123,7 @@ SplitConvolutionWithBias::SplitConvolutionWithBias() {
}
SplitConvolutionWithFq::SplitConvolutionWithFq() {
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
ngraph::pattern::any_input()});
auto conv = getConvForMatcher();
auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
auto add = ngraph::pattern::wrap_type<ngraph::opset7::Add>({conv, bias});
auto conv_output = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, add});
@ -119,4 +144,4 @@ SplitConvolutionWithFq::SplitConvolutionWithFq() {
auto m = std::make_shared<ngraph::pattern::Matcher>(out_fq, "SplitConvolutionWithFq");
this->register_matcher(m, callback);
}
}

View File

@ -16,8 +16,8 @@ protected:
if (std::find(metrics.begin(), metrics.end(), METRIC_KEY(GNA_LIBRARY_FULL_VERSION)) != metrics.end()) {
std::string gnaLibVer = ie_core.GetMetric(targetDevice, METRIC_KEY(GNA_LIBRARY_FULL_VERSION));
if (gnaLibVer.rfind("2.1", 0) != 0) {
GTEST_SKIP() << "Disabled test due to GNA library version being < 2.1" << std::endl;
if (gnaLibVer.rfind("2.1", 0) != 0 && gnaLibVer.rfind("3.0", 0) != 0) {
GTEST_SKIP() << "Disabled test due to GNA library version being not 2.1 or 3.0" << std::endl;
}
skipTest = false;
}