[GNA] Fixes for GNA 3.0 library (#7180)
* Pass compileTarget to am_intel_dnn * Enable tests for GNA lib version prefix 3.0 * Fix conv split transform for 2d cnn tests * Apply review
This commit is contained in:
parent
3a28ffaf57
commit
ce21344585
@ -23,6 +23,7 @@
|
||||
#include "dnn.hpp"
|
||||
#include "am_intel_dnn.hpp"
|
||||
#include "dnn_types.h"
|
||||
#include "gna/gna_config.hpp"
|
||||
#include "gna_types.h"
|
||||
#include "gna_limitations.hpp"
|
||||
#include "layers/gna_convolution_layer.hpp"
|
||||
@ -248,6 +249,16 @@ void GNAPluginNS::backend::AMIntelDNN::InitConvolutional2DComponentPrivate(intel
|
||||
ptr_inputs = &comp.ptr_inputs;
|
||||
ptr_outputs = &comp.ptr_outputs;
|
||||
}
|
||||
|
||||
bool GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(const Gna2Operation& op) {
|
||||
// GNA compile target GNA_TARGET_3_0 does not support pooling window < pooling stride
|
||||
return op.Type == Gna2OperationTypeConvolution &&
|
||||
op.NumberOfParameters > std::max(PoolStrideParamIdx, PoolWinParamIdx) &&
|
||||
op.Parameters[PoolStrideParamIdx] != nullptr &&
|
||||
op.Parameters[PoolWinParamIdx] != nullptr &&
|
||||
static_cast<Gna2Shape*>(op.Parameters[PoolStrideParamIdx])->NumberOfDimensions == 1 &&
|
||||
static_cast<Gna2Shape*>(op.Parameters[PoolStrideParamIdx])->Dimensions[0] > static_cast<Gna2Shape*>(op.Parameters[PoolWinParamIdx])->Dimensions[0];
|
||||
}
|
||||
#endif
|
||||
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitMaxpoolComponentPrivate(intel_dnn_component_t &comp,
|
||||
@ -1361,7 +1372,7 @@ uint32_t GNAPluginNS::backend::AMIntelDNN::CountLayers() {
|
||||
}
|
||||
|
||||
#if GNA_LIB_VER == 2
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel) {
|
||||
void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model* gnaModel, const std::string& gnaCompileTarget) {
|
||||
Gna2Operation * gnaOperation;
|
||||
if (gnaModel == nullptr)
|
||||
THROW_GNA_EXCEPTION << "Invalid input parameter";
|
||||
@ -1677,9 +1688,16 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(intel_nnet_type_t *ptr_nnet
|
||||
const auto fltStride = fltStrideShape->Dimensions[0];
|
||||
const auto outFromConv = outputFromConv(inVecCnt, nFltSize, fltStride);
|
||||
// FLAT input matrix, pooled outputs per filter
|
||||
// TODO: Issue 50386 check why (outFromConv - 1) an not (outFromConv - poolingWindow)
|
||||
outputTensor.Shape.Dimensions[1] =
|
||||
(outFromConv - 1) / poolStride->Dimensions[0] + 1;
|
||||
auto effectiveCompileTarget = gnaCompileTarget;
|
||||
if (isOperationCnnLegacySpecific(*gnaOperation)) {
|
||||
effectiveCompileTarget = InferenceEngine::GNAConfigParams::GNA_TARGET_2_0;
|
||||
}
|
||||
if (effectiveCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) {
|
||||
outputTensor.Shape.Dimensions[1] = outputFromPooling(outFromConv, poolWindow->Dimensions[0], poolStride->Dimensions[0]);
|
||||
} else {
|
||||
outputTensor.Shape.Dimensions[1] = outputFromPoolingLegacy(outFromConv, poolStride->Dimensions[0]);
|
||||
}
|
||||
|
||||
} else { // kDnnConvolutional2dOp
|
||||
// Override GNA operation output pointer with the one from pooling component
|
||||
outputTensor.Data = comp.ptr_outputs;
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
#include "dnn_types.h"
|
||||
#include "gna_types.h"
|
||||
#include "gna/gna_config.hpp"
|
||||
|
||||
#include "gna_plugin_log.hpp"
|
||||
|
||||
@ -152,6 +153,10 @@ public:
|
||||
(void*&)ptr_filters,
|
||||
(void*&)ptr_biases);
|
||||
}
|
||||
|
||||
// Checks whether operation is Convolution and its parameters makes it specific to GNA1/GNA2 targets
|
||||
// It does not guarantee that operation fully compatible to GNA1/GNA2, but for sure is not comaptible with GNA3 target
|
||||
static bool isOperationCnnLegacySpecific(const Gna2Operation& operation);
|
||||
#endif
|
||||
|
||||
template<class A, class B>
|
||||
@ -293,7 +298,7 @@ public:
|
||||
|
||||
|
||||
#if GNA_LIB_VER == 2
|
||||
void InitGNAStruct(Gna2Model *gnaModel);
|
||||
void InitGNAStruct(Gna2Model *gnaModel, const std::string& gnaCompileTarget = InferenceEngine::GNAConfigParams::GNA_TARGET_2_0);
|
||||
void DestroyGNAStruct(Gna2Model *gnaModel);
|
||||
#else
|
||||
|
||||
|
@ -31,7 +31,7 @@ bool RangeLimit2D::isValid(const uint32_t h, const uint32_t w) const {
|
||||
}
|
||||
|
||||
std::string RangeLimit2D::GetErrorOrEmpty(const uint32_t h, const uint32_t w) const {
|
||||
return hLimit.GetErrorOrEmpty(h) + hLimit.GetErrorOrEmpty(w);
|
||||
return hLimit.GetErrorOrEmpty(h) + wLimit.GetErrorOrEmpty(w);
|
||||
}
|
||||
|
||||
RangeMultipleLimit::RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn) : RangeLimit(rlIn), multiplier(multiplierIn) {
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "gna-api.h"
|
||||
#endif
|
||||
|
||||
#include "backend/am_intel_dnn.hpp"
|
||||
#include "gna/gna_config.hpp"
|
||||
#include "gna_plugin_log.hpp"
|
||||
|
||||
@ -115,13 +116,26 @@ uint32_t GNADeviceHelper::propagate(const uint32_t requestConfigId, Gna2Accelera
|
||||
return reqId;
|
||||
}
|
||||
|
||||
void enforceLegacyCnn(Gna2Operation& operation) {
|
||||
snprintf(
|
||||
const_cast<char*>(operation.Operands[1]->Layout),
|
||||
sizeof(operation.Operands[1]->Layout) / sizeof(char),
|
||||
"GNA1");
|
||||
}
|
||||
|
||||
void GNADeviceHelper::enforceLegacyCnns(Gna2Model& gnaModel) {
|
||||
for (uint32_t i = 0; i < gnaModel.NumberOfOperations; i++) {
|
||||
if (gnaModel.Operations[i].Type == Gna2OperationTypeConvolution) {
|
||||
snprintf(
|
||||
const_cast<char*>(gnaModel.Operations[i].Operands[1]->Layout),
|
||||
sizeof(gnaModel.Operations[i].Operands[1]->Layout) / sizeof(char),
|
||||
"GNA1");
|
||||
enforceLegacyCnn(gnaModel.Operations[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GNADeviceHelper::enforceLegacyCnnsWhenNeeded(Gna2Model& gnaModel) {
|
||||
for (uint32_t i = 0; i < gnaModel.NumberOfOperations; i++) {
|
||||
auto& op = gnaModel.Operations[i];
|
||||
if (GNAPluginNS::backend::AMIntelDNN::isOperationCnnLegacySpecific(op)) {
|
||||
enforceLegacyCnn(op);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -132,6 +146,7 @@ uint32_t GNADeviceHelper::createModel(Gna2Model& gnaModel) const {
|
||||
if (enforceLegacyCnnNeeded()) {
|
||||
enforceLegacyCnns(gnaModel);
|
||||
}
|
||||
enforceLegacyCnnsWhenNeeded(gnaModel);
|
||||
#if GNA_LIB_VER == 2 && defined MODEL_DUMP
|
||||
std::string path =
|
||||
#ifdef _WIN32
|
||||
@ -582,3 +597,14 @@ void GNADeviceHelper::getGnaPerfCounters(std::map<std::string, InferenceEngine::
|
||||
#endif
|
||||
retPerfCounters["1.2 Stall scoring time in HW"] = info;
|
||||
}
|
||||
|
||||
std::string GNADeviceHelper::getEffectiveGnaCompileTarget() const {
|
||||
#if GNA_LIB_VER == 1
|
||||
return InferenceEngine::GNAConfigParams::GNA_TARGET_2_0;
|
||||
#else
|
||||
if (getTargetDevice(false) == Gna2DeviceVersion3_0) {
|
||||
return InferenceEngine::GNAConfigParams::GNA_TARGET_3_0;
|
||||
}
|
||||
return InferenceEngine::GNAConfigParams::GNA_TARGET_2_0;
|
||||
#endif
|
||||
}
|
||||
|
@ -180,6 +180,7 @@ public:
|
||||
void getGnaPerfCounters(std::map<std::string,
|
||||
InferenceEngine::InferenceEngineProfileInfo>& retPerfCounters);
|
||||
static std::string GetGnaLibraryVersion();
|
||||
std::string getEffectiveGnaCompileTarget() const;
|
||||
private:
|
||||
void open(uint8_t const n_threads);
|
||||
|
||||
@ -194,6 +195,7 @@ public:
|
||||
static const std::map <const std::pair<Gna2OperationType, int32_t>, const std::string > operandTypes;
|
||||
|
||||
static void enforceLegacyCnns(Gna2Model& gnaModel);
|
||||
static void enforceLegacyCnnsWhenNeeded(Gna2Model& gnaModel);
|
||||
Gna2DeviceVersion parseDeclaredTarget(std::string target, const bool execTarget) const;
|
||||
Gna2DeviceVersion getDefaultTarget() const;
|
||||
Gna2DeviceVersion getTargetDevice(bool execTarget) const;
|
||||
|
@ -991,10 +991,14 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
||||
#else
|
||||
nnets.emplace_back(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap());
|
||||
#endif
|
||||
std::string effectiveGnaCompileTarget = config.gnaCompileTarget;
|
||||
if (gnadevice) {
|
||||
effectiveGnaCompileTarget = gnadevice->getEffectiveGnaCompileTarget();
|
||||
}
|
||||
if (!gnaFlags->sw_fp32 && !graphCompiler.dnnComponents.components.empty()) {
|
||||
// number of layer gets calculated inside that InitGNAStruct function
|
||||
#if GNA_LIB_VER == 2
|
||||
dnn->InitGNAStruct(&std::get<0>(gnaModels.front())->obj);
|
||||
dnn->InitGNAStruct(&std::get<0>(gnaModels.front())->obj, effectiveGnaCompileTarget);
|
||||
#else
|
||||
dnn->InitGNAStruct(&std::get<0>(nnets.front())->obj);
|
||||
#endif
|
||||
@ -1005,7 +1009,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
|
||||
#if GNA_LIB_VER == 2
|
||||
gnaModels.push_back(std::make_tuple(make_shared<CPPWrapper<Gna2Model>>()));
|
||||
// this can be improved by just copy all structures, but we are too lazy
|
||||
dnn->InitGNAStruct(&std::get<0>(gnaModels.back())->obj);
|
||||
dnn->InitGNAStruct(&std::get<0>(gnaModels.back())->obj, effectiveGnaCompileTarget);
|
||||
#else
|
||||
nnets.emplace_back(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap());
|
||||
dnn->InitGNAStruct(&std::get<0>(nnets.back())->obj);
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#include "backend/gna_limitations.hpp"
|
||||
#include "layers/gna_split_layer.hpp"
|
||||
#include "layers/gna_convolution_layer.hpp"
|
||||
|
||||
using namespace GNAPluginNS;
|
||||
|
||||
@ -19,6 +20,34 @@ NGRAPH_RTTI_DEFINITION(SplitConvolution, "SplitConvolution", 0);
|
||||
NGRAPH_RTTI_DEFINITION(SplitConvolutionWithBias, "SplitConvolutionWithBias", 0);
|
||||
NGRAPH_RTTI_DEFINITION(SplitConvolutionWithFq, "SplitConvolutionWithFq", 0);
|
||||
|
||||
// Don't split when convolution is 2D and is not mappable to 1D
|
||||
static bool shouldSplitCnn(const ngraph::Output<ngraph::Node>& node) {
|
||||
auto convolution = dynamic_cast<ngraph::opset7::Convolution*>(node.get_node());
|
||||
IE_ASSERT(convolution != nullptr);
|
||||
auto& input = convolution->get_input_shape(0);
|
||||
auto& filters = convolution->get_input_shape(1);
|
||||
uint32_t width = input.back();
|
||||
uint32_t in_channels = input.at(1);
|
||||
if (input.size() >= 4 && filters.size() >= 4) {
|
||||
uint32_t height = input.at(2);
|
||||
auto kH = filters.at(2);
|
||||
auto kW = filters.at(3);
|
||||
auto sW = convolution->get_strides().at(1);
|
||||
if (GNAConvolutionLayer::isConv2D(height, width, in_channels, kH, kW) &&
|
||||
!GNAConvolutionLayer::isMappableFrom2DTo1D(height, width, kW, sW)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<ngraph::Node> getConvForMatcher() {
|
||||
return ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input() }, [](const ngraph::Output<ngraph::Node>& convolution) {
|
||||
return shouldSplitCnn(convolution);
|
||||
});
|
||||
}
|
||||
|
||||
static bool Convert(std::shared_ptr<ngraph::Node> conv,
|
||||
std::shared_ptr<ngraph::Node> add,
|
||||
std::shared_ptr<ngraph::Node> bias,
|
||||
@ -28,9 +57,9 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
|
||||
if (input_size <= GNALimitations::bufferMaxSize) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t width = conv->get_input_shape(0).back();
|
||||
uint32_t in_channels = conv->get_input_shape(0).at(1);
|
||||
auto& input = conv->get_input_shape(0);
|
||||
uint32_t width = input.back();
|
||||
uint32_t in_channels = input.at(1);
|
||||
auto split_sizes = GetAlignedSplitSizes(width, GNALimitations::bufferMaxSize / in_channels);
|
||||
IE_ASSERT(split_sizes.size() > 1);
|
||||
std::vector<int64_t> split_sizes_casted(split_sizes.size());
|
||||
@ -40,7 +69,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
|
||||
|
||||
/* TODO check if it's NHWC convolution wrapped with transposes or all input dimensions except of width == 1,
|
||||
otherwise this split axis isn't supported */
|
||||
const int64_t width_axis = conv->get_input_shape(0).size() - 1;
|
||||
const int64_t width_axis = input.size() - 1;
|
||||
auto split_node = std::make_shared<ngraph::opset7::VariadicSplit>(conv->input_value(0),
|
||||
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({1}), std::vector<int64_t>{width_axis}),
|
||||
ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape({split_sizes_casted.size()}), split_sizes_casted));
|
||||
@ -68,9 +97,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
|
||||
}
|
||||
|
||||
SplitConvolution::SplitConvolution() {
|
||||
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input()});
|
||||
|
||||
auto conv = getConvForMatcher();
|
||||
ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) {
|
||||
const auto& pattern_map = m.get_pattern_value_map();
|
||||
return Convert(pattern_map.at(conv).get_node_shared_ptr(), nullptr, nullptr, nullptr);
|
||||
@ -81,8 +108,7 @@ SplitConvolution::SplitConvolution() {
|
||||
}
|
||||
|
||||
SplitConvolutionWithBias::SplitConvolutionWithBias() {
|
||||
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input()});
|
||||
auto conv = getConvForMatcher();
|
||||
auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||
auto add = ngraph::pattern::wrap_type<ngraph::opset7::Add>({conv, bias});
|
||||
|
||||
@ -97,8 +123,7 @@ SplitConvolutionWithBias::SplitConvolutionWithBias() {
|
||||
}
|
||||
|
||||
SplitConvolutionWithFq::SplitConvolutionWithFq() {
|
||||
auto conv = ngraph::pattern::wrap_type<ngraph::opset7::Convolution>({ngraph::pattern::any_input(),
|
||||
ngraph::pattern::any_input()});
|
||||
auto conv = getConvForMatcher();
|
||||
auto bias = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||
auto add = ngraph::pattern::wrap_type<ngraph::opset7::Add>({conv, bias});
|
||||
auto conv_output = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, add});
|
||||
|
@ -16,8 +16,8 @@ protected:
|
||||
if (std::find(metrics.begin(), metrics.end(), METRIC_KEY(GNA_LIBRARY_FULL_VERSION)) != metrics.end()) {
|
||||
std::string gnaLibVer = ie_core.GetMetric(targetDevice, METRIC_KEY(GNA_LIBRARY_FULL_VERSION));
|
||||
|
||||
if (gnaLibVer.rfind("2.1", 0) != 0) {
|
||||
GTEST_SKIP() << "Disabled test due to GNA library version being < 2.1" << std::endl;
|
||||
if (gnaLibVer.rfind("2.1", 0) != 0 && gnaLibVer.rfind("3.0", 0) != 0) {
|
||||
GTEST_SKIP() << "Disabled test due to GNA library version being not 2.1 or 3.0" << std::endl;
|
||||
}
|
||||
skipTest = false;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user