[GNA] Plugin transition to the library v3.0 Master (#7386)

* GNA Plugin transition to the library v3.0 Master

* [GNA] Enable 2D convolution decomposition on GNA 3.0

* [GNA] Fix cpplint

* [GNA] Fix cpplint

Co-authored-by: Szymon Irzabek <szymon.jakub.irzabek@intel.com>
This commit is contained in:
Andrey Sapozhnikov 2021-09-21 10:59:35 +03:00 committed by GitHub
parent d725a17561
commit 2349caa482
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 318 additions and 143 deletions

View File

@ -277,8 +277,8 @@ if(ENABLE_GNA)
set(GNA_HASH "cc954e67525006bf8bd353a6682e38bf208f6d74e973e0fc292850e721f17452")
endif()
if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
set(GNA_VERSION "02.00.00.1226")
set(GNA_HASH "d5450af15c993e264c25ac4591a7dab44722e10d15fca4f222a1b84429d4e5b6")
set(GNA_VERSION "03.00.00.1377")
set(GNA_HASH "d45fb48994d8c2803a16e88e29ae48851066325b97c1c6c4a5bf4f4573d55c65")
endif()
set(FILES_TO_EXTRACT_LIST gna_${GNA_VERSION}/include)

View File

@ -201,18 +201,6 @@ enum OvGnaType {
OvGnaTypePwl = 8,
};
#if GNA_LIB_VER == 2
enum OvGnaMode {
OvGnaModeDefault = 0,
OvGnaModeDisabled = -1
};
struct OvGnaTensor {
std::vector<uint32_t> dimensions;
OvGnaType type;
OvGnaMode mode;
};
template <class T>
OvGnaType OvGnaTypeIntFromBytes(T bytesPerElement) {
static const std::map<T, OvGnaType> m = {
@ -227,6 +215,18 @@ OvGnaType OvGnaTypeIntFromBytes(T bytesPerElement) {
return r->second;
}
#if GNA_LIB_VER == 2
enum OvGnaMode {
OvGnaModeDefault = 0,
OvGnaModeDisabled = -1
};
struct OvGnaTensor {
std::vector<uint32_t> dimensions;
OvGnaType type;
OvGnaMode mode;
};
inline std::string OvGnaTypeToString(OvGnaType type) {
static const std::map<OvGnaType, std::string> typeToString = {
{OvGnaTypeInt8, "OvGnaTypeInt8"},

View File

@ -94,23 +94,36 @@ std::string VectorOrSquareLimitByChannelsAndPrecision::GetErrorOrEmpty(const uin
return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what);
}
void Validator::ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
const uint32_t strideH, const uint32_t strideW, OvGnaType inPrecision) const {
bool Validator::ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
const uint32_t inChannels, const uint32_t kernelH, const uint32_t kernelW, const uint32_t kernelN,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
OvGnaType inPrecision, bool exception) const {
const std::string prefix = "Layer Convolution2D: " + name + ":";
auto error = inputHWLimit.GetErrorOrEmpty(inHeight, inWidth);
error += kernelNumberLimit.GetErrorOrEmpty(kN);
error += kernelNumberLimit.GetErrorOrEmpty(kernelN);
error += inputChannelsNumberLimit.GetErrorOrEmpty(inChannels);
error += kernelLimit.GetErrorOrEmpty(kH, kW, inPrecision, inChannels, "kernel");
error += kernelLimit.GetErrorOrEmpty(kernelH, kernelW, inPrecision, inChannels, "kernel");
error += strideLimit.GetErrorOrEmpty(strideH, strideW, inPrecision, inChannels, "convolution stride");
ThrowIfNotEmpty(prefix, error);
const RangeLimit kernelStrideHLimit{1, kernelH, "kernel stride height (must be up to kernel height)"};
const RangeLimit kernelStrideWLimit{1, kernelW, "kernel stride width (must be up to kernel width)"};
error += kernelStrideHLimit.GetErrorOrEmpty(strideH);
error += kernelStrideWLimit.GetErrorOrEmpty(strideW);
error += dilationLimit.GetErrorOrEmpty(dilationH, dilationW);
if (exception)
ThrowIfNotEmpty(prefix, error);
return error.empty() ? true : false;
}
void Validator::ValidatePooling2D(std::string name,
bool Validator::ValidatePooling2D(std::string name,
const uint32_t windowH, const uint32_t windowW,
const uint32_t strideH, const uint32_t strideW) const {
const uint32_t strideH, const uint32_t strideW,
bool exception) const {
const std::string prefix = "Layer Pooling2D: " + name + ":";
auto error = poolingWindowLimit.GetErrorOrEmpty(windowH, windowW, "pooling window");
@ -120,7 +133,10 @@ void Validator::ValidatePooling2D(std::string name,
error += poolingStrideHLimit.GetErrorOrEmpty(strideH);
error += poolingStrideWLimit.GetErrorOrEmpty(strideW);
ThrowIfNotEmpty(prefix, error);
if (exception)
ThrowIfNotEmpty(prefix, error);
return error.empty() ? true : false;
}
void Validator::ThrowIfNotEmpty(const std::string prefix, const std::string error) {

View File

@ -16,6 +16,8 @@ constexpr uint32_t bufferMaxSize = 65528;
constexpr uint32_t convMinFiltersNum = 4;
constexpr uint32_t convMaxFiltersNum = 65532;
constexpr uint32_t convDilationHeight = 1;
constexpr uint32_t convDilationWidth = 1;
constexpr uint32_t convFiltersNumDivider = 4;
constexpr uint32_t convFilterSizeDivider = 8;
constexpr uint32_t convFilterMaxSize = 768;
@ -97,19 +99,24 @@ class Validator {
{ 240, { 3, 7, 3 }, { 2, 7, 2 } },
{ 120, { 3, 7, 3 }, { 1, 7, 1 } } };
VectorOrSquareLimitByChannelsAndPrecision& strideLimit = kernelLimit;
RangeLimit2D dilationLimit{ {convDilationHeight, convDilationHeight, "dilation height" },
{ convDilationWidth, convDilationWidth, "dilation width" } };
const VectorOrSquareLimit poolingWindowLimit{ 3, 1, 1 };
static void ThrowIfNotEmpty(const std::string prefix, const std::string error);
public:
Validator() = default;
void ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
const uint32_t strideH, const uint32_t strideW, OvGnaType inPrecision) const;
bool ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
const uint32_t inChannels, const uint32_t kernelH, const uint32_t kernelW, const uint32_t kernelN,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
OvGnaType inPrecision, bool exception = true) const;
void ValidatePooling2D(std::string name,
bool ValidatePooling2D(std::string name,
const uint32_t windowH, const uint32_t windowW,
const uint32_t strideH, const uint32_t strideW) const;
const uint32_t strideH, const uint32_t strideW,
bool exception = true) const;
};
} // namespace Cnn2D

View File

@ -9,19 +9,21 @@
#include "gna2-model-export-api.h"
#include "gna2-model-suecreek-header.h"
#include "gna_api_wrapper.hpp"
#include "gna2-device-api.h"
#include <cstdint>
#include <fstream>
void * ExportSueLegacyUsingGnaApi2(
uint32_t modelId,
uint32_t deviceIndex,
Gna2ModelSueCreekHeader* modelHeader) {
uint32_t exportConfig;
auto status = Gna2ModelExportConfigCreate(gnaUserAllocatorAlignedPage, &exportConfig);
GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigCreate");
status = Gna2ModelExportConfigSetSource(exportConfig, 0, modelId);
status = Gna2ModelExportConfigSetSource(exportConfig, deviceIndex, modelId);
GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigSetSource");
status = Gna2ModelExportConfigSetTarget(exportConfig, Gna2DeviceVersionEmbedded1_0);
GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigSetTarget");

View File

@ -14,6 +14,7 @@
void * ExportSueLegacyUsingGnaApi2(
uint32_t modelId,
uint32_t deviceIndex,
Gna2ModelSueCreekHeader* modelHeader);
void ExportLdForDeviceVersion(

View File

@ -170,13 +170,9 @@ void GNADeviceHelper::releaseModel(const uint32_t model_id) {
bool GNADeviceHelper::enforceLegacyCnnNeeded() const {
const auto compileTargetDevice = getTargetDevice(false);
return isGnaLibVersion2_1 && isUpTo20HwGnaDevice(compileTargetDevice);
return (isGnaLibVersion3_0 || isGnaLibVersion2_1) && isUpTo20HwGnaDevice(compileTargetDevice);
}
namespace {
const volatile auto Gna2DeviceVersion3_0 = static_cast<Gna2DeviceVersion>(0x30);
} // namespace
Gna2DeviceVersion GNADeviceHelper::parseDeclaredTarget(std::string target, const bool execTarget) const {
auto parsed = Gna2DeviceVersion2_0;
auto throwUnsupportedGnaTarget = [&](std::string extraSuffix) {
@ -184,7 +180,7 @@ Gna2DeviceVersion GNADeviceHelper::parseDeclaredTarget(std::string target, const
THROW_GNA_EXCEPTION << "Unsupported " << key << " = \"" << target << "\"" << extraSuffix;
};
if (target == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) {
if (!isGnaLibVersion2_1)
if (!isGnaLibVersion2_1 && !isGnaLibVersion3_0)
throwUnsupportedGnaTarget(", when GNA Library version is 2.0.X.Y");
parsed = Gna2DeviceVersion3_0;
} else if (target != InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
@ -195,7 +191,7 @@ Gna2DeviceVersion GNADeviceHelper::parseDeclaredTarget(std::string target, const
Gna2DeviceVersion GNADeviceHelper::getDefaultTarget() const {
if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation)
return isGnaLibVersion2_1 ? Gna2DeviceVersion3_0 : Gna2DeviceVersion2_0;
return (isGnaLibVersion3_0 || isGnaLibVersion2_1) ? Gna2DeviceVersion3_0 : Gna2DeviceVersion2_0;
return detectedGnaDevVersion;
}
@ -213,14 +209,6 @@ uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) {
auto status = Gna2RequestConfigCreate(model_id, &reqConfId);
checkGna2Status(status, "Gna2RequestConfigCreate");
// When the GNA_SW_EXACT mode is chosen inference results should be computed exactly the same way
// (bit exactly) as on the selected GNA execution target generation.
// See the GNA Plugin's GNA_EXEC_TARGET config option description.
if (swExactMode) {
const auto consistentDevice = getTargetDevice(true);
status = Gna2RequestConfigEnableHardwareConsistency(reqConfId, consistentDevice);
checkGna2Status(status, "Gna2RequestConfigEnableHardwareConsistency(" + std::to_string(static_cast<long>(consistentDevice)) + ")");
}
status = Gna2InstrumentationConfigAssignToRequestConfig(instrumentationConfigId, reqConfId);
checkGna2Status(status, "Gna2InstrumentationConfigAssignToRequestConfig");
@ -454,7 +442,7 @@ GNADeviceHelper::DumpResult GNADeviceHelper::dumpXnn(const uint32_t modelId) {
checkStatus();
#else
r.model.reset(
ExportSueLegacyUsingGnaApi2(modelId, &r.header),
ExportSueLegacyUsingGnaApi2(modelId, nGnaDeviceIndex, &r.header),
gnaUserFree);
#endif
@ -473,7 +461,7 @@ void GNADeviceHelper::dumpXnnForDeviceVersion(
const Gna2DeviceVersion targetDeviceVersion) {
Gna2ModelSueCreekHeader sueHeader;
auto ptr = ExportSueLegacyUsingGnaApi2(modelId, &sueHeader);
auto ptr = ExportSueLegacyUsingGnaApi2(modelId, nGnaDeviceIndex, &sueHeader);
gnaUserFree(ptr);
ExportGnaDescriptorPartiallyFilled(sueHeader.NumberOfLayers, outStream);
@ -506,8 +494,15 @@ void GNADeviceHelper::open(uint8_t n_threads) {
#else
auto status = Gna2DeviceGetVersion(nGnaDeviceIndex, &detectedGnaDevVersion);
checkGna2Status(status, "Gna2DeviceGetVersion");
status = Gna2DeviceOpen(nGnaDeviceIndex);
checkGna2Status(status, "Gna2DeviceOpen");
if (useDeviceEmbeddedExport) {
status = Gna2DeviceCreateForExport(exportGeneration, &nGnaDeviceIndex);
GNADeviceHelper::checkGna2Status(status, "Gna2DeviceCreateForExport");
} else {
status = Gna2DeviceOpen(nGnaDeviceIndex);
checkGna2Status(status, "Gna2DeviceOpen");
}
// TODO: GNA2: uncomment when scratchpad repaired
// status = Gna2DeviceSetNumberOfThreads(nGnaDeviceIndex, n_threads);
// checkGna2Status(status);

View File

@ -55,7 +55,10 @@ class GNADeviceHelper {
Gna2DeviceVersion detectedGnaDevVersion = Gna2DeviceVersionSoftwareEmulation;
std::string executionTarget;
std::string compileTarget;
bool useDeviceEmbeddedExport = false;
Gna2DeviceVersion exportGeneration = Gna2DeviceVersionEmbedded1_0;
bool isGnaLibVersion2_1 = false;
bool isGnaLibVersion3_0 = false;
static const uint32_t TotalGna2InstrumentationPoints = 2;
Gna2InstrumentationPoint gna2InstrumentationPoints[TotalGna2InstrumentationPoints] = {
@ -82,12 +85,16 @@ public:
bool swExactModeIn = false,
uint8_t lib_async_n_threads = 1,
bool use_openmp = false,
bool isPerformanceMeasuring = false) :
bool isPerformanceMeasuring = false,
bool deviceEmbedded = false,
int deviceVersionParsed = 0) :
swExactMode(swExactModeIn),
executionTarget(executionTargetIn),
compileTarget(compileTargetIn),
isPerformanceMeasuring(isPerformanceMeasuring),
nGnaDeviceIndex{selectGnaDevice()} {
nGnaDeviceIndex{selectGnaDevice()},
useDeviceEmbeddedExport(deviceEmbedded),
exportGeneration(static_cast<Gna2DeviceVersion>(deviceVersionParsed)) {
#endif
open(lib_async_n_threads);
initGnaPerfCounters();
@ -98,6 +105,9 @@ public:
if (gnaLibVersion.rfind("2.1", 0) == 0) {
isGnaLibVersion2_1 = true;
}
if (gnaLibVersion.rfind("3.0", 0) == 0) {
isGnaLibVersion3_0 = true;
}
#endif
if (use_openmp) {

View File

@ -580,7 +580,8 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
cnn2dValidator.ValidateCnn2D(layer->name,
in_height, in_width, in_channels,
convolution._kernel_y, convolution._kernel_x, filter_n, convolution._stride_y, convolution._stride_x, inputPrec);
convolution._kernel_y, convolution._kernel_x, filter_n, convolution._stride_y, convolution._stride_x,
convolution._dilation_y, convolution._dilation_x, inputPrec);
float weight_scale_factor = getScaleFactor(layer, QuantizedDataType::weights);
float output_scale_factor = getScaleFactor(layer, QuantizedDataType::output);
@ -2577,4 +2578,4 @@ GNAGraphCompiler::transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint
}
}
return temp_buffer;
}
}

View File

@ -420,7 +420,9 @@ void GNAPlugin::InitGNADevice() {
config.swExactMode,
gnaFlags->gna_lib_async_threads_num,
gnaFlags->gna_openmp_multithreading,
gnaFlags->performance_counting);
gnaFlags->performance_counting,
!config.dumpXNNPath.empty(),
GetDeviceVersionFromString(config.dumpXNNGeneration));
#endif
size_t page_size_bytes = 4096;
gnamem = std::make_shared<gna_memory_type>(memory::make_polymorph<memory::GNAAllocator>(gnadevice), page_size_bytes);
@ -706,6 +708,11 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
InitGNADevice();
}
std::string effectiveGnaCompileTarget = config.gnaCompileTarget;
if (gnadevice) {
effectiveGnaCompileTarget = gnadevice->getEffectiveGnaCompileTarget();
}
bool isNgraphPassesUsed = false;
if (_network.getFunction()) {
@ -719,11 +726,9 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
manager.register_pass<ngraph::pass::LSTMCellDecomposition>();
manager.register_pass<ConvertDWSCToScaleShifts>();
manager.register_pass<ConvertPaddedToValidConv>();
if (config.gnaCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
manager.register_pass<Decompose2DConvTransposedWithBiasAF>();
manager.register_pass<Decompose2DConvTransposedWithBias>();
manager.register_pass<Decompose2DConv>();
}
manager.register_pass<Decompose2DConvTransposedWithBiasAF>(effectiveGnaCompileTarget, config.gnaPrecision);
manager.register_pass<Decompose2DConvTransposedWithBias>(effectiveGnaCompileTarget, config.gnaPrecision);
manager.register_pass<Decompose2DConv>(effectiveGnaCompileTarget, config.gnaPrecision);
// TODO enable this transformation for networks with convolutions
if (!ngraph::op::util::has_op_with_type<ngraph::opset7::Convolution>(graph)) {
manager.register_pass<ConvertMatmulWithFqToPointWiseConvolution>();
@ -1036,10 +1041,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
#else
nnets.emplace_back(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap());
#endif
std::string effectiveGnaCompileTarget = config.gnaCompileTarget;
if (gnadevice) {
effectiveGnaCompileTarget = gnadevice->getEffectiveGnaCompileTarget();
}
if (!gnaFlags->sw_fp32 && !graphCompiler.dnnComponents.components.empty()) {
// number of layer gets calculated inside that InitGNAStruct function
#if GNA_LIB_VER == 2

View File

@ -201,20 +201,25 @@ ConvertPaddedToValidConv::ConvertPaddedToValidConv() {
ngraph::pattern::consumers_count(1));
auto af1 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
ngraph::opset7::Sign, ngraph::opset7::Clamp>({bias}, ngraph::pattern::consumers_count(1));
ngraph::opset7::Sign, ngraph::opset7::Clamp>({conv}, ngraph::pattern::consumers_count(1));
auto af2 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
ngraph::opset7::Sign, ngraph::opset7::Clamp>({fq_bias}, ngraph::pattern::consumers_count(1));
ngraph::opset7::Sign, ngraph::opset7::Clamp>({bias}, ngraph::pattern::consumers_count(1));
auto af3 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool1}, ngraph::pattern::consumers_count(1));
ngraph::opset7::Sign, ngraph::opset7::Clamp>({fq_bias}, ngraph::pattern::consumers_count(1));
auto af4 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool1}, ngraph::pattern::consumers_count(1));
auto af5 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool2}, ngraph::pattern::consumers_count(1));
auto fq_af = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({af4, const_input, const_input, const_input, const_input},
auto fq_af1 = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({af3, const_input, const_input, const_input, const_input},
ngraph::pattern::consumers_count(1));
auto fq_af2 = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({af5, const_input, const_input, const_input, const_input},
ngraph::pattern::consumers_count(1));
auto transpose_input =
std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, bias, max_pool1, max_pool2, fq_bias, af1, af2, af3, af4, fq_af});
std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, bias, max_pool1, max_pool2, fq_bias, af1, af2, af3, af4, af5, fq_af1, fq_af2});
auto trailing_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({transpose_input, const_input},
consumers_and_rank(1, 4));

View File

@ -12,8 +12,8 @@
#include <ngraph/pattern/op/or.hpp>
#include <ngraph/rt_info.hpp>
#include <ngraph/pass/manager.hpp>
#include <ie_common.h>
#include "utils/transformation_helper.hpp"
#include <gna/gna_config.hpp>
#include "backend/gna_limitations.hpp"
#include "layers/gna_convolution_layer.hpp"
@ -71,10 +71,12 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr<ngraph::opset7:
auto pool_strides = max_pool->get_strides();
// Check Max Pool padding and limitations
// Allow only Max Pool 1D (2D is currently not supported by this transformation)
if ((max_pool->get_auto_pad() != ngraph::op::PadType::VALID &&
(max_pool->get_auto_pad() != ngraph::op::PadType::EXPLICIT ||
max_pool->get_pads_begin() != ngraph::Shape({0, 0}) || max_pool->get_pads_end() != ngraph::Shape({0, 0}))) ||
pool_filter.size() != 2 || pool_strides.size() != 2 ||
pool_filter[0] > 1 || pool_strides[0] > 1 ||
pool_filter[0] > GNALimitations::maxPoolMaxWindowSize)
return false;
@ -83,6 +85,25 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr<ngraph::opset7:
return true;
}
static bool GNA30SupportedConv(const std::string& gnaCompileTarget, const InferenceEngine::Precision& gnaPrecision,
const GraphData& graph_data, const ConvData& conv_data) {
const GNALimitations::Cnn2D::Validator cnn2dValidator;
if (gnaCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0 &&
cnn2dValidator.ValidateCnn2D(graph_data.conv->get_friendly_name(),
conv_data.input_height, conv_data.input_width, conv_data.input_channel_count,
conv_data.filter_height, conv_data.filter_width, conv_data.filter_channel_count,
conv_data.filter_stride_height, conv_data.filter_stride_width, conv_data.filter_dilation_height, conv_data.filter_dilation_width,
OvGnaTypeIntFromBytes(gnaPrecision.size()), false) &&
(!graph_data.max_pool || cnn2dValidator.ValidatePooling2D(graph_data.conv->get_friendly_name(),
graph_data.max_pool->get_kernel()[0], graph_data.max_pool->get_kernel()[1],
graph_data.max_pool->get_strides()[0], graph_data.max_pool->get_strides()[1],
false)))
return true;
return false;
}
static size_t CalculateConvCount(const ConvData& conv_data) {
// Check if split of plane due to GNA HW limitations of 768 filter elements is possible
size_t conv_count = 1;
@ -349,7 +370,7 @@ static std::shared_ptr<ngraph::Node> CreateDecomposedConv(const GraphData& graph
// We need to calculate some parameters in case horizontal stride > 1 is used, because if we use the ones available from the original convolution
// we won't take into account the fact horizontal strides will be supported by the newly created 1D convolution, and not by decomposition
size_t filter_dilation_width = conv_data.filter_width > 1 ? conv_data.filter_dilation_width : 1;
size_t output_width = (conv_data.input_width - (conv_data.filter_width + filter_dilation_width - 2));
size_t output_width = (conv_data.input_width - (filter_dilation_width * (conv_data.filter_width - 1)));
if (conv_data.filter_width > 1) {
for (size_t filter_width = 0; filter_width < conv_data.filter_width; filter_width++) {
@ -442,7 +463,9 @@ static void Decompose(const GraphData& graph_data, ConvData& conv_data) {
conv_result->set_friendly_name(conv_result_name);
}
static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
static bool Convert(const std::string& gnaCompileTarget,
const InferenceEngine::Precision& gnaPrecision,
std::shared_ptr<ngraph::Node> leading_transpose,
std::shared_ptr<ngraph::Node> fq_conv,
std::shared_ptr<ngraph::Node> conv,
std::shared_ptr<ngraph::Node> trailing_transpose,
@ -468,6 +491,13 @@ static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
if (!VerifyAndGetConvData(std::dynamic_pointer_cast<ngraph::opset7::Convolution>(conv), conv_data))
return false;
if (max_pool && !VerifyMaxPool(graph_data, std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(max_pool)))
return false;
// If compile target is GNA 3.0 and the convolution is supported on it, then skip decomposition
if (GNA30SupportedConv(gnaCompileTarget, gnaPrecision, graph_data, conv_data))
return false;
// We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC)
// or similar cases, so required network must be in NHWC order like in TF
if (!TransposeOrderMatches(std::dynamic_pointer_cast<ngraph::opset7::Transpose>(leading_transpose), {0, 3, 1, 2}))
@ -476,9 +506,6 @@ static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
if (!TransposeOrderMatches(std::dynamic_pointer_cast<ngraph::opset7::Transpose>(trailing_transpose), {0, 2, 3, 1}))
return false;
if (max_pool && !VerifyMaxPool(graph_data, std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(max_pool)))
return false;
if (!ShouldDecompose(graph_data, conv_data))
return false;
@ -488,7 +515,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
return true;
}
Decompose2DConv::Decompose2DConv() {
Decompose2DConv::Decompose2DConv(const std::string& gnaCompileTarget, const InferenceEngine::Precision& gnaPrecision) {
MATCHER_SCOPE(Decompose2DConv);
auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
@ -510,20 +537,25 @@ Decompose2DConv::Decompose2DConv() {
ngraph::pattern::consumers_count(1));
auto af1 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
ngraph::opset7::Sign, ngraph::opset7::Clamp>({bias}, ngraph::pattern::consumers_count(1));
ngraph::opset7::Sign, ngraph::opset7::Clamp>({conv}, ngraph::pattern::consumers_count(1));
auto af2 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
ngraph::opset7::Sign, ngraph::opset7::Clamp>({fq_bias}, ngraph::pattern::consumers_count(1));
ngraph::opset7::Sign, ngraph::opset7::Clamp>({bias}, ngraph::pattern::consumers_count(1));
auto af3 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool1}, ngraph::pattern::consumers_count(1));
ngraph::opset7::Sign, ngraph::opset7::Clamp>({fq_bias}, ngraph::pattern::consumers_count(1));
auto af4 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool1}, ngraph::pattern::consumers_count(1));
auto af5 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool2}, ngraph::pattern::consumers_count(1));
auto fq_af = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({af4, const_input, const_input, const_input, const_input},
auto fq_af1 = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({af3, const_input, const_input, const_input, const_input},
ngraph::pattern::consumers_count(1));
auto fq_af2 = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({af5, const_input, const_input, const_input, const_input},
ngraph::pattern::consumers_count(1));
auto transpose_input =
std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, bias, max_pool1, max_pool2, fq_bias, af1, af2, af3, af4, fq_af});
std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, bias, max_pool1, max_pool2, fq_bias, af1, af2, af3, af4, fq_af1, fq_af2});
auto trailing_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({transpose_input, const_input},
consumers_and_rank(1, 4));
@ -540,8 +572,10 @@ Decompose2DConv::Decompose2DConv() {
auto fq_bias_it = pattern_map.find(fq_bias);
auto fq_bias_node = (fq_bias_it == std::end(pattern_map) ? nullptr : fq_bias_it->second.get_node_shared_ptr());
auto fq_af_it = pattern_map.find(fq_af);
auto fq_af_node = (fq_af_it == std::end(pattern_map) ? nullptr : fq_af_it->second.get_node_shared_ptr());
auto fq_af1_it = pattern_map.find(fq_af1);
auto fq_af2_it = pattern_map.find(fq_af2);
auto fq_af_node = (fq_af1_it == std::end(pattern_map) ?
((fq_af2_it == std::end(pattern_map) ? nullptr : fq_af2_it->second.get_node_shared_ptr())) : fq_af1_it->second.get_node_shared_ptr());
auto max_pool1_it = pattern_map.find(max_pool1);
auto max_pool2_it = pattern_map.find(max_pool2);
auto max_pool_node = (max_pool1_it == std::end(pattern_map) ?
@ -557,7 +591,8 @@ Decompose2DConv::Decompose2DConv() {
}
}
return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), fq_conv_node, pattern_map.at(conv).get_node_shared_ptr(),
return Convert(gnaCompileTarget, gnaPrecision,
pattern_map.at(leading_transpose).get_node_shared_ptr(), fq_conv_node, pattern_map.at(conv).get_node_shared_ptr(),
pattern_map.at(trailing_transpose).get_node_shared_ptr(), bias_node, bias_const_node, fq_bias_node, max_pool_node, af_node, fq_af_node,
pattern_map.at(trailing_transpose).get_node_shared_ptr());
};
@ -566,7 +601,7 @@ Decompose2DConv::Decompose2DConv() {
this->register_matcher(m, callback);
}
Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias() {
Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const std::string& gnaCompileTarget, const InferenceEngine::Precision& gnaPrecision) {
MATCHER_SCOPE(Decompose2DConvTransposedWithBias);
auto const_input_i64 = ngraph::pattern::wrap_type<ngraph::opset7::Constant>(ngraph::pattern::type_matches(ngraph::element::i64));
@ -588,7 +623,8 @@ Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias() {
if (!(bias_const_node = VerifyBiasGetConst(pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr())))
return false;
return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), nullptr, pattern_map.at(conv).get_node_shared_ptr(),
return Convert(gnaCompileTarget, gnaPrecision,
pattern_map.at(leading_transpose).get_node_shared_ptr(), nullptr, pattern_map.at(conv).get_node_shared_ptr(),
pattern_map.at(trailing_transpose).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr(), bias_const_node, nullptr, nullptr,
nullptr, nullptr, pattern_map.at(bias).get_node_shared_ptr());
};
@ -597,7 +633,7 @@ Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias() {
this->register_matcher(m, callback);
}
Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF() {
Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF(const std::string& gnaCompileTarget, const InferenceEngine::Precision& gnaPrecision) {
MATCHER_SCOPE(Decompose2DConvTransposedWithBiasAF);
auto const_input_i64 = ngraph::pattern::wrap_type<ngraph::opset7::Constant>(ngraph::pattern::type_matches(ngraph::element::i64));
@ -623,7 +659,8 @@ Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF() {
if (!(bias_const_node = VerifyBiasGetConst(pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr())))
return false;
return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), nullptr, pattern_map.at(conv).get_node_shared_ptr(),
return Convert(gnaCompileTarget, gnaPrecision,
pattern_map.at(leading_transpose).get_node_shared_ptr(), nullptr, pattern_map.at(conv).get_node_shared_ptr(),
pattern_map.at(trailing_transpose).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr(), bias_const_node, nullptr,
nullptr, pattern_map.at(af).get_node_shared_ptr(), nullptr, pattern_map.at(af).get_node_shared_ptr());
};

View File

@ -5,6 +5,7 @@
#pragma once
#include <ngraph/pass/graph_rewrite.hpp>
#include <ie_precision.hpp>
namespace GNAPluginNS {
@ -30,7 +31,7 @@ namespace GNAPluginNS {
class Decompose2DConv : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
Decompose2DConv();
Decompose2DConv(const std::string& gnaCompileTarget, const InferenceEngine::Precision& gnaPrecision);
};
/**
@ -51,7 +52,7 @@ public:
class Decompose2DConvTransposedWithBias : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
Decompose2DConvTransposedWithBias();
Decompose2DConvTransposedWithBias(const std::string& gnaCompileTarget, const InferenceEngine::Precision& gnaPrecision);
};
/**
@ -74,7 +75,7 @@ public:
class Decompose2DConvTransposedWithBiasAF : public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
Decompose2DConvTransposedWithBiasAF();
Decompose2DConvTransposedWithBiasAF(const std::string& gnaCompileTarget, const InferenceEngine::Precision& gnaPrecision);
};
} // namespace GNAPluginNS

View File

@ -73,8 +73,8 @@ std::shared_ptr<ngraph::opset7::StridedSlice> FlatCrop(ngraph::Output<ngraph::No
std::vector<int64_t>{1, 0}); // end mask
}
std::shared_ptr<ngraph::Node> VerifyBiasGetConst(std::shared_ptr<ngraph::Node> conv, std::shared_ptr<ngraph::Node> bias) {
auto add_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(bias->input_value(1).get_node_shared_ptr());
std::shared_ptr<ngraph::Node> VerifyBiasGetConst(std::shared_ptr<ngraph::Node> conv, std::shared_ptr<ngraph::Node> add) {
auto add_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(add->input_value(1).get_node_shared_ptr());
// Check if it's really a bias and not just addition
if (add_const) {

View File

@ -65,13 +65,13 @@ std::shared_ptr<ngraph::opset7::StridedSlice> FlatCrop(ngraph::Output<ngraph::No
/**
* @brief checks whether an add present after convolution is a bias and gets its const input
* @param conv convolution layer preceding potential bias
* @param bias potential bias layer passed from ngraph matcher
* @param add potential bias layer passed from ngraph matcher
* @return bias const if the add layer present after convolution is a bias, nullptr otherwise
*/
std::shared_ptr<ngraph::Node> VerifyBiasGetConst(std::shared_ptr<ngraph::Node> conv, std::shared_ptr<ngraph::Node> bias);
std::shared_ptr<ngraph::Node> VerifyBiasGetConst(std::shared_ptr<ngraph::Node> conv, std::shared_ptr<ngraph::Node> add);
/**
* @brief inserts a new fake quantize layer (if it exists) copied from an existing fake quantize layer and conncts it to the output of a given layer
* @brief inserts a new fake quantize layer copied from an existing one and connects it to the output of a given layer
* @param fq_layer existing fake quantize layer to be copied
* @param last_node the node to which output the new fake quantize layer will be connected
* @return new fake quantize layer or the last node

View File

@ -25,10 +25,11 @@ namespace LayerTestsDefinitions {
enum class modelType {
TranspConvTransp = 0, /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) */
TranspConvBcastAddTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Transpose(NCHW->NHWC) */
TranspConvActTransp, /* Transpose(NHWC->NCHW) => Conv => Activation Function => Transpose(NCHW->NHWC) */
TranspConvBcastAddMaxPoolTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPooling => Transpose(NCHW->NHWC) (2D Max Pool case) */
TranspConvBcastAddActTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Activation Function => Transpose(NCHW->NHWC) */
TranspConvBcastAddMaxPoolActTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPool => Activation Function => Transpose(NCHW->NHWC) */
TranspConvTranspBcastAdd, /* Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) => Bias */
TranspConvTranspBcastAdd, /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) => Bias */
TranspConvTranspBcastAddAct /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) => Bias => Activation Function */
};
@ -142,6 +143,13 @@ protected:
}
break;
case modelType::TranspConvActTransp:
{
auto activation = std::make_shared<Relu>(conv);
lastOp = std::make_shared<Transpose>(activation, transposeOutOrder);
}
break;
case modelType::TranspConvBcastAddMaxPoolTransp:
{
auto bcastAdd = std::make_shared<Add>(conv, biasConst);
@ -257,6 +265,7 @@ const std::vector<op::PadType> padTypes = {
const std::vector<modelType> models = {
modelType::TranspConvTransp,
modelType::TranspConvBcastAddTransp,
modelType::TranspConvActTransp,
modelType::TranspConvBcastAddActTransp,
modelType::TranspConvTranspBcastAdd,
modelType::TranspConvTranspBcastAddAct,
@ -277,8 +286,8 @@ const std::vector<std::vector<size_t >> maxpool1DPools = {{1, 2}};
const std::vector<std::vector<size_t >> maxpool1DStrides = {{1, 1}};
const std::vector<std::vector<size_t>> input2DNHWC = {{1, 16, 16, 32}};
const std::vector<std::vector<size_t >> kernels2D = {{2, 2}, {4, 1}, {1, 3}};
const std::vector<std::vector<size_t >> strides2D = {{1, 1}, {1, 2}, {2, 1}, {2, 2}};
const std::vector<std::vector<size_t >> kernels2D = {{2, 2}, {4, 1}};
const std::vector<std::vector<size_t >> strides2D = {{1, 1}, {2, 1}};
const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{1, 2}};
const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{3, 1}};
const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};

View File

@ -24,6 +24,7 @@ namespace LayerTestsDefinitions {
enum class modelType {
TranspConvTransp = 0, /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) */
TranspConvBcastAddTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Transpose(NCHW->NHWC) */
TranspConvActTransp, /* Transpose(NHWC->NCHW) => Conv => Activation Function => Transpose(NCHW->NHWC) */
TranspConvBcastAddMaxPoolTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPooling => Transpose(NCHW->NHWC) (2D Max Pool case) */
TranspConvBcastAddActTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Activation Function => Transpose(NCHW->NHWC) */
TranspConvBcastAddMaxPoolActTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPool => Activation Function => Transpose(NCHW->NHWC) */
@ -141,13 +142,20 @@ protected:
}
break;
case modelType::TranspConvActTransp:
{
auto activation = std::make_shared<Relu>(conv);
lastOp = std::make_shared<Transpose>(activation, transposeOutOrder);
}
break;
case modelType::TranspConvBcastAddMaxPoolTransp:
{
auto bcastAdd = std::make_shared<Add>(conv, biasConst);
auto maxpool = std::make_shared<MaxPool>(bcastAdd, maxpoolStrides, Shape{0, 0}, Shape{0, 0}, maxpoolShape,
op::RoundingType::FLOOR, op::PadType::VALID);
auto transpose = std::make_shared<Transpose>(maxpool, transposeOutOrder);
auto lastOp = std::make_shared<Relu>(transpose);
lastOp = std::make_shared<Relu>(transpose);
}
break;
@ -221,6 +229,7 @@ const std::vector<op::PadType> padTypes = {
const std::vector<modelType> models = {
modelType::TranspConvTransp,
modelType::TranspConvBcastAddTransp,
modelType::TranspConvActTransp,
modelType::TranspConvBcastAddActTransp,
modelType::TranspConvTranspBcastAdd,
modelType::TranspConvTranspBcastAddAct,
@ -236,9 +245,9 @@ const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{3, 1}};
const std::vector<std::vector<size_t >> dilations2D = {{1, 1}, {1, 2}, {2, 1}, {2, 2}};
const std::vector<size_t> numOutChannels2D = {4};
const std::vector<std::vector<size_t >> biases2D = {{1, 4, 1, 1}};
const std::vector<std::vector<size_t >> transp_biases2D = {{1, 1, 1, 4}};
const std::vector<std::vector<size_t >> maxpool1D_pools = {{1, 2}};
const std::vector<std::vector<size_t >> maxpool1D_strides = {{1, 1}};
const std::vector<std::vector<size_t >> transpBiases2D = {{1, 1, 1, 4}};
const std::vector<std::vector<size_t >> maxpool1DPools = {{1, 2}};
const std::vector<std::vector<size_t >> maxpool1DStrides = {{1, 1}};
const auto conv2DParams = ::testing::Combine(
::testing::ValuesIn(kernels2D),
@ -252,9 +261,9 @@ const auto conv2DParams = ::testing::Combine(
const auto miscParams = ::testing::Combine(
::testing::ValuesIn(biases2D),
::testing::ValuesIn(transp_biases2D),
::testing::ValuesIn(maxpool1D_pools),
::testing::ValuesIn(maxpool1D_strides)
::testing::ValuesIn(transpBiases2D),
::testing::ValuesIn(maxpool1DPools),
::testing::ValuesIn(maxpool1DStrides)
);
INSTANTIATE_TEST_CASE_P(smoke_Decompose2DConv, Decompose2DConvTest,
@ -313,4 +322,60 @@ INSTANTIATE_TEST_CASE_P(smoke_Decompose2DConvStridesDilations, Decompose2DConvTe
::testing::ValuesIn(modelsStrides)),
Decompose2DConvTest::getTestCaseName);
/* ============= GNA 3.0 Supported Convolutions Combination ============= */
const std::vector<std::map<std::string, std::string>> configsGNA30 = {
{
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
{"GNA_SCALE_FACTOR_0", "1"},
{"GNA_EXEC_TARGET", "GNA_TARGET_3_0"}
}
};
const std::vector<op::PadType> padTypesGNA30 = {
op::PadType::VALID,
};
const std::vector<modelType> modelsGNA30 = {
modelType::TranspConvBcastAddMaxPoolTransp,
};
const std::vector<std::vector<size_t>> input2DNHWCGNA30 = {{1, 16, 16, 32}};
const std::vector<std::vector<size_t >> kernels2DGNA30 = {{1, 2}, {1, 4}};
const std::vector<std::vector<size_t >> strides2DGNA30 = {{1, 1}};
const std::vector<std::vector<size_t >> dilations2DGNA30 = {{1, 1}, {1, 2}};
const std::vector<size_t> numOutChannels2DGNA30 = {8};
const std::vector<std::vector<size_t >> biases2DGNA30 = {{1, 8, 1, 1}};
const std::vector<std::vector<size_t >> transpBiases2DGNA30 = {{1, 1, 1, 8}};
const std::vector<std::vector<size_t >> maxpool2DPoolsGNA30 = {{1, 1}, {1, 2}};
const std::vector<std::vector<size_t >> maxpoo2DStridesGNA30 = {{1, 1}};
const auto conv2DParamsGNA30 = ::testing::Combine(
::testing::ValuesIn(kernels2DGNA30),
::testing::ValuesIn(strides2DGNA30),
::testing::ValuesIn(padBegins2D),
::testing::ValuesIn(padEnds2D),
::testing::ValuesIn(dilations2DGNA30),
::testing::ValuesIn(numOutChannels2DGNA30),
::testing::ValuesIn(padTypesGNA30)
);
const auto miscParamsGNA30 = ::testing::Combine(
::testing::ValuesIn(biases2DGNA30),
::testing::ValuesIn(transpBiases2DGNA30),
::testing::ValuesIn(maxpool2DPoolsGNA30),
::testing::ValuesIn(maxpoo2DStridesGNA30)
);
INSTANTIATE_TEST_CASE_P(smoke_Decompose2DConvGNA30, Decompose2DConvTest,
::testing::Combine(
conv2DParamsGNA30,
miscParamsGNA30,
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::ValuesIn(configsGNA30),
::testing::ValuesIn(input2DNHWCGNA30),
::testing::ValuesIn(modelsGNA30)),
Decompose2DConvTest::getTestCaseName);
} // namespace LayerTestsDefinitions

View File

@ -45,6 +45,12 @@ GNA2_API enum Gna2Status Gna2MemoryAlloc(
return Gna2StatusSuccess;
}
GNA2_API enum Gna2Status Gna2DeviceCreateForExport(
Gna2DeviceVersion targetDeviceVersion,
uint32_t * deviceIndex) {
*deviceIndex = 1;
return Gna2StatusSuccess;
}
GNA2_API enum Gna2Status Gna2DeviceOpen(
uint32_t deviceIndex) {
return Gna2StatusSuccess;
@ -109,12 +115,6 @@ GNA2_API enum Gna2Status Gna2RequestConfigEnableActiveList(
return Gna2StatusSuccess;
}
GNA2_API enum Gna2Status Gna2RequestConfigEnableHardwareConsistency(
uint32_t requestConfigId,
enum Gna2DeviceVersion deviceVersion) {
return Gna2StatusSuccess;
}
GNA2_API enum Gna2Status Gna2RequestConfigSetAccelerationMode(
uint32_t requestConfigId,
enum Gna2AccelerationMode accelerationMode) {

View File

@ -20,6 +20,7 @@ namespace {
enum class modelType {
TranspConvTransp = 0, /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) */
TranspConvBcastAddTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Transpose(NCHW->NHWC) */
TranspConvActTransp, /* Transpose(NHWC->NCHW) => Conv => Activation Function => Transpose(NCHW->NHWC) */
TranspConvBcastAddMaxPoolTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPooling => Transpose(NCHW->NHWC) (2D Max Pool case) */
TranspConvBcastAddActTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Activation Function => Transpose(NCHW->NHWC) */
TranspConvBcastAddMaxPoolActTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPool => Activation Function => Transpose(NCHW->NHWC) */
@ -66,7 +67,7 @@ void GetConvParams(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData&
conv_data.pads_end_width = conv->get_pads_end()[1];
}
std::shared_ptr<ngraph::opset7::FakeQuantize> createFQ(ngraph::Output<ngraph::Node>& in_node) {
std::shared_ptr<ngraph::opset7::FakeQuantize> createFQ(std::shared_ptr<ngraph::Node>& in_node) {
auto input_low = ngraph::opset7::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {1});
auto input_high = ngraph::opset7::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {5});
auto output_low = ngraph::opset7::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {0});
@ -76,7 +77,7 @@ std::shared_ptr<ngraph::opset7::FakeQuantize> createFQ(ngraph::Output<ngraph::No
ngraph::Output<ngraph::Node> createBiasFQ(const ngraph::Output<ngraph::Node>& in_node,
std::shared_ptr<ngraph::opset7::Constant>& bias_const, const bool& fq) {
ngraph::Output<ngraph::Node> bcast_add = std::make_shared<ngraph::opset7::Add>(in_node, bias_const);
std::shared_ptr<ngraph::Node> bcast_add = std::make_shared<ngraph::opset7::Add>(in_node, bias_const);
if (fq) {
bcast_add = createFQ(bcast_add);
@ -100,7 +101,7 @@ std::shared_ptr<ngraph::opset7::Result> createFunction(const bool& fq,
ConvData* conv_data) {
auto transpose_in_order = std::make_shared<ngraph::opset7::Constant>(ngraph::element::i64, ngraph::Shape{4}, std::vector<int64_t>{0, 3, 1, 2});
auto transpose_in = std::make_shared<ngraph::opset7::Transpose>(input_node, transpose_in_order);
ngraph::Output<ngraph::Node> filters = std::make_shared<ngraph::opset7::Constant>(ngraph::element::i64,
std::shared_ptr<ngraph::Node> filters = std::make_shared<ngraph::opset7::Constant>(ngraph::element::i64,
ngraph::Shape{4, input_node.get_shape()[3], filters_shape[0], filters_shape[1]});
if (fq) {
@ -123,6 +124,19 @@ std::shared_ptr<ngraph::opset7::Result> createFunction(const bool& fq,
}
break;
case modelType::TranspConvActTransp:
{
auto bcast_add = createBiasFQ(conv, bias_const, fq);
std::shared_ptr<ngraph::Node> activation = std::make_shared<ngraph::opset7::Relu>(bcast_add);
if (fq) {
activation = createFQ(activation);
}
last_op = std::make_shared<ngraph::opset7::Transpose>(activation, transpose_out_order);
}
break;
case modelType::TranspConvBcastAddMaxPoolTransp:
{
auto bcast_add = createBiasFQ(conv, bias_const, fq);
@ -428,6 +442,9 @@ INSTANTIATE_TEST_SUITE_P(ConvertPaddedToValidConvTestSuite, ConvertPaddedToValid
std::make_tuple(modelType::TranspConvBcastAddTransp, ngraph::PartialShape{1, 1, 16, 8}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
ngraph::CoordinateDiff{0, 2}, ngraph::CoordinateDiff{0, 3}, ngraph::Strides{1, 1},
ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}, ngraph::op::PadType::EXPLICIT),
std::make_tuple(modelType::TranspConvActTransp, ngraph::PartialShape{1, 1, 16, 8}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
ngraph::CoordinateDiff{0, 2}, ngraph::CoordinateDiff{0, 3}, ngraph::Strides{1, 1},
ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}, ngraph::op::PadType::EXPLICIT),
std::make_tuple(modelType::TranspConvBcastAddMaxPoolTransp, ngraph::PartialShape{1, 1, 16, 8}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
ngraph::CoordinateDiff{0, 2}, ngraph::CoordinateDiff{0, 3}, ngraph::Strides{1, 1},
ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}, ngraph::op::PadType::EXPLICIT),
@ -459,6 +476,9 @@ INSTANTIATE_TEST_SUITE_P(ConvertPaddedToValidConvInvalidTestSuite, ConvertPadded
std::make_tuple(modelType::TranspConvBcastAddTransp, ngraph::PartialShape{2, 1, 16, 8}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
ngraph::CoordinateDiff{0, 2}, ngraph::CoordinateDiff{0, 3}, ngraph::Strides{1, 1},
ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}, ngraph::op::PadType::EXPLICIT),
std::make_tuple(modelType::TranspConvActTransp, ngraph::PartialShape{2, 1, 16, 8}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
ngraph::CoordinateDiff{0, 2}, ngraph::CoordinateDiff{0, 3}, ngraph::Strides{1, 1},
ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}, ngraph::op::PadType::EXPLICIT),
std::make_tuple(modelType::TranspConvBcastAddMaxPoolTransp, ngraph::PartialShape{2, 16, 16, 8}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
ngraph::CoordinateDiff{0, 2}, ngraph::CoordinateDiff{0, 3}, ngraph::Strides{1, 1},
ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{5, 1}, ngraph::op::PadType::EXPLICIT),

View File

@ -22,6 +22,7 @@ namespace {
enum class modelType {
TranspConvTransp = 0, /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) */
TranspConvBcastAddTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Transpose(NCHW->NHWC) */
TranspConvActTransp, /* Transpose(NHWC->NCHW) => Conv => Activation Function => Transpose(NCHW->NHWC) */
TranspConvBcastAddMaxPoolTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPooling => Transpose(NCHW->NHWC) (2D Max Pool case) */
TranspConvBcastAddActTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Activation Function => Transpose(NCHW->NHWC) */
TranspConvBcastAddMaxPoolActTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPool => Activation Function => Transpose(NCHW->NHWC) */
@ -157,6 +158,19 @@ std::shared_ptr<ngraph::opset7::Result> createFunction(const bool& fq,
}
break;
case modelType::TranspConvActTransp:
{
fq_bias = createBiasFQ(conv, bias_const, bias, fq);
std::shared_ptr<ngraph::Node> activation = std::make_shared<ngraph::opset7::Relu>(fq_bias);
if (fq) {
activation = createFQ(activation);
}
last_op = std::make_shared<ngraph::opset7::Transpose>(activation, transpose_out_order);
}
break;
case modelType::TranspConvBcastAddMaxPoolTransp:
{
fq_bias = createBiasFQ(conv, bias_const, bias, fq);
@ -555,7 +569,7 @@ std::shared_ptr<ngraph::Node> CreateDeomposedConv(const GraphData& graph_data, C
// We need to calculate some parameters in case horizontal stride > 1 is used, because if we use the ones available from the original convolution
// we won't take into account the fact horizontal strides will be supported by the newly created 1D convolution, and not by decomposition
size_t filter_dilation_width = conv_params.filter_width > 1 ? conv_params.filter_dilation_width : 1;
size_t output_width = (conv_params.input_width - (conv_params.filter_width + filter_dilation_width - 2));
size_t output_width = (conv_params.input_width - (filter_dilation_width * (conv_params.filter_width - 1)));
if (conv_params.filter_width > 1) {
for (size_t filter_width = 0; filter_width < conv_params.filter_width; filter_width++) {
@ -695,21 +709,23 @@ std::shared_ptr<ngraph::Function> Decompose2DConvTestFixture::get_reference(cons
void execute_test(modelType model, std::shared_ptr<ngraph::Function> function, std::shared_ptr<ngraph::Function> reference_function) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
InferenceEngine::Precision gnaPrecision = InferenceEngine::Precision::I16;
switch (model) {
default:
case modelType::TranspConvTransp:
case modelType::TranspConvBcastAddTransp:
case modelType::TranspConvActTransp:
case modelType::TranspConvBcastAddMaxPoolTransp:
case modelType::TranspConvBcastAddActTransp:
case modelType::TranspConvBcastAddMaxPoolActTransp:
manager.register_pass<GNAPluginNS::Decompose2DConv>();
manager.register_pass<GNAPluginNS::Decompose2DConv>("", gnaPrecision);
break;
case modelType::TranspConvTranspBcastAdd:
manager.register_pass<GNAPluginNS::Decompose2DConvTransposedWithBias>();
manager.register_pass<GNAPluginNS::Decompose2DConvTransposedWithBias>("", gnaPrecision);
break;
case modelType::TranspConvTranspBcastAddAct:
manager.register_pass<GNAPluginNS::Decompose2DConvTransposedWithBiasAF>();
manager.register_pass<GNAPluginNS::Decompose2DConvTransposedWithBiasAF>("", gnaPrecision);
break;
}
@ -732,6 +748,8 @@ INSTANTIATE_TEST_SUITE_P(Decompose2DConvTestSuite, Decompose2DConvTestFixture,
ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 1}),
std::make_tuple(modelType::TranspConvBcastAddTransp, ngraph::PartialShape{1, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 1}),
std::make_tuple(modelType::TranspConvActTransp, ngraph::PartialShape{1, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 1}),
std::make_tuple(modelType::TranspConvBcastAddMaxPoolTransp, ngraph::PartialShape{1, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 1}),
std::make_tuple(modelType::TranspConvBcastAddActTransp, ngraph::PartialShape{1, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
@ -756,6 +774,8 @@ INSTANTIATE_TEST_SUITE_P(Decompose2DConvInvalidTestSuite, Decompose2DConvTestInv
ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}),
std::make_tuple(modelType::TranspConvBcastAddTransp, ngraph::PartialShape{2, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}),
std::make_tuple(modelType::TranspConvActTransp, ngraph::PartialShape{2, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}),
std::make_tuple(modelType::TranspConvBcastAddMaxPoolTransp, ngraph::PartialShape{1, 16, 16, 128}, ngraph::Shape{5, 5}, ngraph::Strides{1, 1},
ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{2, 2}),
std::make_tuple(modelType::TranspConvBcastAddActTransp, ngraph::PartialShape{2, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},

View File

@ -44,6 +44,13 @@ GNA2_API enum Gna2Status Gna2MemoryAlloc(
return Gna2StatusSuccess;
}
GNA2_API enum Gna2Status Gna2DeviceCreateForExport(
Gna2DeviceVersion targetDeviceVersion,
uint32_t * deviceIndex) {
*deviceIndex = 1;
return Gna2StatusSuccess;
}
GNA2_API enum Gna2Status Gna2DeviceOpen(
uint32_t deviceIndex) {
if (current != nullptr) {
@ -139,15 +146,6 @@ GNA2_API enum Gna2Status Gna2RequestConfigEnableActiveList(
return Gna2StatusSuccess;
}
GNA2_API enum Gna2Status Gna2RequestConfigEnableHardwareConsistency(
uint32_t requestConfigId,
enum Gna2DeviceVersion deviceVersion) {
if (current != nullptr) {
return current->Gna2RequestConfigEnableHardwareConsistency(requestConfigId, deviceVersion);
}
return Gna2StatusSuccess;
}
GNA2_API enum Gna2Status Gna2RequestConfigSetAccelerationMode(
uint32_t requestConfigId,
enum Gna2AccelerationMode accelerationMode) {

View File

@ -66,7 +66,7 @@ public:
}
};
#if GNA_LIB_VER == 2
void expect_enqueue_calls(GNACppApi &mockApi, bool enableHardwareConsistency = true){
void expect_enqueue_calls(GNACppApi &mockApi){
EXPECT_CALL(mockApi, Gna2ModelCreate(_,_,_)).Times(AtLeast(1)).WillRepeatedly(Invoke([](
uint32_t deviceIndex,
struct Gna2Model const * model,
@ -82,10 +82,6 @@ void expect_enqueue_calls(GNACppApi &mockApi, bool enableHardwareConsistency = t
return Gna2StatusSuccess;
}));
if (enableHardwareConsistency) {
EXPECT_CALL(mockApi, Gna2RequestConfigEnableHardwareConsistency(_,_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess));
}
EXPECT_CALL(mockApi, Gna2RequestConfigSetAccelerationMode(_,_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess));
EXPECT_CALL(mockApi, Gna2InstrumentationConfigAssignToRequestConfig(_,_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess));
@ -352,11 +348,7 @@ void GNAPropagateMatcher :: match() {
EXPECT_CALL(mockApi, GNAPropagateForward(_, _, _, _, _, Eq(_env.proc_type)))
.WillOnce(Return(GNA_NOERROR));
#elif GNA_LIB_VER == 2
if(_env.proc_type == (GNA_SOFTWARE & GNA_HARDWARE)) {
expect_enqueue_calls(mockApi);
} else {
expect_enqueue_calls(mockApi, false);
}
expect_enqueue_calls(mockApi);
#endif
break;
case GnaPluginTestEnvironment::matchPwlInserted :
@ -574,8 +566,6 @@ void GNAPluginAOTMatcher :: match() {
return Gna2StatusSuccess;
}));
EXPECT_CALL(mockApi, Gna2RequestConfigEnableHardwareConsistency(_,_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess));
EXPECT_CALL(mockApi, Gna2InstrumentationConfigAssignToRequestConfig(_,_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess));
#else
#error "Not supported GNA_LIB_VER"
@ -703,8 +693,6 @@ void GNADumpXNNMatcher::match() {
ON_CALL(mockApi, Gna2RequestConfigSetAccelerationMode(_,_)).WillByDefault(Return(Gna2StatusSuccess));
ON_CALL(mockApi, Gna2RequestConfigEnableHardwareConsistency(_,_)).WillByDefault(Return(Gna2StatusSuccess));
ON_CALL(mockApi, Gna2InstrumentationConfigAssignToRequestConfig(_,_)).WillByDefault(Return(Gna2StatusSuccess));
}
#else
@ -808,8 +796,6 @@ void GNAQueryStateMatcher :: match() {
return Gna2StatusSuccess;
}));
EXPECT_CALL(mockApi, Gna2RequestConfigEnableHardwareConsistency(_,_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess));
EXPECT_CALL(mockApi, Gna2InstrumentationConfigAssignToRequestConfig(_,_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess));
#endif
IE_SUPPRESS_DEPRECATED_START