[GNA] Limitations refactoring (#16957)

* Limitations refactoring

* fix CI builds/tests

* changes after review

* Move GraphCompiler initialization to constructor

* resolve conflicts after rebase

* update after review

* resolve problem with double initialization for Limitations
This commit is contained in:
Tomasz Adamowicz 2023-05-29 10:03:58 +02:00 committed by GitHub
parent 3300543eac
commit cccbf7ce7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
38 changed files with 1371 additions and 1073 deletions

View File

@ -48,6 +48,8 @@
using ov::intel_gna::gna_convolution_layer::outputFromConv; using ov::intel_gna::gna_convolution_layer::outputFromConv;
using ov::intel_gna::gna_convolution_layer::outputFromPooling; using ov::intel_gna::gna_convolution_layer::outputFromPooling;
using namespace ov::intel_gna::limitations;
namespace ov { namespace ov {
namespace intel_gna { namespace intel_gna {
namespace backend { namespace backend {
@ -180,8 +182,8 @@ void AMIntelDNN::InitConvolutional1DComponentPrivate(intel_dnn_component_t& comp
THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << num_columns_in THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << num_columns_in
<< ") is not a multiply by 8"; << ") is not a multiply by 8";
} }
if (num_filters < limitations::convMinFiltersNum || num_filters > limitations::convMaxFiltersNum || if (num_filters < Limitations::kConvMinFiltersNum || num_filters > Limitations::kConvMaxFiltersNum ||
num_filters % limitations::convFiltersNumDivider != 0) { num_filters % Limitations::kConvFiltersNumDivider != 0) {
THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << num_filters; THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << num_filters;
} }
auto max_number_of_out_elements = outputFromConv(num_columns_in, num_filter_coefficients, convStride); auto max_number_of_out_elements = outputFromConv(num_columns_in, num_filter_coefficients, convStride);

View File

@ -37,267 +37,62 @@ namespace intel_gna {
using namespace target; using namespace target;
namespace limitations { namespace limitations {
class SupportedElementTypes {
public:
static bool IsParameterTypeSupported(ov::element::Type type, bool is_exception_allowed = false);
static bool IsConstantTypeSupported(ov::element::Type type, bool is_exception_allowed = false);
private:
static const std::set<ov::element::Type> supported_parameter_types;
static const std::set<ov::element::Type> supported_constant_types;
};
const std::set<ov::element::Type> SupportedElementTypes::supported_parameter_types = {ov::element::u8, const std::set<ov::element::Type> SupportedElementTypes::supported_parameter_types = {ov::element::u8,
ov::element::i16, ov::element::i16,
ov::element::f32}; ov::element::f32};
size_t getMemoryAlignmentBytes(target::DeviceVersion target) {
static const std::unordered_map<target::DeviceVersion, size_t> mem_alignment_map{
{target::DeviceVersion::GNA1_0, 64},
{target::DeviceVersion::GNA2_0, 64},
{target::DeviceVersion::GNA3_0, 64},
{target::DeviceVersion::GNA3_1, 64},
{target::DeviceVersion::GNA3_5, 64},
{target::DeviceVersion::GNAEmbedded3_5, 64},
{target::DeviceVersion::GNA3_6, 16},
{target::DeviceVersion::GNA4_0, 16}};
return common::GetValueForKey<target::DeviceVersion, size_t>(target, mem_alignment_map);
}
bool SupportedElementTypes::is_parameter_type_supported(ov::element::Type elem_type, bool is_exception_allowed) {
if (supported_parameter_types.count(elem_type) == 0) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "The plugin does not support input precision with " << elem_type.get_type_name()
<< " format. Supported precisions " << supported_parameter_types << "\n";
}
return false;
}
return true;
}
const std::set<ov::element::Type> SupportedElementTypes::supported_constant_types = {ov::element::i8,
ov::element::u8,
ov::element::i16,
ov::element::u16,
ov::element::i32,
ov::element::f32,
ov::element::f64};
bool SupportedElementTypes::is_constant_type_supported(ov::element::Type elem_type, bool is_exception_allowed) {
if (supported_constant_types.count(elem_type) == 0) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "The plugin does not support constant precision with " << elem_type.get_type_name()
<< " format. Supported precisions " << supported_constant_types << "\n";
}
return false;
}
return true;
}
bool is_transpose_supported(const std::shared_ptr<const ov::Node>& node) {
OPENVINO_ASSERT(node, "Transpose node is empty!");
const ov::Shape squeezed_shape = graph_utils::squeeze_shape(node->get_input_shape(0));
const size_t min_input_dim = std::min(squeezed_shape[0], squeezed_shape[1]);
const size_t max_input_dim = std::max(squeezed_shape[0], squeezed_shape[1]);
// GNA transpose limitations:
// - supports 2d transposes only
// - smaller dimension should be less or equal to 8
// - bigger dimension should be a multiple of limitations::noOfInputsDivisor
if (squeezed_shape.size() == 2 && min_input_dim <= 8 &&
ALIGN(max_input_dim, limitations::noOfInputsDivisor) == max_input_dim) {
return true;
}
return false;
}
bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
const DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed) {
OPENVINO_ASSERT(conv_ie, "ConvolutionIE node is empty!");
size_t batch_size = conv_ie->input_value(0).get_shape()[0];
if (batch_size != 1) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "topology with layer: " + conv_ie->get_friendly_name() +
", type: " + conv_ie->get_type_name() + ", and batch size(" +
std::to_string(batch_size) + ") != 1 not supported";
}
return false;
}
auto check_dilation = [&](size_t filter_dilation_height, size_t filter_stride_width) -> bool {
cnn2d::RangeLimit2D dilation_limit{{convDilationHeight, convDilationHeight, "dilation height"},
{convDilationWidth, convDilationWidth, "dilation width"}};
std::string error = dilation_limit.GetErrorOrEmpty(filter_dilation_height, filter_stride_width);
return cnn2d::AbstractValidator::ValidationSuccesful(is_exception_allowed,
error,
conv_ie->get_friendly_name(),
conv_ie->get_type_name());
};
auto input_shape = conv_ie->input_value(0).get_shape();
auto filter_shape = conv_ie->input_value(1).get_shape();
if ((4 == filter_shape.size() && filter_shape[2] > 1 && filter_shape[3] > 1) ||
(4 == input_shape.size() && input_shape[2] > 1 && input_shape[3] > 1)) {
pass::helper::ConvData conv_data;
pass::helper::GetConvData(conv_ie, conv_data);
if (gna_convolution_layer::isMappableFrom2DTo1D(conv_data.input_height,
conv_data.input_width,
conv_data.input_channel_count,
conv_data.filter_height,
conv_data.filter_width,
conv_data.filter_stride_height,
conv_data.filter_stride_width)) {
return check_dilation(conv_data.filter_dilation_height, conv_data.filter_dilation_width);
}
const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target);
if (cnn2dValidatorPtr) {
return cnn2dValidatorPtr->ValidateCnn2D(conv_ie->get_friendly_name(),
conv_data.input_height,
conv_data.input_width,
conv_data.input_channel_count,
conv_data.filter_height,
conv_data.filter_width,
conv_data.filter_channel_count,
conv_data.filter_stride_height,
conv_data.filter_stride_width,
conv_data.filter_dilation_height,
conv_data.filter_dilation_width,
OvGnaTypeIntFromBytes(gna_precision.size()),
is_exception_allowed);
}
}
return check_dilation(conv_ie->get_dilations()[0], conv_ie->get_dilations()[1]);
}
bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
const DeviceVersion& effective_compile_target,
bool is_exception_allowed) {
OPENVINO_ASSERT(max_pool, "MaxPool node is empty!");
auto kernels = max_pool->get_kernel();
if (2 == kernels.size() && kernels[0] > 1 && kernels[1] > 1) {
const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target);
if (cnn2dValidatorPtr) {
auto strides = max_pool->get_strides();
return cnn2dValidatorPtr->ValidatePooling2D(max_pool->get_friendly_name(),
kernels[0],
kernels[1],
strides[0],
strides[1],
is_exception_allowed);
}
}
return true;
}
bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected, bool is_exception_allowed) {
OPENVINO_ASSERT(fully_connected, "FullyConnected node is empty!");
size_t output_batch_size = fully_connected->get_output_shape(0)[0];
if (output_batch_size > 8) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "topology with layer: " + fully_connected->get_friendly_name() +
", type: " + fully_connected->get_type_name() + ", and batch size(" +
std::to_string(output_batch_size) + ") not supported";
}
return false;
}
return true;
}
bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed) {
OPENVINO_ASSERT(node, "Split node is empty!");
bool is_aligned = true;
for (size_t i = 0; i < node->get_output_size(); i++) {
is_aligned &= ov::intel_gna::graph_utils::is_aligned_split(node, i);
}
return is_aligned;
}
bool is_op_supported(const std::shared_ptr<ov::Node>& node,
const DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed) {
if (ov::op::util::is_parameter(node)) {
return SupportedElementTypes::is_parameter_type_supported(node->get_element_type(), is_exception_allowed);
} else if (ov::op::util::is_constant(node)) {
return SupportedElementTypes::is_constant_type_supported(node->get_element_type(), is_exception_allowed);
} else if (auto conv_ie = std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node)) {
return is_conv_supported(conv_ie, effective_compile_target, gna_precision, is_exception_allowed);
} else if (auto fully_connected = std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node)) {
return is_fc_supported(fully_connected, is_exception_allowed);
} else if (ov::intel_gna::graph_utils::is_pooling(node)) {
return is_pooling_supported(std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node),
effective_compile_target,
is_exception_allowed);
} else if (ov::op::util::is_output(node) || ov::op::util::is_sink(node) ||
ov::intel_gna::graph_utils::is_eltwise_add(node) || ov::intel_gna::graph_utils::is_eltwise_mul(node) ||
ov::intel_gna::graph_utils::is_crop_affined(node) ||
ov::intel_gna::graph_utils::is_activation(node.get()) ||
ov::intel_gna::graph_utils::is_gna_precision_agnostic(
node) || // check concat/split are aligned when transformations will be moved to ngraph
(std::dynamic_pointer_cast<ov::op::util::ReadValueBase>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::op::ScaleShiftIE>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::op::PowerIE>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::opset9::MatMul>(node) != nullptr)) {
return true;
} else if (ov::intel_gna::graph_utils::is_gna_precision_agnostic(node)) {
if ((std::dynamic_pointer_cast<ngraph::opset9::Split>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::opset9::VariadicSplit>(node) != nullptr)) {
return is_split_supported(node, is_exception_allowed);
}
// TODO check concat are aligned when transformation will be moved to ngraph
return true;
}
return false;
}
void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
const DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision) {
std::stringstream error;
// Walk through the transformed model
for (auto& op : model->get_ops()) {
if (!is_op_supported(op, effective_compile_target, gna_precision, true)) {
error << "The plugin does not support layer " << op->get_friendly_name() << " (type " << op->get_type_name()
<< ")!" << std::endl;
}
}
if (!error.str().empty()) {
THROW_GNA_EXCEPTION << error.str();
}
}
namespace cnn2d { namespace cnn2d {
bool IsEqualToLimit::isValid(const uint32_t val) const { bool IsEqualToLimit::IsValid(const uint32_t val) const {
return val == compared_value; return val == compared_value;
} }
std::string IsEqualToLimit::GetErrorOrEmpty(const uint32_t val) const { std::string IsEqualToLimit::GetErrorOrEmpty(const uint32_t val) const {
std::ostringstream out; std::ostringstream out;
if (!isValid(val)) { if (!IsValid(val)) {
out << "Unsupported " << what << ", actual value: " << val << ", but should be equal to " << compared_value out << "Unsupported " << what << ", actual value: " << val << ", but should be equal to " << compared_value
<< "\n"; << "\n";
} }
return out.str(); return out.str();
} }
bool IsLessThanLimit ::isValid(const uint32_t val) const { bool IsLessThanLimit::IsValid(const uint32_t val) const {
return val < compared_value; return val < compared_value;
} }
std::string IsLessThanLimit ::GetErrorOrEmpty(const uint32_t val) const { std::string IsLessThanLimit::GetErrorOrEmpty(const uint32_t val) const {
std::ostringstream out; std::ostringstream out;
if (!isValid(val)) { if (!IsValid(val)) {
out << "Unsupported " << what << ", actual value: " << val << ", but should be less than " << compared_value out << "Unsupported " << what << ", actual value: " << val << ", but should be less than " << compared_value
<< "\n"; << "\n";
} }
return out.str(); return out.str();
} }
bool RangeLimit::isValid(const uint32_t val) const { bool RangeLimit::IsValid(const uint32_t val) const {
return val >= min && val <= max; return val >= min && val <= max;
} }
std::string RangeLimit::GetErrorOrEmpty(const uint32_t val) const { std::string RangeLimit::GetErrorOrEmpty(const uint32_t val) const {
std::ostringstream out; std::ostringstream out;
if (!isValid(val)) { if (!IsValid(val)) {
out << "Unsupported " << what << ", actual value: " << val << ", valid range [" << min << ", " << max << "]\n"; out << "Unsupported " << what << ", actual value: " << val << ", valid range [" << min << ", " << max << "]\n";
} }
return out.str(); return out.str();
} }
bool RangeLimit2D::isValid(const uint32_t h, const uint32_t w) const { bool RangeLimit2D::IsValid(const uint32_t h, const uint32_t w) const {
return hLimit.isValid(h) && wLimit.isValid(w); return hLimit.IsValid(h) && wLimit.IsValid(w);
} }
std::string RangeLimit2D::GetErrorOrEmpty(const uint32_t h, const uint32_t w) const { std::string RangeLimit2D::GetErrorOrEmpty(const uint32_t h, const uint32_t w) const {
@ -308,8 +103,8 @@ RangeMultipleLimit::RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn)
: RangeLimit(rlIn), : RangeLimit(rlIn),
multiplier(multiplierIn) {} multiplier(multiplierIn) {}
bool RangeMultipleLimit::isValid(const uint32_t val) const { bool RangeMultipleLimit::IsValid(const uint32_t val) const {
return RangeLimit::isValid(val) && (val % multiplier == 0); return RangeLimit::IsValid(val) && (val % multiplier == 0);
} }
std::string RangeMultipleLimit::GetErrorOrEmpty(const uint32_t val) const { std::string RangeMultipleLimit::GetErrorOrEmpty(const uint32_t val) const {
@ -321,7 +116,7 @@ std::string RangeMultipleLimit::GetErrorOrEmpty(const uint32_t val) const {
return e + out.str(); return e + out.str();
} }
bool VectorOrSquareLimit::isValid(const uint32_t h, const uint32_t w) const { bool VectorOrSquareLimit::IsValid(const uint32_t h, const uint32_t w) const {
if (w == 1 && h >= 1 && h <= maxVectorHeight) if (w == 1 && h >= 1 && h <= maxVectorHeight)
return true; return true;
if (h == 1 && w >= 1 && w <= maxVectorWidth) if (h == 1 && w >= 1 && w <= maxVectorWidth)
@ -333,7 +128,7 @@ bool VectorOrSquareLimit::isValid(const uint32_t h, const uint32_t w) const {
std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const { std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
std::ostringstream out; std::ostringstream out;
if (!isValid(h, w)) { if (!IsValid(h, w)) {
out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w << ", only vertical vector up to " out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w << ", only vertical vector up to "
<< maxVectorHeight << "x1, horizontal up to 1x" << maxVectorWidth << " or square up to " << maxSquare << "x" << maxVectorHeight << "x1, horizontal up to 1x" << maxVectorWidth << " or square up to " << maxSquare << "x"
<< maxSquare << " are valid\n"; << maxSquare << " are valid\n";
@ -341,7 +136,7 @@ std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_
return out.str(); return out.str();
} }
bool RectLimit::isValid(const uint32_t h, const uint32_t w) const { bool RectLimit::IsValid(const uint32_t h, const uint32_t w) const {
if (h >= 1 && h <= maxVectorHeight && w >= 1 && w <= maxVectorWidth) if (h >= 1 && h <= maxVectorHeight && w >= 1 && w <= maxVectorWidth)
return true; return true;
return false; return false;
@ -349,7 +144,7 @@ bool RectLimit::isValid(const uint32_t h, const uint32_t w) const {
std::string RectLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const { std::string RectLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
std::ostringstream out; std::ostringstream out;
if (!isValid(h, w)) { if (!IsValid(h, w)) {
out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w << ", only rectangular shapes up to " out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w << ", only rectangular shapes up to "
<< maxVectorHeight << "x" << maxVectorWidth << " are valid\n"; << maxVectorHeight << "x" << maxVectorWidth << " are valid\n";
} }
@ -365,8 +160,8 @@ RectLimit RectLimitByChannels::GetByChannels(const uint32_t channels) const {
return RectLimit{0, 0}; return RectLimit{0, 0};
} }
bool RectLimitByChannels::isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const { bool RectLimitByChannels::IsValid(const uint32_t h, const uint32_t w, const uint32_t channels) const {
return GetByChannels(channels).isValid(h, w); return GetByChannels(channels).IsValid(h, w);
} }
std::string RectLimitByChannels::GetErrorOrEmpty(const uint32_t h, std::string RectLimitByChannels::GetErrorOrEmpty(const uint32_t h,
@ -380,11 +175,11 @@ RectLimitByChannels RectLimitByChannelsAndPrecision::GetByPrecision(const OvGnaT
return precision == OvGnaTypeInt8 ? limit_for_int8 : limit_for_int16; return precision == OvGnaTypeInt8 ? limit_for_int8 : limit_for_int16;
} }
bool RectLimitByChannelsAndPrecision::isValid(const uint32_t h, bool RectLimitByChannelsAndPrecision::IsValid(const uint32_t h,
const uint32_t w, const uint32_t w,
const OvGnaType precision, const OvGnaType precision,
const uint32_t channels) const { const uint32_t channels) const {
return GetByPrecision(precision).isValid(h, w, channels); return GetByPrecision(precision).IsValid(h, w, channels);
} }
std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h, std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h,
@ -395,6 +190,66 @@ std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h,
return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what); return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what);
} }
class Validator_30 : public AbstractValidator {
static const RangeLimit2D kInputHWLimit;
static const RangeMultipleLimit kInputChannelsNumberLimit;
static const RangeMultipleLimit kKernelNumberLimit;
static const RectLimitByChannelsAndPrecision kKernelLimit;
static const RangeLimit2D kDilationLimit;
static const VectorOrSquareLimit kPoolingWindowLimit;
public:
Validator_30() = default;
bool ValidateCnn2D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
bool ValidatePooling2D(const std::string& name,
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW,
bool exception = true) const override;
bool ValidateInputPadding(const std::string& name,
const uint32_t pad_h_begin,
const uint32_t pad_h_end,
const uint32_t pad_w_begin,
const uint32_t pad_w_end,
const uint32_t kernel_h,
const uint32_t kernel_w,
const bool throwOnError = true) const override;
bool ShouldUseOnlyConv2DGnaIface() const override;
bool ValidateCnn1D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
};
const RangeLimit2D Validator_30::kInputHWLimit{{16, 384, "input height"}, {16, 240, "input width"}}; const RangeLimit2D Validator_30::kInputHWLimit{{16, 384, "input height"}, {16, 240, "input width"}};
const RangeMultipleLimit Validator_30::kInputChannelsNumberLimit{{8, 384, "number of input channels"}, 8}; const RangeMultipleLimit Validator_30::kInputChannelsNumberLimit{{8, 384, "number of input channels"}, 8};
@ -404,8 +259,9 @@ const RectLimitByChannelsAndPrecision Validator_30::kKernelLimit{
{{{48, {7, 7}}, {64, {7, 5}}, {80, {7, 4}}, {120, {7, 3}}, {384, {7, 1}}}}, {{{48, {7, 7}}, {64, {7, 5}}, {80, {7, 4}}, {120, {7, 3}}, {384, {7, 1}}}},
}; };
const RangeLimit2D Validator_30::kDilationLimit{{convDilationHeight, convDilationHeight, "dilation height"}, const RangeLimit2D Validator_30::kDilationLimit{
{convDilationWidth, convDilationWidth, "dilation width"}}; {Limitations::kConvDilationHeight, Limitations::kConvDilationHeight, "dilation height"},
{Limitations::kConvDilationWidth, Limitations::kConvDilationWidth, "dilation width"}};
bool Validator_30::ValidateCnn2D(const std::string& name, bool Validator_30::ValidateCnn2D(const std::string& name,
const uint32_t inHeight, const uint32_t inHeight,
@ -493,6 +349,95 @@ bool Validator_30::ShouldUseOnlyConv2DGnaIface() const {
return false; return false;
} }
class Validator_35 : public AbstractValidator {
struct CnnLimits {
const RangeLimit2D kInputHWLimit;
const RangeLimit kInputChannelsNumberLimit1B;
const RangeLimit kInputChannelsNumberLimit2B;
const RangeLimit kKernelNumberLimit;
const RangeLimit2D kKerneHWlLimit1B;
const RangeLimit2D kKerneHWlLimit2B;
const RangeLimit2D kStrideHWLimit1B;
const RangeLimit2D kStrideHWLimit2B;
const RangeLimit2D kDilationLimit;
const RangeLimit2D kPoolingWindowHWLimit;
const RangeLimit2D kPoolingStrideHWLimit;
};
static const CnnLimits kCnn2DLimits;
static const CnnLimits kCnn1DLimits;
std::string ValidateCnn(const CnnLimits& limits,
const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision) const;
std::string ValidatePooling(const CnnLimits& limits,
const std::string& name,
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW) const;
public:
Validator_35() = default;
bool ValidateCnn2D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
bool ValidatePooling2D(const std::string& name,
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW,
bool exception = true) const override;
bool ValidateInputPadding(const std::string& name,
const uint32_t pad_h_begin,
const uint32_t pad_h_end,
const uint32_t pad_w_begin,
const uint32_t pad_w_end,
const uint32_t kernel_h,
const uint32_t kernel_w,
const bool throwOnError = true) const override;
bool ShouldUseOnlyConv2DGnaIface() const override;
bool ValidateCnn1D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
};
const Validator_35::CnnLimits Validator_35::kCnn2DLimits{ const Validator_35::CnnLimits Validator_35::kCnn2DLimits{
{{1, 65535, "input height"}, {1, 65535, "input width"}}, // kInputHWLimit {{1, 65535, "input height"}, {1, 65535, "input width"}}, // kInputHWLimit
{1, 2048, "number of input channels"}, // kInputChannelsNumberLimit1B {1, 2048, "number of input channels"}, // kInputChannelsNumberLimit1B
@ -502,8 +447,8 @@ const Validator_35::CnnLimits Validator_35::kCnn2DLimits{
{{1, 255, "kernel height"}, {1, 256, "kernel width"}}, // kKerneHWlLimit2B {{1, 255, "kernel height"}, {1, 256, "kernel width"}}, // kKerneHWlLimit2B
{{1, 255, "convolution stride height"}, {1, 256, "convolution stride width"}}, // kStrideHWLimit1B {{1, 255, "convolution stride height"}, {1, 256, "convolution stride width"}}, // kStrideHWLimit1B
{{1, 255, "convolution stride height"}, {1, 256, "convolution stride width"}}, // kStrideHWLimit2B {{1, 255, "convolution stride height"}, {1, 256, "convolution stride width"}}, // kStrideHWLimit2B
{{convDilationHeight, convDilationHeight, "dilation height"}, // kDilationLimit {{Limitations::kConvDilationHeight, Limitations::kConvDilationHeight, "dilation height"}, // kDilationLimit
{convDilationWidth, convDilationWidth, "dilation width"}}, {Limitations::kConvDilationWidth, Limitations::kConvDilationWidth, "dilation width"}},
{{1, 255, "pooling window height"}, {1, 255, "pooling window width"}}, // kPoolingWindowHWLimit {{1, 255, "pooling window height"}, {1, 255, "pooling window width"}}, // kPoolingWindowHWLimit
{{1, 255, "pooling stride height"}, {1, 255, "pooling stride width"}} // kPoolingStrideHWLimit {{1, 255, "pooling stride height"}, {1, 255, "pooling stride width"}} // kPoolingStrideHWLimit
}; };
@ -517,8 +462,8 @@ const Validator_35::CnnLimits Validator_35::kCnn1DLimits{
{{1, 1, "kernel height"}, {1, 2048, "kernel width"}}, // kKerneHWlLimit2B {{1, 1, "kernel height"}, {1, 2048, "kernel width"}}, // kKerneHWlLimit2B
{{1, 1, "convolution stride height"}, {1, 4096, "convolution stride width"}}, // kStrideHWLimit1B {{1, 1, "convolution stride height"}, {1, 4096, "convolution stride width"}}, // kStrideHWLimit1B
{{1, 1, "convolution stride height"}, {1, 2048, "convolution stride width"}}, // kStrideHWLimit2B {{1, 1, "convolution stride height"}, {1, 2048, "convolution stride width"}}, // kStrideHWLimit2B
{{convDilationHeight, convDilationHeight, "dilation height"}, // kDilationLimit {{Limitations::kConvDilationHeight, Limitations::kConvDilationHeight, "dilation height"}, // kDilationLimit
{convDilationWidth, convDilationWidth, "dilation width"}}, {Limitations::kConvDilationWidth, Limitations::kConvDilationWidth, "dilation width"}},
{{1, 1, "pooling window height"}, {1, 255, "pooling window width"}}, // kPoolingWindowHWLimit {{1, 1, "pooling window height"}, {1, 255, "pooling window width"}}, // kPoolingWindowHWLimit
{{1, 1, "pooling stride height"}, {1, 255, "pooling stride width"}} // kPoolingStrideHWLimit {{1, 1, "pooling stride height"}, {1, 255, "pooling stride width"}} // kPoolingStrideHWLimit
}; };
@ -672,16 +617,16 @@ bool Validator_35::ShouldUseOnlyConv2DGnaIface() const {
return true; return true;
} }
std::unique_ptr<AbstractValidator> AbstractValidator::Create(const DeviceVersion& target) { std::shared_ptr<AbstractValidator> AbstractValidator::Create(const DeviceVersion& target) {
switch (target) { switch (target) {
case DeviceVersion::GNA3_0: case DeviceVersion::GNA3_0:
case DeviceVersion::GNA3_1: case DeviceVersion::GNA3_1:
return tools::make_unique<Validator_30>(); return std::make_shared<Validator_30>();
case DeviceVersion::GNA3_5: case DeviceVersion::GNA3_5:
case DeviceVersion::GNAEmbedded3_5: case DeviceVersion::GNAEmbedded3_5:
case DeviceVersion::GNA3_6: case DeviceVersion::GNA3_6:
case DeviceVersion::GNA4_0: case DeviceVersion::GNA4_0:
return tools::make_unique<Validator_35>(); return std::make_shared<Validator_35>();
default: default:
return nullptr; return nullptr;
} }
@ -705,15 +650,280 @@ bool AbstractValidator::ValidationSuccesful(const bool throwOnError,
return error.empty(); return error.empty();
} }
bool UseOnly16BitConvolutionWeights(const DeviceVersion& compile_target) {
return compile_target == DeviceVersion::GNA1_0 || compile_target == DeviceVersion::GNA2_0 ||
compile_target == DeviceVersion::GNA3_0 || compile_target == DeviceVersion::GNA3_1;
}
} // namespace cnn2d } // namespace cnn2d
constexpr uint32_t Limitations::kBufferMaxSize;
constexpr uint32_t Limitations::kConvMinFiltersNum;
constexpr uint32_t Limitations::kConvMaxFiltersNum;
constexpr uint32_t Limitations::kConvDilationHeight;
constexpr uint32_t Limitations::kConvDilationWidth;
constexpr uint32_t Limitations::kConvFiltersNumDivider;
constexpr uint32_t Limitations::kConvFilterSizeDivider;
constexpr uint32_t Limitations::kConvFilterMaxSize;
constexpr uint32_t Limitations::kConvEachKernelByteAlignment;
constexpr uint32_t Limitations::kInputByteAlignment;
constexpr uint32_t Limitations::kNoOfInputsDivisor;
constexpr uint32_t Limitations::kNoOfInputsLowPrecDivisor;
constexpr uint32_t Limitations::kAffineMaxBatchSize;
constexpr uint32_t Limitations::kMaxPoolMaxWindowSize;
constexpr uint32_t Limitations::kCopyMaxGrouping;
constexpr uint32_t Limitations::kTransposeMaxSize;
constexpr uint32_t Limitations::kMaxLayersCountGNA1_0;
constexpr uint32_t Limitations::kMaxLayersCountGNA2_0;
constexpr uint32_t Limitations::kMaxLayersCountGNA3_X;
constexpr uint32_t Limitations::kBytesPerSplitElement;
constexpr uint32_t Limitations::kBytesPerCropElement;
constexpr uint32_t Limitations::kMemoryPageSize;
thread_local std::shared_ptr<Limitations> Limitations::k_instance{nullptr};
Limitations::Limitations(const DeviceVersion& target) {
m_use_only_16bit_conv_weights = (target == DeviceVersion::GNA1_0 || target == DeviceVersion::GNA2_0 ||
target == DeviceVersion::GNA3_0 || target == DeviceVersion::GNA3_1);
m_mem_alignment = get_memory_alignment_bytes(target);
m_cnn_validator = cnn2d::AbstractValidator::Create(target);
}
void Limitations::init(const DeviceVersion& compile_target) {
k_instance = std::shared_ptr<Limitations>(new Limitations(compile_target));
}
bool Limitations::is_transpose_2d(const std::vector<size_t>& shape) {
return std::count_if(std::begin(shape), std::end(shape), [](size_t dim) {
return dim != 1;
}) == 2;
}
bool Limitations::is_transpose_supported(const std::vector<size_t>& shape) {
if (!is_transpose_2d(shape))
return false;
auto shape_no_1 = shape;
shape_no_1.erase(std::remove(shape_no_1.begin(), shape_no_1.end(), 1), shape_no_1.end());
size_t min, max;
std::tie(min, max) = std::minmax(shape_no_1[0], shape_no_1[1]);
return min <= 8 && max % 8 == 0 && max >= 8 && max <= kTransposeMaxSize;
}
size_t Limitations::get_min_batch_to_fit_in_buffer(InferenceEngine::DataPtr input) {
auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims()));
return total_size / kBufferMaxSize + 1;
}
size_t Limitations::get_memory_alignment_bytes(const DeviceVersion& target) const {
static const std::unordered_map<DeviceVersion, size_t> mem_alignment_map{{DeviceVersion::GNA1_0, 64},
{DeviceVersion::GNA2_0, 64},
{DeviceVersion::GNA3_0, 64},
{DeviceVersion::GNA3_1, 64},
{DeviceVersion::GNA3_5, 64},
{DeviceVersion::GNAEmbedded3_5, 64},
{DeviceVersion::GNA3_6, 16},
{DeviceVersion::GNA4_0, 16}};
return common::GetValueForKey<DeviceVersion, size_t>(target, mem_alignment_map);
}
bool SupportedElementTypes::IsParameterTypeSupported(ov::element::Type elem_type, bool is_exception_allowed) {
if (supported_parameter_types.count(elem_type) == 0) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "The plugin does not support input precision with " << elem_type.get_type_name()
<< " format. Supported precisions " << supported_parameter_types << "\n";
}
return false;
}
return true;
}
const std::set<ov::element::Type> SupportedElementTypes::supported_constant_types = {ov::element::i8,
ov::element::u8,
ov::element::i16,
ov::element::u16,
ov::element::i32,
ov::element::f32,
ov::element::f64};
bool SupportedElementTypes::IsConstantTypeSupported(ov::element::Type elem_type, bool is_exception_allowed) {
if (supported_constant_types.count(elem_type) == 0) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "The plugin does not support constant precision with " << elem_type.get_type_name()
<< " format. Supported precisions " << supported_constant_types << "\n";
}
return false;
}
return true;
}
bool Limitations::is_transpose_supported(const std::shared_ptr<const ov::Node>& node) {
OPENVINO_ASSERT(node, "Transpose node is empty!");
const ov::Shape squeezed_shape = graph_utils::squeeze_shape(node->get_input_shape(0));
const size_t min_input_dim = std::min(squeezed_shape[0], squeezed_shape[1]);
const size_t max_input_dim = std::max(squeezed_shape[0], squeezed_shape[1]);
// GNA transpose limitations:
// - supports 2d transposes only
// - smaller dimension should be less or equal to 8
// - bigger dimension should be a multiple of Limitations::kNoOfInputsDivisor
if (squeezed_shape.size() == 2 && min_input_dim <= 8 && ALIGN(max_input_dim, kNoOfInputsDivisor) == max_input_dim) {
return true;
}
return false;
}
bool Limitations::is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed) {
OPENVINO_ASSERT(conv_ie, "ConvolutionIE node is empty!");
size_t batch_size = conv_ie->input_value(0).get_shape()[0];
if (batch_size != 1) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "topology with layer: " + conv_ie->get_friendly_name() +
", type: " + conv_ie->get_type_name() + ", and batch size(" +
std::to_string(batch_size) + ") != 1 not supported";
}
return false;
}
auto check_dilation = [&](size_t filter_dilation_height, size_t filter_stride_width) -> bool {
cnn2d::RangeLimit2D dilation_limit{{kConvDilationHeight, kConvDilationHeight, "dilation height"},
{kConvDilationWidth, kConvDilationWidth, "dilation width"}};
std::string error = dilation_limit.GetErrorOrEmpty(filter_dilation_height, filter_stride_width);
return cnn2d::AbstractValidator::ValidationSuccesful(is_exception_allowed,
error,
conv_ie->get_friendly_name(),
conv_ie->get_type_name());
};
auto input_shape = conv_ie->input_value(0).get_shape();
auto filter_shape = conv_ie->input_value(1).get_shape();
if ((4 == filter_shape.size() && filter_shape[2] > 1 && filter_shape[3] > 1) ||
(4 == input_shape.size() && input_shape[2] > 1 && input_shape[3] > 1)) {
pass::helper::ConvData conv_data;
pass::helper::GetConvData(conv_ie, conv_data);
if (gna_convolution_layer::isMappableFrom2DTo1D(conv_data.input_height,
conv_data.input_width,
conv_data.input_channel_count,
conv_data.filter_height,
conv_data.filter_width,
conv_data.filter_stride_height,
conv_data.filter_stride_width)) {
return check_dilation(conv_data.filter_dilation_height, conv_data.filter_dilation_width);
}
if (m_cnn_validator) {
return m_cnn_validator->ValidateCnn2D(conv_ie->get_friendly_name(),
conv_data.input_height,
conv_data.input_width,
conv_data.input_channel_count,
conv_data.filter_height,
conv_data.filter_width,
conv_data.filter_channel_count,
conv_data.filter_stride_height,
conv_data.filter_stride_width,
conv_data.filter_dilation_height,
conv_data.filter_dilation_width,
OvGnaTypeIntFromBytes(gna_precision.size()),
is_exception_allowed);
}
}
return check_dilation(conv_ie->get_dilations()[0], conv_ie->get_dilations()[1]);
}
bool Limitations::is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
bool is_exception_allowed) {
OPENVINO_ASSERT(max_pool, "MaxPool node is empty!");
auto kernels = max_pool->get_kernel();
if (2 == kernels.size() && kernels[0] > 1 && kernels[1] > 1) {
if (m_cnn_validator) {
auto strides = max_pool->get_strides();
return m_cnn_validator->ValidatePooling2D(max_pool->get_friendly_name(),
kernels[0],
kernels[1],
strides[0],
strides[1],
is_exception_allowed);
}
}
return true;
}
bool Limitations::is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected,
bool is_exception_allowed) {
OPENVINO_ASSERT(fully_connected, "FullyConnected node is empty!");
size_t output_batch_size = fully_connected->get_output_shape(0)[0];
if (output_batch_size > 8) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "topology with layer: " + fully_connected->get_friendly_name() +
", type: " + fully_connected->get_type_name() + ", and batch size(" +
std::to_string(output_batch_size) + ") not supported";
}
return false;
}
return true;
}
bool Limitations::is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed) {
OPENVINO_ASSERT(node, "Split node is empty!");
bool is_aligned = true;
for (size_t i = 0; i < node->get_output_size(); i++) {
is_aligned &= ov::intel_gna::graph_utils::is_aligned_split(node, i);
}
return is_aligned;
}
bool Limitations::is_op_supported(const std::shared_ptr<ov::Node>& node,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed) {
if (ov::op::util::is_parameter(node)) {
return SupportedElementTypes::IsParameterTypeSupported(node->get_element_type(), is_exception_allowed);
} else if (ov::op::util::is_constant(node)) {
return SupportedElementTypes::IsConstantTypeSupported(node->get_element_type(), is_exception_allowed);
} else if (auto conv_ie = std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node)) {
return is_conv_supported(conv_ie, gna_precision, is_exception_allowed);
} else if (auto fully_connected = std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node)) {
return is_fc_supported(fully_connected, is_exception_allowed);
} else if (ov::intel_gna::graph_utils::is_pooling(node)) {
return is_pooling_supported(std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node), is_exception_allowed);
} else if (ov::op::util::is_output(node) || ov::op::util::is_sink(node) ||
ov::intel_gna::graph_utils::is_eltwise_add(node) || ov::intel_gna::graph_utils::is_eltwise_mul(node) ||
ov::intel_gna::graph_utils::is_crop_affined(node) ||
ov::intel_gna::graph_utils::is_activation(node.get()) ||
ov::intel_gna::graph_utils::is_gna_precision_agnostic(
node) || // check concat/split are aligned when transformations will be moved to ngraph
(std::dynamic_pointer_cast<ov::op::util::ReadValueBase>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::op::ScaleShiftIE>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::op::PowerIE>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::opset9::MatMul>(node) != nullptr)) {
return true;
} else if (ov::intel_gna::graph_utils::is_gna_precision_agnostic(node)) {
if ((std::dynamic_pointer_cast<ngraph::opset9::Split>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::opset9::VariadicSplit>(node) != nullptr)) {
return is_split_supported(node, is_exception_allowed);
}
// TODO check concat are aligned when transformation will be moved to ngraph
return true;
}
return false;
}
void Limitations::check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
const InferenceEngine::Precision gna_precision) {
std::stringstream error;
// Walk through the transformed model
for (auto& op : model->get_ops()) {
if (!is_op_supported(op, gna_precision, true)) {
error << "The plugin does not support layer " << op->get_friendly_name() << " (type " << op->get_type_name()
<< ")!" << std::endl;
}
}
if (!error.str().empty()) {
THROW_GNA_EXCEPTION << error.str();
}
}
bool Limitations::use_only_16bit_convolution_weights() const {
return m_use_only_16bit_conv_weights;
}
IE_SUPPRESS_DEPRECATED_START IE_SUPPRESS_DEPRECATED_START
static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage) { bool Limitations::validate_concat_axis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage) {
LayerInfo info(layer); LayerInfo info(layer);
auto concat_layer = info.as<InferenceEngine::ConcatLayer*>(); auto concat_layer = info.as<InferenceEngine::ConcatLayer*>();
IE_ASSERT(concat_layer); IE_ASSERT(concat_layer);
@ -747,7 +957,8 @@ static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::st
// when all transformations are migrated to ngraph // when all transformations are migrated to ngraph
bool is_not_trivial_concat = false; bool is_not_trivial_concat = false;
// Concatentaion of consts and input parameters only is supported, even if first dimentsion of input parameter > // Concatentaion of consts and input parameters only is supported, even if first dimentsion of input
// parameter >
// 1 // 1
bool concat_all_const_or_inputs = false; bool concat_all_const_or_inputs = false;
@ -846,7 +1057,7 @@ static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::st
return true; return true;
} }
bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concat_layer) { bool Limitations::validate_conv_concat_axis(const InferenceEngine::ConcatLayer* concat_layer) {
IE_ASSERT(concat_layer); IE_ASSERT(concat_layer);
auto dims_size = concat_layer->insData[0].lock()->getDims().size(); auto dims_size = concat_layer->insData[0].lock()->getDims().size();
@ -898,7 +1109,7 @@ bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concat_layer) {
return true; return true;
} }
bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage) { bool Limitations::are_layers_supported(InferenceEngine::CNNNetwork& network, std::string& errMessage) {
IE_SUPPRESS_DEPRECATED_START IE_SUPPRESS_DEPRECATED_START
InferenceEngine::InputsDataMap inputs = network.getInputsInfo(); InferenceEngine::InputsDataMap inputs = network.getInputsInfo();
std::unordered_set<InferenceEngine::CNNLayer*> allLayers; std::unordered_set<InferenceEngine::CNNLayer*> allLayers;
@ -909,7 +1120,7 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
// If there are no inputs start search from an output // If there are no inputs start search from an output
startLayer = getCreatorLayer(outputs.begin()->second).lock(); startLayer = getCreatorLayer(outputs.begin()->second).lock();
} else { } else {
SupportedElementTypes::is_parameter_type_supported( SupportedElementTypes::IsParameterTypeSupported(
InferenceEngine::details::convertPrecision(inputs.begin()->second->getPrecision()), InferenceEngine::details::convertPrecision(inputs.begin()->second->getPrecision()),
true); true);
@ -944,7 +1155,7 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
check_result = false; check_result = false;
} }
} else if (info.isConcat()) { } else if (info.isConcat()) {
if (!ValidateConcatAxis(layer, errMessage)) { if (!validate_concat_axis(layer, errMessage)) {
THROW_GNA_EXCEPTION << errMessage; THROW_GNA_EXCEPTION << errMessage;
} }
} }

View File

@ -9,6 +9,8 @@
#include <cstdint> #include <cstdint>
#include <ie_algorithm.hpp> #include <ie_algorithm.hpp>
#include <memory>
#include <thread>
#include "common/gna_target.hpp" #include "common/gna_target.hpp"
#include "common/misc_utils.hpp" #include "common/misc_utils.hpp"
@ -23,158 +25,19 @@ namespace ov {
namespace intel_gna { namespace intel_gna {
namespace limitations { namespace limitations {
constexpr uint32_t bufferMaxSize = 65528;
constexpr uint32_t convMinFiltersNum = 4;
constexpr uint32_t convMaxFiltersNum = 65532;
constexpr uint32_t convDilationHeight = 1;
constexpr uint32_t convDilationWidth = 1;
constexpr uint32_t convFiltersNumDivider = 4;
constexpr uint32_t convFilterSizeDivider = 8;
constexpr uint32_t convFilterMaxSize = 768;
constexpr uint32_t convEachKernelByteAlignment = 16;
constexpr uint32_t inputByteAlignment = 64;
constexpr uint32_t noOfInputsDivisor = 8;
constexpr uint32_t noOfInputsLowPrecDivisor = 16;
constexpr uint32_t affineMaxBatchSize = 8;
constexpr uint32_t maxPoolMaxWindowSize = 6;
constexpr uint32_t copyMaxGrouping = 8;
constexpr uint32_t transposeMaxSize = 65528;
// TODO In the future there should be created class/struct representing all limitations for specific device versions.
constexpr uint32_t kMaxLayersCountGNA1_0 = 1023;
constexpr uint32_t kMaxLayersCountGNA2_0 = 4096;
constexpr uint32_t kMaxLayersCountGNA3_X = 8192;
// Currently split layer only supports 2 bytes in int16 and int8 mode.
// In fp32 mode this is not necessary but is useful for testing
constexpr uint32_t bytesPerSplitElement = 2;
// Currently crop layer only supports 2 bytes in int16 and int8 mode.
// In fp32 mode this is not necessary but is useful for testing
constexpr uint32_t bytesPerCropElement = 2;
constexpr uint32_t kMemoryPageSize = 4096;
inline bool isCropAffinedOffset(size_t numberOfElements) {
const auto cropOffset = numberOfElements * bytesPerCropElement;
return (ALIGN64(cropOffset) != cropOffset);
}
inline bool IsTranspose2d(const std::vector<size_t>& shape) {
return std::count_if(std::begin(shape), std::end(shape), [](size_t dim) {
return dim != 1;
}) == 2;
}
inline bool IsTransposeSupported(const std::vector<size_t>& shape) {
if (!IsTranspose2d(shape))
return false;
auto shape_no_1 = shape;
shape_no_1.erase(std::remove(shape_no_1.begin(), shape_no_1.end(), 1), shape_no_1.end());
size_t min, max;
std::tie(min, max) = std::minmax(shape_no_1[0], shape_no_1[1]);
return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize;
}
size_t getMemoryAlignmentBytes(target::DeviceVersion target);
class SupportedElementTypes {
public:
static bool is_parameter_type_supported(ov::element::Type type, bool is_exception_allowed = false);
static bool is_constant_type_supported(ov::element::Type type, bool is_exception_allowed = false);
private:
static const std::set<ov::element::Type> supported_parameter_types;
static const std::set<ov::element::Type> supported_constant_types;
};
/**
* @brief Validates if transpose is supported by GNA
* @param node transpose
* @return true if supported
*/
bool is_transpose_supported(const std::shared_ptr<const ov::Node>& node);
/**
* @brief Validates if legacy convolution is supported by GNA
* @param conv_ie convolution
* @param effective_compile_target GNA compile targets
* @param gna_precision GNA inference precision
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
const target::DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed = false);
/**
* @brief Validates if max pooling is supported by GNA
* @param max_pool max pooling
* @param effective_compile_target GNA compile targets
* @param supported_types list of supported types
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if precision is found in supported
*/
bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
const target::DeviceVersion& effective_compile_target,
bool is_exception_allowed = false);
/**
* @brief Validates if fully connected is supported by GNA
* @param fully_connected fully connected
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected,
bool is_exception_allowed = false);
/**
* @brief Validates if split is supported by GNA
* @param node split
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed = false);
/**
* @brief Validates if operation is supported by GNA
* @param node operation
* @param gna_compile_target GNA compile target
* @param gna_precision GNA inference precision
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_op_supported(const std::shared_ptr<ov::Node>& node,
const target::DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed = false);
/**
* @brief Check if all operations are supported by GNA
* @param model ngraph model
* @param gna_compile_target GNA compile target
* @param gna_precision GNA inference precision
*/
void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
const target::DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision);
namespace cnn2d { namespace cnn2d {
struct IsEqualToLimit { struct IsEqualToLimit {
uint32_t compared_value; uint32_t compared_value;
std::string what; std::string what;
bool isValid(const uint32_t val) const; bool IsValid(const uint32_t val) const;
std::string GetErrorOrEmpty(const uint32_t val) const; std::string GetErrorOrEmpty(const uint32_t val) const;
}; };
struct IsLessThanLimit { struct IsLessThanLimit {
uint32_t compared_value; uint32_t compared_value;
std::string what; std::string what;
bool isValid(const uint32_t val) const; bool IsValid(const uint32_t val) const;
std::string GetErrorOrEmpty(const uint32_t val) const; std::string GetErrorOrEmpty(const uint32_t val) const;
}; };
@ -182,28 +45,28 @@ struct RangeLimit {
uint32_t min; uint32_t min;
uint32_t max; uint32_t max;
std::string what; std::string what;
bool isValid(const uint32_t val) const; bool IsValid(const uint32_t val) const;
std::string GetErrorOrEmpty(const uint32_t val) const; std::string GetErrorOrEmpty(const uint32_t val) const;
}; };
struct RangeLimit2D { struct RangeLimit2D {
RangeLimit hLimit; RangeLimit hLimit;
RangeLimit wLimit; RangeLimit wLimit;
bool isValid(const uint32_t h, const uint32_t w) const; bool IsValid(const uint32_t h, const uint32_t w) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w) const; std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w) const;
}; };
struct RangeMultipleLimit : public RangeLimit { struct RangeMultipleLimit : public RangeLimit {
uint32_t multiplier; uint32_t multiplier;
RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn); RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn);
bool isValid(const uint32_t val) const; bool IsValid(const uint32_t val) const;
std::string GetErrorOrEmpty(const uint32_t val) const; std::string GetErrorOrEmpty(const uint32_t val) const;
}; };
struct RectLimit { struct RectLimit {
uint32_t maxVectorHeight; uint32_t maxVectorHeight;
uint32_t maxVectorWidth; uint32_t maxVectorWidth;
bool isValid(const uint32_t h, const uint32_t w) const; bool IsValid(const uint32_t h, const uint32_t w) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const; std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
}; };
@ -211,14 +74,14 @@ struct VectorOrSquareLimit {
uint32_t maxSquare; uint32_t maxSquare;
uint32_t maxVectorHeight; uint32_t maxVectorHeight;
uint32_t maxVectorWidth; uint32_t maxVectorWidth;
bool isValid(const uint32_t h, const uint32_t w) const; bool IsValid(const uint32_t h, const uint32_t w) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const; std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
}; };
struct RectLimitByChannels { struct RectLimitByChannels {
std::vector<std::pair<uint32_t, RectLimit>> limitPerChannel; std::vector<std::pair<uint32_t, RectLimit>> limitPerChannel;
RectLimit GetByChannels(const uint32_t channels) const; RectLimit GetByChannels(const uint32_t channels) const;
bool isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const; bool IsValid(const uint32_t h, const uint32_t w, const uint32_t channels) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, const uint32_t channels, std::string what) const; std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, const uint32_t channels, std::string what) const;
}; };
@ -226,7 +89,7 @@ struct RectLimitByChannelsAndPrecision {
RectLimitByChannels limit_for_int8; RectLimitByChannels limit_for_int8;
RectLimitByChannels limit_for_int16; RectLimitByChannels limit_for_int16;
RectLimitByChannels GetByPrecision(const OvGnaType precision) const; RectLimitByChannels GetByPrecision(const OvGnaType precision) const;
bool isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const; bool IsValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const;
std::string GetErrorOrEmpty(const uint32_t h, std::string GetErrorOrEmpty(const uint32_t h,
const uint32_t w, const uint32_t w,
const OvGnaType precision, const OvGnaType precision,
@ -291,177 +154,168 @@ public:
OvGnaType inPrecision, OvGnaType inPrecision,
bool exception = true) const = 0; bool exception = true) const = 0;
static std::unique_ptr<AbstractValidator> Create(const target::DeviceVersion& target); static std::shared_ptr<AbstractValidator> Create(const target::DeviceVersion& target);
}; };
class Validator_30 : public AbstractValidator {
static const RangeLimit2D kInputHWLimit;
static const RangeMultipleLimit kInputChannelsNumberLimit;
static const RangeMultipleLimit kKernelNumberLimit;
static const RectLimitByChannelsAndPrecision kKernelLimit;
static const RangeLimit2D kDilationLimit;
static const VectorOrSquareLimit kPoolingWindowLimit;
public:
Validator_30() = default;
bool ValidateCnn2D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
bool ValidatePooling2D(const std::string& name,
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW,
bool exception = true) const override;
bool ValidateInputPadding(const std::string& name,
const uint32_t pad_h_begin,
const uint32_t pad_h_end,
const uint32_t pad_w_begin,
const uint32_t pad_w_end,
const uint32_t kernel_h,
const uint32_t kernel_w,
const bool throwOnError = true) const override;
bool ShouldUseOnlyConv2DGnaIface() const override;
bool ValidateCnn1D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
};
class Validator_35 : public AbstractValidator {
struct CnnLimits {
const RangeLimit2D kInputHWLimit;
const RangeLimit kInputChannelsNumberLimit1B;
const RangeLimit kInputChannelsNumberLimit2B;
const RangeLimit kKernelNumberLimit;
const RangeLimit2D kKerneHWlLimit1B;
const RangeLimit2D kKerneHWlLimit2B;
const RangeLimit2D kStrideHWLimit1B;
const RangeLimit2D kStrideHWLimit2B;
const RangeLimit2D kDilationLimit;
const RangeLimit2D kPoolingWindowHWLimit;
const RangeLimit2D kPoolingStrideHWLimit;
};
static const CnnLimits kCnn2DLimits;
static const CnnLimits kCnn1DLimits;
std::string ValidateCnn(const CnnLimits& limits,
const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision) const;
std::string ValidatePooling(const CnnLimits& limits,
const std::string& name,
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW) const;
public:
Validator_35() = default;
bool ValidateCnn2D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
bool ValidatePooling2D(const std::string& name,
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW,
bool exception = true) const override;
bool ValidateInputPadding(const std::string& name,
const uint32_t pad_h_begin,
const uint32_t pad_h_end,
const uint32_t pad_w_begin,
const uint32_t pad_w_end,
const uint32_t kernel_h,
const uint32_t kernel_w,
const bool throwOnError = true) const override;
bool ShouldUseOnlyConv2DGnaIface() const override;
bool ValidateCnn1D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
};
bool UseOnly16BitConvolutionWeights(const target::DeviceVersion& compile_target);
} // namespace cnn2d } // namespace cnn2d
bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage); class Limitations {
public:
/**
* @brief Create instance of the Limitations class. Due to Limitations being a singleton, multiple instances of the
* plugin with different compilation targets cannot exist at the same time
* @param compile_target GNA compile target
*/
static void init(const target::DeviceVersion& compile_target);
inline size_t GetMinBatchToFitInBuffer(InferenceEngine::DataPtr input) { /**
auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims())); * @brief Returns the instance of Limitations object. Requires an Init call before the first usage
return total_size / bufferMaxSize + 1; */
} static inline std::shared_ptr<Limitations> get_instance();
/** static bool is_transpose_2d(const std::vector<size_t>& shape);
static bool is_transpose_supported(const std::vector<size_t>& shape);
static size_t get_min_batch_to_fit_in_buffer(InferenceEngine::DataPtr input);
/**
* @brief Validates if concat layer axis is supported by GNA * @brief Validates if concat layer axis is supported by GNA
* @param layer concat layer * @param layer concat layer
* @return true if concat layer axis is valid * @return true if concat layer axis is valid
*/ */
IE_SUPPRESS_DEPRECATED_START IE_SUPPRESS_DEPRECATED_START
bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concatLayer); static bool validate_conv_concat_axis(const InferenceEngine::ConcatLayer* concatLayer);
IE_SUPPRESS_DEPRECATED_END static bool are_layers_supported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
IE_SUPPRESS_DEPRECATED_END
/**
* @brief Validates if fully connected is supported by GNA
* @param fully_connected fully connected
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
static bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected,
bool is_exception_allowed = false);
/**
* @brief Validates if split is supported by GNA
* @param node split
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
static bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed = false);
/**
* @brief Validates if transpose is supported by GNA
* @param node transpose
* @return true if supported
*/
static bool is_transpose_supported(const std::shared_ptr<const ov::Node>& node);
/**
* @brief Validates if legacy convolution is supported by GNA
* @param conv_ie convolution
* @param gna_precision GNA inference precision
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed = false);
/**
* @brief Validates if max pooling is supported by GNA
* @param max_pool max pooling
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if precision is found in supported
*/
bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
bool is_exception_allowed = false);
/**
* @brief Validates if operation is supported by GNA
* @param node operation
* @param gna_precision GNA inference precision
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_op_supported(const std::shared_ptr<ov::Node>& node,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed = false);
/**
* @brief Check if all operations are supported by GNA
* @param model ngraph model
* @param gna_precision GNA inference precision
*/
void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
const InferenceEngine::Precision gna_precision);
bool use_only_16bit_convolution_weights() const;
bool is_crop_affined_offset(size_t numberOfElements) const;
size_t get_memory_alignment() const;
std::shared_ptr<cnn2d::AbstractValidator> get_cnn_validator() const;
constexpr static uint32_t kBufferMaxSize = 65528;
constexpr static uint32_t kConvMinFiltersNum = 4;
constexpr static uint32_t kConvMaxFiltersNum = 65532;
constexpr static uint32_t kConvDilationHeight = 1;
constexpr static uint32_t kConvDilationWidth = 1;
constexpr static uint32_t kConvFiltersNumDivider = 4;
constexpr static uint32_t kConvFilterSizeDivider = 8;
constexpr static uint32_t kConvFilterMaxSize = 768;
constexpr static uint32_t kConvEachKernelByteAlignment = 16;
constexpr static uint32_t kInputByteAlignment = 64;
constexpr static uint32_t kNoOfInputsDivisor = 8;
constexpr static uint32_t kNoOfInputsLowPrecDivisor = 16;
constexpr static uint32_t kAffineMaxBatchSize = 8;
constexpr static uint32_t kMaxPoolMaxWindowSize = 6;
constexpr static uint32_t kCopyMaxGrouping = 8;
constexpr static uint32_t kTransposeMaxSize = 65528;
constexpr static uint32_t kMaxLayersCountGNA1_0 = 1023;
constexpr static uint32_t kMaxLayersCountGNA2_0 = 4096;
constexpr static uint32_t kMaxLayersCountGNA3_X = 8192;
// Currently split layer only supports 2 bytes in int16 and int8 mode.
// In fp32 mode this is not necessary but is useful for testing
constexpr static uint32_t kBytesPerSplitElement = 2;
// Currently crop layer only supports 2 bytes in int16 and int8 mode.
// In fp32 mode this is not necessary but is useful for testing
constexpr static uint32_t kBytesPerCropElement = 2;
constexpr static uint32_t kMemoryPageSize = 4096;
private:
Limitations(const target::DeviceVersion& target);
Limitations(const Limitations&) = delete;
Limitations& operator=(const Limitations&) = delete;
size_t get_memory_alignment_bytes(const target::DeviceVersion& target) const;
IE_SUPPRESS_DEPRECATED_START
static bool validate_concat_axis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage);
IE_SUPPRESS_DEPRECATED_END
bool m_use_only_16bit_conv_weights = false;
size_t m_mem_alignment = 0;
std::shared_ptr<cnn2d::AbstractValidator> m_cnn_validator;
static thread_local std::shared_ptr<Limitations> k_instance;
};
inline std::shared_ptr<Limitations> Limitations::get_instance() {
if (!k_instance) {
THROW_GNA_EXCEPTION << "Limitations instance is not initialized.\n";
}
return k_instance;
}
inline bool Limitations::is_crop_affined_offset(size_t numberOfElements) const {
const auto cropOffset = numberOfElements * kBytesPerCropElement;
return (ALIGN64(cropOffset) != cropOffset);
}
inline size_t Limitations::get_memory_alignment() const {
return m_mem_alignment;
}
inline std::shared_ptr<cnn2d::AbstractValidator> Limitations::get_cnn_validator() const {
return m_cnn_validator;
}
} // namespace limitations } // namespace limitations
} // namespace intel_gna } // namespace intel_gna

View File

@ -84,7 +84,7 @@ inline bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
std::dynamic_pointer_cast<ngraph::opset8::VariadicSplit>(input_op)) { std::dynamic_pointer_cast<ngraph::opset8::VariadicSplit>(input_op)) {
for (size_t index = 0; index < input_op_out_index; index++) { for (size_t index = 0; index < input_op_out_index; index++) {
size_t outputSize = ngraph::shape_size(input_op->get_output_shape(index)); size_t outputSize = ngraph::shape_size(input_op->get_output_shape(index));
offset += outputSize * limitations::bytesPerSplitElement; offset += outputSize * limitations::Limitations::kBytesPerSplitElement;
} }
} }
return (offset == ALIGN64(offset)); return (offset == ALIGN64(offset));
@ -93,7 +93,7 @@ inline bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
inline bool is_crop_affined(std::shared_ptr<ngraph::Node> node) { inline bool is_crop_affined(std::shared_ptr<ngraph::Node> node) {
auto crop = std::dynamic_pointer_cast<ngraph::op::CropIE>(node); auto crop = std::dynamic_pointer_cast<ngraph::op::CropIE>(node);
if (crop != nullptr && !crop->offset.empty()) { if (crop != nullptr && !crop->offset.empty()) {
return limitations::isCropAffinedOffset(crop->offset.back()); return limitations::Limitations::get_instance()->is_crop_affined_offset(crop->offset.back());
} }
return false; return false;
} }

View File

@ -11,6 +11,7 @@
namespace ov { namespace ov {
namespace intel_gna { namespace intel_gna {
using namespace limitations;
namespace frontend { namespace frontend {
template <class T> template <class T>
@ -352,7 +353,7 @@ InferenceEngine::Precision GetWeightsPrecision(const LayerInfo& layer_info,
const QuantizedLayerParams& quant_layer_params, const QuantizedLayerParams& quant_layer_params,
const Config& gna_config) { const Config& gna_config) {
if (((layer_info.isConvolution() || layer_info.isConvolutionFilter()) && if (((layer_info.isConvolution() || layer_info.isConvolutionFilter()) &&
limitations::cnn2d::UseOnly16BitConvolutionWeights(gna_config.target->get_effective_compile_target())) || Limitations::get_instance()->use_only_16bit_convolution_weights()) ||
layer_info.isScaleShift()) { layer_info.isScaleShift()) {
return InferenceEngine::Precision::I16; return InferenceEngine::Precision::I16;
} }

View File

@ -38,8 +38,7 @@ GNADeviceHelper::GNADeviceHelper(std::shared_ptr<Target> targetIn, bool isPerfor
: target(targetIn), : target(targetIn),
nGnaDeviceIndex{selectGnaDevice()}, nGnaDeviceIndex{selectGnaDevice()},
useDeviceEmbeddedExport(deviceEmbedded), useDeviceEmbeddedExport(deviceEmbedded),
isPerformanceMeasuring(isPerformanceMeasuring), isPerformanceMeasuring(isPerformanceMeasuring) {
m_mem_alignment(limitations::getMemoryAlignmentBytes(targetIn->get_effective_compile_target())) {
per_request_diagnostics = log::get_log_level() >= ov::log::Level::TRACE; per_request_diagnostics = log::get_log_level() >= ov::log::Level::TRACE;
per_model_diagnostics = log::get_log_level() >= ov::log::Level::DEBUG; per_model_diagnostics = log::get_log_level() >= ov::log::Level::DEBUG;
open(); open();
@ -573,7 +572,7 @@ uint32_t GNADeviceHelper::retrieveMaxLayersCount() {
switch (target->get_effective_execution_target()) { switch (target->get_effective_execution_target()) {
case DeviceVersion::GNA1_0: case DeviceVersion::GNA1_0:
case DeviceVersion::GNA2_0: case DeviceVersion::GNA2_0:
return kMaxLayersCountGNA2_0; return Limitations::kMaxLayersCountGNA2_0;
case DeviceVersion::GNA3_0: case DeviceVersion::GNA3_0:
case DeviceVersion::GNA3_1: case DeviceVersion::GNA3_1:
case DeviceVersion::GNA3_5: case DeviceVersion::GNA3_5:
@ -581,7 +580,7 @@ uint32_t GNADeviceHelper::retrieveMaxLayersCount() {
case DeviceVersion::GNA3_6: case DeviceVersion::GNA3_6:
case DeviceVersion::GNA4_0: case DeviceVersion::GNA4_0:
default: default:
return kMaxLayersCountGNA3_X; return Limitations::kMaxLayersCountGNA3_X;
} }
} }
} // namespace intel_gna } // namespace intel_gna

View File

@ -67,7 +67,6 @@ class GNADeviceHelper : public GNADevice {
uint64_t debugLogIndexRequestWait = 0; uint64_t debugLogIndexRequestWait = 0;
static constexpr const char* kDumpExt = ".bin"; static constexpr const char* kDumpExt = ".bin";
static constexpr const char* kDumpDelimiter = "."; static constexpr const char* kDumpDelimiter = ".";
const size_t m_mem_alignment;
public: public:
explicit GNADeviceHelper(std::shared_ptr<target::Target> target = std::make_shared<target::Target>(), explicit GNADeviceHelper(std::shared_ptr<target::Target> target = std::make_shared<target::Target>(),
@ -128,10 +127,6 @@ public:
return allAllocations; return allAllocations;
} }
size_t getMemAlignment() const {
return m_mem_alignment;
}
/** /**
* @see GNADevice::createModel() * @see GNADevice::createModel()
*/ */

View File

@ -49,6 +49,7 @@ namespace intel_gna {
using namespace frontend; using namespace frontend;
using namespace common; using namespace common;
using namespace memory; using namespace memory;
using namespace limitations;
static bool CheckIFLastComponentIsPrecededByConv2D(const backend::DnnComponents::storage_type& components, static bool CheckIFLastComponentIsPrecededByConv2D(const backend::DnnComponents::storage_type& components,
bool verify_with_pooling = true) { bool verify_with_pooling = true) {
@ -81,20 +82,22 @@ static uint32_t count_conv2D_input_width_for_expected_output_width(uint32_t expe
return (expected_ouput_width - 1) * stride_width - 2 * padding_width + kernel_width; return (expected_ouput_width - 1) * stride_width - 2 * padding_width + kernel_width;
}; };
GNAGraphCompiler::GNAGraphCompiler(const Config& gna_config) : gna_config(gna_config) {} GNAGraphCompiler::GNAGraphCompiler(const Config& gna_config,
std::shared_ptr<backend::AMIntelDNN> dnn_ptr,
std::shared_ptr<GnaInputs> inputs_ptr,
std::shared_ptr<limitations::cnn2d::AbstractValidator> cnn2d_validator_ptr,
std::shared_ptr<gna_memory_type> gna_mem_ptr)
: gna_config(gna_config) {
dnn = std::move(dnn_ptr);
inputs_ptr_ = std::move(inputs_ptr);
m_cnn2d_validator = std::move(cnn2d_validator_ptr);
gnamem = std::move(gna_mem_ptr);
}
void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr) { void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr) {
this->gnamem = std::move(gnaMemPtr); this->gnamem = std::move(gnaMemPtr);
} }
void GNAGraphCompiler::setDNNPtr(std::shared_ptr<backend::AMIntelDNN> dnnPtr) {
this->dnn = std::move(dnnPtr);
}
void GNAGraphCompiler::setInputsPtr(std::shared_ptr<GnaInputs> inputsPtr) {
this->inputs_ptr_ = std::move(inputsPtr);
}
intel_dnn_component_t* GNAGraphCompiler::find_first_unused_input(InferenceEngine::CNNLayerPtr current) { intel_dnn_component_t* GNAGraphCompiler::find_first_unused_input(InferenceEngine::CNNLayerPtr current) {
if (current->insData.empty()) if (current->insData.empty())
return nullptr; return nullptr;
@ -228,13 +231,8 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
split_connection.emplace(id, layerInfoItem); split_connection.emplace(id, layerInfoItem);
} }
void GNAGraphCompiler::SetValidatorTarget(const target::DeviceVersion& target) {
auto temp = limitations::cnn2d::AbstractValidator::Create(target);
cnn2dValidator.reset(temp.release());
}
bool GNAGraphCompiler::ShouldUseOnlyConv2DGnaIface() const { bool GNAGraphCompiler::ShouldUseOnlyConv2DGnaIface() const {
return cnn2dValidator && cnn2dValidator->ShouldUseOnlyConv2DGnaIface(); return m_cnn2d_validator && m_cnn2d_validator->ShouldUseOnlyConv2DGnaIface();
} }
void GNAGraphCompiler::ValidateCnn2D(const std::string& name, void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
@ -249,8 +247,8 @@ void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
const uint32_t dilH, const uint32_t dilH,
const uint32_t dilW, const uint32_t dilW,
OvGnaType inPrecision) const { OvGnaType inPrecision) const {
if (cnn2dValidator) { if (m_cnn2d_validator) {
if (cnn2dValidator->ValidateCnn1D(name, if (m_cnn2d_validator->ValidateCnn1D(name,
inHeight, inHeight,
inWidth, inWidth,
inChannels, inChannels,
@ -265,7 +263,7 @@ void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
false)) { false)) {
return; return;
} }
cnn2dValidator m_cnn2d_validator
->ValidateCnn2D(name, inHeight, inWidth, inChannels, kH, kW, kN, strideH, strideW, dilH, dilW, inPrecision); ->ValidateCnn2D(name, inHeight, inWidth, inChannels, kH, kW, kN, strideH, strideW, dilH, dilW, inPrecision);
} else { } else {
THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << name; THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << name;
@ -277,8 +275,8 @@ void GNAGraphCompiler::ValidatePooling2D(const std::string& name,
const uint32_t windowW, const uint32_t windowW,
const uint32_t strideH, const uint32_t strideH,
const uint32_t strideW) const { const uint32_t strideW) const {
if (cnn2dValidator) { if (m_cnn2d_validator) {
cnn2dValidator->ValidatePooling2D(name, windowH, windowW, strideH, strideW); m_cnn2d_validator->ValidatePooling2D(name, windowH, windowW, strideH, strideW);
} else { } else {
THROW_GNA_EXCEPTION << "No Pooling2D validator found for layer " << name; THROW_GNA_EXCEPTION << "No Pooling2D validator found for layer " << name;
} }
@ -684,11 +682,11 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
// TODO add function // TODO add function
// printConvolution2DLayer(convolution); // printConvolution2DLayer(convolution);
if (!cnn2dValidator) { if (!m_cnn2d_validator) {
THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << convolution.name; THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << convolution.name;
} }
cnn2dValidator->ValidateInputPadding(convolution.name, m_cnn2d_validator->ValidateInputPadding(convolution.name,
convolution._padding_y, convolution._padding_y,
convolution._pads_end_y, convolution._pads_end_y,
convolution._padding_x, convolution._padding_x,
@ -713,7 +711,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
// have to pad input to let last kernel meets it's corresponding input // have to pad input to let last kernel meets it's corresponding input
const auto num_inputs = in_batch * effective_input_width * in_height * in_channels; const auto num_inputs = in_batch * effective_input_width * in_height * in_channels;
uint32_t num_input_padding = ALIGN(num_inputs, limitations::noOfInputsDivisor) - num_inputs; uint32_t num_input_padding = ALIGN(num_inputs, Limitations::kNoOfInputsDivisor) - num_inputs;
const uint32_t filter_n = convolution._out_depth; const uint32_t filter_n = convolution._out_depth;
@ -813,7 +811,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
// Kernel is extended only for 1D case which allows to add 0-s at the end of the kernel. // Kernel is extended only for 1D case which allows to add 0-s at the end of the kernel.
const auto kernel_pad = const auto kernel_pad =
ALIGN(effective_single_kernel_size, limitations::convEachKernelByteAlignment) - effective_single_kernel_size; ALIGN(effective_single_kernel_size, Limitations::kConvEachKernelByteAlignment) - effective_single_kernel_size;
for (uint32_t k = 0; k < convolution._out_depth; k++) { for (uint32_t k = 0; k < convolution._out_depth; k++) {
uint8_t* ptr_filt_current = convolution._weights->cbuffer().as<uint8_t*>() + k * single_kernel_size; uint8_t* ptr_filt_current = convolution._weights->cbuffer().as<uint8_t*>() + k * single_kernel_size;
auto transposed_part = transposeMatrix(ptr_filt_current, convolution_precision, in_channels, kernelHW); auto transposed_part = transposeMatrix(ptr_filt_current, convolution_precision, in_channels, kernelHW);
@ -846,14 +844,15 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto input = layer->insData[0].lock(); auto input = layer->insData[0].lock();
auto outputs = *layer->outData.begin(); auto outputs = *layer->outData.begin();
auto reshaped_dims = Get2DReshapedData(input, limitations::GetMinBatchToFitInBuffer(input), 8)->getDims(); auto reshaped_dims = Get2DReshapedData(input, Limitations::get_min_batch_to_fit_in_buffer(input), 8)->getDims();
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
: limitations::noOfInputsDivisor; ? Limitations::kNoOfInputsLowPrecDivisor
: Limitations::kNoOfInputsDivisor;
uint32_t num_rows_in = reshaped_dims[1]; uint32_t num_rows_in = reshaped_dims[1];
uint32_t num_columns_in = reshaped_dims[0]; uint32_t num_columns_in = reshaped_dims[0];
uint32_t num_rows_out = num_rows_in; uint32_t num_rows_out = num_rows_in;
uint32_t num_columns_out = num_columns_in; uint32_t num_columns_out = num_columns_in;
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in; uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
size_t num_data_bytes_out = num_columns_out * (num_rows_out + num_padding) * outputs->getPrecision().size(); size_t num_data_bytes_out = num_columns_out * (num_rows_out + num_padding) * outputs->getPrecision().size();
size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * input->getPrecision().size(); size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * input->getPrecision().size();
@ -1097,7 +1096,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto inputs = layer->insData.begin()->lock(); auto inputs = layer->insData.begin()->lock();
auto outputs = *layer->outData.begin(); auto outputs = *layer->outData.begin();
auto reshaped_dims = Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims(); auto reshaped_dims = Get2DReshapedData(inputs, Limitations::get_min_batch_to_fit_in_buffer(inputs), 8)->getDims();
uint32_t num_rows_in = reshaped_dims[1]; uint32_t num_rows_in = reshaped_dims[1];
uint32_t num_columns_in = reshaped_dims[0]; uint32_t num_columns_in = reshaped_dims[0];
uint32_t num_rows_out = num_rows_in; uint32_t num_rows_out = num_rows_in;
@ -1159,7 +1158,7 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
} }
// Concat axis validation // Concat axis validation
if (!limitations::ValidateConvConcatAxis(concatLayer)) { if (!Limitations::validate_conv_concat_axis(concatLayer)) {
std::ostringstream in_dims_oss; std::ostringstream in_dims_oss;
auto in_dims = concatLayer->insData[0].lock()->getDims(); auto in_dims = concatLayer->insData[0].lock()->getDims();
std::copy(in_dims.begin(), in_dims.end(), std::ostream_iterator<size_t>(in_dims_oss, ",")); std::copy(in_dims.begin(), in_dims.end(), std::ostream_iterator<size_t>(in_dims_oss, ","));
@ -1270,10 +1269,10 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
uint32_t num_columns_in = 1; uint32_t num_columns_in = 1;
uint32_t num_rows_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())); uint32_t num_rows_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()));
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
? limitations::noOfInputsLowPrecDivisor ? Limitations::kNoOfInputsLowPrecDivisor
: limitations::noOfInputsDivisor; : Limitations::kNoOfInputsDivisor;
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in; uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
void* ptr_inputs = nullptr; void* ptr_inputs = nullptr;
void* ptr_outputs = nullptr; void* ptr_outputs = nullptr;
@ -1303,7 +1302,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) * 4; InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) * 4;
size_t num_data_bytes_in = size_t num_data_bytes_in =
num_columns_in * ALIGN(num_rows_in, noOfInputsDivisor) * inputs->getPrecision().size(); num_columns_in * ALIGN(num_rows_in, num_of_inputs_divisor) * inputs->getPrecision().size();
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0); connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
connectOutput(layer, ptr_outputs, num_data_bytes_out); connectOutput(layer, ptr_outputs, num_data_bytes_out);
@ -1326,8 +1325,9 @@ void GNAGraphCompiler::SlicePrimitive(InferenceEngine::CNNLayerPtr layer) {
void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) { void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
auto& eltwise = dynamic_cast<EltwiseLayer&>(*layer.get()); auto& eltwise = dynamic_cast<EltwiseLayer&>(*layer.get());
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer); auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
: limitations::noOfInputsDivisor; ? Limitations::kNoOfInputsLowPrecDivisor
: Limitations::kNoOfInputsDivisor;
// for eltwise sum/sub in 16-bit precision one input should be 4 bytes and one 2 bytes - detecting that below // for eltwise sum/sub in 16-bit precision one input should be 4 bytes and one 2 bytes - detecting that below
// the names of variables are left for clarity although not always reflecting the real precision/size // the names of variables are left for clarity although not always reflecting the real precision/size
@ -1409,7 +1409,7 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
uint32_t num_columns_in = 1; uint32_t num_columns_in = 1;
uint32_t num_rows_out = num_rows_in; uint32_t num_rows_out = num_rows_in;
uint32_t num_columns_out = num_columns_in; uint32_t num_columns_out = num_columns_in;
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in; uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
void* ptr_inputs = nullptr; void* ptr_inputs = nullptr;
void* ptr_outputs = nullptr; void* ptr_outputs = nullptr;
@ -1518,7 +1518,6 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto outputs = *layer->outData.begin(); auto outputs = *layer->outData.begin();
auto input1_precision = quantized ? Precision(Precision::I16) : input_1->getPrecision(); auto input1_precision = quantized ? Precision(Precision::I16) : input_1->getPrecision();
auto input2_precision = quantized ? Precision(Precision::I16) : input_2->getPrecision(); auto input2_precision = quantized ? Precision(Precision::I16) : input_2->getPrecision();
uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor;
auto in_dims = input_1->getDims(); auto in_dims = input_1->getDims();
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front(); auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
@ -1527,7 +1526,7 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
const auto out_dims = outputs->getDims(); const auto out_dims = outputs->getDims();
const auto out_dims_size = ngraph::shape_size(out_dims); const auto out_dims_size = ngraph::shape_size(out_dims);
uint32_t num_rows_out = InferenceEngine::GetDimFromBack(out_dims, 1); uint32_t num_rows_out = InferenceEngine::GetDimFromBack(out_dims, 1);
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in; uint32_t num_padding = ALIGN(num_rows_in, Limitations::kNoOfInputsDivisor) - num_rows_in;
// Gemm gets two inputs // Gemm gets two inputs
void* ptr_input_1 = nullptr; // the first input void* ptr_input_1 = nullptr; // the first input
@ -1578,7 +1577,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
auto outputs = *layer->outData.begin(); auto outputs = *layer->outData.begin();
const auto out_dims = outputs->getDims(); const auto out_dims = outputs->getDims();
Precision inputPrecision; Precision inputPrecision;
uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor; uint32_t num_of_inputs_divisor = Limitations::kNoOfInputsDivisor;
if (!quantized) { if (!quantized) {
inputPrecision = inputs->getPrecision(); inputPrecision = inputs->getPrecision();
@ -1586,11 +1585,11 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
inputPrecision = Precision(Precision::I16); inputPrecision = Precision(Precision::I16);
} else { } else {
inputPrecision = Precision(Precision::I8); inputPrecision = Precision(Precision::I8);
noOfInputsDivisor = limitations::noOfInputsLowPrecDivisor; num_of_inputs_divisor = Limitations::kNoOfInputsLowPrecDivisor;
} }
auto input_data = HasTo2DReshapeData(layer) auto input_data = HasTo2DReshapeData(layer)
? Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8) ? Get2DReshapedData(inputs, Limitations::get_min_batch_to_fit_in_buffer(inputs), 8)
: inputs; : inputs;
auto in_dims = input_data->getDims(); auto in_dims = input_data->getDims();
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front(); auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
@ -1598,7 +1597,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
uint32_t num_columns_in = batch_size; uint32_t num_columns_in = batch_size;
uint32_t num_rows_out = isDiag ? num_rows_in : InferenceEngine::GetDimFromBack(out_dims, 1); uint32_t num_rows_out = isDiag ? num_rows_in : InferenceEngine::GetDimFromBack(out_dims, 1);
uint32_t num_columns_out = num_columns_in; uint32_t num_columns_out = num_columns_in;
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in; uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
uint32_t num_padding_out = isDiag ? num_padding : 0; uint32_t num_padding_out = isDiag ? num_padding : 0;
void* ptr_inputs = nullptr; void* ptr_inputs = nullptr;
@ -1803,12 +1802,13 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
auto outputs = *layer->outData.begin(); auto outputs = *layer->outData.begin();
auto inputs = layer->insData.begin()->lock(); auto inputs = layer->insData.begin()->lock();
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
: limitations::noOfInputsDivisor; ? Limitations::kNoOfInputsLowPrecDivisor
: Limitations::kNoOfInputsDivisor;
uint32_t num_columns_in = GetDimFromBack(inputs->getDims(), 2); uint32_t num_columns_in = GetDimFromBack(inputs->getDims(), 2);
uint32_t num_rows_out = GetDimFromBack(outputs->getDims(), 1); uint32_t num_rows_out = GetDimFromBack(outputs->getDims(), 1);
uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out; uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in; uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
auto numRowsPadded = filterLayer->GetParamAsInt("num_rows_padded"); auto numRowsPadded = filterLayer->GetParamAsInt("num_rows_padded");
// number of rows we handled by inserting copy layer // number of rows we handled by inserting copy layer
@ -1877,7 +1877,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
false); false);
size_t num_data_bytes_out = num_rows_out * num_columns_in * outputs->getPrecision().size(); size_t num_data_bytes_out = num_rows_out * num_columns_in * outputs->getPrecision().size();
size_t num_data_bytes_in = num_columns_in * ALIGN(num_rows_in, noOfInputsDivisor) * inputs->getPrecision().size(); size_t num_data_bytes_in =
num_columns_in * ALIGN(num_rows_in, num_of_inputs_divisor) * inputs->getPrecision().size();
connectInput(layer, ptr_inputs, num_data_bytes_in, num_rows_copied * inputs->getPrecision().size(), 0); connectInput(layer, ptr_inputs, num_data_bytes_in, num_rows_copied * inputs->getPrecision().size(), 0);
connectOutput(layer, ptr_outputs, num_data_bytes_out); connectOutput(layer, ptr_outputs, num_data_bytes_out);
@ -1940,8 +1941,8 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
auto outputs = *layer->outData.begin(); auto outputs = *layer->outData.begin();
auto inputs = layer->insData.begin()->lock(); auto inputs = layer->insData.begin()->lock();
const auto noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor const auto num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision ? Limitations::kNoOfInputsLowPrecDivisor
: limitations::noOfInputsDivisor; : Limitations::kNoOfInputsDivisor;
const uint32_t orginalInputSize = const uint32_t orginalInputSize =
InferenceEngine::details::product(std::next(inputs->getDims().begin()), inputs->getDims().end()); InferenceEngine::details::product(std::next(inputs->getDims().begin()), inputs->getDims().end());
const uint32_t orginalOutputSize = const uint32_t orginalOutputSize =
@ -1956,7 +1957,7 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
const auto filterWidth = filterLayer->_kernel_x; const auto filterWidth = filterLayer->_kernel_x;
const auto minOutputsPerFilter = ALIGN(orginalOutputSize, numberOfFilters) / numberOfFilters; const auto minOutputsPerFilter = ALIGN(orginalOutputSize, numberOfFilters) / numberOfFilters;
const auto minInputsNeeded = (minOutputsPerFilter - 1) * convolutionStride + filterWidth; const auto minInputsNeeded = (minOutputsPerFilter - 1) * convolutionStride + filterWidth;
const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, noOfInputsDivisor); const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, num_of_inputs_divisor);
auto numOutputs = auto numOutputs =
gna_convolution_layer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride); gna_convolution_layer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride);
@ -2278,14 +2279,15 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
<< std::min(squeezedInputOrder[0], squeezedInputOrder[1]) << " > 8)"; << std::min(squeezedInputOrder[0], squeezedInputOrder[1]) << " > 8)";
} }
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
: limitations::noOfInputsDivisor; ? Limitations::kNoOfInputsLowPrecDivisor
: Limitations::kNoOfInputsDivisor;
// now this can be run on GNA // now this can be run on GNA
if (squeezedInputOrder[0] < squeezedInputOrder[1]) { // interleave case if (squeezedInputOrder[0] < squeezedInputOrder[1]) { // interleave case
if (ALIGN(squeezedInputOrder[1], noOfInputsDivisor) != squeezedInputOrder[1]) { if (ALIGN(squeezedInputOrder[1], num_of_inputs_divisor) != squeezedInputOrder[1]) {
THROW_GNA_LAYER_EXCEPTION(layer) THROW_GNA_LAYER_EXCEPTION(layer)
<< "unsupported permute (row size not a multiple of " << noOfInputsDivisor << ")"; << "unsupported permute (row size not a multiple of " << num_of_inputs_divisor << ")";
} else { } else {
auto& currentComponent = dnnComponents.addComponent(layer->name, "interleave"); auto& currentComponent = dnnComponents.addComponent(layer->name, "interleave");
dnn->InitInterleaveComponent(currentComponent, dnn->InitInterleaveComponent(currentComponent,
@ -2299,9 +2301,9 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
} }
} else { // deinterleave case } else { // deinterleave case
if (ALIGN(squeezedInputOrder[0], noOfInputsDivisor) != squeezedInputOrder[0]) { if (ALIGN(squeezedInputOrder[0], num_of_inputs_divisor) != squeezedInputOrder[0]) {
THROW_GNA_LAYER_EXCEPTION(layer) THROW_GNA_LAYER_EXCEPTION(layer)
<< "[GNA plugin] unsupported permute (column size not a multiple of " << noOfInputsDivisor << ")"; << "[GNA plugin] unsupported permute (column size not a multiple of " << num_of_inputs_divisor << ")";
} else { } else {
auto& currentComponent = dnnComponents.addComponent(layer->name, "deinterleave"); auto& currentComponent = dnnComponents.addComponent(layer->name, "deinterleave");
dnn->InitDeinterleaveComponent(currentComponent, dnn->InitDeinterleaveComponent(currentComponent,
@ -2317,7 +2319,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
size_t num_data_bytes_out = size_t num_data_bytes_out =
ALIGN(InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())), ALIGN(InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())),
noOfInputsDivisor) * num_of_inputs_divisor) *
outputs->getPrecision().size(); outputs->getPrecision().size();
size_t num_data_bytes_in = squeezedInputOrder[0] * squeezedInputOrder[1] * inputs->getPrecision().size(); size_t num_data_bytes_in = squeezedInputOrder[0] * squeezedInputOrder[1] * inputs->getPrecision().size();
@ -2610,12 +2612,12 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// if request for allocation less that realTensorInput - we need to extend request // if request for allocation less that realTensorInput - we need to extend request
auto minInput = inputs_ptr_->at(prevLayer->name).get_required_size(); auto minInput = inputs_ptr_->at(prevLayer->name).get_required_size();
if (num_data_bytes_in < minInput) { if (num_data_bytes_in < minInput) {
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
? limitations::noOfInputsLowPrecDivisor ? Limitations::kNoOfInputsLowPrecDivisor
: limitations::noOfInputsDivisor; : Limitations::kNoOfInputsDivisor;
log::debug() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to" log::debug() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to"
<< ALIGN(minInput, noOfInputsDivisor); << ALIGN(minInput, num_of_inputs_divisor);
num_data_bytes_in = ALIGN(minInput, noOfInputsDivisor); num_data_bytes_in = ALIGN(minInput, num_of_inputs_divisor);
} }
// real allocation pointer will be kept in ptr not in ptr_inputs_global // real allocation pointer will be kept in ptr not in ptr_inputs_global

View File

@ -54,20 +54,22 @@ private:
uint32_t num_rows, uint32_t num_rows,
uint32_t num_cols); uint32_t num_cols);
std::unique_ptr<const limitations::cnn2d::AbstractValidator> cnn2dValidator;
bool ShouldUseOnlyConv2DGnaIface() const; bool ShouldUseOnlyConv2DGnaIface() const;
std::shared_ptr<limitations::cnn2d::AbstractValidator> m_cnn2d_validator;
public: public:
backend::DnnComponents dnnComponents; backend::DnnComponents dnnComponents;
MemoryConnection memory_connection; MemoryConnection memory_connection;
ConcatConnection concat_connection; ConcatConnection concat_connection;
ConstConnections const_connections; ConstConnections const_connections;
GNAGraphCompiler(const Config& gna_config); GNAGraphCompiler(const Config& gna_config,
std::shared_ptr<backend::AMIntelDNN> dnn_ptr,
std::shared_ptr<GnaInputs> inputs_ptr,
std::shared_ptr<limitations::cnn2d::AbstractValidator> cnn2d_validator,
std::shared_ptr<gna_memory_type> gna_mem_ptr);
void setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr); void setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr);
void setDNNPtr(std::shared_ptr<backend::AMIntelDNN> dnnPtr);
void setInputsPtr(std::shared_ptr<GnaInputs> inputsPtr);
void fillMemoryConnections(std::unordered_map<std::string, std::vector<InferenceEngine::CNNLayerPtr>>& memoryPairs); void fillMemoryConnections(std::unordered_map<std::string, std::vector<InferenceEngine::CNNLayerPtr>>& memoryPairs);
@ -93,8 +95,6 @@ public:
const uint32_t strideH, const uint32_t strideH,
const uint32_t strideW) const; const uint32_t strideW) const;
void SetValidatorTarget(const target::DeviceVersion& target);
/** /**
* Connects either memory output, or generic output to a layer * Connects either memory output, or generic output to a layer
* @param layer - layer pointer * @param layer - layer pointer

View File

@ -29,6 +29,7 @@
#include <vector> #include <vector>
#include "backend/am_intel_dnn.hpp" #include "backend/am_intel_dnn.hpp"
#include "backend/gna_limitations.hpp"
#include "common/gna_target.hpp" #include "common/gna_target.hpp"
#include "frontend/model_quantizer.hpp" #include "frontend/model_quantizer.hpp"
#include "frontend/scale_factor_calc.hpp" #include "frontend/scale_factor_calc.hpp"
@ -55,6 +56,7 @@
#include "scale_factor_helper.hpp" #include "scale_factor_helper.hpp"
#include "serial/gna_model_serial.hpp" #include "serial/gna_model_serial.hpp"
using namespace ov::intel_gna::limitations;
using namespace ov::intel_gna::graph_utils; using namespace ov::intel_gna::graph_utils;
inline uint32_t ToByteSize(const Gna2DataType type) { inline uint32_t ToByteSize(const Gna2DataType type) {
@ -357,17 +359,23 @@ void GNAPlugin::PrePostProcess(InferenceEngine::Blob::Ptr input_blob,
} }
} }
GNAPlugin::GNAPlugin() : graphCompiler(config) { GNAPlugin::GNAPlugin() {
Init(); Init();
UpdateFieldsFromConfig(); UpdateFieldsFromConfig();
InitGNADevice(); InitGNADevice();
Limitations::init(config.target->get_effective_compile_target());
InitGNAMemory();
InitGraphCompiler();
} }
GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) : graphCompiler(config) { GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) {
Init(); Init();
SetConfig(configMap); SetConfig(configMap);
log::set_log_level(gnaFlags->log_level); log::set_log_level(gnaFlags->log_level);
InitGNADevice(); InitGNADevice();
Limitations::init(config.target->get_effective_compile_target());
InitGNAMemory();
InitGraphCompiler();
} }
void GNAPlugin::Init() { void GNAPlugin::Init() {
@ -376,27 +384,36 @@ void GNAPlugin::Init() {
gnaFlags = std::make_shared<GNAFlags>(GNAFlags()); gnaFlags = std::make_shared<GNAFlags>(GNAFlags());
inputs_ptr_ = std::make_shared<GnaInputs>(GnaInputs()); inputs_ptr_ = std::make_shared<GnaInputs>(GnaInputs());
outputs_ = GnaOutputs(); outputs_ = GnaOutputs();
graphCompiler.setDNNPtr(dnn);
graphCompiler.setInputsPtr(inputs_ptr_);
requestWorkerPool_ = std::make_shared<request::WorkerPoolImpl>(); requestWorkerPool_ = std::make_shared<request::WorkerPoolImpl>();
} }
void GNAPlugin::InitGNADevice() { void GNAPlugin::InitGNADevice() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNADevice"); OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNADevice");
if (gnaFlags->sw_fp32) {
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{})); if (!gnaFlags->sw_fp32) {
} else {
gnadevice = std::make_shared<GNADeviceHelper>(config.target, gnadevice = std::make_shared<GNADeviceHelper>(config.target,
gnaFlags->performance_counting, gnaFlags->performance_counting,
!config.embedded_export_path.empty()); !config.embedded_export_path.empty());
gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice),
gnadevice->getMemAlignment(),
limitations::kMemoryPageSize);
} }
graphCompiler.setGNAMemoryPtr(gnamem); }
void GNAPlugin::InitGNAMemory() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNAMemory");
if (gnaFlags->sw_fp32) {
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
} else {
gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice),
Limitations::get_instance()->get_memory_alignment(),
Limitations::kMemoryPageSize);
}
}
void GNAPlugin::InitGraphCompiler() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGraphCompiler");
m_graph_compiler = std::make_shared<GNAGraphCompiler>(
GNAGraphCompiler(config, dnn, inputs_ptr_, Limitations::get_instance()->get_cnn_validator(), gnamem));
} }
void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork& network) { void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork& network) {
@ -428,8 +445,7 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork& network
GNAFakeQuantizeLayer fqLayer(next_layer); GNAFakeQuantizeLayer fqLayer(next_layer);
auto inputRange = fqLayer.getInputRange(); auto inputRange = fqLayer.getInputRange();
auto outputRange = fqLayer.getOutputRange(); auto outputRange = fqLayer.getOutputRange();
if (inputRange.second.size() != 1 || inputRange.second.size() != 1 || outputRange.second.size() != 1 || if (inputRange.second.size() != 1 || outputRange.second.size() != 1) {
outputRange.second.size() != 1) {
THROW_GNA_LAYER_EXCEPTION(next_layer) THROW_GNA_LAYER_EXCEPTION(next_layer)
<< "unsupported, per-channel quantization for input layer : " << input.second->name(); << "unsupported, per-channel quantization for input layer : " << input.second->name();
} }
@ -552,12 +568,12 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN
}; };
// probing gna_primitives // probing gna_primitives
auto irLayerAvatar = std::find_if(graphCompiler.dnnComponents.components.begin(), auto irLayerAvatar = std::find_if(m_graph_compiler->dnnComponents.components.begin(),
graphCompiler.dnnComponents.components.end(), m_graph_compiler->dnnComponents.components.end(),
[&layer](const backend::DnnComponents::storage_type::value_type& value) { [&layer](const backend::DnnComponents::storage_type::value_type& value) {
return value.name == layer->name; return value.name == layer->name;
}); });
if (irLayerAvatar != graphCompiler.dnnComponents.components.end()) { if (irLayerAvatar != m_graph_compiler->dnnComponents.components.end()) {
initOutput(irLayerAvatar->dnnComponent.orientation_out, initOutput(irLayerAvatar->dnnComponent.orientation_out,
irLayerAvatar->dnnComponent.num_bytes_per_output, irLayerAvatar->dnnComponent.num_bytes_per_output,
irLayerAvatar->dnnComponent.num_rows_out, irLayerAvatar->dnnComponent.num_rows_out,
@ -567,8 +583,8 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN
// probing concatInfo // probing concatInfo
if (LayerInfo(layer).isConcat()) { if (LayerInfo(layer).isConcat()) {
auto concatConnection = graphCompiler.concat_connection.find(layer->name); auto concatConnection = m_graph_compiler->concat_connection.find(layer->name);
if (concatConnection != graphCompiler.concat_connection.end()) { if (concatConnection != m_graph_compiler->concat_connection.end()) {
auto precision = layer->outData.front()->getPrecision().size(); auto precision = layer->outData.front()->getPrecision().size();
initOutput(kDnnInterleavedOrientation, initOutput(kDnnInterleavedOrientation,
precision, precision,
@ -581,8 +597,8 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN
// probing a constant info, for constant trivial networks support // probing a constant info, for constant trivial networks support
if (LayerInfo(layer).isConst()) { if (LayerInfo(layer).isConst()) {
auto const_blob = layer->blobs["custom"]; auto const_blob = layer->blobs["custom"];
auto constConnection = graphCompiler.const_connections.find(layer->name); auto constConnection = m_graph_compiler->const_connections.find(layer->name);
if (constConnection != graphCompiler.const_connections.end()) { if (constConnection != m_graph_compiler->const_connections.end()) {
initOutput(kDnnInterleavedOrientation, initOutput(kDnnInterleavedOrientation,
layer->outData.front()->getPrecision().size(), layer->outData.front()->getPrecision().size(),
const_blob->size(), const_blob->size(),
@ -696,16 +712,13 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
_network_name = _network.getName(); _network_name = _network.getName();
std::shared_ptr<InferenceEngine::details::CNNNetworkImpl> convertedNetwork; std::shared_ptr<InferenceEngine::details::CNNNetworkImpl> convertedNetwork;
const auto effectiveCompileTarget = config.target->get_effective_compile_target();
graphCompiler.SetValidatorTarget(effectiveCompileTarget);
auto transformer = TransformationsPipeline(config); auto transformer = TransformationsPipeline(config);
if (_network.getFunction()) { if (_network.getFunction()) {
CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network); CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network);
auto model = clonedNetwork.getFunction(); auto model = clonedNetwork.getFunction();
transformer.apply(model, &m_input_output_subgraphs); transformer.apply(model, &m_input_output_subgraphs);
limitations::check_all_ops_supported(model, effectiveCompileTarget, config.gnaPrecision); Limitations::get_instance()->check_all_ops_supported(model, config.gnaPrecision);
convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(model, clonedNetwork); convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(model, clonedNetwork);
} }
IE_SUPPRESS_DEPRECATED_START IE_SUPPRESS_DEPRECATED_START
@ -717,7 +730,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
// Check the network // Check the network
std::string error; std::string error;
if (!limitations::AreLayersSupported(network, error)) { if (!Limitations::are_layers_supported(network, error)) {
THROW_GNA_EXCEPTION << error.c_str(); THROW_GNA_EXCEPTION << error.c_str();
} }
@ -805,17 +818,17 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
memoryPairs[id][generic->GetParamAsInt("index")] = layer; memoryPairs[id][generic->GetParamAsInt("index")] = layer;
continue; continue;
} else if (layerInfo.isConcat()) { } else if (layerInfo.isConcat()) {
graphCompiler.fillConcatConnections(layer); m_graph_compiler->fillConcatConnections(layer);
} else if (layerInfo.isSplit() || layerInfo.isSlice()) { } else if (layerInfo.isSplit() || layerInfo.isSlice()) {
graphCompiler.fillSplitConnections(layer); m_graph_compiler->fillSplitConnections(layer);
} }
sortedNoMem.push_back(layer); sortedNoMem.push_back(layer);
} }
// fill in extra storage with memory layers // fill in extra storage with memory layers
graphCompiler.fillMemoryConnections(memoryPairs); m_graph_compiler->fillMemoryConnections(memoryPairs);
if (!graphCompiler.memory_connection.empty() && gnaFlags->num_requests != 1) { if (!m_graph_compiler->memory_connection.empty() && gnaFlags->num_requests != 1) {
gnaFlags->num_requests = 1; gnaFlags->num_requests = 1;
} }
@ -837,17 +850,17 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
// Creating Layer primitives // Creating Layer primitives
for (auto& layer : sortedNoMem) { for (auto& layer : sortedNoMem) {
graphCompiler.CreateLayerPrimitive(layer); m_graph_compiler->CreateLayerPrimitive(layer);
} }
for (auto& inputLayer : inputLayers) { for (auto& inputLayer : inputLayers) {
auto layerInfo = LayerInfo(inputLayer); auto layerInfo = LayerInfo(inputLayer);
if (layerInfo.isInput() && 0 == inputs_ptr_->at(inputLayer->name).get_allocated_size()) { if (layerInfo.isInput() && 0 == inputs_ptr_->at(inputLayer->name).get_allocated_size()) {
graphCompiler.connectOutput(inputLayer, &inputs_ptr_->at(inputLayer->name).ptrs.front(), 0); m_graph_compiler->connectOutput(inputLayer, &inputs_ptr_->at(inputLayer->name).ptrs.front(), 0);
} }
} }
if (graphCompiler.dnnComponents.components.empty()) { if (m_graph_compiler->dnnComponents.components.empty()) {
log::warning() << "No GNA primitives created based on topology. This might indicate trivial topology\n"; log::warning() << "No GNA primitives created based on topology. This might indicate trivial topology\n";
trivialTopology = true; trivialTopology = true;
} }
@ -861,7 +874,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
// Memory layers are not dnnComponents hence we need to make switch with identity layer // Memory layers are not dnnComponents hence we need to make switch with identity layer
if (outLayer->type == "Memory") { if (outLayer->type == "Memory") {
// traverse memory connection to find corresponding output_memory // traverse memory connection to find corresponding output_memory
for (auto&& memConnection : graphCompiler.memory_connection) { for (auto&& memConnection : m_graph_compiler->memory_connection) {
if (memConnection.second.getInput()->name == outLayer->name) { if (memConnection.second.getInput()->name == outLayer->name) {
// if connection is found, replace memory input layer with memory output layer // if connection is found, replace memory input layer with memory output layer
outLayer = memConnection.second.getOutput(); outLayer = memConnection.second.getOutput();
@ -909,11 +922,11 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
dnn->Init(gnamem.get(), gnaFlags->sw_fp32 ? kDnnFloat : kDnnInt, 1); dnn->Init(gnamem.get(), gnaFlags->sw_fp32 ? kDnnFloat : kDnnInt, 1);
// TODO: this copy is unneeded; in fact, we can directly create gna structs from list // TODO: this copy is unneeded; in fact, we can directly create gna structs from list
auto execOrder = graphCompiler.dnnComponents.getExecutionOrder(); auto execOrder = m_graph_compiler->dnnComponents.getExecutionOrder();
dnn->component.insert(dnn->component.begin(), execOrder.begin(), execOrder.end()); dnn->component.insert(dnn->component.begin(), execOrder.begin(), execOrder.end());
// in fp32 mode last PWL cannot be computed without that // in fp32 mode last PWL cannot be computed without that
if (!graphCompiler.dnnComponents.components.empty()) { if (!m_graph_compiler->dnnComponents.components.empty()) {
dnn->InitActiveList(NULL); dnn->InitActiveList(NULL);
} }
@ -965,7 +978,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
for (auto& inputLayer : inputLayers) { for (auto& inputLayer : inputLayers) {
if (LayerInfo(inputLayer).isInput()) { if (LayerInfo(inputLayer).isInput()) {
ov::intel_gna::helpers::updateModelInputOrientationWithoutConvolution(*inputLayer, ov::intel_gna::helpers::updateModelInputOrientationWithoutConvolution(*inputLayer,
graphCompiler.dnnComponents, m_graph_compiler->dnnComponents,
*inputs_ptr_); *inputs_ptr_);
} }
} }
@ -976,7 +989,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
if (outLayer && LayerInfo(outLayer).isOutput()) { if (outLayer && LayerInfo(outLayer).isOutput()) {
ov::intel_gna::helpers::updateModelOutputOrientation(outPort.first, ov::intel_gna::helpers::updateModelOutputOrientation(outPort.first,
outLayer->name, outLayer->name,
graphCompiler.dnnComponents, m_graph_compiler->dnnComponents,
outputs_); outputs_);
} }
} }
@ -1101,7 +1114,7 @@ void GNAPlugin::DumpXNNToFile() const {
uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap& inputs, InferenceEngine::BlobMap& result) { uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap& inputs, InferenceEngine::BlobMap& result) {
auto freeWorker = requestWorkerPool_->findFreeModelWorker(); auto freeWorker = requestWorkerPool_->findFreeModelWorker();
if (freeWorker == nullptr) { if (freeWorker == nullptr) {
if (!graphCompiler.memory_connection.empty()) { if (!m_graph_compiler->memory_connection.empty()) {
Wait(requestWorkerPool_->firstWorker().representingIndex()); Wait(requestWorkerPool_->firstWorker().representingIndex());
freeWorker = requestWorkerPool_->findFreeModelWorker(); freeWorker = requestWorkerPool_->findFreeModelWorker();
if (freeWorker == nullptr) { if (freeWorker == nullptr) {
@ -1412,7 +1425,7 @@ RequestStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
} }
void GNAPlugin::Reset() { void GNAPlugin::Reset() {
graphCompiler.Reset(); m_graph_compiler->Reset();
} }
bool GNAPlugin::Infer(const InferenceEngine::Blob& input, InferenceEngine::Blob& output) { bool GNAPlugin::Infer(const InferenceEngine::Blob& input, InferenceEngine::Blob& output) {
@ -1479,9 +1492,9 @@ Blob::Ptr GNAPlugin::GetInputBlob(const std::string& name, InferenceEngine::Prec
} }
std::vector<InferenceEngine::IVariableStateInternal::Ptr> GNAPlugin::QueryState() { std::vector<InferenceEngine::IVariableStateInternal::Ptr> GNAPlugin::QueryState() {
if (memoryStates.size() != graphCompiler.memory_connection.size()) { if (memoryStates.size() != m_graph_compiler->memory_connection.size()) {
memoryStates.clear(); memoryStates.clear();
for (auto& connection : graphCompiler.memory_connection) { for (auto& connection : m_graph_compiler->memory_connection) {
auto state = auto state =
std::make_shared<memory::GNAVariableState>(connection.first, std::make_shared<memory::GNAVariableState>(connection.first,
std::make_shared<GNAMemoryLayer>(connection.second)); std::make_shared<GNAMemoryLayer>(connection.second));
@ -1575,7 +1588,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
GNAMemoryLayer memoryLayer(nullptr, nullptr, gnaFlags->sw_fp32 ? 4 : 2); GNAMemoryLayer memoryLayer(nullptr, nullptr, gnaFlags->sw_fp32 ? 4 : 2);
std::string name; std::string name;
std::tie(memoryLayer.gna_ptr, memoryLayer.reserved_size, name, memoryLayer.scale_factor) = memory; std::tie(memoryLayer.gna_ptr, memoryLayer.reserved_size, name, memoryLayer.scale_factor) = memory;
graphCompiler.memory_connection.emplace_back(make_pair(name, memoryLayer)); m_graph_compiler->memory_connection.emplace_back(make_pair(name, memoryLayer));
} }
// TODO update documenation to allow exporting tlv with importing cep only for sue creek // TODO update documenation to allow exporting tlv with importing cep only for sue creek
@ -1607,7 +1620,7 @@ void GNAPlugin::Export(std::ostream& outStream) {
.SetInputRotation(transpose_inputs_info) .SetInputRotation(transpose_inputs_info)
.SetOutputRotation(transpose_outputs_info); .SetOutputRotation(transpose_outputs_info);
for (auto&& memoryConnection : graphCompiler.memory_connection) { for (auto&& memoryConnection : m_graph_compiler->memory_connection) {
auto state = auto state =
std::make_shared<memory::GNAVariableState>(memoryConnection.first, std::make_shared<memory::GNAVariableState>(memoryConnection.first,
std::make_shared<GNAMemoryLayer>(memoryConnection.second)); std::make_shared<GNAMemoryLayer>(memoryConnection.second));
@ -1691,7 +1704,6 @@ InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork(
Config qn_config(config); Config qn_config(config);
qn_config.UpdateFromMap(config_map); qn_config.UpdateFromMap(config_map);
const auto effectiveCompileTarget = qn_config.target->get_effective_compile_target();
auto model = network.getFunction(); auto model = network.getFunction();
if (model) { if (model) {
auto supported = GetSupportedNodes( auto supported = GetSupportedNodes(
@ -1700,7 +1712,8 @@ InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork(
TransformationsPipeline(qn_config).apply(model); TransformationsPipeline(qn_config).apply(model);
}, },
[&](const std::shared_ptr<ngraph::Node>& op) { [&](const std::shared_ptr<ngraph::Node>& op) {
return limitations::is_op_supported(op, effectiveCompileTarget, qn_config.gnaPrecision); const auto res = Limitations::get_instance()->is_op_supported(op, qn_config.gnaPrecision);
return res;
}); });
for (auto&& op_name : supported) { for (auto&& op_name : supported) {
res.supportedLayersMap.emplace(op_name, GetName()); res.supportedLayersMap.emplace(op_name, GetName());

View File

@ -47,8 +47,7 @@ protected:
std::shared_ptr<gna_memory_type> gnamem; std::shared_ptr<gna_memory_type> gnamem;
std::shared_ptr<GnaInputs> inputs_ptr_; std::shared_ptr<GnaInputs> inputs_ptr_;
GnaOutputs outputs_; GnaOutputs outputs_;
std::shared_ptr<GNAGraphCompiler> m_graph_compiler;
GNAGraphCompiler graphCompiler;
uint32_t activeLayerIndex = 0xffffffff; uint32_t activeLayerIndex = 0xffffffff;
// TODO: transpose_inputs_info and transpose_outputs_info should be moved to GNAModelSerial class when ngraph // TODO: transpose_inputs_info and transpose_outputs_info should be moved to GNAModelSerial class when ngraph
@ -189,6 +188,8 @@ protected:
void Init(); void Init();
void InitGNADevice(); void InitGNADevice();
void InitGNAMemory();
void InitGraphCompiler();
void DumpXNNToFile() const; void DumpXNNToFile() const;
/** /**

View File

@ -83,11 +83,9 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
manager.register_pass<ov::pass::LSTMCellDecomposition>(); manager.register_pass<ov::pass::LSTMCellDecomposition>();
manager.register_pass<ov::intel_gna::pass::ConvertDWSCToScaleShifts>(); manager.register_pass<ov::intel_gna::pass::ConvertDWSCToScaleShifts>();
manager.register_pass<ov::intel_gna::pass::ConvertPaddedToValidConv>(); manager.register_pass<ov::intel_gna::pass::ConvertPaddedToValidConv>();
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(effective_compile_target, manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(config.gnaPrecision);
config.gnaPrecision); manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(config.gnaPrecision);
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(effective_compile_target, manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(config.gnaPrecision);
config.gnaPrecision);
manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(effective_compile_target, config.gnaPrecision);
if (!has_convolution) { if (!has_convolution) {
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithFqToPointWiseConvolution>(); manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithFqToPointWiseConvolution>();
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithBiasToPointWiseConvolution>(); manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithBiasToPointWiseConvolution>();

View File

@ -385,7 +385,7 @@ public:
auto cropLayer = dynamic_cast<InferenceEngine::CropLayer*>(layer); auto cropLayer = dynamic_cast<InferenceEngine::CropLayer*>(layer);
if (cropLayer != nullptr && !cropLayer->offset.empty()) { if (cropLayer != nullptr && !cropLayer->offset.empty()) {
const auto crop_params = GetCropParams(cropLayer); const auto crop_params = GetCropParams(cropLayer);
return limitations::isCropAffinedOffset(crop_params.start_offset); return limitations::Limitations::get_instance()->is_crop_affined_offset(crop_params.start_offset);
} }
return false; return false;
} }

View File

@ -50,7 +50,7 @@ public:
// @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size // @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
inline std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize, inline std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize,
uint32_t maxSplitSize, uint32_t maxSplitSize,
uint32_t alignment = limitations::inputByteAlignment) { uint32_t alignment = limitations::Limitations::kInputByteAlignment) {
std::vector<uint32_t> splitSizes; std::vector<uint32_t> splitSizes;
uint32_t maxAlignedSplitSize = std::max(maxSplitSize - maxSplitSize % alignment, alignment); uint32_t maxAlignedSplitSize = std::max(maxSplitSize - maxSplitSize % alignment, alignment);
uint32_t usedSize = 0; uint32_t usedSize = 0;
@ -73,7 +73,7 @@ inline std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
IE_ASSERT(firstValuableDim != std::end(dims)); IE_ASSERT(firstValuableDim != std::end(dims));
auto splittedElementsSize = *firstValuableDim; auto splittedElementsSize = *firstValuableDim;
auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim); auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim);
auto alignment = limitations::inputByteAlignment; auto alignment = limitations::Limitations::kInputByteAlignment;
// Split output size should be multiple by 64 to avoid align filters insertion, // Split output size should be multiple by 64 to avoid align filters insertion,
// but we need to check if our input size to split exceeds 64; if not we can always // but we need to check if our input size to split exceeds 64; if not we can always
@ -85,8 +85,9 @@ inline std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
return {splittedDimIx, splitSizes}; return {splittedDimIx, splitSizes};
} }
} }
splitSizes = GetAlignedSplitSizes(splittedElementsSize, splitSizes =
limitations::bufferMaxSize * splittedElementsSize / totalElementsSize, GetAlignedSplitSizes(splittedElementsSize,
limitations::Limitations::kBufferMaxSize * splittedElementsSize / totalElementsSize,
alignment); alignment);
return {splittedDimIx, splitSizes}; return {splittedDimIx, splitSizes};
} }

View File

@ -15,11 +15,14 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "backend/gna_limitations.hpp"
#include "gna2-model-api.h" #include "gna2-model-api.h"
#include "gna2_model_helper.hpp" #include "gna2_model_helper.hpp"
#include "gna_device.hpp" #include "gna_device.hpp"
#include "log.hpp" #include "log.hpp"
using namespace ov::intel_gna::limitations;
namespace ov { namespace ov {
namespace intel_gna { namespace intel_gna {
namespace dump { namespace dump {
@ -486,8 +489,9 @@ void DumpGna2Model(const Gna2Model& gnaModel,
} }
dumpFile << "\tOperand " << j << " (" << GetOperandName(operation.Type, j) << ")" dumpFile << "\tOperand " << j << " (" << GetOperandName(operation.Type, j) << ")"
<< " type: " << GetOperandType(operand.Type) << " shape: " << GetSimpleString(operand.Shape) << " type: " << GetOperandType(operand.Type) << " shape: " << GetSimpleString(operand.Shape)
<< " tag: " << foundName << " offset: " << offset << " tag: " << foundName << " offset: " << offset << " size: "
<< " size: " << Gna2RoundUpTo64(GetGnaShapeSize(operand.Shape, GetTypeByteSize(operand.Type))) << Gna2RoundUp(GetGnaShapeSize(operand.Shape, GetTypeByteSize(operand.Type)),
Limitations::get_instance()->get_memory_alignment())
<< " data: " << operand.Data << " baseAlloc: " << foundPtr << " layout: "; << " data: " << operand.Data << " baseAlloc: " << foundPtr << " layout: ";
DumpCharArray(dumpFile, operand.Layout, GNA2_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS); DumpCharArray(dumpFile, operand.Layout, GNA2_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS);

View File

@ -50,6 +50,7 @@ using namespace InferenceEngine::details;
using namespace ov::intel_gna::frontend; using namespace ov::intel_gna::frontend;
using namespace ov::intel_gna::common; using namespace ov::intel_gna::common;
using namespace ov::intel_gna::pre_post_processing; using namespace ov::intel_gna::pre_post_processing;
using namespace ov::intel_gna::limitations;
namespace ov { namespace ov {
namespace intel_gna { namespace intel_gna {
@ -149,10 +150,11 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
return LayerInfo(ptr).isNonValuesChangable(); return LayerInfo(ptr).isNonValuesChangable();
}); });
IE_ASSERT(inputLayer != nullptr); IE_ASSERT(inputLayer != nullptr);
size_t weightsSize = size_t weightsSize = LayerInfo(prevLayer).has32BOutput()
LayerInfo(prevLayer).has32BOutput()
? nextLayer->outData[0]->getDims().back() ? nextLayer->outData[0]->getDims().back()
: Get2DReshapedData(nextLayer->outData[0], limitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8) : Get2DReshapedData(nextLayer->outData[0],
Limitations::get_min_batch_to_fit_in_buffer(nextLayer->outData[0]),
8)
->getDims()[1]; ->getDims()[1];
std::vector<float> weightsValues(weightsSize, fillValue); std::vector<float> weightsValues(weightsSize, fillValue);
IE_ASSERT(diagLayer != nullptr); IE_ASSERT(diagLayer != nullptr);
@ -1531,19 +1533,19 @@ void InsertSplitAligningFilterPass::run() {
// encodes offset to beginning of split layer input // encodes offset to beginning of split layer input
filterLayer->params["offset"] = filterLayer->params["offset"] =
std::to_string(aligned64_offset / limitations::bytesPerSplitElement); std::to_string(aligned64_offset / Limitations::kBytesPerSplitElement);
auto dims = splitOutput->getTensorDesc().getDims(); auto dims = splitOutput->getTensorDesc().getDims();
if (dims.size() > 3) { if (dims.size() > 3) {
THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size(); THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
} }
const auto offsetOfUnalignment = const auto offsetOfUnalignment =
(currentOffset - aligned64_offset) / limitations::bytesPerSplitElement; (currentOffset - aligned64_offset) / Limitations::kBytesPerSplitElement;
// TODO consider to use a different number of filters do decrese the number of trailing zeros // TODO consider to use a different number of filters do decrese the number of trailing zeros
// (additionalPaddingOfFilter) // (additionalPaddingOfFilter)
const auto numberOfFilters = limitations::convMinFiltersNum; const auto numberOfFilters = Limitations::kConvMinFiltersNum;
const auto filterSize = const auto filterSize =
ALIGN(offsetOfUnalignment + numberOfFilters, limitations::convFilterSizeDivider); ALIGN(offsetOfUnalignment + numberOfFilters, Limitations::kConvFilterSizeDivider);
// filterWeights: numberOfFilters X (offsetOfUnalignment + additionalPaddingOfFilter + // filterWeights: numberOfFilters X (offsetOfUnalignment + additionalPaddingOfFilter +
// numberOfFilters) offsetOfUnalignment - the leading zeros in the filter // numberOfFilters) offsetOfUnalignment - the leading zeros in the filter
@ -1598,7 +1600,7 @@ void InsertSplitAligningFilterPass::run() {
} }
// search data that starts from unaligned location // search data that starts from unaligned location
currentOffset += outputSize * limitations::bytesPerSplitElement; currentOffset += outputSize * Limitations::kBytesPerSplitElement;
splitOutIndex++; splitOutIndex++;
} }
} }
@ -1636,7 +1638,7 @@ void EltwiseSplitOverChannelsPass::run() {
auto oData = l->outData.front(); auto oData = l->outData.front();
auto oDims = oData->getDims(); auto oDims = oData->getDims();
auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims)); auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims));
if (totalElementsSize <= limitations::bufferMaxSize) { if (totalElementsSize <= Limitations::kBufferMaxSize) {
continue; continue;
} }
auto splitSizesPerAxis = AlignedSplitSizesPerAxis(oDims); auto splitSizesPerAxis = AlignedSplitSizesPerAxis(oDims);
@ -1747,8 +1749,9 @@ void SubstituteScaleShiftBroadCastPass::run() {
if (was_reshaped) { if (was_reshaped) {
dataDims = reshaped_data[insData->getName()]; dataDims = reshaped_data[insData->getName()];
} else { } else {
dataDims = HasTo2DReshapeData(l) dataDims =
? Get2DReshapedData(insData, limitations::GetMinBatchToFitInBuffer(insData), 8)->getDims() HasTo2DReshapeData(l)
? Get2DReshapedData(insData, Limitations::get_min_batch_to_fit_in_buffer(insData), 8)->getDims()
: insData->getDims(); : insData->getDims();
} }

View File

@ -17,6 +17,7 @@
#include "log/debug.hpp" #include "log/debug.hpp"
using namespace ov::intel_gna::gna_convolution_layer; using namespace ov::intel_gna::gna_convolution_layer;
using namespace ov::intel_gna::limitations;
void CNNFilter32(intel_dnn_component_t* component) { void CNNFilter32(intel_dnn_component_t* component) {
auto filters = reinterpret_cast<float*>(component->op.conv1D.ptr_filters); auto filters = reinterpret_cast<float*>(component->op.conv1D.ptr_filters);
@ -306,7 +307,7 @@ void CNN2DFilter32(intel_dnn_component_t* component) {
} }
} }
// kernel padded to 16B = 4 * sizeof(float) // kernel padded to 16B = 4 * sizeof(float)
kernelIndex += ALIGN(kh * kw * kc, ov::intel_gna::limitations::convEachKernelByteAlignment / sizeof(float)); kernelIndex += ALIGN(kh * kw * kc, Limitations::kConvEachKernelByteAlignment / sizeof(float));
} }
} }

View File

@ -15,6 +15,7 @@
using namespace ov::intel_gna; using namespace ov::intel_gna;
using namespace ov::intel_gna::pass; using namespace ov::intel_gna::pass;
using namespace ov::intel_gna::limitations;
static bool BiasValidation(const ngraph::Output<ngraph::Node>& output) { static bool BiasValidation(const ngraph::Output<ngraph::Node>& output) {
auto bias_output_shape = output.get_node()->get_output_shape(0); auto bias_output_shape = output.get_node()->get_output_shape(0);
@ -49,9 +50,9 @@ static std::tuple<bool, uint32_t, uint32_t, uint32_t> VerifyAndGetConvParams(
const uint32_t width = input1_shape.front(); const uint32_t width = input1_shape.front();
const uint32_t in_channels = input2_shape.back(); const uint32_t in_channels = input2_shape.back();
const uint32_t out_channels = input2_shape.front(); const uint32_t out_channels = input2_shape.front();
if (input1_shape.front() <= limitations::affineMaxBatchSize || if (input1_shape.front() <= Limitations::kAffineMaxBatchSize ||
out_channels % limitations::convFiltersNumDivider != 0 || out_channels > limitations::convMaxFiltersNum || out_channels % Limitations::kConvFiltersNumDivider != 0 || out_channels > Limitations::kConvMaxFiltersNum ||
in_channels > limitations::convFilterMaxSize) { in_channels > Limitations::kConvFilterMaxSize) {
return std::make_tuple(false, 0, 0, 0); return std::make_tuple(false, 0, 0, 0);
} }

View File

@ -20,6 +20,7 @@
namespace ov { namespace ov {
namespace intel_gna { namespace intel_gna {
using namespace target; using namespace target;
using namespace limitations;
namespace pass { namespace pass {
using namespace helper; using namespace helper;
@ -55,7 +56,7 @@ static bool VerifyAndGetConvData(std::shared_ptr<ngraph::opset7::Convolution> co
size_t filter_height = filters.get_shape()[2]; size_t filter_height = filters.get_shape()[2];
size_t filter_width = filters.get_shape()[3]; size_t filter_width = filters.get_shape()[3];
if (filter_width > limitations::copyMaxGrouping || filter_height > limitations::copyMaxGrouping) { if (filter_width > Limitations::kCopyMaxGrouping || filter_height > Limitations::kCopyMaxGrouping) {
return false; return false;
} }
@ -76,7 +77,7 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr<ngraph::opset7:
(max_pool->get_auto_pad() != ngraph::op::PadType::EXPLICIT || (max_pool->get_auto_pad() != ngraph::op::PadType::EXPLICIT ||
max_pool->get_pads_begin() != ngraph::Shape({0, 0}) || max_pool->get_pads_end() != ngraph::Shape({0, 0}))) || max_pool->get_pads_begin() != ngraph::Shape({0, 0}) || max_pool->get_pads_end() != ngraph::Shape({0, 0}))) ||
pool_filter.size() != 2 || pool_strides.size() != 2 || pool_filter[0] > 1 || pool_strides[0] > 1 || pool_filter.size() != 2 || pool_strides.size() != 2 || pool_filter[0] > 1 || pool_strides[0] > 1 ||
pool_filter[0] > limitations::maxPoolMaxWindowSize) pool_filter[0] > Limitations::kMaxPoolMaxWindowSize)
return false; return false;
graph_data.pool_size_width = pool_filter[1]; graph_data.pool_size_width = pool_filter[1];
@ -84,16 +85,15 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr<ngraph::opset7:
return true; return true;
} }
static bool GNA30SupportedConv(const DeviceVersion& compile_target, static bool GNA30SupportedConv(const InferenceEngine::Precision& gnaPrecision,
const InferenceEngine::Precision& gnaPrecision,
const GraphData& graph_data, const GraphData& graph_data,
const ConvData& conv_data) { const ConvData& conv_data) {
const auto cnn2dValidatorPtr = limitations::cnn2d::AbstractValidator::Create(compile_target); const auto cnn2dValidatorPtr = Limitations::get_instance()->get_cnn_validator();
if (!cnn2dValidatorPtr) { if (!cnn2dValidatorPtr) {
return false; return false;
} }
const auto& cnn2dValidator = *cnn2dValidatorPtr; const auto cnnIsValid = cnn2dValidatorPtr->ValidateCnn2D(graph_data.conv->get_friendly_name(),
const auto cnnIsValid = cnn2dValidator.ValidateCnn2D(graph_data.conv->get_friendly_name(),
conv_data.input_height, conv_data.input_height,
conv_data.input_width, conv_data.input_width,
conv_data.input_channel_count, conv_data.input_channel_count,
@ -112,7 +112,7 @@ static bool GNA30SupportedConv(const DeviceVersion& compile_target,
if (!graph_data.max_pool) { if (!graph_data.max_pool) {
return true; return true;
} }
const auto poolingValid = cnn2dValidator.ValidatePooling2D(graph_data.conv->get_friendly_name(), const auto poolingValid = cnn2dValidatorPtr->ValidatePooling2D(graph_data.conv->get_friendly_name(),
graph_data.max_pool->get_kernel()[0], graph_data.max_pool->get_kernel()[0],
graph_data.max_pool->get_kernel()[1], graph_data.max_pool->get_kernel()[1],
graph_data.max_pool->get_strides()[0], graph_data.max_pool->get_strides()[0],
@ -126,7 +126,7 @@ static size_t CalculateConvCount(const ConvData& conv_data) {
size_t conv_count = 1; size_t conv_count = 1;
size_t total_factorized_conv_channel_count = size_t total_factorized_conv_channel_count =
(conv_data.input_channel_count * conv_data.filter_height * conv_data.filter_width); (conv_data.input_channel_count * conv_data.filter_height * conv_data.filter_width);
while (total_factorized_conv_channel_count / conv_count > limitations::convFilterMaxSize || while (total_factorized_conv_channel_count / conv_count > Limitations::kConvFilterMaxSize ||
total_factorized_conv_channel_count % conv_count != 0 || conv_data.filter_channel_count % conv_count != 0) total_factorized_conv_channel_count % conv_count != 0 || conv_data.filter_channel_count % conv_count != 0)
conv_count++; conv_count++;
@ -139,7 +139,7 @@ static bool ShouldDecompose(GraphData& graph_data, const ConvData& conv_data) {
// Concat (copy) layer limitation allows to split up to a certain limit // Concat (copy) layer limitation allows to split up to a certain limit
// Currently we are able to split only convolutions without pooling in horizontal dimension // Currently we are able to split only convolutions without pooling in horizontal dimension
if (graph_data.conv_count > limitations::copyMaxGrouping || if (graph_data.conv_count > Limitations::kCopyMaxGrouping ||
((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1)) ((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1))
return false; return false;
@ -561,8 +561,7 @@ static void Decompose(const GraphData& graph_data, ConvData& conv_data) {
conv_result->set_friendly_name(conv_result_name); conv_result->set_friendly_name(conv_result_name);
} }
static bool Convert(const DeviceVersion& compile_target, static bool Convert(const InferenceEngine::Precision& gnaPrecision,
const InferenceEngine::Precision& gnaPrecision,
std::shared_ptr<ngraph::Node> leading_transpose, std::shared_ptr<ngraph::Node> leading_transpose,
std::shared_ptr<ngraph::Node> fq_filters, std::shared_ptr<ngraph::Node> fq_filters,
std::shared_ptr<ngraph::Node> conv, std::shared_ptr<ngraph::Node> conv,
@ -598,7 +597,7 @@ static bool Convert(const DeviceVersion& compile_target,
return false; return false;
// If compile target is GNA 3.0 and the convolution is supported on it, then skip decomposition // If compile target is GNA 3.0 and the convolution is supported on it, then skip decomposition
if (GNA30SupportedConv(compile_target, gnaPrecision, graph_data, conv_data)) if (GNA30SupportedConv(gnaPrecision, graph_data, conv_data))
return false; return false;
// We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) // We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC)
@ -618,7 +617,7 @@ static bool Convert(const DeviceVersion& compile_target,
return true; return true;
} }
Decompose2DConv::Decompose2DConv(const DeviceVersion& compile_target, const InferenceEngine::Precision& gnaPrecision) { Decompose2DConv::Decompose2DConv(const InferenceEngine::Precision& gnaPrecision) {
MATCHER_SCOPE(Decompose2DConv); MATCHER_SCOPE(Decompose2DConv);
auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>(); auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
@ -735,8 +734,7 @@ Decompose2DConv::Decompose2DConv(const DeviceVersion& compile_target, const Infe
} }
} }
return Convert(compile_target, return Convert(gnaPrecision,
gnaPrecision,
pattern_map.at(leading_transpose).get_node_shared_ptr(), pattern_map.at(leading_transpose).get_node_shared_ptr(),
fq_filters_node, fq_filters_node,
pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(conv).get_node_shared_ptr(),
@ -755,8 +753,7 @@ Decompose2DConv::Decompose2DConv(const DeviceVersion& compile_target, const Infe
this->register_matcher(m, callback); this->register_matcher(m, callback);
} }
Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const DeviceVersion& compile_target, Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const InferenceEngine::Precision& gnaPrecision) {
const InferenceEngine::Precision& gnaPrecision) {
MATCHER_SCOPE(Decompose2DConvTransposedWithBias); MATCHER_SCOPE(Decompose2DConvTransposedWithBias);
auto const_input_i64 = auto const_input_i64 =
@ -781,8 +778,7 @@ Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const Devic
pattern_map.at(bias).get_node_shared_ptr()))) pattern_map.at(bias).get_node_shared_ptr())))
return false; return false;
return Convert(compile_target, return Convert(gnaPrecision,
gnaPrecision,
pattern_map.at(leading_transpose).get_node_shared_ptr(), pattern_map.at(leading_transpose).get_node_shared_ptr(),
nullptr, nullptr,
pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(conv).get_node_shared_ptr(),
@ -802,7 +798,6 @@ Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const Devic
} }
Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF( Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF(
const DeviceVersion& compile_target,
const InferenceEngine::Precision& gnaPrecision) { const InferenceEngine::Precision& gnaPrecision) {
MATCHER_SCOPE(Decompose2DConvTransposedWithBiasAF); MATCHER_SCOPE(Decompose2DConvTransposedWithBiasAF);
@ -836,8 +831,7 @@ Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF(
pattern_map.at(bias).get_node_shared_ptr()))) pattern_map.at(bias).get_node_shared_ptr())))
return false; return false;
return Convert(compile_target, return Convert(gnaPrecision,
gnaPrecision,
pattern_map.at(leading_transpose).get_node_shared_ptr(), pattern_map.at(leading_transpose).get_node_shared_ptr(),
nullptr, nullptr,
pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(conv).get_node_shared_ptr(),

View File

@ -35,7 +35,7 @@ namespace pass {
class Decompose2DConv : public ngraph::pass::MatcherPass { class Decompose2DConv : public ngraph::pass::MatcherPass {
public: public:
OPENVINO_RTTI("Decompose2DConv", "0"); OPENVINO_RTTI("Decompose2DConv", "0");
Decompose2DConv(const target::DeviceVersion& compile_target, const InferenceEngine::Precision& gnaPrecision); Decompose2DConv(const InferenceEngine::Precision& gnaPrecision);
}; };
/** /**
@ -56,8 +56,7 @@ public:
class Decompose2DConvTransposedWithBias : public ngraph::pass::MatcherPass { class Decompose2DConvTransposedWithBias : public ngraph::pass::MatcherPass {
public: public:
OPENVINO_RTTI("Decompose2DConvTransposedWithBias", "0"); OPENVINO_RTTI("Decompose2DConvTransposedWithBias", "0");
Decompose2DConvTransposedWithBias(const target::DeviceVersion& compile_target, Decompose2DConvTransposedWithBias(const InferenceEngine::Precision& gnaPrecision);
const InferenceEngine::Precision& gnaPrecision);
}; };
/** /**
@ -80,8 +79,7 @@ public:
class Decompose2DConvTransposedWithBiasAF : public ngraph::pass::MatcherPass { class Decompose2DConvTransposedWithBiasAF : public ngraph::pass::MatcherPass {
public: public:
OPENVINO_RTTI("Decompose2DConvTransposedWithBiasAF", "0"); OPENVINO_RTTI("Decompose2DConvTransposedWithBiasAF", "0");
Decompose2DConvTransposedWithBiasAF(const target::DeviceVersion& compile_target, Decompose2DConvTransposedWithBiasAF(const InferenceEngine::Precision& gnaPrecision);
const InferenceEngine::Precision& gnaPrecision);
}; };
} // namespace pass } // namespace pass

View File

@ -13,6 +13,7 @@
#include "backend/gna_limitations.hpp" #include "backend/gna_limitations.hpp"
using namespace ngraph; using namespace ngraph;
using namespace ov::intel_gna::limitations;
namespace ov { namespace ov {
namespace intel_gna { namespace intel_gna {
@ -81,7 +82,7 @@ static bool GetVerifiedMVNData(const std::shared_ptr<opset8::MVN> mvn, MVNData&
// Check if average must be split // Check if average must be split
mvn_data.num_parts = 1; mvn_data.num_parts = 1;
while (mvn_data.W / mvn_data.num_parts > limitations::convFilterMaxSize) { while (mvn_data.W / mvn_data.num_parts > Limitations::kConvFilterMaxSize) {
mvn_data.num_parts *= 2; mvn_data.num_parts *= 2;
} }

View File

@ -16,6 +16,7 @@
#include "backend/gna_limitations.hpp" #include "backend/gna_limitations.hpp"
using namespace ov::intel_gna::pass; using namespace ov::intel_gna::pass;
using namespace ov::intel_gna::limitations;
namespace { namespace {
@ -160,7 +161,7 @@ HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() {
} }
if (prev_node) { if (prev_node) {
if (limitations::IsTransposeSupported(prev_node->get_output_shape(0))) { if (Limitations::is_transpose_supported(prev_node->get_output_shape(0))) {
InsertTranspose(prev_node, matmul_node->get_friendly_name(), true); InsertTranspose(prev_node, matmul_node->get_friendly_name(), true);
} }
} }
@ -170,7 +171,7 @@ HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() {
auto iter = pattern_map.find(fq); auto iter = pattern_map.find(fq);
if (iter != pattern_map.end() || (iter = pattern_map.find(constant)) != pattern_map.end()) { if (iter != pattern_map.end() || (iter = pattern_map.find(constant)) != pattern_map.end()) {
auto prev_node = iter->second.get_node_shared_ptr(); auto prev_node = iter->second.get_node_shared_ptr();
if (limitations::IsTranspose2d(prev_node->get_output_shape(0))) { if (Limitations::is_transpose_2d(prev_node->get_output_shape(0))) {
InsertTranspose(prev_node, prev_node->get_friendly_name(), true); InsertTranspose(prev_node, prev_node->get_friendly_name(), true);
} }
} }
@ -187,7 +188,7 @@ HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() {
} }
if (prev_node) { if (prev_node) {
if (limitations::IsTransposeSupported(prev_node->get_output_shape(0))) { if (Limitations::is_transpose_supported(prev_node->get_output_shape(0))) {
InsertTranspose(prev_node, matmul_node->get_friendly_name(), true); InsertTranspose(prev_node, matmul_node->get_friendly_name(), true);
} }
} }
@ -243,7 +244,7 @@ HandleTransposeAfterMatMul::HandleTransposeAfterMatMul() {
ReplaceTransposeWithReshape(transpose_it->second.get_node_shared_ptr()); ReplaceTransposeWithReshape(transpose_it->second.get_node_shared_ptr());
} else { } else {
auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr(); auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
if (!limitations::IsTransposeSupported(reshape_node->get_input_shape(0))) if (!Limitations::is_transpose_supported(reshape_node->get_input_shape(0)))
return false; return false;
auto iter = pattern_map.find(act); auto iter = pattern_map.find(act);
if (iter == pattern_map.end() && (iter = pattern_map.find(fq2)) == pattern_map.end() && if (iter == pattern_map.end() && (iter = pattern_map.find(fq2)) == pattern_map.end() &&

View File

@ -4,6 +4,7 @@
#include "transformations/remove_in_out_processing.hpp" #include "transformations/remove_in_out_processing.hpp"
#include "backend/gna_limitations.hpp"
#include "common/graph_utils.hpp" #include "common/graph_utils.hpp"
#include "openvino/cc/pass/itt.hpp" #include "openvino/cc/pass/itt.hpp"
#include "openvino/opsets/opset1.hpp" #include "openvino/opsets/opset1.hpp"
@ -17,6 +18,7 @@
using namespace ov::opset10; using namespace ov::opset10;
using namespace ov::intel_gna::pass; using namespace ov::intel_gna::pass;
using namespace ov::intel_gna::limitations;
namespace { namespace {
@ -29,7 +31,7 @@ inline bool is_preprocessing_layer_not_supported(std::shared_ptr<ov::Node>& laye
// Verify that transpose layer cannot be executed on GNA // Verify that transpose layer cannot be executed on GNA
if (std::dynamic_pointer_cast<ov::opset1::Transpose>(layer)) { if (std::dynamic_pointer_cast<ov::opset1::Transpose>(layer)) {
return !limitations::is_transpose_supported(layer); return !Limitations::is_transpose_supported(layer);
} }
return false; return false;

View File

@ -14,6 +14,8 @@
#include "layers/gna_convolution_layer.hpp" #include "layers/gna_convolution_layer.hpp"
#include "layers/gna_split_layer.hpp" #include "layers/gna_split_layer.hpp"
using namespace ov::intel_gna::limitations;
namespace ov { namespace ov {
namespace intel_gna { namespace intel_gna {
namespace pass { namespace pass {
@ -56,13 +58,13 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
std::end(conv->get_input_shape(0)), std::end(conv->get_input_shape(0)),
size_t(1), size_t(1),
std::multiplies<size_t>()); std::multiplies<size_t>());
if (input_size <= limitations::bufferMaxSize) { if (input_size <= Limitations::kBufferMaxSize) {
return false; return false;
} }
auto& input = conv->get_input_shape(0); auto& input = conv->get_input_shape(0);
uint32_t width = input.back(); uint32_t width = input.back();
uint32_t in_channels = input.at(1); uint32_t in_channels = input.at(1);
auto split_sizes = GetAlignedSplitSizes(width, limitations::bufferMaxSize / in_channels); auto split_sizes = GetAlignedSplitSizes(width, Limitations::kBufferMaxSize / in_channels);
IE_ASSERT(split_sizes.size() > 1); IE_ASSERT(split_sizes.size() > 1);
std::vector<int64_t> split_sizes_casted(split_sizes.size()); std::vector<int64_t> split_sizes_casted(split_sizes.size());
std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) { std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) {

View File

@ -15,6 +15,8 @@
#include "legacy/ngraph_ops/eltwise.hpp" #include "legacy/ngraph_ops/eltwise.hpp"
#include "log/log.hpp" #include "log/log.hpp"
using namespace ov::intel_gna::limitations;
namespace ov { namespace ov {
namespace intel_gna { namespace intel_gna {
namespace pass { namespace pass {
@ -25,7 +27,7 @@ inline bool is_eltwise_has_to_be_splitted(const ngraph::Output<ngraph::Node>& no
return false; return false;
auto o_dims = eltwise->get_output_shape(0); auto o_dims = eltwise->get_output_shape(0);
auto total_elem_size = std::accumulate(std::begin(o_dims), std::end(o_dims), 1, std::multiplies<size_t>()); auto total_elem_size = std::accumulate(std::begin(o_dims), std::end(o_dims), 1, std::multiplies<size_t>());
return (total_elem_size > limitations::bufferMaxSize); return (total_elem_size > Limitations::kBufferMaxSize);
} }
static std::shared_ptr<ngraph::opset9::VariadicSplit> split_input( static std::shared_ptr<ngraph::opset9::VariadicSplit> split_input(

View File

@ -11,7 +11,7 @@ namespace intel_gna {
namespace pass { namespace pass {
/** /**
* @brief Split over channels for Eltwise to avoid GNA-HW bufferMaxSize limitation per eltwise * @brief Split over channels for Eltwise to avoid GNA-HW kBufferMaxSize limitation per eltwise
*/ */
class SplitEltwise : public ov::pass::MatcherPass { class SplitEltwise : public ov::pass::MatcherPass {
public: public:

View File

@ -2,20 +2,24 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// //
#include <vector>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <legacy/layer_transform.hpp>
#include "frontend/model_quantizer.hpp"
#include "frontend/layer_quantizer.hpp"
#include "gna_matcher.hpp"
#include <ie_core.hpp> #include <ie_core.hpp>
#include <legacy/layer_transform.hpp>
#include <vector>
#include "backend/gna_limitations.hpp"
#include "frontend/layer_quantizer.hpp"
#include "frontend/model_quantizer.hpp"
#include "gna_matcher.hpp"
using namespace InferenceEngine; using namespace InferenceEngine;
using namespace ov::intel_gna::limitations;
using namespace ov::intel_gna::frontend; using namespace ov::intel_gna::frontend;
using namespace GNATestIRs; using namespace GNATestIRs;
class I8QuantisationTest : public GNATest<> { class I8QuantisationTest : public GNATest<> {
protected: protected:
InferenceEngine::CNNLayerPtr quantize(InferenceEngine::CNNLayerPtr lp) { InferenceEngine::CNNLayerPtr quantize(InferenceEngine::CNNLayerPtr lp) {
auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp); auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
Config gna_config; Config gna_config;
@ -26,7 +30,8 @@ class I8QuantisationTest : public GNATest<> {
return newLayer; return newLayer;
}; };
InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model, float scale_factor) const { InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model,
float scale_factor) const {
auto scale_factors = std::vector<float>({scale_factor}); auto scale_factors = std::vector<float>({scale_factor});
GnaInputs inputs; GnaInputs inputs;
@ -41,30 +46,30 @@ class I8QuantisationTest : public GNATest<> {
auto transformer = ov::intel_gna::TransformationsPipeline(gna_config); auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
return ModelQuantizer(transformer).quantize( return ModelQuantizer(transformer).quantize(model, inputs);
model,
inputs);
} }
void SetUp() override {} void SetUp() override {
Limitations::init(target::DeviceVersion::Default);
}
}; };
// TODO: add test for FC weights after quantization // TODO: add test for FC weights after quantization
TEST_F(I8QuantisationTest, canQuantizeFCLayer){ TEST_F(I8QuantisationTest, canQuantizeFCLayer) {
auto fc = std::make_shared<FullyConnectedLayer>(LayerParams{"name", "type", Precision::FP32}); auto fc = std::make_shared<FullyConnectedLayer>(LayerParams{"name", "type", Precision::FP32});
fc->_out_num = 9; fc->_out_num = 9;
auto weights = make_shared_blob<float>({ Precision::FP32, {1, 1}, Layout::NC }); auto weights = make_shared_blob<float>({Precision::FP32, {1, 1}, Layout::NC});
fc->_weights = weights; fc->_weights = weights;
fc->_biases = make_shared_blob<float>({ Precision::FP32, {1, 1}, Layout::NC }); fc->_biases = make_shared_blob<float>({Precision::FP32, {1, 1}, Layout::NC});
fc->_weights->allocate(); fc->_weights->allocate();
fc->_biases->allocate(); fc->_biases->allocate();
std::shared_ptr<Data> outData = std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({ 1, 1 }), Layout::NC)); std::shared_ptr<Data> outData =
std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({1, 1}), Layout::NC));
fc->outData.push_back(outData); fc->outData.push_back(outData);
fc->insData.push_back(outData); fc->insData.push_back(outData);
// actual quantisation algorithm is involved // actual quantisation algorithm is involved
for (auto && w : *weights) { for (auto&& w : *weights) {
w = MAX_OUT_MULTIPLIER * MAX_VAL_1B_WEIGHT; w = MAX_OUT_MULTIPLIER * MAX_VAL_1B_WEIGHT;
} }
@ -73,17 +78,16 @@ TEST_F(I8QuantisationTest, canQuantizeFCLayer){
ASSERT_NO_THROW(quantize(fc)); ASSERT_NO_THROW(quantize(fc));
} }
TEST_F(I8QuantisationTest, canQuantizeActivation){ TEST_F(I8QuantisationTest, canQuantizeActivation) {
auto sigmoid = std::make_shared<GenericLayer>(LayerParams{"name", "type", Precision::FP32});
auto sigmoid = std::make_shared<GenericLayer >(LayerParams{"name", "type", Precision::FP32});
sigmoid->params["value"] = 2; sigmoid->params["value"] = 2;
sigmoid->type = "Activation"; sigmoid->type = "Activation";
ASSERT_NO_THROW(quantize(sigmoid)); ASSERT_NO_THROW(quantize(sigmoid));
} }
TEST_F(I8QuantisationTest, inputPrecisionIs16Bits){ TEST_F(I8QuantisationTest, inputPrecisionIs16Bits) {
auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C }); auto weights = make_shared_blob<uint8_t>({Precision::U8, {440}, C});
weights->allocate(); weights->allocate();
fillWeights(weights); fillWeights(weights);
@ -92,13 +96,15 @@ TEST_F(I8QuantisationTest, inputPrecisionIs16Bits){
auto newNet = quantize_single_input_model(network, 1000); auto newNet = quantize_single_input_model(network, 1000);
InputsDataMap inputs = newNet.getInputsInfo(); InputsDataMap inputs = newNet.getInputsInfo();
auto inputLayer = getCreatorLayer(getInputTo(inputs.begin()->second->getInputData()).begin()->second->insData.front().lock()).lock(); auto inputLayer =
getCreatorLayer(getInputTo(inputs.begin()->second->getInputData()).begin()->second->insData.front().lock())
.lock();
ASSERT_EQ(inputLayer->precision, Precision::I16); ASSERT_EQ(inputLayer->precision, Precision::I16);
} }
TEST_F(I8QuantisationTest, FCDimensionIs1){ TEST_F(I8QuantisationTest, FCDimensionIs1) {
auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C }); auto weights = make_shared_blob<uint8_t>({Precision::U8, {440}, C});
weights->allocate(); weights->allocate();
fillWeights(weights); fillWeights(weights);
@ -108,8 +114,8 @@ TEST_F(I8QuantisationTest, FCDimensionIs1){
ASSERT_NO_THROW(quantize_single_input_model(network, 1000)); ASSERT_NO_THROW(quantize_single_input_model(network, 1000));
} }
TEST_F(I8QuantisationTest, outputAffinePrecisionIs32Bits){ TEST_F(I8QuantisationTest, outputAffinePrecisionIs32Bits) {
auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C }); auto weights = make_shared_blob<uint8_t>({Precision::U8, {440}, C});
weights->allocate(); weights->allocate();
fillWeights(weights); fillWeights(weights);
@ -124,7 +130,7 @@ TEST_F(I8QuantisationTest, outputAffinePrecisionIs32Bits){
} }
TEST_F(I8QuantisationTest, fp16tofp32_on_fullyConnected_model) { TEST_F(I8QuantisationTest, fp16tofp32_on_fullyConnected_model) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {220}, Layout::C }); auto weights = make_shared_blob<uint8_t>({Precision::U8, {220}, Layout::C});
weights->allocate(); weights->allocate();
fillWeights(weights); fillWeights(weights);
@ -135,7 +141,7 @@ TEST_F(I8QuantisationTest, fp16tofp32_on_fullyConnected_model) {
} }
TEST_F(I8QuantisationTest, LSTMCell_quantize) { TEST_F(I8QuantisationTest, LSTMCell_quantize) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {33664}, C }); auto weights = make_shared_blob<uint8_t>({Precision::U8, {33664}, C});
weights->allocate(); weights->allocate();
fillWeights(weights); fillWeights(weights);
@ -146,7 +152,7 @@ TEST_F(I8QuantisationTest, LSTMCell_quantize) {
} }
TEST_F(I8QuantisationTest, LSTMCell_unaligned_quantize) { TEST_F(I8QuantisationTest, LSTMCell_unaligned_quantize) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {3480}, C }); auto weights = make_shared_blob<uint8_t>({Precision::U8, {3480}, C});
weights->allocate(); weights->allocate();
fillWeights(weights); fillWeights(weights);
@ -157,7 +163,7 @@ TEST_F(I8QuantisationTest, LSTMCell_unaligned_quantize) {
} }
TEST_F(I8QuantisationTest, TI_quantize) { TEST_F(I8QuantisationTest, TI_quantize) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {249748}, C }); auto weights = make_shared_blob<uint8_t>({Precision::U8, {249748}, C});
weights->allocate(); weights->allocate();
fillWeights(weights); fillWeights(weights);

View File

@ -2,23 +2,27 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
// //
#include <vector>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <legacy/layer_transform.hpp>
#include "backend/gna_types.hpp"
#include "frontend/model_quantizer.hpp"
#include "frontend/layer_quantizer.hpp"
#include "gna_matcher.hpp"
#include <ie_core.hpp> #include <ie_core.hpp>
#include <legacy/layer_transform.hpp>
#include <vector>
#include "backend/gna_limitations.hpp"
#include "backend/gna_types.hpp"
#include "frontend/layer_quantizer.hpp"
#include "frontend/model_quantizer.hpp"
#include "gna_matcher.hpp"
#include "ngraph_functions/builders.hpp" #include "ngraph_functions/builders.hpp"
using namespace InferenceEngine; using namespace InferenceEngine;
using namespace ov::intel_gna::limitations;
using namespace ov::intel_gna::frontend; using namespace ov::intel_gna::frontend;
using namespace GNATestIRs; using namespace GNATestIRs;
class I16QuantisationTest : public GNATest<> { class I16QuantisationTest : public GNATest<> {
protected: protected:
InferenceEngine::CNNLayerPtr quantize (InferenceEngine::CNNLayerPtr lp) { InferenceEngine::CNNLayerPtr quantize(InferenceEngine::CNNLayerPtr lp) {
auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp); auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
Config gna_config; Config gna_config;
gna_config.gnaPrecision = InferenceEngine::Precision::I16; gna_config.gnaPrecision = InferenceEngine::Precision::I16;
@ -28,7 +32,8 @@ class I16QuantisationTest : public GNATest<> {
return newLayer; return newLayer;
}; };
InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model, float scale_factor) const { InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model,
float scale_factor) const {
auto scale_factors = std::vector<float>({scale_factor}); auto scale_factors = std::vector<float>({scale_factor});
GnaInputs inputs; GnaInputs inputs;
@ -43,21 +48,20 @@ class I16QuantisationTest : public GNATest<> {
auto transformer = ov::intel_gna::TransformationsPipeline(gna_config); auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
return ModelQuantizer(transformer).quantize( return ModelQuantizer(transformer).quantize(model, inputs);
model,
inputs);
} }
void SetUp() override { void SetUp() override {
Limitations::init(target::DeviceVersion::Default);
} }
}; };
template <class T> template <class T>
T setWeights(T blob) { T setWeights(T blob) {
blob->allocate(); blob->allocate();
// actual quantisation algorithm is involved - we need to provide weights that will be quantized with scale factor of 1 // actual quantisation algorithm is involved - we need to provide weights that will be quantized with scale factor
for (auto && w : *blob) { // of 1
for (auto&& w : *blob) {
w = MAX_VAL_2B_WEIGHT; w = MAX_VAL_2B_WEIGHT;
} }
return blob; return blob;
@ -75,36 +79,34 @@ TBlob<uint8_t>::Ptr setWeights(TBlob<uint8_t>::Ptr blob) {
return blob; return blob;
} }
// TODO: add test for FC weights after quantization // TODO: add test for FC weights after quantization
TEST_F(I16QuantisationTest, canQuantizeFCLayer){ TEST_F(I16QuantisationTest, canQuantizeFCLayer) {
auto fc = std::make_shared<FullyConnectedLayer>(LayerParams{"name", "type", Precision::FP32}); auto fc = std::make_shared<FullyConnectedLayer>(LayerParams{"name", "type", Precision::FP32});
fc->_out_num = 9; fc->_out_num = 9;
fc->_weights = setWeights(make_shared_blob<float>({ Precision::FP32, {1, 1}, Layout::NC })); fc->_weights = setWeights(make_shared_blob<float>({Precision::FP32, {1, 1}, Layout::NC}));
fillWeights(fc->_weights); fillWeights(fc->_weights);
fc->_biases = make_shared_blob<float>({ Precision::FP32, {1, 1}, Layout::NC }); fc->_biases = make_shared_blob<float>({Precision::FP32, {1, 1}, Layout::NC});
fc->_biases->allocate(); fc->_biases->allocate();
fillWeights(fc->_biases); fillWeights(fc->_biases);
std::shared_ptr<Data> outData = std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({1, 1}), Layout::NC)); std::shared_ptr<Data> outData =
std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({1, 1}), Layout::NC));
fc->outData.push_back(outData); fc->outData.push_back(outData);
fc->insData.push_back(outData); fc->insData.push_back(outData);
ASSERT_NO_THROW(quantize(fc)); ASSERT_NO_THROW(quantize(fc));
} }
TEST_F(I16QuantisationTest, canQuantizeActivation){ TEST_F(I16QuantisationTest, canQuantizeActivation) {
auto sigmoid = std::make_shared<GenericLayer>(LayerParams{"name", "type", Precision::FP32});
auto sigmoid = std::make_shared<GenericLayer >(LayerParams{"name", "type", Precision::FP32});
sigmoid->params["value"] = 2; sigmoid->params["value"] = 2;
sigmoid->type = "Activation"; sigmoid->type = "Activation";
ASSERT_NO_THROW(quantize(sigmoid)); ASSERT_NO_THROW(quantize(sigmoid));
} }
TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits){ TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {440}, C }); auto weights = make_shared_blob<uint8_t>({Precision::U8, {440}, C});
weights->allocate(); weights->allocate();
fillWeights(weights); fillWeights(weights);
@ -118,10 +120,9 @@ TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits){
ASSERT_EQ(affineDataPtr->getTensorDesc().getPrecision(), Precision::I32); ASSERT_EQ(affineDataPtr->getTensorDesc().getPrecision(), Precision::I32);
} }
TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) { TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) {
auto weights = setWeights(make_shared_blob<uint8_t >({ Precision::U8, {440}, C })); auto weights = setWeights(make_shared_blob<uint8_t>({Precision::U8, {440}, C}));
//std::fill_n(weights->buffer().as<float*>(), weights->byteSize()/sizeof(float), 0); // std::fill_n(weights->buffer().as<float*>(), weights->byteSize()/sizeof(float), 0);
Core ie; Core ie;
auto network = ie.ReadNetwork(affineToMemoryModel(), weights); auto network = ie.ReadNetwork(affineToMemoryModel(), weights);
@ -129,13 +130,13 @@ TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) {
ASSERT_NO_THROW(quantize_single_input_model(network, 1000)); ASSERT_NO_THROW(quantize_single_input_model(network, 1000));
} }
TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect){ TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect) {
const float inputScaleFactorTest = 1000; const float inputScaleFactorTest = 1000;
const float weightValueTest = 100; const float weightValueTest = 100;
auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C }); auto weights = make_shared_blob<uint8_t>({Precision::U8, {440}, C});
weights->allocate(); weights->allocate();
fillWeights(weights, { weightValueTest }); fillWeights(weights, {weightValueTest});
Core ie; Core ie;
auto network = ie.ReadNetwork(Fc2DOutputModel(), weights); auto network = ie.ReadNetwork(Fc2DOutputModel(), weights);
@ -153,51 +154,70 @@ TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect){
TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion) { TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion) {
assert_that() assert_that()
.onInferModel(Fc2DOutputModel()) .onInferModel(Fc2DOutputModel())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) .inNotCompactMode()
.gna().propagate_forward().called_without().pwl_inserted_into_nnet(); .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_without()
.pwl_inserted_into_nnet();
} }
TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion_ProfilingEnabled) { TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion_ProfilingEnabled) {
assert_that() assert_that()
.onInferModel(Fc2DOutputModel()) .onInferModel(Fc2DOutputModel())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) .inNotCompactMode()
.gna().propagate_forward().called_without().pwl_inserted_into_nnet().profiling_counters(); .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_without()
.pwl_inserted_into_nnet()
.profiling_counters();
} }
TEST_F(I16QuantisationTest, OnlyAffineWithNanScaleFactorFails) { TEST_F(I16QuantisationTest, OnlyAffineWithNanScaleFactorFails) {
gna() gna().onInferModel(Fc2DOutputModel()).withNanScaleFactor().propagate_forward().throws();
.onInferModel(Fc2DOutputModel())
.withNanScaleFactor()
.propagate_forward().throws();
} }
TEST_F(I16QuantisationTest, OnlyAffineWithInfScaleFactorFails) { TEST_F(I16QuantisationTest, OnlyAffineWithInfScaleFactorFails) {
gna() gna().onInferModel(Fc2DOutputModel()).withInfScaleFactor().propagate_forward().throws();
.onInferModel(Fc2DOutputModel())
.withInfScaleFactor()
.propagate_forward().throws();
} }
TEST_F(I16QuantisationTest, AffineToMemoryWillResultInActivationInsertion) { TEST_F(I16QuantisationTest, AffineToMemoryWillResultInActivationInsertion) {
assert_that() assert_that()
.onInferModel(affineToMemoryModel()) .onInferModel(affineToMemoryModel())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) .inNotCompactMode()
.gna().propagate_forward().called_with().pwl_inserted_into_nnet(); .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet();
} }
TEST_F(I16QuantisationTest, EltwiseToMemoryWithNoOutputActivationInsertion) { TEST_F(I16QuantisationTest, EltwiseToMemoryWithNoOutputActivationInsertion) {
assert_that().inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) assert_that()
.onInferModel(eltwiseToMemoryModelNoOutput(), [](CNNNetwork & net){ .inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.onInferModel(eltwiseToMemoryModelNoOutput(),
[](CNNNetwork& net) {
net.addOutput("Eltwise_8"); net.addOutput("Eltwise_8");
}).gna().propagate_forward().called_with().pwl_inserted_into_nnet(); })
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet();
} }
TEST_F(I16QuantisationTest, EltwiseToMemory_ActivationInsertion) { TEST_F(I16QuantisationTest, EltwiseToMemory_ActivationInsertion) {
assert_that().onInferModel(eltwiseToMemoryModel()).withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) assert_that()
.inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet(); .onInferModel(eltwiseToMemoryModel())
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet();
} }
TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInsertion) { TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInsertion) {
auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 20}); auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 20});
const auto axis_node = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1}); const auto axis_node = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
@ -205,41 +225,73 @@ TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInserti
auto tanh = std::make_shared<ngraph::opset8::Tanh>(split->outputs()[0]); auto tanh = std::make_shared<ngraph::opset8::Tanh>(split->outputs()[0]);
auto add = std::make_shared<ngraph::opset8::Add>(split->outputs()[1], tanh); auto add = std::make_shared<ngraph::opset8::Add>(split->outputs()[1], tanh);
auto result = std::make_shared<ngraph::opset8::Result>(add); auto result = std::make_shared<ngraph::opset8::Result>(add);
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); auto function =
assert_that().onInferNgraphModel(function) std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) assert_that()
.gna().propagate_forward().called_with().diagonal_inserted_into_nnet(); .onInferNgraphModel(function)
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.diagonal_inserted_into_nnet();
} }
TEST_F(I16QuantisationTest, SliceFollowedBy2FCsAnd2Eltwises_AlignedFilterInsertion) { TEST_F(I16QuantisationTest, SliceFollowedBy2FCsAnd2Eltwises_AlignedFilterInsertion) {
assert_that().onInferModel(twoFCWithPaddingAfterSliceModel()) assert_that()
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) .onInferModel(twoFCWithPaddingAfterSliceModel())
.gna().propagate_forward().called_with().diagonal_inserted_into_nnet(); .inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.diagonal_inserted_into_nnet();
} }
// ToDo requires implementation of aligning filter for concat inputs and improvement of // ToDo requires implementation of aligning filter for concat inputs and improvement of
// qunatization/scaling algorithm for concat // qunatization/scaling algorithm for concat
TEST_F(I16QuantisationTest, DISABLED_DoubleConcatPropageteForwardWithSuccess_AlignedFilterInsertion) { TEST_F(I16QuantisationTest, DISABLED_DoubleConcatPropageteForwardWithSuccess_AlignedFilterInsertion) {
assert_that().onInferModel(doubleConcatModel()).withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) assert_that()
.inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet(); .onInferModel(doubleConcatModel())
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.diagonal_inserted_into_nnet();
} }
TEST_F(I16QuantisationTest, EltwiseSumm_onlyOneIdentityInsertion) { TEST_F(I16QuantisationTest, EltwiseSumm_onlyOneIdentityInsertion) {
assert_that().onInferModel(eltwiseSummModel()).withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) assert_that()
.inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once(); .onInferModel(eltwiseSummModel())
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet()
.once();
} }
TEST_F(I16QuantisationTest, canDetectLeakyRelu) { TEST_F(I16QuantisationTest, canDetectLeakyRelu) {
assert_that().onInferModel(TFLeakyReluModel()) assert_that()
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) .onInferModel(TFLeakyReluModel())
.gna().propagate_forward().called_with().pwl_inserted_into_nnet(); .inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet();
} }
TEST_F(I16QuantisationTest, MaxPool_followedAfterActivation) { TEST_F(I16QuantisationTest, MaxPool_followedAfterActivation) {
assert_that().onInferModel(maxpoolAfterRelu()) assert_that()
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) .onInferModel(maxpoolAfterRelu())
.gna().propagate_forward().called_with() .inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.convolution_inserted_into_nnet() .convolution_inserted_into_nnet()
.And() .And()
.pwl_inserted_into_nnet() .pwl_inserted_into_nnet()
@ -248,28 +300,53 @@ TEST_F(I16QuantisationTest, MaxPool_followedAfterActivation) {
} }
TEST_F(I16QuantisationTest, EltwiseMull_willInsertTwoIdentities) { TEST_F(I16QuantisationTest, EltwiseMull_willInsertTwoIdentities) {
assert_that().onInferModel(eltwiseMulModel()) assert_that()
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) .onInferModel(eltwiseMulModel())
.gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice(); .inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet()
.twice();
} }
TEST_F(I16QuantisationTest, multiple_inputs_supported) { TEST_F(I16QuantisationTest, multiple_inputs_supported) {
std::string configKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_"); std::string configKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_");
assert_that().onInferModel(two_inputs_to_affine()) assert_that()
.inNotCompactMode().withGNAConfig(configKey + std::to_string(0), 1.0f) .onInferModel(two_inputs_to_affine())
.withGNAConfig(configKey + std::to_string(1), 2.0f).gna().propagate_forward() .inNotCompactMode()
.called_with().pwl_inserted_into_nnet().once(); .withGNAConfig(configKey + std::to_string(0), 1.0f)
.withGNAConfig(configKey + std::to_string(1), 2.0f)
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet()
.once();
} }
TEST_F(I16QuantisationTest, DISABLED_multiple_inputs_into_concat_supported) { TEST_F(I16QuantisationTest, DISABLED_multiple_inputs_into_concat_supported) {
assert_that().onInferModel(two_inputs_to_concat()) assert_that()
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f).gna().propagate_forward().called_with().pwl_inserted_into_nnet().once(); .onInferModel(two_inputs_to_concat())
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet()
.once();
} }
TEST_F(I16QuantisationTest, ScaleShift_Affine_WillResultInIdentityInsertion) { TEST_F(I16QuantisationTest, ScaleShift_Affine_WillResultInIdentityInsertion) {
assert_that().onInferModel(scaleShiftAffineModel()) assert_that()
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) .onInferModel(scaleShiftAffineModel())
.gna().propagate_forward().called_with().pwl_inserted_into_nnet().once(); .inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet()
.once();
} }
TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) { TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) {
@ -277,10 +354,17 @@ TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) {
auto clamp = std::make_shared<ngraph::opset8::Clamp>(input_params, -50, 50); auto clamp = std::make_shared<ngraph::opset8::Clamp>(input_params, -50, 50);
auto tanh = std::make_shared<ngraph::opset8::Tanh>(clamp); auto tanh = std::make_shared<ngraph::opset8::Tanh>(clamp);
auto result = std::make_shared<ngraph::opset8::Result>(tanh); auto result = std::make_shared<ngraph::opset8::Result>(tanh);
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); auto function =
assert_that().onInferNgraphModel(function) std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) assert_that()
.gna().propagate_forward().called_with().diagonal_inserted_into_nnet().twice(); .onInferNgraphModel(function)
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.diagonal_inserted_into_nnet()
.twice();
} }
TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInTwoDiagonalsInsertion) { TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInTwoDiagonalsInsertion) {
@ -296,76 +380,127 @@ TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInTwoDiago
auto result = std::make_shared<ngraph::opset8::Result>(add); auto result = std::make_shared<ngraph::opset8::Result>(add);
mem_w->add_control_dependency(mem_r); mem_w->add_control_dependency(mem_r);
result->add_control_dependency(mem_w); result->add_control_dependency(mem_w);
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); auto function =
assert_that().onInferNgraphModel(function) std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) assert_that()
.gna().propagate_forward().called_with().diagonal_inserted_into_nnet().twice(); .onInferNgraphModel(function)
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.diagonal_inserted_into_nnet()
.twice();
} }
TEST_F(I16QuantisationTest, AffineWith2AffineOutputs_ResultInOnlyOneIdentityInsertion) { TEST_F(I16QuantisationTest, AffineWith2AffineOutputs_ResultInOnlyOneIdentityInsertion) {
// one Identity activation from first FC, and one Identity activation for eltwise // one Identity activation from first FC, and one Identity activation for eltwise
assert_that().onInferModel(AffineWith2AffineOutputsModel()) assert_that()
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) .onInferModel(AffineWith2AffineOutputsModel())
.gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice(); .inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet()
.twice();
} }
TEST_F(I16QuantisationTest, ScaleShiftWithBroadcast_ResultInDiagonalInsertion) { TEST_F(I16QuantisationTest, ScaleShiftWithBroadcast_ResultInDiagonalInsertion) {
auto& affineWeights = storage<std::vector<uint16_t>>();
auto & affineWeights = storage<std::vector<uint16_t>>();
affineWeights = { affineWeights = {
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 2048, 4096, 6144, 8192, 10240, 12288,
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 14336, 16384, 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 2048, 4096, 6144, 8192,
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 10240, 12288, 14336, 16384, 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
}; };
assert_that().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f).onInferModel(ScaleShift3DModel()) assert_that()
.withWeigthsPattern({1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f}) .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.inNotCompactMode().gna().propagate_forward().called_with().called_with().affine_weights_eq(affineWeights); .onInferModel(ScaleShift3DModel())
.withWeigthsPattern({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f})
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.called_with()
.affine_weights_eq(affineWeights);
} }
TEST_F(I16QuantisationTest, MemoryAfterConcat_ResultInCopyInsertion) { TEST_F(I16QuantisationTest, MemoryAfterConcat_ResultInCopyInsertion) {
assert_that().onInferModel(MemoryAfterConcatModel()).inNotCompactMode().gna().propagate_forward(). assert_that()
called_with().copy_inserted_into_nnet(); .onInferModel(MemoryAfterConcatModel())
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.copy_inserted_into_nnet();
} }
TEST_F(I16QuantisationTest, MemoryAndConcatAfterOneNode_ResultInCopyInsertion) { TEST_F(I16QuantisationTest, MemoryAndConcatAfterOneNode_ResultInCopyInsertion) {
assert_that().onInferModel(MemoryAndConcatAfterOneNode()).inNotCompactMode().gna().propagate_forward(). assert_that()
called_with().copy_inserted_into_nnet(); .onInferModel(MemoryAndConcatAfterOneNode())
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.copy_inserted_into_nnet();
} }
TEST_F(I16QuantisationTest, DISABLED_permutationOfWeightsBetweenConvAndAffine) { TEST_F(I16QuantisationTest, DISABLED_permutationOfWeightsBetweenConvAndAffine) {
auto & affineWeights = storage<std::vector<uint16_t>>(); auto& affineWeights = storage<std::vector<uint16_t>>();
// least likely that width and height both are multiple of 7 // least likely that width and height both are multiple of 7
auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}; auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
// here weights are transpozed // here weights are transpozed
save().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern) save()
.inNotCompactMode().from().propagate_forward().affine_weights_transpozed({128, 61}).to(affineWeights); .onInferModel(affineAfterConvNoPermute())
.withWeigthsPattern(weigthsPattern)
.inNotCompactMode()
.from()
.propagate_forward()
.affine_weights_transpozed({128, 61})
.to(affineWeights);
// here weights shouldn't be transposed // here weights shouldn't be transposed
assert_that().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern) assert_that()
.inNotCompactMode().gna().propagate_forward().called_with().affine_weights_eq(affineWeights); .onInferModel(affineAfterConvWithPermute())
.withWeigthsPattern(weigthsPattern)
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.affine_weights_eq(affineWeights);
} }
TEST_F(I16QuantisationTest, DISABLED_noPermutationOfWeightsBetweenConvAndAffineIfPermuteLayerWithCorrectArgs) { TEST_F(I16QuantisationTest, DISABLED_noPermutationOfWeightsBetweenConvAndAffineIfPermuteLayerWithCorrectArgs) {
auto & affineWeights = storage<std::vector<uint16_t>>(); auto& affineWeights = storage<std::vector<uint16_t>>();
// least likely that width and height both are multiple of 7 // least likely that width and height both are multiple of 7
auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}; auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
save().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern) save()
.inNotCompactMode().from().propagate_forward().affine_weights().to(affineWeights); .onInferModel(affineAfterConvWithPermute())
.withWeigthsPattern(weigthsPattern)
.inNotCompactMode()
.from()
.propagate_forward()
.affine_weights()
.to(affineWeights);
assert_that().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern) assert_that()
.inNotCompactMode().gna().propagate_forward().called_with().affine_weights_transposed(affineWeights, {128, 61}); .onInferModel(affineAfterConvNoPermute())
.withWeigthsPattern(weigthsPattern)
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.affine_weights_transposed(affineWeights, {128, 61});
} }
TEST_F(I16QuantisationTest, fp16tofp32_on_fullyConnected_model) { TEST_F(I16QuantisationTest, fp16tofp32_on_fullyConnected_model) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {220}, Layout::C }); auto weights = make_shared_blob<uint8_t>({Precision::U8, {220}, Layout::C});
weights->allocate(); weights->allocate();
fillWeights(weights); fillWeights(weights);
@ -375,7 +510,8 @@ TEST_F(I16QuantisationTest, fp16tofp32_on_fullyConnected_model) {
quantize_single_input_model(network, 1000); quantize_single_input_model(network, 1000);
} }
TEST_F(I16QuantisationTest, MultipleActivationsAfterAffineWithIdentityActivation_MultipleDiagonalLayersWithActivaitons) { TEST_F(I16QuantisationTest,
MultipleActivationsAfterAffineWithIdentityActivation_MultipleDiagonalLayersWithActivaitons) {
auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 10}); auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 10});
const auto constant = ngraph::opset8::Constant::create(ngraph::element::f32, ngraph::Shape{10, 10}, {1}); const auto constant = ngraph::opset8::Constant::create(ngraph::element::f32, ngraph::Shape{10, 10}, {1});
auto matmul1 = std::make_shared<ngraph::opset8::MatMul>(input_params, constant); auto matmul1 = std::make_shared<ngraph::opset8::MatMul>(input_params, constant);
@ -386,11 +522,17 @@ TEST_F(I16QuantisationTest, MultipleActivationsAfterAffineWithIdentityActivation
auto mul = std::make_shared<ngraph::opset8::Multiply>(sigmoid, relu); auto mul = std::make_shared<ngraph::opset8::Multiply>(sigmoid, relu);
auto add2 = std::make_shared<ngraph::opset8::Add>(add, mul); auto add2 = std::make_shared<ngraph::opset8::Add>(add, mul);
auto result = std::make_shared<ngraph::opset8::Result>(add); auto result = std::make_shared<ngraph::opset8::Result>(add);
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); auto function =
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
// identiy came from automatic insertion due to // identiy came from automatic insertion due to
assert_that().onInferNgraphModel(function) assert_that()
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) .onInferNgraphModel(function)
.gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActSigmoid, kActRelu, kActIdentity, kActIdentity}); .inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwls_inserted_into_nnet({kActSigmoid, kActRelu, kActIdentity, kActIdentity});
} }
TEST_F(I16QuantisationTest, MultipleActivationsAfterAffine_ResultInMultipleDiagonalLayersWithActivaitons) { TEST_F(I16QuantisationTest, MultipleActivationsAfterAffine_ResultInMultipleDiagonalLayersWithActivaitons) {
@ -401,24 +543,36 @@ TEST_F(I16QuantisationTest, MultipleActivationsAfterAffine_ResultInMultipleDiago
auto relu = std::make_shared<ngraph::opset8::Relu>(matmul); auto relu = std::make_shared<ngraph::opset8::Relu>(matmul);
auto mul = std::make_shared<ngraph::opset8::Multiply>(sigmoid, relu); auto mul = std::make_shared<ngraph::opset8::Multiply>(sigmoid, relu);
auto result = std::make_shared<ngraph::opset8::Result>(mul); auto result = std::make_shared<ngraph::opset8::Result>(mul);
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); auto function =
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
// extra identity inserted for affine // extra identity inserted for affine
assert_that().onInferNgraphModel(function) assert_that()
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) .onInferNgraphModel(function)
.gna().propagate_forward().called_with() .inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
// 1 diag for second activation, 1 for eltwise // 1 diag for second activation, 1 for eltwise
.pwls_inserted_into_nnet({kActRelu, kActSigmoid}).diagonal_inserted_into_nnet().times(3); .pwls_inserted_into_nnet({kActRelu, kActSigmoid})
.diagonal_inserted_into_nnet()
.times(3);
} }
// TODO: build a regression test on top of it using real quantisation accuracy checking // TODO: build a regression test on top of it using real quantisation accuracy checking
TEST_F(I16QuantisationTest, ConcatWithConstInputPropagatedForward) { TEST_F(I16QuantisationTest, ConcatWithConstInputPropagatedForward) {
assert_that().onInferModel(concatModelWithConstLayer()) assert_that()
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) .onInferModel(concatModelWithConstLayer())
.gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity}); .inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwls_inserted_into_nnet({kActIdentity});
} }
TEST_F(I16QuantisationTest, LSTMCell_quantize) { TEST_F(I16QuantisationTest, LSTMCell_quantize) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {33664}, C }); auto weights = make_shared_blob<uint8_t>({Precision::U8, {33664}, C});
weights->allocate(); weights->allocate();
fillWeights(weights); fillWeights(weights);
@ -429,7 +583,7 @@ TEST_F(I16QuantisationTest, LSTMCell_quantize) {
} }
TEST_F(I16QuantisationTest, LSTMCell_unaligned_quantize) { TEST_F(I16QuantisationTest, LSTMCell_unaligned_quantize) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {3480}, C }); auto weights = make_shared_blob<uint8_t>({Precision::U8, {3480}, C});
weights->allocate(); weights->allocate();
fillWeights(weights); fillWeights(weights);
@ -440,15 +594,27 @@ TEST_F(I16QuantisationTest, LSTMCell_unaligned_quantize) {
} }
TEST_F(I16QuantisationTest, EltwisetWithConstInputPropagatedForward) { TEST_F(I16QuantisationTest, EltwisetWithConstInputPropagatedForward) {
assert_that().onInferModel(eltwiseSumModelWithConstLayer()) assert_that()
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) .onInferModel(eltwiseSumModelWithConstLayer())
.gna().propagate_forward().called_with().diagonal_inserted_into_nnet(); .inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.diagonal_inserted_into_nnet();
} }
TEST_F(I16QuantisationTest, PowerWithScaleFactorPropagateForward) { TEST_F(I16QuantisationTest, PowerWithScaleFactorPropagateForward) {
assert_that().onInferModel(PowerWithScaleFactor1()) assert_that()
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) .onInferModel(PowerWithScaleFactor1())
.gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity}).And().diagonal_inserted_into_nnet(); .inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwls_inserted_into_nnet({kActIdentity})
.And()
.diagonal_inserted_into_nnet();
} }
TEST_F(I16QuantisationTest, ConcatWithDifferentInputScaleFactorsPropagateForward) { TEST_F(I16QuantisationTest, ConcatWithDifferentInputScaleFactorsPropagateForward) {
@ -459,14 +625,20 @@ TEST_F(I16QuantisationTest, ConcatWithDifferentInputScaleFactorsPropagateForward
auto tanh = std::make_shared<ngraph::opset8::Tanh>(split->outputs()[1]); auto tanh = std::make_shared<ngraph::opset8::Tanh>(split->outputs()[1]);
auto concat = std::make_shared<ngraph::opset8::Concat>(ngraph::OutputVector{sigmoid, tanh}, 1); auto concat = std::make_shared<ngraph::opset8::Concat>(ngraph::OutputVector{sigmoid, tanh}, 1);
auto result = std::make_shared<ngraph::opset8::Result>(concat); auto result = std::make_shared<ngraph::opset8::Result>(concat);
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); auto function =
assert_that().onInferNgraphModel(function) std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) assert_that()
.gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity}); .onInferNgraphModel(function)
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwls_inserted_into_nnet({kActIdentity});
} }
TEST_F(I16QuantisationTest, TI_quantize) { TEST_F(I16QuantisationTest, TI_quantize) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {249748}, C }); auto weights = make_shared_blob<uint8_t>({Precision::U8, {249748}, C});
weights->allocate(); weights->allocate();
fillWeights(weights); fillWeights(weights);
@ -477,40 +649,52 @@ TEST_F(I16QuantisationTest, TI_quantize) {
} }
TEST_F(I16QuantisationTest, TI_PropagateForward) { TEST_F(I16QuantisationTest, TI_PropagateForward) {
auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 10 }); auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 10});
auto mul = std::make_shared<ngraph::opset8::Multiply>(input_params, auto mul = std::make_shared<ngraph::opset8::Multiply>(
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{ 1, 10 })); input_params,
auto add = std::make_shared<ngraph::opset8::Add>(mul, std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1, 10}));
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{ 1, 10 })); auto add = std::make_shared<ngraph::opset8::Add>(
auto reshape = std::make_shared<ngraph::opset8::Reshape>(add, mul,
std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{ 3 }, std::vector<size_t>{ 1, 1, 10 }), false); std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1, 10}));
auto reshape = std::make_shared<ngraph::opset8::Reshape>(
add,
std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{3}, std::vector<size_t>{1, 1, 10}),
false);
auto reshape_shape = reshape->output(0).get_shape(); auto reshape_shape = reshape->output(0).get_shape();
const size_t batch_size = 1; const size_t batch_size = 1;
const size_t hiddenSize = 10; const size_t hiddenSize = 10;
auto H_init = ngraph::builder::makeConstant<float>(ngraph::element::f32, { batch_size, hiddenSize }, {}, true); auto H_init = ngraph::builder::makeConstant<float>(ngraph::element::f32, {batch_size, hiddenSize}, {}, true);
auto C_init = ngraph::builder::makeConstant<float>(ngraph::element::f32, { batch_size, hiddenSize }, {}, true); auto C_init = ngraph::builder::makeConstant<float>(ngraph::element::f32, {batch_size, hiddenSize}, {}, true);
auto H_t = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{ batch_size, hiddenSize }); auto H_t = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{batch_size, hiddenSize});
auto C_t = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{ batch_size, hiddenSize }); auto C_t = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{batch_size, hiddenSize});
//Body // Body
auto X = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{ batch_size, 1, reshape_shape[2] }); auto X = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32,
auto weightsNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, { 4 * hiddenSize, reshape_shape[2] }, {}, true); ngraph::Shape{batch_size, 1, reshape_shape[2]});
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, { 4 * hiddenSize, hiddenSize }, {}, true); auto weightsNode =
ngraph::builder::makeConstant<float>(ngraph::element::f32, {4 * hiddenSize, reshape_shape[2]}, {}, true);
auto reccurrenceWeightsNode =
ngraph::builder::makeConstant<float>(ngraph::element::f32, {4 * hiddenSize, hiddenSize}, {}, true);
// lstm // lstm
auto constantX = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{ 2 }, { batch_size, reshape_shape[2] }); auto constantX =
auto lstm1 = std::make_shared<ngraph::opset8::LSTMCell>(std::make_shared<ngraph::opset8::Reshape>(X, constantX, false), ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {batch_size, reshape_shape[2]});
H_t, C_t, auto lstm1 =
weightsNode, reccurrenceWeightsNode, hiddenSize); std::make_shared<ngraph::opset8::LSTMCell>(std::make_shared<ngraph::opset8::Reshape>(X, constantX, false),
H_t,
C_t,
weightsNode,
reccurrenceWeightsNode,
hiddenSize);
auto H_o = lstm1->output(0); auto H_o = lstm1->output(0);
auto C_o = lstm1->output(1); auto C_o = lstm1->output(1);
auto body = std::make_shared<ngraph::Function>( auto body =
ngraph::OutputVector{ H_o, C_o }, ngraph::ParameterVector{ X, H_t, C_t }); std::make_shared<ngraph::Function>(ngraph::OutputVector{H_o, C_o}, ngraph::ParameterVector{X, H_t, C_t});
auto tensor_iterator = std::make_shared<ngraph::opset8::TensorIterator>(); auto tensor_iterator = std::make_shared<ngraph::opset8::TensorIterator>();
tensor_iterator->set_body(body); tensor_iterator->set_body(body);
@ -522,16 +706,29 @@ TEST_F(I16QuantisationTest, TI_PropagateForward) {
auto out0 = tensor_iterator->get_iter_value(H_o, -1); auto out0 = tensor_iterator->get_iter_value(H_o, -1);
const size_t output_size = 12; const size_t output_size = 12;
auto fc = ngraph::builder::makeFullyConnected(out0, ngraph::element::f32, output_size, true, { hiddenSize, output_size }, { 1 }, { 1 }); auto fc = ngraph::builder::makeFullyConnected(out0,
ngraph::element::f32,
output_size,
true,
{hiddenSize, output_size},
{1},
{1});
auto result = std::make_shared<ngraph::opset8::Result>(fc); auto result = std::make_shared<ngraph::opset8::Result>(fc);
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); auto function =
assert_that().onInferNgraphModel(function).withWeigthsPattern({0.1f}) std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
.inNotCompactMode().gna().propagate_forward() assert_that()
.called_with().pwls_inserted_into_nnet({kActIdentity}); .onInferNgraphModel(function)
.withWeigthsPattern({0.1f})
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.pwls_inserted_into_nnet({kActIdentity});
} }
TEST_F(I16QuantisationTest, SplitToConcatWith2Inputs1360NotAlignedNoFC) { TEST_F(I16QuantisationTest, SplitToConcatWith2Inputs1360NotAlignedNoFC) {
assert_that().onInferModel(SplitToConcatWith2Inputs1360NotAlignedNoFC()) assert_that()
.onInferModel(SplitToConcatWith2Inputs1360NotAlignedNoFC())
.inNotCompactMode() .inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna() .gna()

View File

@ -282,10 +282,12 @@ struct Validatecnn2dParams {
class GNAcnn2dValidatorTest : public ::testing::TestWithParam<GNACnn2DValidatorTestParam> { class GNAcnn2dValidatorTest : public ::testing::TestWithParam<GNACnn2DValidatorTestParam> {
protected: protected:
void SetUp() override { void SetUp() override {
validator = cnn2d::AbstractValidator::Create(GetParam().target); Limitations::init(GetParam().target);
ASSERT_TRUE(validator != nullptr); validator = Limitations::get_instance()->get_cnn_validator();
ASSERT_TRUE(validator);
} }
std::unique_ptr<cnn2d::AbstractValidator> validator;
std::shared_ptr<cnn2d::AbstractValidator> validator;
}; };
class GNAcnn2dValidatorTestPadding : public GNAcnn2dValidatorTest { class GNAcnn2dValidatorTestPadding : public GNAcnn2dValidatorTest {

View File

@ -18,7 +18,7 @@ class GNAPluginForNetworkMetricsTest : public GNAPlugin {
public: public:
GNAPluginForNetworkMetricsTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) { GNAPluginForNetworkMetricsTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{})); gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
graphCompiler.setGNAMemoryPtr(gnamem); m_graph_compiler->setGNAMemoryPtr(gnamem);
gnadevice.reset(); gnadevice.reset();
} }
}; };

View File

@ -81,11 +81,11 @@ class GNAPluginForPWLExtraSegmentsTest : public GNAPlugin {
public: public:
GNAPluginForPWLExtraSegmentsTest(const std::map<std::string, std::string>& config) : GNAPlugin(config) { GNAPluginForPWLExtraSegmentsTest(const std::map<std::string, std::string>& config) : GNAPlugin(config) {
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{})); gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
graphCompiler.setGNAMemoryPtr(gnamem); m_graph_compiler->setGNAMemoryPtr(gnamem);
gnadevice.reset(); gnadevice.reset();
} }
void Test(const size_t expected_segments) { void Test(const size_t expected_segments) {
for (const auto& component : graphCompiler.dnnComponents.components) { for (const auto& component : m_graph_compiler->dnnComponents.components) {
if (component.dnnComponent.operation == kDnnPiecewiselinearOp) { if (component.dnnComponent.operation == kDnnPiecewiselinearOp) {
EXPECT_EQ(expected_segments, component.dnnComponent.op.pwl.num_segments); EXPECT_EQ(expected_segments, component.dnnComponent.op.pwl.num_segments);
} }

View File

@ -58,7 +58,7 @@ TEST(CheckSplitSupported, CheckVariadicSplitSupported) {
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::opset9::Constant::create(ngraph::element::i64,
ngraph::Shape({split_lengths.size()}), ngraph::Shape({split_lengths.size()}),
split_lengths)); split_lengths));
ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result); ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result);
} }
} }
@ -86,7 +86,7 @@ TEST(CheckSplitSupported, CheckSplitSupported) {
std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape), std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}), ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}),
num_splits); num_splits);
ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result); ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result);
} }
} }
} // namespace } // namespace

View File

@ -17,7 +17,7 @@ class GNAPluginForPrecisionTest : public GNAPlugin {
public: public:
GNAPluginForPrecisionTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) { GNAPluginForPrecisionTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{})); gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
graphCompiler.setGNAMemoryPtr(gnamem); m_graph_compiler->setGNAMemoryPtr(gnamem);
gnadevice.reset(); gnadevice.reset();
} }
std::vector<intel_dnn_component_t> get_components() { std::vector<intel_dnn_component_t> get_components() {

View File

@ -45,9 +45,9 @@ public:
GNAPluginForMemoryAlignmentTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) { GNAPluginForMemoryAlignmentTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
if (gnadevice) { if (gnadevice) {
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}, gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{},
gnadevice->getMemAlignment(), Limitations::get_instance()->get_memory_alignment(),
limitations::kMemoryPageSize)); Limitations::kMemoryPageSize));
graphCompiler.setGNAMemoryPtr(gnamem); m_graph_compiler->setGNAMemoryPtr(gnamem);
gnadevice.reset(); gnadevice.reset();
} }
} }
@ -149,16 +149,14 @@ INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_4_0,
class MemoryAlignmentTest : public ::testing::Test {}; class MemoryAlignmentTest : public ::testing::Test {};
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_ExpectExceptionWhenTargetIsUnset) { TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect64ByteAlignmentWhenTargetIsGNA3_5) {
EXPECT_ANY_THROW(getMemoryAlignmentBytes(DeviceVersion::NotSet)); Limitations::init(DeviceVersion::GNA3_5);
} EXPECT_EQ(Limitations::get_instance()->get_memory_alignment(), 64);
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect64ByteAlignmentWhenTargetIsGNA3_0) {
EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_0), 64);
} }
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect16ByteAlignmentWhenTargetIsGNA3_6) { TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect16ByteAlignmentWhenTargetIsGNA3_6) {
EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_6), 16); Limitations::init(DeviceVersion::GNA3_6);
EXPECT_EQ(Limitations::get_instance()->get_memory_alignment(), 16);
} }
} // namespace testing } // namespace testing

View File

@ -297,7 +297,7 @@ public:
GNAPluginTested() : GNAPlugin() { GNAPluginTested() : GNAPlugin() {
gnamem_t = std::make_shared<GNAMemoryTested>(); gnamem_t = std::make_shared<GNAMemoryTested>();
gnamem = gnamem_t; gnamem = gnamem_t;
graphCompiler.setGNAMemoryPtr(gnamem); m_graph_compiler->setGNAMemoryPtr(gnamem);
gnadevice.reset(); gnadevice.reset();
} }
void Test() { void Test() {

View File

@ -15,6 +15,7 @@
#include "common_test_utils/ngraph_test_utils.hpp" #include "common_test_utils/ngraph_test_utils.hpp"
#include "transformations/decompose_2d_convolution.hpp" #include "transformations/decompose_2d_convolution.hpp"
using namespace ov::intel_gna::limitations;
namespace testing { namespace testing {
namespace { namespace {
@ -312,6 +313,8 @@ void Decompose2DConvTestInvalidFixture::SetUp() {
std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation, bias_shape, maxpool_stride, maxpool_shape) = std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation, bias_shape, maxpool_stride, maxpool_shape) =
params; params;
Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
function = get_initial_function(fq, function = get_initial_function(fq,
model, model,
input_shape, input_shape,
@ -342,6 +345,7 @@ class Decompose2DConvTestFixture : public CommonTestUtils::TestsCommon,
public ::testing::WithParamInterface<fqDecompose2DConvParams> { public ::testing::WithParamInterface<fqDecompose2DConvParams> {
public: public:
void SetUp() override; void SetUp() override;
std::shared_ptr<ngraph::Function> get_reference(const bool& fq, std::shared_ptr<ngraph::Function> get_reference(const bool& fq,
const modelType& model, const modelType& model,
const ngraph::PartialShape& input_shape, const ngraph::PartialShape& input_shape,
@ -365,6 +369,8 @@ void Decompose2DConvTestFixture::SetUp() {
std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation, bias_shape, maxpool_stride, maxpool_shape) = std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation, bias_shape, maxpool_stride, maxpool_shape) =
params; params;
Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
function = get_initial_function(fq, function = get_initial_function(fq,
model, model,
input_shape, input_shape,
@ -779,7 +785,7 @@ static size_t CalculateConvCount(const ConvParams& conv_params) {
size_t conv_count = 1; size_t conv_count = 1;
size_t total_factorized_conv_channel_count = size_t total_factorized_conv_channel_count =
(conv_params.input_channel_count * conv_params.filter_height * conv_params.filter_width); (conv_params.input_channel_count * conv_params.filter_height * conv_params.filter_width);
while (total_factorized_conv_channel_count / conv_count > ov::intel_gna::limitations::convFilterMaxSize || while (total_factorized_conv_channel_count / conv_count > Limitations::kConvFilterMaxSize ||
total_factorized_conv_channel_count % conv_count != 0 || conv_params.filter_channel_count % conv_count != 0) total_factorized_conv_channel_count % conv_count != 0 || conv_params.filter_channel_count % conv_count != 0)
conv_count++; conv_count++;
@ -792,7 +798,7 @@ static bool ShouldDecompose(GraphData& graph_data, const ConvParams& conv_params
// Concat (copy) layer limitation allows to split up to a certain limit // Concat (copy) layer limitation allows to split up to a certain limit
// Currently we are able to split only convolutions without pooling in horizontal dimension // Currently we are able to split only convolutions without pooling in horizontal dimension
if (graph_data.conv_count > ov::intel_gna::limitations::copyMaxGrouping || if (graph_data.conv_count > Limitations::kCopyMaxGrouping ||
((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1)) ((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1))
return false; return false;
@ -884,18 +890,13 @@ void execute_test(modelType model,
case modelType::TranspConvBcastAddMaxPoolTransp: case modelType::TranspConvBcastAddMaxPoolTransp:
case modelType::TranspConvBcastAddActTransp: case modelType::TranspConvBcastAddActTransp:
case modelType::TranspConvBcastAddMaxPoolActTransp: case modelType::TranspConvBcastAddMaxPoolActTransp:
manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(ov::intel_gna::target::DeviceVersion::Default, manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(gnaPrecision);
gnaPrecision);
break; break;
case modelType::TranspConvTranspBcastAdd: case modelType::TranspConvTranspBcastAdd:
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>( manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(gnaPrecision);
ov::intel_gna::target::DeviceVersion::Default,
gnaPrecision);
break; break;
case modelType::TranspConvTranspBcastAddAct: case modelType::TranspConvTranspBcastAddAct:
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>( manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(gnaPrecision);
ov::intel_gna::target::DeviceVersion::Default,
gnaPrecision);
break; break;
} }

View File

@ -13,6 +13,8 @@
#include "transformations/decompose_mvn.hpp" #include "transformations/decompose_mvn.hpp"
#include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp" #include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp"
using namespace ov::intel_gna::limitations;
namespace decomposeMVN { namespace decomposeMVN {
typedef std::tuple<ngraph::Shape, // Input shape typedef std::tuple<ngraph::Shape, // Input shape
@ -264,7 +266,7 @@ std::shared_ptr<ngraph::Function> getReferenceFunction(const ngraph::Shape& inpu
mvn_data.normalize_variance = normalize_variance; mvn_data.normalize_variance = normalize_variance;
mvn_data.num_parts = 1; mvn_data.num_parts = 1;
while (mvn_data.W / mvn_data.num_parts > ov::intel_gna::limitations::convFilterMaxSize) { while (mvn_data.W / mvn_data.num_parts > Limitations::kConvFilterMaxSize) {
mvn_data.num_parts *= 2; mvn_data.num_parts *= 2;
} }

View File

@ -11,6 +11,7 @@
#include <transformations/init_node_info.hpp> #include <transformations/init_node_info.hpp>
#include <transformations/utils/utils.hpp> #include <transformations/utils/utils.hpp>
#include "backend/gna_limitations.hpp"
#include "common_test_utils/ngraph_test_utils.hpp" #include "common_test_utils/ngraph_test_utils.hpp"
#include "ngraph_functions/builders.hpp" #include "ngraph_functions/builders.hpp"
#include "ops/copy.hpp" #include "ops/copy.hpp"
@ -54,10 +55,10 @@ void InsertCopyLayerTest::Validate() {
void InsertCopyLayerTest::SetUp() { void InsertCopyLayerTest::SetUp() {
std::tie(m_axis, m_inputs_num) = this->GetParam(); std::tie(m_axis, m_inputs_num) = this->GetParam();
ov::intel_gna::limitations::Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
} }
void InsertCopyLayerTest::Run() { void InsertCopyLayerTest::Run() {
SetUp();
Validate(); Validate();
} }
@ -176,6 +177,11 @@ public:
} }
}; };
void RunPasses(ngraph::pass::Manager& m, std::shared_ptr<ov::Model> func) {
ov::intel_gna::limitations::Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
m.run_passes(func);
}
// [Parameter] [Parameter] // [Parameter] [Parameter]
// \ / => | // \ / => |
// [Concat] [Copy] // [Concat] [Copy]
@ -211,7 +217,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>(); m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -263,7 +269,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>(); m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -324,7 +330,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMultiNFLConcatTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>(); m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -382,7 +388,7 @@ TEST(TransformationTests, InsertCopyLayerMultiConstConcatTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>(); m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -442,7 +448,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>(); m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -510,7 +516,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerNFLConcatTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>(); m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -573,7 +579,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMemoryTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>(); m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -633,7 +639,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatMemoryTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>(); m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -705,7 +711,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatMemoryTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>(); m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -776,7 +782,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatMemoryTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>(); m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -851,7 +857,7 @@ TEST(TransformationTests, InsertCopyLayerCropMemoryTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>(); m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -918,7 +924,7 @@ TEST(TransformationTests, InsertCopyLayerCropNFLMemoryTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>(); m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -987,7 +993,7 @@ TEST(TransformationTests, InsertCopyLayerConcatMemoryTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>(); m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -1060,7 +1066,7 @@ TEST(TransformationTests, InsertCopyLayerConcatNFLMemoryTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>(); m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -1122,7 +1128,7 @@ TEST(TransformationTests, InsertCopyLayerSplitMemoryTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>(); m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -1189,7 +1195,7 @@ TEST(TransformationTests, InsertCopyLayerSplitNFLMemoryTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>(); m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -1244,7 +1250,7 @@ TEST(TransformationTests, InsertCopyLayerCropConcatTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>(); m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -1289,7 +1295,7 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>(); m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -1338,7 +1344,7 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoSubgraphsTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>(); m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -1385,7 +1391,7 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoResultsTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>(); m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -1442,7 +1448,7 @@ TEST(TransformationTests, InsertCopyLayerNFLBranchTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>(); m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -1499,7 +1505,7 @@ TEST(TransformationTests, InsertCopyLayerNFLvsFLSubgraphTestt) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>(); m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));
@ -1550,7 +1556,7 @@ TEST(TransformationTests, InsertCopyLayerSplitNFLConcatTest) {
ngraph::pass::Manager m; ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>(); m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>(); m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
m.run_passes(func); RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func)); ASSERT_NO_THROW(check_rt_info(func));