[GNA] Limitations refactoring (#16957)

* Limitations refactoring

* fix CI builds/tests

* changes after review

* Move GraphCompiler initialization to constructor

* resolve conflicts after rebase

* update after review

* resolve problem with double initialization for Limitations
This commit is contained in:
Tomasz Adamowicz 2023-05-29 10:03:58 +02:00 committed by GitHub
parent 3300543eac
commit cccbf7ce7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
38 changed files with 1371 additions and 1073 deletions

View File

@ -48,6 +48,8 @@
using ov::intel_gna::gna_convolution_layer::outputFromConv;
using ov::intel_gna::gna_convolution_layer::outputFromPooling;
using namespace ov::intel_gna::limitations;
namespace ov {
namespace intel_gna {
namespace backend {
@ -180,8 +182,8 @@ void AMIntelDNN::InitConvolutional1DComponentPrivate(intel_dnn_component_t& comp
THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << num_columns_in
<< ") is not a multiply by 8";
}
if (num_filters < limitations::convMinFiltersNum || num_filters > limitations::convMaxFiltersNum ||
num_filters % limitations::convFiltersNumDivider != 0) {
if (num_filters < Limitations::kConvMinFiltersNum || num_filters > Limitations::kConvMaxFiltersNum ||
num_filters % Limitations::kConvFiltersNumDivider != 0) {
THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << num_filters;
}
auto max_number_of_out_elements = outputFromConv(num_columns_in, num_filter_coefficients, convStride);

View File

@ -37,267 +37,62 @@ namespace intel_gna {
using namespace target;
namespace limitations {
class SupportedElementTypes {
public:
static bool IsParameterTypeSupported(ov::element::Type type, bool is_exception_allowed = false);
static bool IsConstantTypeSupported(ov::element::Type type, bool is_exception_allowed = false);
private:
static const std::set<ov::element::Type> supported_parameter_types;
static const std::set<ov::element::Type> supported_constant_types;
};
const std::set<ov::element::Type> SupportedElementTypes::supported_parameter_types = {ov::element::u8,
ov::element::i16,
ov::element::f32};
size_t getMemoryAlignmentBytes(target::DeviceVersion target) {
static const std::unordered_map<target::DeviceVersion, size_t> mem_alignment_map{
{target::DeviceVersion::GNA1_0, 64},
{target::DeviceVersion::GNA2_0, 64},
{target::DeviceVersion::GNA3_0, 64},
{target::DeviceVersion::GNA3_1, 64},
{target::DeviceVersion::GNA3_5, 64},
{target::DeviceVersion::GNAEmbedded3_5, 64},
{target::DeviceVersion::GNA3_6, 16},
{target::DeviceVersion::GNA4_0, 16}};
return common::GetValueForKey<target::DeviceVersion, size_t>(target, mem_alignment_map);
}
bool SupportedElementTypes::is_parameter_type_supported(ov::element::Type elem_type, bool is_exception_allowed) {
if (supported_parameter_types.count(elem_type) == 0) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "The plugin does not support input precision with " << elem_type.get_type_name()
<< " format. Supported precisions " << supported_parameter_types << "\n";
}
return false;
}
return true;
}
const std::set<ov::element::Type> SupportedElementTypes::supported_constant_types = {ov::element::i8,
ov::element::u8,
ov::element::i16,
ov::element::u16,
ov::element::i32,
ov::element::f32,
ov::element::f64};
bool SupportedElementTypes::is_constant_type_supported(ov::element::Type elem_type, bool is_exception_allowed) {
if (supported_constant_types.count(elem_type) == 0) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "The plugin does not support constant precision with " << elem_type.get_type_name()
<< " format. Supported precisions " << supported_constant_types << "\n";
}
return false;
}
return true;
}
bool is_transpose_supported(const std::shared_ptr<const ov::Node>& node) {
OPENVINO_ASSERT(node, "Transpose node is empty!");
const ov::Shape squeezed_shape = graph_utils::squeeze_shape(node->get_input_shape(0));
const size_t min_input_dim = std::min(squeezed_shape[0], squeezed_shape[1]);
const size_t max_input_dim = std::max(squeezed_shape[0], squeezed_shape[1]);
// GNA transpose limitations:
// - supports 2d transposes only
// - smaller dimension should be less or equal to 8
// - bigger dimension should be a multiple of limitations::noOfInputsDivisor
if (squeezed_shape.size() == 2 && min_input_dim <= 8 &&
ALIGN(max_input_dim, limitations::noOfInputsDivisor) == max_input_dim) {
return true;
}
return false;
}
bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
const DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed) {
OPENVINO_ASSERT(conv_ie, "ConvolutionIE node is empty!");
size_t batch_size = conv_ie->input_value(0).get_shape()[0];
if (batch_size != 1) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "topology with layer: " + conv_ie->get_friendly_name() +
", type: " + conv_ie->get_type_name() + ", and batch size(" +
std::to_string(batch_size) + ") != 1 not supported";
}
return false;
}
auto check_dilation = [&](size_t filter_dilation_height, size_t filter_stride_width) -> bool {
cnn2d::RangeLimit2D dilation_limit{{convDilationHeight, convDilationHeight, "dilation height"},
{convDilationWidth, convDilationWidth, "dilation width"}};
std::string error = dilation_limit.GetErrorOrEmpty(filter_dilation_height, filter_stride_width);
return cnn2d::AbstractValidator::ValidationSuccesful(is_exception_allowed,
error,
conv_ie->get_friendly_name(),
conv_ie->get_type_name());
};
auto input_shape = conv_ie->input_value(0).get_shape();
auto filter_shape = conv_ie->input_value(1).get_shape();
if ((4 == filter_shape.size() && filter_shape[2] > 1 && filter_shape[3] > 1) ||
(4 == input_shape.size() && input_shape[2] > 1 && input_shape[3] > 1)) {
pass::helper::ConvData conv_data;
pass::helper::GetConvData(conv_ie, conv_data);
if (gna_convolution_layer::isMappableFrom2DTo1D(conv_data.input_height,
conv_data.input_width,
conv_data.input_channel_count,
conv_data.filter_height,
conv_data.filter_width,
conv_data.filter_stride_height,
conv_data.filter_stride_width)) {
return check_dilation(conv_data.filter_dilation_height, conv_data.filter_dilation_width);
}
const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target);
if (cnn2dValidatorPtr) {
return cnn2dValidatorPtr->ValidateCnn2D(conv_ie->get_friendly_name(),
conv_data.input_height,
conv_data.input_width,
conv_data.input_channel_count,
conv_data.filter_height,
conv_data.filter_width,
conv_data.filter_channel_count,
conv_data.filter_stride_height,
conv_data.filter_stride_width,
conv_data.filter_dilation_height,
conv_data.filter_dilation_width,
OvGnaTypeIntFromBytes(gna_precision.size()),
is_exception_allowed);
}
}
return check_dilation(conv_ie->get_dilations()[0], conv_ie->get_dilations()[1]);
}
bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
const DeviceVersion& effective_compile_target,
bool is_exception_allowed) {
OPENVINO_ASSERT(max_pool, "MaxPool node is empty!");
auto kernels = max_pool->get_kernel();
if (2 == kernels.size() && kernels[0] > 1 && kernels[1] > 1) {
const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target);
if (cnn2dValidatorPtr) {
auto strides = max_pool->get_strides();
return cnn2dValidatorPtr->ValidatePooling2D(max_pool->get_friendly_name(),
kernels[0],
kernels[1],
strides[0],
strides[1],
is_exception_allowed);
}
}
return true;
}
bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected, bool is_exception_allowed) {
OPENVINO_ASSERT(fully_connected, "FullyConnected node is empty!");
size_t output_batch_size = fully_connected->get_output_shape(0)[0];
if (output_batch_size > 8) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "topology with layer: " + fully_connected->get_friendly_name() +
", type: " + fully_connected->get_type_name() + ", and batch size(" +
std::to_string(output_batch_size) + ") not supported";
}
return false;
}
return true;
}
bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed) {
OPENVINO_ASSERT(node, "Split node is empty!");
bool is_aligned = true;
for (size_t i = 0; i < node->get_output_size(); i++) {
is_aligned &= ov::intel_gna::graph_utils::is_aligned_split(node, i);
}
return is_aligned;
}
bool is_op_supported(const std::shared_ptr<ov::Node>& node,
const DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed) {
if (ov::op::util::is_parameter(node)) {
return SupportedElementTypes::is_parameter_type_supported(node->get_element_type(), is_exception_allowed);
} else if (ov::op::util::is_constant(node)) {
return SupportedElementTypes::is_constant_type_supported(node->get_element_type(), is_exception_allowed);
} else if (auto conv_ie = std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node)) {
return is_conv_supported(conv_ie, effective_compile_target, gna_precision, is_exception_allowed);
} else if (auto fully_connected = std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node)) {
return is_fc_supported(fully_connected, is_exception_allowed);
} else if (ov::intel_gna::graph_utils::is_pooling(node)) {
return is_pooling_supported(std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node),
effective_compile_target,
is_exception_allowed);
} else if (ov::op::util::is_output(node) || ov::op::util::is_sink(node) ||
ov::intel_gna::graph_utils::is_eltwise_add(node) || ov::intel_gna::graph_utils::is_eltwise_mul(node) ||
ov::intel_gna::graph_utils::is_crop_affined(node) ||
ov::intel_gna::graph_utils::is_activation(node.get()) ||
ov::intel_gna::graph_utils::is_gna_precision_agnostic(
node) || // check concat/split are aligned when transformations will be moved to ngraph
(std::dynamic_pointer_cast<ov::op::util::ReadValueBase>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::op::ScaleShiftIE>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::op::PowerIE>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::opset9::MatMul>(node) != nullptr)) {
return true;
} else if (ov::intel_gna::graph_utils::is_gna_precision_agnostic(node)) {
if ((std::dynamic_pointer_cast<ngraph::opset9::Split>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::opset9::VariadicSplit>(node) != nullptr)) {
return is_split_supported(node, is_exception_allowed);
}
// TODO check concat are aligned when transformation will be moved to ngraph
return true;
}
return false;
}
void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
const DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision) {
std::stringstream error;
// Walk through the transformed model
for (auto& op : model->get_ops()) {
if (!is_op_supported(op, effective_compile_target, gna_precision, true)) {
error << "The plugin does not support layer " << op->get_friendly_name() << " (type " << op->get_type_name()
<< ")!" << std::endl;
}
}
if (!error.str().empty()) {
THROW_GNA_EXCEPTION << error.str();
}
}
namespace cnn2d {
bool IsEqualToLimit::isValid(const uint32_t val) const {
bool IsEqualToLimit::IsValid(const uint32_t val) const {
return val == compared_value;
}
std::string IsEqualToLimit::GetErrorOrEmpty(const uint32_t val) const {
std::ostringstream out;
if (!isValid(val)) {
if (!IsValid(val)) {
out << "Unsupported " << what << ", actual value: " << val << ", but should be equal to " << compared_value
<< "\n";
}
return out.str();
}
bool IsLessThanLimit ::isValid(const uint32_t val) const {
bool IsLessThanLimit::IsValid(const uint32_t val) const {
return val < compared_value;
}
std::string IsLessThanLimit::GetErrorOrEmpty(const uint32_t val) const {
std::ostringstream out;
if (!isValid(val)) {
if (!IsValid(val)) {
out << "Unsupported " << what << ", actual value: " << val << ", but should be less than " << compared_value
<< "\n";
}
return out.str();
}
bool RangeLimit::isValid(const uint32_t val) const {
bool RangeLimit::IsValid(const uint32_t val) const {
return val >= min && val <= max;
}
std::string RangeLimit::GetErrorOrEmpty(const uint32_t val) const {
std::ostringstream out;
if (!isValid(val)) {
if (!IsValid(val)) {
out << "Unsupported " << what << ", actual value: " << val << ", valid range [" << min << ", " << max << "]\n";
}
return out.str();
}
bool RangeLimit2D::isValid(const uint32_t h, const uint32_t w) const {
return hLimit.isValid(h) && wLimit.isValid(w);
bool RangeLimit2D::IsValid(const uint32_t h, const uint32_t w) const {
return hLimit.IsValid(h) && wLimit.IsValid(w);
}
std::string RangeLimit2D::GetErrorOrEmpty(const uint32_t h, const uint32_t w) const {
@ -308,8 +103,8 @@ RangeMultipleLimit::RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn)
: RangeLimit(rlIn),
multiplier(multiplierIn) {}
bool RangeMultipleLimit::isValid(const uint32_t val) const {
return RangeLimit::isValid(val) && (val % multiplier == 0);
bool RangeMultipleLimit::IsValid(const uint32_t val) const {
return RangeLimit::IsValid(val) && (val % multiplier == 0);
}
std::string RangeMultipleLimit::GetErrorOrEmpty(const uint32_t val) const {
@ -321,7 +116,7 @@ std::string RangeMultipleLimit::GetErrorOrEmpty(const uint32_t val) const {
return e + out.str();
}
bool VectorOrSquareLimit::isValid(const uint32_t h, const uint32_t w) const {
bool VectorOrSquareLimit::IsValid(const uint32_t h, const uint32_t w) const {
if (w == 1 && h >= 1 && h <= maxVectorHeight)
return true;
if (h == 1 && w >= 1 && w <= maxVectorWidth)
@ -333,7 +128,7 @@ bool VectorOrSquareLimit::isValid(const uint32_t h, const uint32_t w) const {
std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
std::ostringstream out;
if (!isValid(h, w)) {
if (!IsValid(h, w)) {
out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w << ", only vertical vector up to "
<< maxVectorHeight << "x1, horizontal up to 1x" << maxVectorWidth << " or square up to " << maxSquare << "x"
<< maxSquare << " are valid\n";
@ -341,7 +136,7 @@ std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_
return out.str();
}
bool RectLimit::isValid(const uint32_t h, const uint32_t w) const {
bool RectLimit::IsValid(const uint32_t h, const uint32_t w) const {
if (h >= 1 && h <= maxVectorHeight && w >= 1 && w <= maxVectorWidth)
return true;
return false;
@ -349,7 +144,7 @@ bool RectLimit::isValid(const uint32_t h, const uint32_t w) const {
std::string RectLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
std::ostringstream out;
if (!isValid(h, w)) {
if (!IsValid(h, w)) {
out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w << ", only rectangular shapes up to "
<< maxVectorHeight << "x" << maxVectorWidth << " are valid\n";
}
@ -365,8 +160,8 @@ RectLimit RectLimitByChannels::GetByChannels(const uint32_t channels) const {
return RectLimit{0, 0};
}
bool RectLimitByChannels::isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const {
return GetByChannels(channels).isValid(h, w);
bool RectLimitByChannels::IsValid(const uint32_t h, const uint32_t w, const uint32_t channels) const {
return GetByChannels(channels).IsValid(h, w);
}
std::string RectLimitByChannels::GetErrorOrEmpty(const uint32_t h,
@ -380,11 +175,11 @@ RectLimitByChannels RectLimitByChannelsAndPrecision::GetByPrecision(const OvGnaT
return precision == OvGnaTypeInt8 ? limit_for_int8 : limit_for_int16;
}
bool RectLimitByChannelsAndPrecision::isValid(const uint32_t h,
bool RectLimitByChannelsAndPrecision::IsValid(const uint32_t h,
const uint32_t w,
const OvGnaType precision,
const uint32_t channels) const {
return GetByPrecision(precision).isValid(h, w, channels);
return GetByPrecision(precision).IsValid(h, w, channels);
}
std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h,
@ -395,6 +190,66 @@ std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h,
return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what);
}
class Validator_30 : public AbstractValidator {
static const RangeLimit2D kInputHWLimit;
static const RangeMultipleLimit kInputChannelsNumberLimit;
static const RangeMultipleLimit kKernelNumberLimit;
static const RectLimitByChannelsAndPrecision kKernelLimit;
static const RangeLimit2D kDilationLimit;
static const VectorOrSquareLimit kPoolingWindowLimit;
public:
Validator_30() = default;
bool ValidateCnn2D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
bool ValidatePooling2D(const std::string& name,
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW,
bool exception = true) const override;
bool ValidateInputPadding(const std::string& name,
const uint32_t pad_h_begin,
const uint32_t pad_h_end,
const uint32_t pad_w_begin,
const uint32_t pad_w_end,
const uint32_t kernel_h,
const uint32_t kernel_w,
const bool throwOnError = true) const override;
bool ShouldUseOnlyConv2DGnaIface() const override;
bool ValidateCnn1D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
};
const RangeLimit2D Validator_30::kInputHWLimit{{16, 384, "input height"}, {16, 240, "input width"}};
const RangeMultipleLimit Validator_30::kInputChannelsNumberLimit{{8, 384, "number of input channels"}, 8};
@ -404,8 +259,9 @@ const RectLimitByChannelsAndPrecision Validator_30::kKernelLimit{
{{{48, {7, 7}}, {64, {7, 5}}, {80, {7, 4}}, {120, {7, 3}}, {384, {7, 1}}}},
};
const RangeLimit2D Validator_30::kDilationLimit{{convDilationHeight, convDilationHeight, "dilation height"},
{convDilationWidth, convDilationWidth, "dilation width"}};
const RangeLimit2D Validator_30::kDilationLimit{
{Limitations::kConvDilationHeight, Limitations::kConvDilationHeight, "dilation height"},
{Limitations::kConvDilationWidth, Limitations::kConvDilationWidth, "dilation width"}};
bool Validator_30::ValidateCnn2D(const std::string& name,
const uint32_t inHeight,
@ -493,6 +349,95 @@ bool Validator_30::ShouldUseOnlyConv2DGnaIface() const {
return false;
}
class Validator_35 : public AbstractValidator {
struct CnnLimits {
const RangeLimit2D kInputHWLimit;
const RangeLimit kInputChannelsNumberLimit1B;
const RangeLimit kInputChannelsNumberLimit2B;
const RangeLimit kKernelNumberLimit;
const RangeLimit2D kKerneHWlLimit1B;
const RangeLimit2D kKerneHWlLimit2B;
const RangeLimit2D kStrideHWLimit1B;
const RangeLimit2D kStrideHWLimit2B;
const RangeLimit2D kDilationLimit;
const RangeLimit2D kPoolingWindowHWLimit;
const RangeLimit2D kPoolingStrideHWLimit;
};
static const CnnLimits kCnn2DLimits;
static const CnnLimits kCnn1DLimits;
std::string ValidateCnn(const CnnLimits& limits,
const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision) const;
std::string ValidatePooling(const CnnLimits& limits,
const std::string& name,
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW) const;
public:
Validator_35() = default;
bool ValidateCnn2D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
bool ValidatePooling2D(const std::string& name,
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW,
bool exception = true) const override;
bool ValidateInputPadding(const std::string& name,
const uint32_t pad_h_begin,
const uint32_t pad_h_end,
const uint32_t pad_w_begin,
const uint32_t pad_w_end,
const uint32_t kernel_h,
const uint32_t kernel_w,
const bool throwOnError = true) const override;
bool ShouldUseOnlyConv2DGnaIface() const override;
bool ValidateCnn1D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
};
const Validator_35::CnnLimits Validator_35::kCnn2DLimits{
{{1, 65535, "input height"}, {1, 65535, "input width"}}, // kInputHWLimit
{1, 2048, "number of input channels"}, // kInputChannelsNumberLimit1B
@ -502,8 +447,8 @@ const Validator_35::CnnLimits Validator_35::kCnn2DLimits{
{{1, 255, "kernel height"}, {1, 256, "kernel width"}}, // kKerneHWlLimit2B
{{1, 255, "convolution stride height"}, {1, 256, "convolution stride width"}}, // kStrideHWLimit1B
{{1, 255, "convolution stride height"}, {1, 256, "convolution stride width"}}, // kStrideHWLimit2B
{{convDilationHeight, convDilationHeight, "dilation height"}, // kDilationLimit
{convDilationWidth, convDilationWidth, "dilation width"}},
{{Limitations::kConvDilationHeight, Limitations::kConvDilationHeight, "dilation height"}, // kDilationLimit
{Limitations::kConvDilationWidth, Limitations::kConvDilationWidth, "dilation width"}},
{{1, 255, "pooling window height"}, {1, 255, "pooling window width"}}, // kPoolingWindowHWLimit
{{1, 255, "pooling stride height"}, {1, 255, "pooling stride width"}} // kPoolingStrideHWLimit
};
@ -517,8 +462,8 @@ const Validator_35::CnnLimits Validator_35::kCnn1DLimits{
{{1, 1, "kernel height"}, {1, 2048, "kernel width"}}, // kKerneHWlLimit2B
{{1, 1, "convolution stride height"}, {1, 4096, "convolution stride width"}}, // kStrideHWLimit1B
{{1, 1, "convolution stride height"}, {1, 2048, "convolution stride width"}}, // kStrideHWLimit2B
{{convDilationHeight, convDilationHeight, "dilation height"}, // kDilationLimit
{convDilationWidth, convDilationWidth, "dilation width"}},
{{Limitations::kConvDilationHeight, Limitations::kConvDilationHeight, "dilation height"}, // kDilationLimit
{Limitations::kConvDilationWidth, Limitations::kConvDilationWidth, "dilation width"}},
{{1, 1, "pooling window height"}, {1, 255, "pooling window width"}}, // kPoolingWindowHWLimit
{{1, 1, "pooling stride height"}, {1, 255, "pooling stride width"}} // kPoolingStrideHWLimit
};
@ -672,16 +617,16 @@ bool Validator_35::ShouldUseOnlyConv2DGnaIface() const {
return true;
}
std::unique_ptr<AbstractValidator> AbstractValidator::Create(const DeviceVersion& target) {
std::shared_ptr<AbstractValidator> AbstractValidator::Create(const DeviceVersion& target) {
switch (target) {
case DeviceVersion::GNA3_0:
case DeviceVersion::GNA3_1:
return tools::make_unique<Validator_30>();
return std::make_shared<Validator_30>();
case DeviceVersion::GNA3_5:
case DeviceVersion::GNAEmbedded3_5:
case DeviceVersion::GNA3_6:
case DeviceVersion::GNA4_0:
return tools::make_unique<Validator_35>();
return std::make_shared<Validator_35>();
default:
return nullptr;
}
@ -705,15 +650,280 @@ bool AbstractValidator::ValidationSuccesful(const bool throwOnError,
return error.empty();
}
bool UseOnly16BitConvolutionWeights(const DeviceVersion& compile_target) {
return compile_target == DeviceVersion::GNA1_0 || compile_target == DeviceVersion::GNA2_0 ||
compile_target == DeviceVersion::GNA3_0 || compile_target == DeviceVersion::GNA3_1;
}
} // namespace cnn2d
constexpr uint32_t Limitations::kBufferMaxSize;
constexpr uint32_t Limitations::kConvMinFiltersNum;
constexpr uint32_t Limitations::kConvMaxFiltersNum;
constexpr uint32_t Limitations::kConvDilationHeight;
constexpr uint32_t Limitations::kConvDilationWidth;
constexpr uint32_t Limitations::kConvFiltersNumDivider;
constexpr uint32_t Limitations::kConvFilterSizeDivider;
constexpr uint32_t Limitations::kConvFilterMaxSize;
constexpr uint32_t Limitations::kConvEachKernelByteAlignment;
constexpr uint32_t Limitations::kInputByteAlignment;
constexpr uint32_t Limitations::kNoOfInputsDivisor;
constexpr uint32_t Limitations::kNoOfInputsLowPrecDivisor;
constexpr uint32_t Limitations::kAffineMaxBatchSize;
constexpr uint32_t Limitations::kMaxPoolMaxWindowSize;
constexpr uint32_t Limitations::kCopyMaxGrouping;
constexpr uint32_t Limitations::kTransposeMaxSize;
constexpr uint32_t Limitations::kMaxLayersCountGNA1_0;
constexpr uint32_t Limitations::kMaxLayersCountGNA2_0;
constexpr uint32_t Limitations::kMaxLayersCountGNA3_X;
constexpr uint32_t Limitations::kBytesPerSplitElement;
constexpr uint32_t Limitations::kBytesPerCropElement;
constexpr uint32_t Limitations::kMemoryPageSize;
thread_local std::shared_ptr<Limitations> Limitations::k_instance{nullptr};
Limitations::Limitations(const DeviceVersion& target) {
m_use_only_16bit_conv_weights = (target == DeviceVersion::GNA1_0 || target == DeviceVersion::GNA2_0 ||
target == DeviceVersion::GNA3_0 || target == DeviceVersion::GNA3_1);
m_mem_alignment = get_memory_alignment_bytes(target);
m_cnn_validator = cnn2d::AbstractValidator::Create(target);
}
void Limitations::init(const DeviceVersion& compile_target) {
k_instance = std::shared_ptr<Limitations>(new Limitations(compile_target));
}
bool Limitations::is_transpose_2d(const std::vector<size_t>& shape) {
return std::count_if(std::begin(shape), std::end(shape), [](size_t dim) {
return dim != 1;
}) == 2;
}
bool Limitations::is_transpose_supported(const std::vector<size_t>& shape) {
if (!is_transpose_2d(shape))
return false;
auto shape_no_1 = shape;
shape_no_1.erase(std::remove(shape_no_1.begin(), shape_no_1.end(), 1), shape_no_1.end());
size_t min, max;
std::tie(min, max) = std::minmax(shape_no_1[0], shape_no_1[1]);
return min <= 8 && max % 8 == 0 && max >= 8 && max <= kTransposeMaxSize;
}
size_t Limitations::get_min_batch_to_fit_in_buffer(InferenceEngine::DataPtr input) {
auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims()));
return total_size / kBufferMaxSize + 1;
}
size_t Limitations::get_memory_alignment_bytes(const DeviceVersion& target) const {
static const std::unordered_map<DeviceVersion, size_t> mem_alignment_map{{DeviceVersion::GNA1_0, 64},
{DeviceVersion::GNA2_0, 64},
{DeviceVersion::GNA3_0, 64},
{DeviceVersion::GNA3_1, 64},
{DeviceVersion::GNA3_5, 64},
{DeviceVersion::GNAEmbedded3_5, 64},
{DeviceVersion::GNA3_6, 16},
{DeviceVersion::GNA4_0, 16}};
return common::GetValueForKey<DeviceVersion, size_t>(target, mem_alignment_map);
}
bool SupportedElementTypes::IsParameterTypeSupported(ov::element::Type elem_type, bool is_exception_allowed) {
if (supported_parameter_types.count(elem_type) == 0) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "The plugin does not support input precision with " << elem_type.get_type_name()
<< " format. Supported precisions " << supported_parameter_types << "\n";
}
return false;
}
return true;
}
const std::set<ov::element::Type> SupportedElementTypes::supported_constant_types = {ov::element::i8,
ov::element::u8,
ov::element::i16,
ov::element::u16,
ov::element::i32,
ov::element::f32,
ov::element::f64};
bool SupportedElementTypes::IsConstantTypeSupported(ov::element::Type elem_type, bool is_exception_allowed) {
if (supported_constant_types.count(elem_type) == 0) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "The plugin does not support constant precision with " << elem_type.get_type_name()
<< " format. Supported precisions " << supported_constant_types << "\n";
}
return false;
}
return true;
}
bool Limitations::is_transpose_supported(const std::shared_ptr<const ov::Node>& node) {
OPENVINO_ASSERT(node, "Transpose node is empty!");
const ov::Shape squeezed_shape = graph_utils::squeeze_shape(node->get_input_shape(0));
const size_t min_input_dim = std::min(squeezed_shape[0], squeezed_shape[1]);
const size_t max_input_dim = std::max(squeezed_shape[0], squeezed_shape[1]);
// GNA transpose limitations:
// - supports 2d transposes only
// - smaller dimension should be less or equal to 8
// - bigger dimension should be a multiple of Limitations::kNoOfInputsDivisor
if (squeezed_shape.size() == 2 && min_input_dim <= 8 && ALIGN(max_input_dim, kNoOfInputsDivisor) == max_input_dim) {
return true;
}
return false;
}
bool Limitations::is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed) {
OPENVINO_ASSERT(conv_ie, "ConvolutionIE node is empty!");
size_t batch_size = conv_ie->input_value(0).get_shape()[0];
if (batch_size != 1) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "topology with layer: " + conv_ie->get_friendly_name() +
", type: " + conv_ie->get_type_name() + ", and batch size(" +
std::to_string(batch_size) + ") != 1 not supported";
}
return false;
}
auto check_dilation = [&](size_t filter_dilation_height, size_t filter_stride_width) -> bool {
cnn2d::RangeLimit2D dilation_limit{{kConvDilationHeight, kConvDilationHeight, "dilation height"},
{kConvDilationWidth, kConvDilationWidth, "dilation width"}};
std::string error = dilation_limit.GetErrorOrEmpty(filter_dilation_height, filter_stride_width);
return cnn2d::AbstractValidator::ValidationSuccesful(is_exception_allowed,
error,
conv_ie->get_friendly_name(),
conv_ie->get_type_name());
};
auto input_shape = conv_ie->input_value(0).get_shape();
auto filter_shape = conv_ie->input_value(1).get_shape();
if ((4 == filter_shape.size() && filter_shape[2] > 1 && filter_shape[3] > 1) ||
(4 == input_shape.size() && input_shape[2] > 1 && input_shape[3] > 1)) {
pass::helper::ConvData conv_data;
pass::helper::GetConvData(conv_ie, conv_data);
if (gna_convolution_layer::isMappableFrom2DTo1D(conv_data.input_height,
conv_data.input_width,
conv_data.input_channel_count,
conv_data.filter_height,
conv_data.filter_width,
conv_data.filter_stride_height,
conv_data.filter_stride_width)) {
return check_dilation(conv_data.filter_dilation_height, conv_data.filter_dilation_width);
}
if (m_cnn_validator) {
return m_cnn_validator->ValidateCnn2D(conv_ie->get_friendly_name(),
conv_data.input_height,
conv_data.input_width,
conv_data.input_channel_count,
conv_data.filter_height,
conv_data.filter_width,
conv_data.filter_channel_count,
conv_data.filter_stride_height,
conv_data.filter_stride_width,
conv_data.filter_dilation_height,
conv_data.filter_dilation_width,
OvGnaTypeIntFromBytes(gna_precision.size()),
is_exception_allowed);
}
}
return check_dilation(conv_ie->get_dilations()[0], conv_ie->get_dilations()[1]);
}
bool Limitations::is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
bool is_exception_allowed) {
OPENVINO_ASSERT(max_pool, "MaxPool node is empty!");
auto kernels = max_pool->get_kernel();
if (2 == kernels.size() && kernels[0] > 1 && kernels[1] > 1) {
if (m_cnn_validator) {
auto strides = max_pool->get_strides();
return m_cnn_validator->ValidatePooling2D(max_pool->get_friendly_name(),
kernels[0],
kernels[1],
strides[0],
strides[1],
is_exception_allowed);
}
}
return true;
}
bool Limitations::is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected,
bool is_exception_allowed) {
OPENVINO_ASSERT(fully_connected, "FullyConnected node is empty!");
size_t output_batch_size = fully_connected->get_output_shape(0)[0];
if (output_batch_size > 8) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "topology with layer: " + fully_connected->get_friendly_name() +
", type: " + fully_connected->get_type_name() + ", and batch size(" +
std::to_string(output_batch_size) + ") not supported";
}
return false;
}
return true;
}
bool Limitations::is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed) {
OPENVINO_ASSERT(node, "Split node is empty!");
bool is_aligned = true;
for (size_t i = 0; i < node->get_output_size(); i++) {
is_aligned &= ov::intel_gna::graph_utils::is_aligned_split(node, i);
}
return is_aligned;
}
bool Limitations::is_op_supported(const std::shared_ptr<ov::Node>& node,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed) {
if (ov::op::util::is_parameter(node)) {
return SupportedElementTypes::IsParameterTypeSupported(node->get_element_type(), is_exception_allowed);
} else if (ov::op::util::is_constant(node)) {
return SupportedElementTypes::IsConstantTypeSupported(node->get_element_type(), is_exception_allowed);
} else if (auto conv_ie = std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node)) {
return is_conv_supported(conv_ie, gna_precision, is_exception_allowed);
} else if (auto fully_connected = std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node)) {
return is_fc_supported(fully_connected, is_exception_allowed);
} else if (ov::intel_gna::graph_utils::is_pooling(node)) {
return is_pooling_supported(std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node), is_exception_allowed);
} else if (ov::op::util::is_output(node) || ov::op::util::is_sink(node) ||
ov::intel_gna::graph_utils::is_eltwise_add(node) || ov::intel_gna::graph_utils::is_eltwise_mul(node) ||
ov::intel_gna::graph_utils::is_crop_affined(node) ||
ov::intel_gna::graph_utils::is_activation(node.get()) ||
ov::intel_gna::graph_utils::is_gna_precision_agnostic(
node) || // check concat/split are aligned when transformations will be moved to ngraph
(std::dynamic_pointer_cast<ov::op::util::ReadValueBase>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::op::ScaleShiftIE>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::op::PowerIE>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::opset9::MatMul>(node) != nullptr)) {
return true;
} else if (ov::intel_gna::graph_utils::is_gna_precision_agnostic(node)) {
if ((std::dynamic_pointer_cast<ngraph::opset9::Split>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::opset9::VariadicSplit>(node) != nullptr)) {
return is_split_supported(node, is_exception_allowed);
}
// TODO check concat are aligned when transformation will be moved to ngraph
return true;
}
return false;
}
void Limitations::check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
const InferenceEngine::Precision gna_precision) {
std::stringstream error;
// Walk through the transformed model
for (auto& op : model->get_ops()) {
if (!is_op_supported(op, gna_precision, true)) {
error << "The plugin does not support layer " << op->get_friendly_name() << " (type " << op->get_type_name()
<< ")!" << std::endl;
}
}
if (!error.str().empty()) {
THROW_GNA_EXCEPTION << error.str();
}
}
bool Limitations::use_only_16bit_convolution_weights() const {
return m_use_only_16bit_conv_weights;
}
IE_SUPPRESS_DEPRECATED_START
static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage) {
bool Limitations::validate_concat_axis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage) {
LayerInfo info(layer);
auto concat_layer = info.as<InferenceEngine::ConcatLayer*>();
IE_ASSERT(concat_layer);
@ -747,7 +957,8 @@ static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::st
// when all transformations are migrated to ngraph
bool is_not_trivial_concat = false;
// Concatentaion of consts and input parameters only is supported, even if first dimentsion of input parameter >
// Concatentaion of consts and input parameters only is supported, even if first dimentsion of input
// parameter >
// 1
bool concat_all_const_or_inputs = false;
@ -846,7 +1057,7 @@ static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::st
return true;
}
bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concat_layer) {
bool Limitations::validate_conv_concat_axis(const InferenceEngine::ConcatLayer* concat_layer) {
IE_ASSERT(concat_layer);
auto dims_size = concat_layer->insData[0].lock()->getDims().size();
@ -898,7 +1109,7 @@ bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concat_layer) {
return true;
}
bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage) {
bool Limitations::are_layers_supported(InferenceEngine::CNNNetwork& network, std::string& errMessage) {
IE_SUPPRESS_DEPRECATED_START
InferenceEngine::InputsDataMap inputs = network.getInputsInfo();
std::unordered_set<InferenceEngine::CNNLayer*> allLayers;
@ -909,7 +1120,7 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
// If there are no inputs start search from an output
startLayer = getCreatorLayer(outputs.begin()->second).lock();
} else {
SupportedElementTypes::is_parameter_type_supported(
SupportedElementTypes::IsParameterTypeSupported(
InferenceEngine::details::convertPrecision(inputs.begin()->second->getPrecision()),
true);
@ -944,7 +1155,7 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
check_result = false;
}
} else if (info.isConcat()) {
if (!ValidateConcatAxis(layer, errMessage)) {
if (!validate_concat_axis(layer, errMessage)) {
THROW_GNA_EXCEPTION << errMessage;
}
}

View File

@ -9,6 +9,8 @@
#include <cstdint>
#include <ie_algorithm.hpp>
#include <memory>
#include <thread>
#include "common/gna_target.hpp"
#include "common/misc_utils.hpp"
@ -23,158 +25,19 @@ namespace ov {
namespace intel_gna {
namespace limitations {
constexpr uint32_t bufferMaxSize = 65528;
constexpr uint32_t convMinFiltersNum = 4;
constexpr uint32_t convMaxFiltersNum = 65532;
constexpr uint32_t convDilationHeight = 1;
constexpr uint32_t convDilationWidth = 1;
constexpr uint32_t convFiltersNumDivider = 4;
constexpr uint32_t convFilterSizeDivider = 8;
constexpr uint32_t convFilterMaxSize = 768;
constexpr uint32_t convEachKernelByteAlignment = 16;
constexpr uint32_t inputByteAlignment = 64;
constexpr uint32_t noOfInputsDivisor = 8;
constexpr uint32_t noOfInputsLowPrecDivisor = 16;
constexpr uint32_t affineMaxBatchSize = 8;
constexpr uint32_t maxPoolMaxWindowSize = 6;
constexpr uint32_t copyMaxGrouping = 8;
constexpr uint32_t transposeMaxSize = 65528;
// TODO In the future there should be created class/struct representing all limitations for specific device versions.
constexpr uint32_t kMaxLayersCountGNA1_0 = 1023;
constexpr uint32_t kMaxLayersCountGNA2_0 = 4096;
constexpr uint32_t kMaxLayersCountGNA3_X = 8192;
// Currently split layer only supports 2 bytes in int16 and int8 mode.
// In fp32 mode this is not necessary but is useful for testing
constexpr uint32_t bytesPerSplitElement = 2;
// Currently crop layer only supports 2 bytes in int16 and int8 mode.
// In fp32 mode this is not necessary but is useful for testing
constexpr uint32_t bytesPerCropElement = 2;
constexpr uint32_t kMemoryPageSize = 4096;
inline bool isCropAffinedOffset(size_t numberOfElements) {
const auto cropOffset = numberOfElements * bytesPerCropElement;
return (ALIGN64(cropOffset) != cropOffset);
}
inline bool IsTranspose2d(const std::vector<size_t>& shape) {
return std::count_if(std::begin(shape), std::end(shape), [](size_t dim) {
return dim != 1;
}) == 2;
}
inline bool IsTransposeSupported(const std::vector<size_t>& shape) {
if (!IsTranspose2d(shape))
return false;
auto shape_no_1 = shape;
shape_no_1.erase(std::remove(shape_no_1.begin(), shape_no_1.end(), 1), shape_no_1.end());
size_t min, max;
std::tie(min, max) = std::minmax(shape_no_1[0], shape_no_1[1]);
return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize;
}
size_t getMemoryAlignmentBytes(target::DeviceVersion target);
class SupportedElementTypes {
public:
static bool is_parameter_type_supported(ov::element::Type type, bool is_exception_allowed = false);
static bool is_constant_type_supported(ov::element::Type type, bool is_exception_allowed = false);
private:
static const std::set<ov::element::Type> supported_parameter_types;
static const std::set<ov::element::Type> supported_constant_types;
};
/**
* @brief Validates if transpose is supported by GNA
* @param node transpose
* @return true if supported
*/
bool is_transpose_supported(const std::shared_ptr<const ov::Node>& node);
/**
* @brief Validates if legacy convolution is supported by GNA
* @param conv_ie convolution
* @param effective_compile_target GNA compile targets
* @param gna_precision GNA inference precision
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
const target::DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed = false);
/**
* @brief Validates if max pooling is supported by GNA
* @param max_pool max pooling
* @param effective_compile_target GNA compile targets
* @param supported_types list of supported types
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if precision is found in supported
*/
bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
const target::DeviceVersion& effective_compile_target,
bool is_exception_allowed = false);
/**
* @brief Validates if fully connected is supported by GNA
* @param fully_connected fully connected
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected,
bool is_exception_allowed = false);
/**
* @brief Validates if split is supported by GNA
* @param node split
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed = false);
/**
* @brief Validates if operation is supported by GNA
* @param node operation
* @param gna_compile_target GNA compile target
* @param gna_precision GNA inference precision
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_op_supported(const std::shared_ptr<ov::Node>& node,
const target::DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed = false);
/**
* @brief Check if all operations are supported by GNA
* @param model ngraph model
* @param gna_compile_target GNA compile target
* @param gna_precision GNA inference precision
*/
void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
const target::DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision);
namespace cnn2d {
struct IsEqualToLimit {
uint32_t compared_value;
std::string what;
bool isValid(const uint32_t val) const;
bool IsValid(const uint32_t val) const;
std::string GetErrorOrEmpty(const uint32_t val) const;
};
struct IsLessThanLimit {
uint32_t compared_value;
std::string what;
bool isValid(const uint32_t val) const;
bool IsValid(const uint32_t val) const;
std::string GetErrorOrEmpty(const uint32_t val) const;
};
@ -182,28 +45,28 @@ struct RangeLimit {
uint32_t min;
uint32_t max;
std::string what;
bool isValid(const uint32_t val) const;
bool IsValid(const uint32_t val) const;
std::string GetErrorOrEmpty(const uint32_t val) const;
};
struct RangeLimit2D {
RangeLimit hLimit;
RangeLimit wLimit;
bool isValid(const uint32_t h, const uint32_t w) const;
bool IsValid(const uint32_t h, const uint32_t w) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w) const;
};
struct RangeMultipleLimit : public RangeLimit {
uint32_t multiplier;
RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn);
bool isValid(const uint32_t val) const;
bool IsValid(const uint32_t val) const;
std::string GetErrorOrEmpty(const uint32_t val) const;
};
struct RectLimit {
uint32_t maxVectorHeight;
uint32_t maxVectorWidth;
bool isValid(const uint32_t h, const uint32_t w) const;
bool IsValid(const uint32_t h, const uint32_t w) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
};
@ -211,14 +74,14 @@ struct VectorOrSquareLimit {
uint32_t maxSquare;
uint32_t maxVectorHeight;
uint32_t maxVectorWidth;
bool isValid(const uint32_t h, const uint32_t w) const;
bool IsValid(const uint32_t h, const uint32_t w) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
};
struct RectLimitByChannels {
std::vector<std::pair<uint32_t, RectLimit>> limitPerChannel;
RectLimit GetByChannels(const uint32_t channels) const;
bool isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const;
bool IsValid(const uint32_t h, const uint32_t w, const uint32_t channels) const;
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, const uint32_t channels, std::string what) const;
};
@ -226,7 +89,7 @@ struct RectLimitByChannelsAndPrecision {
RectLimitByChannels limit_for_int8;
RectLimitByChannels limit_for_int16;
RectLimitByChannels GetByPrecision(const OvGnaType precision) const;
bool isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const;
bool IsValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const;
std::string GetErrorOrEmpty(const uint32_t h,
const uint32_t w,
const OvGnaType precision,
@ -291,168 +154,28 @@ public:
OvGnaType inPrecision,
bool exception = true) const = 0;
static std::unique_ptr<AbstractValidator> Create(const target::DeviceVersion& target);
static std::shared_ptr<AbstractValidator> Create(const target::DeviceVersion& target);
};
class Validator_30 : public AbstractValidator {
static const RangeLimit2D kInputHWLimit;
static const RangeMultipleLimit kInputChannelsNumberLimit;
static const RangeMultipleLimit kKernelNumberLimit;
static const RectLimitByChannelsAndPrecision kKernelLimit;
static const RangeLimit2D kDilationLimit;
static const VectorOrSquareLimit kPoolingWindowLimit;
public:
Validator_30() = default;
bool ValidateCnn2D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
bool ValidatePooling2D(const std::string& name,
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW,
bool exception = true) const override;
bool ValidateInputPadding(const std::string& name,
const uint32_t pad_h_begin,
const uint32_t pad_h_end,
const uint32_t pad_w_begin,
const uint32_t pad_w_end,
const uint32_t kernel_h,
const uint32_t kernel_w,
const bool throwOnError = true) const override;
bool ShouldUseOnlyConv2DGnaIface() const override;
bool ValidateCnn1D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
};
class Validator_35 : public AbstractValidator {
struct CnnLimits {
const RangeLimit2D kInputHWLimit;
const RangeLimit kInputChannelsNumberLimit1B;
const RangeLimit kInputChannelsNumberLimit2B;
const RangeLimit kKernelNumberLimit;
const RangeLimit2D kKerneHWlLimit1B;
const RangeLimit2D kKerneHWlLimit2B;
const RangeLimit2D kStrideHWLimit1B;
const RangeLimit2D kStrideHWLimit2B;
const RangeLimit2D kDilationLimit;
const RangeLimit2D kPoolingWindowHWLimit;
const RangeLimit2D kPoolingStrideHWLimit;
};
static const CnnLimits kCnn2DLimits;
static const CnnLimits kCnn1DLimits;
std::string ValidateCnn(const CnnLimits& limits,
const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision) const;
std::string ValidatePooling(const CnnLimits& limits,
const std::string& name,
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW) const;
public:
Validator_35() = default;
bool ValidateCnn2D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
bool ValidatePooling2D(const std::string& name,
const uint32_t windowH,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW,
bool exception = true) const override;
bool ValidateInputPadding(const std::string& name,
const uint32_t pad_h_begin,
const uint32_t pad_h_end,
const uint32_t pad_w_begin,
const uint32_t pad_w_end,
const uint32_t kernel_h,
const uint32_t kernel_w,
const bool throwOnError = true) const override;
bool ShouldUseOnlyConv2DGnaIface() const override;
bool ValidateCnn1D(const std::string& name,
const uint32_t inHeight,
const uint32_t inWidth,
const uint32_t inChannels,
const uint32_t kH,
const uint32_t kW,
const uint32_t kN,
const uint32_t strideH,
const uint32_t strideW,
const uint32_t dilationH,
const uint32_t dilationW,
OvGnaType inPrecision,
bool exception = true) const override;
};
bool UseOnly16BitConvolutionWeights(const target::DeviceVersion& compile_target);
} // namespace cnn2d
bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
class Limitations {
public:
/**
* @brief Create instance of the Limitations class. Due to Limitations being a singleton, multiple instances of the
* plugin with different compilation targets cannot exist at the same time
* @param compile_target GNA compile target
*/
static void init(const target::DeviceVersion& compile_target);
inline size_t GetMinBatchToFitInBuffer(InferenceEngine::DataPtr input) {
auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims()));
return total_size / bufferMaxSize + 1;
}
/**
* @brief Returns the instance of Limitations object. Requires an Init call before the first usage
*/
static inline std::shared_ptr<Limitations> get_instance();
static bool is_transpose_2d(const std::vector<size_t>& shape);
static bool is_transpose_supported(const std::vector<size_t>& shape);
static size_t get_min_batch_to_fit_in_buffer(InferenceEngine::DataPtr input);
/**
* @brief Validates if concat layer axis is supported by GNA
@ -460,9 +183,140 @@ inline size_t GetMinBatchToFitInBuffer(InferenceEngine::DataPtr input) {
* @return true if concat layer axis is valid
*/
IE_SUPPRESS_DEPRECATED_START
bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concatLayer);
static bool validate_conv_concat_axis(const InferenceEngine::ConcatLayer* concatLayer);
static bool are_layers_supported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
IE_SUPPRESS_DEPRECATED_END
/**
* @brief Validates if fully connected is supported by GNA
* @param fully_connected fully connected
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
static bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected,
bool is_exception_allowed = false);
/**
* @brief Validates if split is supported by GNA
* @param node split
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
static bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed = false);
/**
* @brief Validates if transpose is supported by GNA
* @param node transpose
* @return true if supported
*/
static bool is_transpose_supported(const std::shared_ptr<const ov::Node>& node);
/**
* @brief Validates if legacy convolution is supported by GNA
* @param conv_ie convolution
* @param gna_precision GNA inference precision
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed = false);
/**
* @brief Validates if max pooling is supported by GNA
* @param max_pool max pooling
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if precision is found in supported
*/
bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
bool is_exception_allowed = false);
/**
* @brief Validates if operation is supported by GNA
* @param node operation
* @param gna_precision GNA inference precision
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_op_supported(const std::shared_ptr<ov::Node>& node,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed = false);
/**
* @brief Check if all operations are supported by GNA
* @param model ngraph model
* @param gna_precision GNA inference precision
*/
void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
const InferenceEngine::Precision gna_precision);
bool use_only_16bit_convolution_weights() const;
bool is_crop_affined_offset(size_t numberOfElements) const;
size_t get_memory_alignment() const;
std::shared_ptr<cnn2d::AbstractValidator> get_cnn_validator() const;
constexpr static uint32_t kBufferMaxSize = 65528;
constexpr static uint32_t kConvMinFiltersNum = 4;
constexpr static uint32_t kConvMaxFiltersNum = 65532;
constexpr static uint32_t kConvDilationHeight = 1;
constexpr static uint32_t kConvDilationWidth = 1;
constexpr static uint32_t kConvFiltersNumDivider = 4;
constexpr static uint32_t kConvFilterSizeDivider = 8;
constexpr static uint32_t kConvFilterMaxSize = 768;
constexpr static uint32_t kConvEachKernelByteAlignment = 16;
constexpr static uint32_t kInputByteAlignment = 64;
constexpr static uint32_t kNoOfInputsDivisor = 8;
constexpr static uint32_t kNoOfInputsLowPrecDivisor = 16;
constexpr static uint32_t kAffineMaxBatchSize = 8;
constexpr static uint32_t kMaxPoolMaxWindowSize = 6;
constexpr static uint32_t kCopyMaxGrouping = 8;
constexpr static uint32_t kTransposeMaxSize = 65528;
constexpr static uint32_t kMaxLayersCountGNA1_0 = 1023;
constexpr static uint32_t kMaxLayersCountGNA2_0 = 4096;
constexpr static uint32_t kMaxLayersCountGNA3_X = 8192;
// Currently split layer only supports 2 bytes in int16 and int8 mode.
// In fp32 mode this is not necessary but is useful for testing
constexpr static uint32_t kBytesPerSplitElement = 2;
// Currently crop layer only supports 2 bytes in int16 and int8 mode.
// In fp32 mode this is not necessary but is useful for testing
constexpr static uint32_t kBytesPerCropElement = 2;
constexpr static uint32_t kMemoryPageSize = 4096;
private:
Limitations(const target::DeviceVersion& target);
Limitations(const Limitations&) = delete;
Limitations& operator=(const Limitations&) = delete;
size_t get_memory_alignment_bytes(const target::DeviceVersion& target) const;
IE_SUPPRESS_DEPRECATED_START
static bool validate_concat_axis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage);
IE_SUPPRESS_DEPRECATED_END
bool m_use_only_16bit_conv_weights = false;
size_t m_mem_alignment = 0;
std::shared_ptr<cnn2d::AbstractValidator> m_cnn_validator;
static thread_local std::shared_ptr<Limitations> k_instance;
};
inline std::shared_ptr<Limitations> Limitations::get_instance() {
if (!k_instance) {
THROW_GNA_EXCEPTION << "Limitations instance is not initialized.\n";
}
return k_instance;
}
inline bool Limitations::is_crop_affined_offset(size_t numberOfElements) const {
const auto cropOffset = numberOfElements * kBytesPerCropElement;
return (ALIGN64(cropOffset) != cropOffset);
}
inline size_t Limitations::get_memory_alignment() const {
return m_mem_alignment;
}
inline std::shared_ptr<cnn2d::AbstractValidator> Limitations::get_cnn_validator() const {
return m_cnn_validator;
}
} // namespace limitations
} // namespace intel_gna
} // namespace ov

View File

@ -84,7 +84,7 @@ inline bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
std::dynamic_pointer_cast<ngraph::opset8::VariadicSplit>(input_op)) {
for (size_t index = 0; index < input_op_out_index; index++) {
size_t outputSize = ngraph::shape_size(input_op->get_output_shape(index));
offset += outputSize * limitations::bytesPerSplitElement;
offset += outputSize * limitations::Limitations::kBytesPerSplitElement;
}
}
return (offset == ALIGN64(offset));
@ -93,7 +93,7 @@ inline bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
inline bool is_crop_affined(std::shared_ptr<ngraph::Node> node) {
auto crop = std::dynamic_pointer_cast<ngraph::op::CropIE>(node);
if (crop != nullptr && !crop->offset.empty()) {
return limitations::isCropAffinedOffset(crop->offset.back());
return limitations::Limitations::get_instance()->is_crop_affined_offset(crop->offset.back());
}
return false;
}

View File

@ -11,6 +11,7 @@
namespace ov {
namespace intel_gna {
using namespace limitations;
namespace frontend {
template <class T>
@ -352,7 +353,7 @@ InferenceEngine::Precision GetWeightsPrecision(const LayerInfo& layer_info,
const QuantizedLayerParams& quant_layer_params,
const Config& gna_config) {
if (((layer_info.isConvolution() || layer_info.isConvolutionFilter()) &&
limitations::cnn2d::UseOnly16BitConvolutionWeights(gna_config.target->get_effective_compile_target())) ||
Limitations::get_instance()->use_only_16bit_convolution_weights()) ||
layer_info.isScaleShift()) {
return InferenceEngine::Precision::I16;
}

View File

@ -38,8 +38,7 @@ GNADeviceHelper::GNADeviceHelper(std::shared_ptr<Target> targetIn, bool isPerfor
: target(targetIn),
nGnaDeviceIndex{selectGnaDevice()},
useDeviceEmbeddedExport(deviceEmbedded),
isPerformanceMeasuring(isPerformanceMeasuring),
m_mem_alignment(limitations::getMemoryAlignmentBytes(targetIn->get_effective_compile_target())) {
isPerformanceMeasuring(isPerformanceMeasuring) {
per_request_diagnostics = log::get_log_level() >= ov::log::Level::TRACE;
per_model_diagnostics = log::get_log_level() >= ov::log::Level::DEBUG;
open();
@ -573,7 +572,7 @@ uint32_t GNADeviceHelper::retrieveMaxLayersCount() {
switch (target->get_effective_execution_target()) {
case DeviceVersion::GNA1_0:
case DeviceVersion::GNA2_0:
return kMaxLayersCountGNA2_0;
return Limitations::kMaxLayersCountGNA2_0;
case DeviceVersion::GNA3_0:
case DeviceVersion::GNA3_1:
case DeviceVersion::GNA3_5:
@ -581,7 +580,7 @@ uint32_t GNADeviceHelper::retrieveMaxLayersCount() {
case DeviceVersion::GNA3_6:
case DeviceVersion::GNA4_0:
default:
return kMaxLayersCountGNA3_X;
return Limitations::kMaxLayersCountGNA3_X;
}
}
} // namespace intel_gna

View File

@ -67,7 +67,6 @@ class GNADeviceHelper : public GNADevice {
uint64_t debugLogIndexRequestWait = 0;
static constexpr const char* kDumpExt = ".bin";
static constexpr const char* kDumpDelimiter = ".";
const size_t m_mem_alignment;
public:
explicit GNADeviceHelper(std::shared_ptr<target::Target> target = std::make_shared<target::Target>(),
@ -128,10 +127,6 @@ public:
return allAllocations;
}
size_t getMemAlignment() const {
return m_mem_alignment;
}
/**
* @see GNADevice::createModel()
*/

View File

@ -49,6 +49,7 @@ namespace intel_gna {
using namespace frontend;
using namespace common;
using namespace memory;
using namespace limitations;
static bool CheckIFLastComponentIsPrecededByConv2D(const backend::DnnComponents::storage_type& components,
bool verify_with_pooling = true) {
@ -81,20 +82,22 @@ static uint32_t count_conv2D_input_width_for_expected_output_width(uint32_t expe
return (expected_ouput_width - 1) * stride_width - 2 * padding_width + kernel_width;
};
GNAGraphCompiler::GNAGraphCompiler(const Config& gna_config) : gna_config(gna_config) {}
GNAGraphCompiler::GNAGraphCompiler(const Config& gna_config,
std::shared_ptr<backend::AMIntelDNN> dnn_ptr,
std::shared_ptr<GnaInputs> inputs_ptr,
std::shared_ptr<limitations::cnn2d::AbstractValidator> cnn2d_validator_ptr,
std::shared_ptr<gna_memory_type> gna_mem_ptr)
: gna_config(gna_config) {
dnn = std::move(dnn_ptr);
inputs_ptr_ = std::move(inputs_ptr);
m_cnn2d_validator = std::move(cnn2d_validator_ptr);
gnamem = std::move(gna_mem_ptr);
}
void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr) {
this->gnamem = std::move(gnaMemPtr);
}
void GNAGraphCompiler::setDNNPtr(std::shared_ptr<backend::AMIntelDNN> dnnPtr) {
this->dnn = std::move(dnnPtr);
}
void GNAGraphCompiler::setInputsPtr(std::shared_ptr<GnaInputs> inputsPtr) {
this->inputs_ptr_ = std::move(inputsPtr);
}
intel_dnn_component_t* GNAGraphCompiler::find_first_unused_input(InferenceEngine::CNNLayerPtr current) {
if (current->insData.empty())
return nullptr;
@ -228,13 +231,8 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
split_connection.emplace(id, layerInfoItem);
}
void GNAGraphCompiler::SetValidatorTarget(const target::DeviceVersion& target) {
auto temp = limitations::cnn2d::AbstractValidator::Create(target);
cnn2dValidator.reset(temp.release());
}
bool GNAGraphCompiler::ShouldUseOnlyConv2DGnaIface() const {
return cnn2dValidator && cnn2dValidator->ShouldUseOnlyConv2DGnaIface();
return m_cnn2d_validator && m_cnn2d_validator->ShouldUseOnlyConv2DGnaIface();
}
void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
@ -249,8 +247,8 @@ void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
const uint32_t dilH,
const uint32_t dilW,
OvGnaType inPrecision) const {
if (cnn2dValidator) {
if (cnn2dValidator->ValidateCnn1D(name,
if (m_cnn2d_validator) {
if (m_cnn2d_validator->ValidateCnn1D(name,
inHeight,
inWidth,
inChannels,
@ -265,7 +263,7 @@ void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
false)) {
return;
}
cnn2dValidator
m_cnn2d_validator
->ValidateCnn2D(name, inHeight, inWidth, inChannels, kH, kW, kN, strideH, strideW, dilH, dilW, inPrecision);
} else {
THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << name;
@ -277,8 +275,8 @@ void GNAGraphCompiler::ValidatePooling2D(const std::string& name,
const uint32_t windowW,
const uint32_t strideH,
const uint32_t strideW) const {
if (cnn2dValidator) {
cnn2dValidator->ValidatePooling2D(name, windowH, windowW, strideH, strideW);
if (m_cnn2d_validator) {
m_cnn2d_validator->ValidatePooling2D(name, windowH, windowW, strideH, strideW);
} else {
THROW_GNA_EXCEPTION << "No Pooling2D validator found for layer " << name;
}
@ -684,11 +682,11 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
// TODO add function
// printConvolution2DLayer(convolution);
if (!cnn2dValidator) {
if (!m_cnn2d_validator) {
THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << convolution.name;
}
cnn2dValidator->ValidateInputPadding(convolution.name,
m_cnn2d_validator->ValidateInputPadding(convolution.name,
convolution._padding_y,
convolution._pads_end_y,
convolution._padding_x,
@ -713,7 +711,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
// have to pad input to let last kernel meets it's corresponding input
const auto num_inputs = in_batch * effective_input_width * in_height * in_channels;
uint32_t num_input_padding = ALIGN(num_inputs, limitations::noOfInputsDivisor) - num_inputs;
uint32_t num_input_padding = ALIGN(num_inputs, Limitations::kNoOfInputsDivisor) - num_inputs;
const uint32_t filter_n = convolution._out_depth;
@ -813,7 +811,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
// Kernel is extended only for 1D case which allows to add 0-s at the end of the kernel.
const auto kernel_pad =
ALIGN(effective_single_kernel_size, limitations::convEachKernelByteAlignment) - effective_single_kernel_size;
ALIGN(effective_single_kernel_size, Limitations::kConvEachKernelByteAlignment) - effective_single_kernel_size;
for (uint32_t k = 0; k < convolution._out_depth; k++) {
uint8_t* ptr_filt_current = convolution._weights->cbuffer().as<uint8_t*>() + k * single_kernel_size;
auto transposed_part = transposeMatrix(ptr_filt_current, convolution_precision, in_channels, kernelHW);
@ -846,14 +844,15 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto input = layer->insData[0].lock();
auto outputs = *layer->outData.begin();
auto reshaped_dims = Get2DReshapedData(input, limitations::GetMinBatchToFitInBuffer(input), 8)->getDims();
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor
: limitations::noOfInputsDivisor;
auto reshaped_dims = Get2DReshapedData(input, Limitations::get_min_batch_to_fit_in_buffer(input), 8)->getDims();
const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
? Limitations::kNoOfInputsLowPrecDivisor
: Limitations::kNoOfInputsDivisor;
uint32_t num_rows_in = reshaped_dims[1];
uint32_t num_columns_in = reshaped_dims[0];
uint32_t num_rows_out = num_rows_in;
uint32_t num_columns_out = num_columns_in;
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
size_t num_data_bytes_out = num_columns_out * (num_rows_out + num_padding) * outputs->getPrecision().size();
size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * input->getPrecision().size();
@ -1097,7 +1096,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto inputs = layer->insData.begin()->lock();
auto outputs = *layer->outData.begin();
auto reshaped_dims = Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims();
auto reshaped_dims = Get2DReshapedData(inputs, Limitations::get_min_batch_to_fit_in_buffer(inputs), 8)->getDims();
uint32_t num_rows_in = reshaped_dims[1];
uint32_t num_columns_in = reshaped_dims[0];
uint32_t num_rows_out = num_rows_in;
@ -1159,7 +1158,7 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
}
// Concat axis validation
if (!limitations::ValidateConvConcatAxis(concatLayer)) {
if (!Limitations::validate_conv_concat_axis(concatLayer)) {
std::ostringstream in_dims_oss;
auto in_dims = concatLayer->insData[0].lock()->getDims();
std::copy(in_dims.begin(), in_dims.end(), std::ostream_iterator<size_t>(in_dims_oss, ","));
@ -1270,10 +1269,10 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
uint32_t num_columns_in = 1;
uint32_t num_rows_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()));
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision
? limitations::noOfInputsLowPrecDivisor
: limitations::noOfInputsDivisor;
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
? Limitations::kNoOfInputsLowPrecDivisor
: Limitations::kNoOfInputsDivisor;
uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
void* ptr_inputs = nullptr;
void* ptr_outputs = nullptr;
@ -1303,7 +1302,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) * 4;
size_t num_data_bytes_in =
num_columns_in * ALIGN(num_rows_in, noOfInputsDivisor) * inputs->getPrecision().size();
num_columns_in * ALIGN(num_rows_in, num_of_inputs_divisor) * inputs->getPrecision().size();
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
connectOutput(layer, ptr_outputs, num_data_bytes_out);
@ -1326,8 +1325,9 @@ void GNAGraphCompiler::SlicePrimitive(InferenceEngine::CNNLayerPtr layer) {
void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
auto& eltwise = dynamic_cast<EltwiseLayer&>(*layer.get());
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor
: limitations::noOfInputsDivisor;
const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
? Limitations::kNoOfInputsLowPrecDivisor
: Limitations::kNoOfInputsDivisor;
// for eltwise sum/sub in 16-bit precision one input should be 4 bytes and one 2 bytes - detecting that below
// the names of variables are left for clarity although not always reflecting the real precision/size
@ -1409,7 +1409,7 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
uint32_t num_columns_in = 1;
uint32_t num_rows_out = num_rows_in;
uint32_t num_columns_out = num_columns_in;
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
void* ptr_inputs = nullptr;
void* ptr_outputs = nullptr;
@ -1518,7 +1518,6 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
auto outputs = *layer->outData.begin();
auto input1_precision = quantized ? Precision(Precision::I16) : input_1->getPrecision();
auto input2_precision = quantized ? Precision(Precision::I16) : input_2->getPrecision();
uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor;
auto in_dims = input_1->getDims();
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
@ -1527,7 +1526,7 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
const auto out_dims = outputs->getDims();
const auto out_dims_size = ngraph::shape_size(out_dims);
uint32_t num_rows_out = InferenceEngine::GetDimFromBack(out_dims, 1);
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
uint32_t num_padding = ALIGN(num_rows_in, Limitations::kNoOfInputsDivisor) - num_rows_in;
// Gemm gets two inputs
void* ptr_input_1 = nullptr; // the first input
@ -1578,7 +1577,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
auto outputs = *layer->outData.begin();
const auto out_dims = outputs->getDims();
Precision inputPrecision;
uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor;
uint32_t num_of_inputs_divisor = Limitations::kNoOfInputsDivisor;
if (!quantized) {
inputPrecision = inputs->getPrecision();
@ -1586,11 +1585,11 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
inputPrecision = Precision(Precision::I16);
} else {
inputPrecision = Precision(Precision::I8);
noOfInputsDivisor = limitations::noOfInputsLowPrecDivisor;
num_of_inputs_divisor = Limitations::kNoOfInputsLowPrecDivisor;
}
auto input_data = HasTo2DReshapeData(layer)
? Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8)
? Get2DReshapedData(inputs, Limitations::get_min_batch_to_fit_in_buffer(inputs), 8)
: inputs;
auto in_dims = input_data->getDims();
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
@ -1598,7 +1597,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
uint32_t num_columns_in = batch_size;
uint32_t num_rows_out = isDiag ? num_rows_in : InferenceEngine::GetDimFromBack(out_dims, 1);
uint32_t num_columns_out = num_columns_in;
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
uint32_t num_padding_out = isDiag ? num_padding : 0;
void* ptr_inputs = nullptr;
@ -1803,12 +1802,13 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
auto outputs = *layer->outData.begin();
auto inputs = layer->insData.begin()->lock();
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor
: limitations::noOfInputsDivisor;
const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
? Limitations::kNoOfInputsLowPrecDivisor
: Limitations::kNoOfInputsDivisor;
uint32_t num_columns_in = GetDimFromBack(inputs->getDims(), 2);
uint32_t num_rows_out = GetDimFromBack(outputs->getDims(), 1);
uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
auto numRowsPadded = filterLayer->GetParamAsInt("num_rows_padded");
// number of rows we handled by inserting copy layer
@ -1877,7 +1877,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
false);
size_t num_data_bytes_out = num_rows_out * num_columns_in * outputs->getPrecision().size();
size_t num_data_bytes_in = num_columns_in * ALIGN(num_rows_in, noOfInputsDivisor) * inputs->getPrecision().size();
size_t num_data_bytes_in =
num_columns_in * ALIGN(num_rows_in, num_of_inputs_divisor) * inputs->getPrecision().size();
connectInput(layer, ptr_inputs, num_data_bytes_in, num_rows_copied * inputs->getPrecision().size(), 0);
connectOutput(layer, ptr_outputs, num_data_bytes_out);
@ -1940,8 +1941,8 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
auto outputs = *layer->outData.begin();
auto inputs = layer->insData.begin()->lock();
const auto noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor
: limitations::noOfInputsDivisor;
const auto num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision ? Limitations::kNoOfInputsLowPrecDivisor
: Limitations::kNoOfInputsDivisor;
const uint32_t orginalInputSize =
InferenceEngine::details::product(std::next(inputs->getDims().begin()), inputs->getDims().end());
const uint32_t orginalOutputSize =
@ -1956,7 +1957,7 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
const auto filterWidth = filterLayer->_kernel_x;
const auto minOutputsPerFilter = ALIGN(orginalOutputSize, numberOfFilters) / numberOfFilters;
const auto minInputsNeeded = (minOutputsPerFilter - 1) * convolutionStride + filterWidth;
const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, noOfInputsDivisor);
const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, num_of_inputs_divisor);
auto numOutputs =
gna_convolution_layer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride);
@ -2278,14 +2279,15 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
<< std::min(squeezedInputOrder[0], squeezedInputOrder[1]) << " > 8)";
}
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor
: limitations::noOfInputsDivisor;
const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
? Limitations::kNoOfInputsLowPrecDivisor
: Limitations::kNoOfInputsDivisor;
// now this can be run on GNA
if (squeezedInputOrder[0] < squeezedInputOrder[1]) { // interleave case
if (ALIGN(squeezedInputOrder[1], noOfInputsDivisor) != squeezedInputOrder[1]) {
if (ALIGN(squeezedInputOrder[1], num_of_inputs_divisor) != squeezedInputOrder[1]) {
THROW_GNA_LAYER_EXCEPTION(layer)
<< "unsupported permute (row size not a multiple of " << noOfInputsDivisor << ")";
<< "unsupported permute (row size not a multiple of " << num_of_inputs_divisor << ")";
} else {
auto& currentComponent = dnnComponents.addComponent(layer->name, "interleave");
dnn->InitInterleaveComponent(currentComponent,
@ -2299,9 +2301,9 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
}
} else { // deinterleave case
if (ALIGN(squeezedInputOrder[0], noOfInputsDivisor) != squeezedInputOrder[0]) {
if (ALIGN(squeezedInputOrder[0], num_of_inputs_divisor) != squeezedInputOrder[0]) {
THROW_GNA_LAYER_EXCEPTION(layer)
<< "[GNA plugin] unsupported permute (column size not a multiple of " << noOfInputsDivisor << ")";
<< "[GNA plugin] unsupported permute (column size not a multiple of " << num_of_inputs_divisor << ")";
} else {
auto& currentComponent = dnnComponents.addComponent(layer->name, "deinterleave");
dnn->InitDeinterleaveComponent(currentComponent,
@ -2317,7 +2319,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
size_t num_data_bytes_out =
ALIGN(InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())),
noOfInputsDivisor) *
num_of_inputs_divisor) *
outputs->getPrecision().size();
size_t num_data_bytes_in = squeezedInputOrder[0] * squeezedInputOrder[1] * inputs->getPrecision().size();
@ -2610,12 +2612,12 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
// if request for allocation less that realTensorInput - we need to extend request
auto minInput = inputs_ptr_->at(prevLayer->name).get_required_size();
if (num_data_bytes_in < minInput) {
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision
? limitations::noOfInputsLowPrecDivisor
: limitations::noOfInputsDivisor;
const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
? Limitations::kNoOfInputsLowPrecDivisor
: Limitations::kNoOfInputsDivisor;
log::debug() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to"
<< ALIGN(minInput, noOfInputsDivisor);
num_data_bytes_in = ALIGN(minInput, noOfInputsDivisor);
<< ALIGN(minInput, num_of_inputs_divisor);
num_data_bytes_in = ALIGN(minInput, num_of_inputs_divisor);
}
// real allocation pointer will be kept in ptr not in ptr_inputs_global

View File

@ -54,20 +54,22 @@ private:
uint32_t num_rows,
uint32_t num_cols);
std::unique_ptr<const limitations::cnn2d::AbstractValidator> cnn2dValidator;
bool ShouldUseOnlyConv2DGnaIface() const;
std::shared_ptr<limitations::cnn2d::AbstractValidator> m_cnn2d_validator;
public:
backend::DnnComponents dnnComponents;
MemoryConnection memory_connection;
ConcatConnection concat_connection;
ConstConnections const_connections;
GNAGraphCompiler(const Config& gna_config);
GNAGraphCompiler(const Config& gna_config,
std::shared_ptr<backend::AMIntelDNN> dnn_ptr,
std::shared_ptr<GnaInputs> inputs_ptr,
std::shared_ptr<limitations::cnn2d::AbstractValidator> cnn2d_validator,
std::shared_ptr<gna_memory_type> gna_mem_ptr);
void setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr);
void setDNNPtr(std::shared_ptr<backend::AMIntelDNN> dnnPtr);
void setInputsPtr(std::shared_ptr<GnaInputs> inputsPtr);
void fillMemoryConnections(std::unordered_map<std::string, std::vector<InferenceEngine::CNNLayerPtr>>& memoryPairs);
@ -93,8 +95,6 @@ public:
const uint32_t strideH,
const uint32_t strideW) const;
void SetValidatorTarget(const target::DeviceVersion& target);
/**
* Connects either memory output, or generic output to a layer
* @param layer - layer pointer

View File

@ -29,6 +29,7 @@
#include <vector>
#include "backend/am_intel_dnn.hpp"
#include "backend/gna_limitations.hpp"
#include "common/gna_target.hpp"
#include "frontend/model_quantizer.hpp"
#include "frontend/scale_factor_calc.hpp"
@ -55,6 +56,7 @@
#include "scale_factor_helper.hpp"
#include "serial/gna_model_serial.hpp"
using namespace ov::intel_gna::limitations;
using namespace ov::intel_gna::graph_utils;
inline uint32_t ToByteSize(const Gna2DataType type) {
@ -357,17 +359,23 @@ void GNAPlugin::PrePostProcess(InferenceEngine::Blob::Ptr input_blob,
}
}
GNAPlugin::GNAPlugin() : graphCompiler(config) {
GNAPlugin::GNAPlugin() {
Init();
UpdateFieldsFromConfig();
InitGNADevice();
Limitations::init(config.target->get_effective_compile_target());
InitGNAMemory();
InitGraphCompiler();
}
GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) : graphCompiler(config) {
GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) {
Init();
SetConfig(configMap);
log::set_log_level(gnaFlags->log_level);
InitGNADevice();
Limitations::init(config.target->get_effective_compile_target());
InitGNAMemory();
InitGraphCompiler();
}
void GNAPlugin::Init() {
@ -376,27 +384,36 @@ void GNAPlugin::Init() {
gnaFlags = std::make_shared<GNAFlags>(GNAFlags());
inputs_ptr_ = std::make_shared<GnaInputs>(GnaInputs());
outputs_ = GnaOutputs();
graphCompiler.setDNNPtr(dnn);
graphCompiler.setInputsPtr(inputs_ptr_);
requestWorkerPool_ = std::make_shared<request::WorkerPoolImpl>();
}
void GNAPlugin::InitGNADevice() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNADevice");
if (gnaFlags->sw_fp32) {
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
} else {
if (!gnaFlags->sw_fp32) {
gnadevice = std::make_shared<GNADeviceHelper>(config.target,
gnaFlags->performance_counting,
!config.embedded_export_path.empty());
gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice),
gnadevice->getMemAlignment(),
limitations::kMemoryPageSize);
}
graphCompiler.setGNAMemoryPtr(gnamem);
}
void GNAPlugin::InitGNAMemory() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNAMemory");
if (gnaFlags->sw_fp32) {
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
} else {
gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice),
Limitations::get_instance()->get_memory_alignment(),
Limitations::kMemoryPageSize);
}
}
void GNAPlugin::InitGraphCompiler() {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGraphCompiler");
m_graph_compiler = std::make_shared<GNAGraphCompiler>(
GNAGraphCompiler(config, dnn, inputs_ptr_, Limitations::get_instance()->get_cnn_validator(), gnamem));
}
void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork& network) {
@ -428,8 +445,7 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork& network
GNAFakeQuantizeLayer fqLayer(next_layer);
auto inputRange = fqLayer.getInputRange();
auto outputRange = fqLayer.getOutputRange();
if (inputRange.second.size() != 1 || inputRange.second.size() != 1 || outputRange.second.size() != 1 ||
outputRange.second.size() != 1) {
if (inputRange.second.size() != 1 || outputRange.second.size() != 1) {
THROW_GNA_LAYER_EXCEPTION(next_layer)
<< "unsupported, per-channel quantization for input layer : " << input.second->name();
}
@ -552,12 +568,12 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN
};
// probing gna_primitives
auto irLayerAvatar = std::find_if(graphCompiler.dnnComponents.components.begin(),
graphCompiler.dnnComponents.components.end(),
auto irLayerAvatar = std::find_if(m_graph_compiler->dnnComponents.components.begin(),
m_graph_compiler->dnnComponents.components.end(),
[&layer](const backend::DnnComponents::storage_type::value_type& value) {
return value.name == layer->name;
});
if (irLayerAvatar != graphCompiler.dnnComponents.components.end()) {
if (irLayerAvatar != m_graph_compiler->dnnComponents.components.end()) {
initOutput(irLayerAvatar->dnnComponent.orientation_out,
irLayerAvatar->dnnComponent.num_bytes_per_output,
irLayerAvatar->dnnComponent.num_rows_out,
@ -567,8 +583,8 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN
// probing concatInfo
if (LayerInfo(layer).isConcat()) {
auto concatConnection = graphCompiler.concat_connection.find(layer->name);
if (concatConnection != graphCompiler.concat_connection.end()) {
auto concatConnection = m_graph_compiler->concat_connection.find(layer->name);
if (concatConnection != m_graph_compiler->concat_connection.end()) {
auto precision = layer->outData.front()->getPrecision().size();
initOutput(kDnnInterleavedOrientation,
precision,
@ -581,8 +597,8 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN
// probing a constant info, for constant trivial networks support
if (LayerInfo(layer).isConst()) {
auto const_blob = layer->blobs["custom"];
auto constConnection = graphCompiler.const_connections.find(layer->name);
if (constConnection != graphCompiler.const_connections.end()) {
auto constConnection = m_graph_compiler->const_connections.find(layer->name);
if (constConnection != m_graph_compiler->const_connections.end()) {
initOutput(kDnnInterleavedOrientation,
layer->outData.front()->getPrecision().size(),
const_blob->size(),
@ -696,16 +712,13 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
_network_name = _network.getName();
std::shared_ptr<InferenceEngine::details::CNNNetworkImpl> convertedNetwork;
const auto effectiveCompileTarget = config.target->get_effective_compile_target();
graphCompiler.SetValidatorTarget(effectiveCompileTarget);
auto transformer = TransformationsPipeline(config);
if (_network.getFunction()) {
CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network);
auto model = clonedNetwork.getFunction();
transformer.apply(model, &m_input_output_subgraphs);
limitations::check_all_ops_supported(model, effectiveCompileTarget, config.gnaPrecision);
Limitations::get_instance()->check_all_ops_supported(model, config.gnaPrecision);
convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(model, clonedNetwork);
}
IE_SUPPRESS_DEPRECATED_START
@ -717,7 +730,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
// Check the network
std::string error;
if (!limitations::AreLayersSupported(network, error)) {
if (!Limitations::are_layers_supported(network, error)) {
THROW_GNA_EXCEPTION << error.c_str();
}
@ -805,17 +818,17 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
memoryPairs[id][generic->GetParamAsInt("index")] = layer;
continue;
} else if (layerInfo.isConcat()) {
graphCompiler.fillConcatConnections(layer);
m_graph_compiler->fillConcatConnections(layer);
} else if (layerInfo.isSplit() || layerInfo.isSlice()) {
graphCompiler.fillSplitConnections(layer);
m_graph_compiler->fillSplitConnections(layer);
}
sortedNoMem.push_back(layer);
}
// fill in extra storage with memory layers
graphCompiler.fillMemoryConnections(memoryPairs);
m_graph_compiler->fillMemoryConnections(memoryPairs);
if (!graphCompiler.memory_connection.empty() && gnaFlags->num_requests != 1) {
if (!m_graph_compiler->memory_connection.empty() && gnaFlags->num_requests != 1) {
gnaFlags->num_requests = 1;
}
@ -837,17 +850,17 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
// Creating Layer primitives
for (auto& layer : sortedNoMem) {
graphCompiler.CreateLayerPrimitive(layer);
m_graph_compiler->CreateLayerPrimitive(layer);
}
for (auto& inputLayer : inputLayers) {
auto layerInfo = LayerInfo(inputLayer);
if (layerInfo.isInput() && 0 == inputs_ptr_->at(inputLayer->name).get_allocated_size()) {
graphCompiler.connectOutput(inputLayer, &inputs_ptr_->at(inputLayer->name).ptrs.front(), 0);
m_graph_compiler->connectOutput(inputLayer, &inputs_ptr_->at(inputLayer->name).ptrs.front(), 0);
}
}
if (graphCompiler.dnnComponents.components.empty()) {
if (m_graph_compiler->dnnComponents.components.empty()) {
log::warning() << "No GNA primitives created based on topology. This might indicate trivial topology\n";
trivialTopology = true;
}
@ -861,7 +874,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
// Memory layers are not dnnComponents hence we need to make switch with identity layer
if (outLayer->type == "Memory") {
// traverse memory connection to find corresponding output_memory
for (auto&& memConnection : graphCompiler.memory_connection) {
for (auto&& memConnection : m_graph_compiler->memory_connection) {
if (memConnection.second.getInput()->name == outLayer->name) {
// if connection is found, replace memory input layer with memory output layer
outLayer = memConnection.second.getOutput();
@ -909,11 +922,11 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
dnn->Init(gnamem.get(), gnaFlags->sw_fp32 ? kDnnFloat : kDnnInt, 1);
// TODO: this copy is unneeded; in fact, we can directly create gna structs from list
auto execOrder = graphCompiler.dnnComponents.getExecutionOrder();
auto execOrder = m_graph_compiler->dnnComponents.getExecutionOrder();
dnn->component.insert(dnn->component.begin(), execOrder.begin(), execOrder.end());
// in fp32 mode last PWL cannot be computed without that
if (!graphCompiler.dnnComponents.components.empty()) {
if (!m_graph_compiler->dnnComponents.components.empty()) {
dnn->InitActiveList(NULL);
}
@ -965,7 +978,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
for (auto& inputLayer : inputLayers) {
if (LayerInfo(inputLayer).isInput()) {
ov::intel_gna::helpers::updateModelInputOrientationWithoutConvolution(*inputLayer,
graphCompiler.dnnComponents,
m_graph_compiler->dnnComponents,
*inputs_ptr_);
}
}
@ -976,7 +989,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
if (outLayer && LayerInfo(outLayer).isOutput()) {
ov::intel_gna::helpers::updateModelOutputOrientation(outPort.first,
outLayer->name,
graphCompiler.dnnComponents,
m_graph_compiler->dnnComponents,
outputs_);
}
}
@ -1101,7 +1114,7 @@ void GNAPlugin::DumpXNNToFile() const {
uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap& inputs, InferenceEngine::BlobMap& result) {
auto freeWorker = requestWorkerPool_->findFreeModelWorker();
if (freeWorker == nullptr) {
if (!graphCompiler.memory_connection.empty()) {
if (!m_graph_compiler->memory_connection.empty()) {
Wait(requestWorkerPool_->firstWorker().representingIndex());
freeWorker = requestWorkerPool_->findFreeModelWorker();
if (freeWorker == nullptr) {
@ -1412,7 +1425,7 @@ RequestStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
}
void GNAPlugin::Reset() {
graphCompiler.Reset();
m_graph_compiler->Reset();
}
bool GNAPlugin::Infer(const InferenceEngine::Blob& input, InferenceEngine::Blob& output) {
@ -1479,9 +1492,9 @@ Blob::Ptr GNAPlugin::GetInputBlob(const std::string& name, InferenceEngine::Prec
}
std::vector<InferenceEngine::IVariableStateInternal::Ptr> GNAPlugin::QueryState() {
if (memoryStates.size() != graphCompiler.memory_connection.size()) {
if (memoryStates.size() != m_graph_compiler->memory_connection.size()) {
memoryStates.clear();
for (auto& connection : graphCompiler.memory_connection) {
for (auto& connection : m_graph_compiler->memory_connection) {
auto state =
std::make_shared<memory::GNAVariableState>(connection.first,
std::make_shared<GNAMemoryLayer>(connection.second));
@ -1575,7 +1588,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
GNAMemoryLayer memoryLayer(nullptr, nullptr, gnaFlags->sw_fp32 ? 4 : 2);
std::string name;
std::tie(memoryLayer.gna_ptr, memoryLayer.reserved_size, name, memoryLayer.scale_factor) = memory;
graphCompiler.memory_connection.emplace_back(make_pair(name, memoryLayer));
m_graph_compiler->memory_connection.emplace_back(make_pair(name, memoryLayer));
}
// TODO update documenation to allow exporting tlv with importing cep only for sue creek
@ -1607,7 +1620,7 @@ void GNAPlugin::Export(std::ostream& outStream) {
.SetInputRotation(transpose_inputs_info)
.SetOutputRotation(transpose_outputs_info);
for (auto&& memoryConnection : graphCompiler.memory_connection) {
for (auto&& memoryConnection : m_graph_compiler->memory_connection) {
auto state =
std::make_shared<memory::GNAVariableState>(memoryConnection.first,
std::make_shared<GNAMemoryLayer>(memoryConnection.second));
@ -1691,7 +1704,6 @@ InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork(
Config qn_config(config);
qn_config.UpdateFromMap(config_map);
const auto effectiveCompileTarget = qn_config.target->get_effective_compile_target();
auto model = network.getFunction();
if (model) {
auto supported = GetSupportedNodes(
@ -1700,7 +1712,8 @@ InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork(
TransformationsPipeline(qn_config).apply(model);
},
[&](const std::shared_ptr<ngraph::Node>& op) {
return limitations::is_op_supported(op, effectiveCompileTarget, qn_config.gnaPrecision);
const auto res = Limitations::get_instance()->is_op_supported(op, qn_config.gnaPrecision);
return res;
});
for (auto&& op_name : supported) {
res.supportedLayersMap.emplace(op_name, GetName());

View File

@ -47,8 +47,7 @@ protected:
std::shared_ptr<gna_memory_type> gnamem;
std::shared_ptr<GnaInputs> inputs_ptr_;
GnaOutputs outputs_;
GNAGraphCompiler graphCompiler;
std::shared_ptr<GNAGraphCompiler> m_graph_compiler;
uint32_t activeLayerIndex = 0xffffffff;
// TODO: transpose_inputs_info and transpose_outputs_info should be moved to GNAModelSerial class when ngraph
@ -189,6 +188,8 @@ protected:
void Init();
void InitGNADevice();
void InitGNAMemory();
void InitGraphCompiler();
void DumpXNNToFile() const;
/**

View File

@ -83,11 +83,9 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
manager.register_pass<ov::pass::LSTMCellDecomposition>();
manager.register_pass<ov::intel_gna::pass::ConvertDWSCToScaleShifts>();
manager.register_pass<ov::intel_gna::pass::ConvertPaddedToValidConv>();
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(effective_compile_target,
config.gnaPrecision);
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(effective_compile_target,
config.gnaPrecision);
manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(effective_compile_target, config.gnaPrecision);
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(config.gnaPrecision);
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(config.gnaPrecision);
manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(config.gnaPrecision);
if (!has_convolution) {
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithFqToPointWiseConvolution>();
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithBiasToPointWiseConvolution>();

View File

@ -385,7 +385,7 @@ public:
auto cropLayer = dynamic_cast<InferenceEngine::CropLayer*>(layer);
if (cropLayer != nullptr && !cropLayer->offset.empty()) {
const auto crop_params = GetCropParams(cropLayer);
return limitations::isCropAffinedOffset(crop_params.start_offset);
return limitations::Limitations::get_instance()->is_crop_affined_offset(crop_params.start_offset);
}
return false;
}

View File

@ -50,7 +50,7 @@ public:
// @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
inline std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize,
uint32_t maxSplitSize,
uint32_t alignment = limitations::inputByteAlignment) {
uint32_t alignment = limitations::Limitations::kInputByteAlignment) {
std::vector<uint32_t> splitSizes;
uint32_t maxAlignedSplitSize = std::max(maxSplitSize - maxSplitSize % alignment, alignment);
uint32_t usedSize = 0;
@ -73,7 +73,7 @@ inline std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
IE_ASSERT(firstValuableDim != std::end(dims));
auto splittedElementsSize = *firstValuableDim;
auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim);
auto alignment = limitations::inputByteAlignment;
auto alignment = limitations::Limitations::kInputByteAlignment;
// Split output size should be multiple by 64 to avoid align filters insertion,
// but we need to check if our input size to split exceeds 64; if not we can always
@ -85,8 +85,9 @@ inline std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
return {splittedDimIx, splitSizes};
}
}
splitSizes = GetAlignedSplitSizes(splittedElementsSize,
limitations::bufferMaxSize * splittedElementsSize / totalElementsSize,
splitSizes =
GetAlignedSplitSizes(splittedElementsSize,
limitations::Limitations::kBufferMaxSize * splittedElementsSize / totalElementsSize,
alignment);
return {splittedDimIx, splitSizes};
}

View File

@ -15,11 +15,14 @@
#include <string>
#include <vector>
#include "backend/gna_limitations.hpp"
#include "gna2-model-api.h"
#include "gna2_model_helper.hpp"
#include "gna_device.hpp"
#include "log.hpp"
using namespace ov::intel_gna::limitations;
namespace ov {
namespace intel_gna {
namespace dump {
@ -486,8 +489,9 @@ void DumpGna2Model(const Gna2Model& gnaModel,
}
dumpFile << "\tOperand " << j << " (" << GetOperandName(operation.Type, j) << ")"
<< " type: " << GetOperandType(operand.Type) << " shape: " << GetSimpleString(operand.Shape)
<< " tag: " << foundName << " offset: " << offset
<< " size: " << Gna2RoundUpTo64(GetGnaShapeSize(operand.Shape, GetTypeByteSize(operand.Type)))
<< " tag: " << foundName << " offset: " << offset << " size: "
<< Gna2RoundUp(GetGnaShapeSize(operand.Shape, GetTypeByteSize(operand.Type)),
Limitations::get_instance()->get_memory_alignment())
<< " data: " << operand.Data << " baseAlloc: " << foundPtr << " layout: ";
DumpCharArray(dumpFile, operand.Layout, GNA2_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS);

View File

@ -50,6 +50,7 @@ using namespace InferenceEngine::details;
using namespace ov::intel_gna::frontend;
using namespace ov::intel_gna::common;
using namespace ov::intel_gna::pre_post_processing;
using namespace ov::intel_gna::limitations;
namespace ov {
namespace intel_gna {
@ -149,10 +150,11 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
return LayerInfo(ptr).isNonValuesChangable();
});
IE_ASSERT(inputLayer != nullptr);
size_t weightsSize =
LayerInfo(prevLayer).has32BOutput()
size_t weightsSize = LayerInfo(prevLayer).has32BOutput()
? nextLayer->outData[0]->getDims().back()
: Get2DReshapedData(nextLayer->outData[0], limitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8)
: Get2DReshapedData(nextLayer->outData[0],
Limitations::get_min_batch_to_fit_in_buffer(nextLayer->outData[0]),
8)
->getDims()[1];
std::vector<float> weightsValues(weightsSize, fillValue);
IE_ASSERT(diagLayer != nullptr);
@ -1531,19 +1533,19 @@ void InsertSplitAligningFilterPass::run() {
// encodes offset to beginning of split layer input
filterLayer->params["offset"] =
std::to_string(aligned64_offset / limitations::bytesPerSplitElement);
std::to_string(aligned64_offset / Limitations::kBytesPerSplitElement);
auto dims = splitOutput->getTensorDesc().getDims();
if (dims.size() > 3) {
THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
}
const auto offsetOfUnalignment =
(currentOffset - aligned64_offset) / limitations::bytesPerSplitElement;
(currentOffset - aligned64_offset) / Limitations::kBytesPerSplitElement;
// TODO consider to use a different number of filters do decrese the number of trailing zeros
// (additionalPaddingOfFilter)
const auto numberOfFilters = limitations::convMinFiltersNum;
const auto numberOfFilters = Limitations::kConvMinFiltersNum;
const auto filterSize =
ALIGN(offsetOfUnalignment + numberOfFilters, limitations::convFilterSizeDivider);
ALIGN(offsetOfUnalignment + numberOfFilters, Limitations::kConvFilterSizeDivider);
// filterWeights: numberOfFilters X (offsetOfUnalignment + additionalPaddingOfFilter +
// numberOfFilters) offsetOfUnalignment - the leading zeros in the filter
@ -1598,7 +1600,7 @@ void InsertSplitAligningFilterPass::run() {
}
// search data that starts from unaligned location
currentOffset += outputSize * limitations::bytesPerSplitElement;
currentOffset += outputSize * Limitations::kBytesPerSplitElement;
splitOutIndex++;
}
}
@ -1636,7 +1638,7 @@ void EltwiseSplitOverChannelsPass::run() {
auto oData = l->outData.front();
auto oDims = oData->getDims();
auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims));
if (totalElementsSize <= limitations::bufferMaxSize) {
if (totalElementsSize <= Limitations::kBufferMaxSize) {
continue;
}
auto splitSizesPerAxis = AlignedSplitSizesPerAxis(oDims);
@ -1747,8 +1749,9 @@ void SubstituteScaleShiftBroadCastPass::run() {
if (was_reshaped) {
dataDims = reshaped_data[insData->getName()];
} else {
dataDims = HasTo2DReshapeData(l)
? Get2DReshapedData(insData, limitations::GetMinBatchToFitInBuffer(insData), 8)->getDims()
dataDims =
HasTo2DReshapeData(l)
? Get2DReshapedData(insData, Limitations::get_min_batch_to_fit_in_buffer(insData), 8)->getDims()
: insData->getDims();
}

View File

@ -17,6 +17,7 @@
#include "log/debug.hpp"
using namespace ov::intel_gna::gna_convolution_layer;
using namespace ov::intel_gna::limitations;
void CNNFilter32(intel_dnn_component_t* component) {
auto filters = reinterpret_cast<float*>(component->op.conv1D.ptr_filters);
@ -306,7 +307,7 @@ void CNN2DFilter32(intel_dnn_component_t* component) {
}
}
// kernel padded to 16B = 4 * sizeof(float)
kernelIndex += ALIGN(kh * kw * kc, ov::intel_gna::limitations::convEachKernelByteAlignment / sizeof(float));
kernelIndex += ALIGN(kh * kw * kc, Limitations::kConvEachKernelByteAlignment / sizeof(float));
}
}

View File

@ -15,6 +15,7 @@
using namespace ov::intel_gna;
using namespace ov::intel_gna::pass;
using namespace ov::intel_gna::limitations;
static bool BiasValidation(const ngraph::Output<ngraph::Node>& output) {
auto bias_output_shape = output.get_node()->get_output_shape(0);
@ -49,9 +50,9 @@ static std::tuple<bool, uint32_t, uint32_t, uint32_t> VerifyAndGetConvParams(
const uint32_t width = input1_shape.front();
const uint32_t in_channels = input2_shape.back();
const uint32_t out_channels = input2_shape.front();
if (input1_shape.front() <= limitations::affineMaxBatchSize ||
out_channels % limitations::convFiltersNumDivider != 0 || out_channels > limitations::convMaxFiltersNum ||
in_channels > limitations::convFilterMaxSize) {
if (input1_shape.front() <= Limitations::kAffineMaxBatchSize ||
out_channels % Limitations::kConvFiltersNumDivider != 0 || out_channels > Limitations::kConvMaxFiltersNum ||
in_channels > Limitations::kConvFilterMaxSize) {
return std::make_tuple(false, 0, 0, 0);
}

View File

@ -20,6 +20,7 @@
namespace ov {
namespace intel_gna {
using namespace target;
using namespace limitations;
namespace pass {
using namespace helper;
@ -55,7 +56,7 @@ static bool VerifyAndGetConvData(std::shared_ptr<ngraph::opset7::Convolution> co
size_t filter_height = filters.get_shape()[2];
size_t filter_width = filters.get_shape()[3];
if (filter_width > limitations::copyMaxGrouping || filter_height > limitations::copyMaxGrouping) {
if (filter_width > Limitations::kCopyMaxGrouping || filter_height > Limitations::kCopyMaxGrouping) {
return false;
}
@ -76,7 +77,7 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr<ngraph::opset7:
(max_pool->get_auto_pad() != ngraph::op::PadType::EXPLICIT ||
max_pool->get_pads_begin() != ngraph::Shape({0, 0}) || max_pool->get_pads_end() != ngraph::Shape({0, 0}))) ||
pool_filter.size() != 2 || pool_strides.size() != 2 || pool_filter[0] > 1 || pool_strides[0] > 1 ||
pool_filter[0] > limitations::maxPoolMaxWindowSize)
pool_filter[0] > Limitations::kMaxPoolMaxWindowSize)
return false;
graph_data.pool_size_width = pool_filter[1];
@ -84,16 +85,15 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr<ngraph::opset7:
return true;
}
static bool GNA30SupportedConv(const DeviceVersion& compile_target,
const InferenceEngine::Precision& gnaPrecision,
static bool GNA30SupportedConv(const InferenceEngine::Precision& gnaPrecision,
const GraphData& graph_data,
const ConvData& conv_data) {
const auto cnn2dValidatorPtr = limitations::cnn2d::AbstractValidator::Create(compile_target);
const auto cnn2dValidatorPtr = Limitations::get_instance()->get_cnn_validator();
if (!cnn2dValidatorPtr) {
return false;
}
const auto& cnn2dValidator = *cnn2dValidatorPtr;
const auto cnnIsValid = cnn2dValidator.ValidateCnn2D(graph_data.conv->get_friendly_name(),
const auto cnnIsValid = cnn2dValidatorPtr->ValidateCnn2D(graph_data.conv->get_friendly_name(),
conv_data.input_height,
conv_data.input_width,
conv_data.input_channel_count,
@ -112,7 +112,7 @@ static bool GNA30SupportedConv(const DeviceVersion& compile_target,
if (!graph_data.max_pool) {
return true;
}
const auto poolingValid = cnn2dValidator.ValidatePooling2D(graph_data.conv->get_friendly_name(),
const auto poolingValid = cnn2dValidatorPtr->ValidatePooling2D(graph_data.conv->get_friendly_name(),
graph_data.max_pool->get_kernel()[0],
graph_data.max_pool->get_kernel()[1],
graph_data.max_pool->get_strides()[0],
@ -126,7 +126,7 @@ static size_t CalculateConvCount(const ConvData& conv_data) {
size_t conv_count = 1;
size_t total_factorized_conv_channel_count =
(conv_data.input_channel_count * conv_data.filter_height * conv_data.filter_width);
while (total_factorized_conv_channel_count / conv_count > limitations::convFilterMaxSize ||
while (total_factorized_conv_channel_count / conv_count > Limitations::kConvFilterMaxSize ||
total_factorized_conv_channel_count % conv_count != 0 || conv_data.filter_channel_count % conv_count != 0)
conv_count++;
@ -139,7 +139,7 @@ static bool ShouldDecompose(GraphData& graph_data, const ConvData& conv_data) {
// Concat (copy) layer limitation allows to split up to a certain limit
// Currently we are able to split only convolutions without pooling in horizontal dimension
if (graph_data.conv_count > limitations::copyMaxGrouping ||
if (graph_data.conv_count > Limitations::kCopyMaxGrouping ||
((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1))
return false;
@ -561,8 +561,7 @@ static void Decompose(const GraphData& graph_data, ConvData& conv_data) {
conv_result->set_friendly_name(conv_result_name);
}
static bool Convert(const DeviceVersion& compile_target,
const InferenceEngine::Precision& gnaPrecision,
static bool Convert(const InferenceEngine::Precision& gnaPrecision,
std::shared_ptr<ngraph::Node> leading_transpose,
std::shared_ptr<ngraph::Node> fq_filters,
std::shared_ptr<ngraph::Node> conv,
@ -598,7 +597,7 @@ static bool Convert(const DeviceVersion& compile_target,
return false;
// If compile target is GNA 3.0 and the convolution is supported on it, then skip decomposition
if (GNA30SupportedConv(compile_target, gnaPrecision, graph_data, conv_data))
if (GNA30SupportedConv(gnaPrecision, graph_data, conv_data))
return false;
// We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC)
@ -618,7 +617,7 @@ static bool Convert(const DeviceVersion& compile_target,
return true;
}
Decompose2DConv::Decompose2DConv(const DeviceVersion& compile_target, const InferenceEngine::Precision& gnaPrecision) {
Decompose2DConv::Decompose2DConv(const InferenceEngine::Precision& gnaPrecision) {
MATCHER_SCOPE(Decompose2DConv);
auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
@ -735,8 +734,7 @@ Decompose2DConv::Decompose2DConv(const DeviceVersion& compile_target, const Infe
}
}
return Convert(compile_target,
gnaPrecision,
return Convert(gnaPrecision,
pattern_map.at(leading_transpose).get_node_shared_ptr(),
fq_filters_node,
pattern_map.at(conv).get_node_shared_ptr(),
@ -755,8 +753,7 @@ Decompose2DConv::Decompose2DConv(const DeviceVersion& compile_target, const Infe
this->register_matcher(m, callback);
}
Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const DeviceVersion& compile_target,
const InferenceEngine::Precision& gnaPrecision) {
Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const InferenceEngine::Precision& gnaPrecision) {
MATCHER_SCOPE(Decompose2DConvTransposedWithBias);
auto const_input_i64 =
@ -781,8 +778,7 @@ Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const Devic
pattern_map.at(bias).get_node_shared_ptr())))
return false;
return Convert(compile_target,
gnaPrecision,
return Convert(gnaPrecision,
pattern_map.at(leading_transpose).get_node_shared_ptr(),
nullptr,
pattern_map.at(conv).get_node_shared_ptr(),
@ -802,7 +798,6 @@ Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const Devic
}
Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF(
const DeviceVersion& compile_target,
const InferenceEngine::Precision& gnaPrecision) {
MATCHER_SCOPE(Decompose2DConvTransposedWithBiasAF);
@ -836,8 +831,7 @@ Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF(
pattern_map.at(bias).get_node_shared_ptr())))
return false;
return Convert(compile_target,
gnaPrecision,
return Convert(gnaPrecision,
pattern_map.at(leading_transpose).get_node_shared_ptr(),
nullptr,
pattern_map.at(conv).get_node_shared_ptr(),

View File

@ -35,7 +35,7 @@ namespace pass {
class Decompose2DConv : public ngraph::pass::MatcherPass {
public:
OPENVINO_RTTI("Decompose2DConv", "0");
Decompose2DConv(const target::DeviceVersion& compile_target, const InferenceEngine::Precision& gnaPrecision);
Decompose2DConv(const InferenceEngine::Precision& gnaPrecision);
};
/**
@ -56,8 +56,7 @@ public:
class Decompose2DConvTransposedWithBias : public ngraph::pass::MatcherPass {
public:
OPENVINO_RTTI("Decompose2DConvTransposedWithBias", "0");
Decompose2DConvTransposedWithBias(const target::DeviceVersion& compile_target,
const InferenceEngine::Precision& gnaPrecision);
Decompose2DConvTransposedWithBias(const InferenceEngine::Precision& gnaPrecision);
};
/**
@ -80,8 +79,7 @@ public:
class Decompose2DConvTransposedWithBiasAF : public ngraph::pass::MatcherPass {
public:
OPENVINO_RTTI("Decompose2DConvTransposedWithBiasAF", "0");
Decompose2DConvTransposedWithBiasAF(const target::DeviceVersion& compile_target,
const InferenceEngine::Precision& gnaPrecision);
Decompose2DConvTransposedWithBiasAF(const InferenceEngine::Precision& gnaPrecision);
};
} // namespace pass

View File

@ -13,6 +13,7 @@
#include "backend/gna_limitations.hpp"
using namespace ngraph;
using namespace ov::intel_gna::limitations;
namespace ov {
namespace intel_gna {
@ -81,7 +82,7 @@ static bool GetVerifiedMVNData(const std::shared_ptr<opset8::MVN> mvn, MVNData&
// Check if average must be split
mvn_data.num_parts = 1;
while (mvn_data.W / mvn_data.num_parts > limitations::convFilterMaxSize) {
while (mvn_data.W / mvn_data.num_parts > Limitations::kConvFilterMaxSize) {
mvn_data.num_parts *= 2;
}

View File

@ -16,6 +16,7 @@
#include "backend/gna_limitations.hpp"
using namespace ov::intel_gna::pass;
using namespace ov::intel_gna::limitations;
namespace {
@ -160,7 +161,7 @@ HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() {
}
if (prev_node) {
if (limitations::IsTransposeSupported(prev_node->get_output_shape(0))) {
if (Limitations::is_transpose_supported(prev_node->get_output_shape(0))) {
InsertTranspose(prev_node, matmul_node->get_friendly_name(), true);
}
}
@ -170,7 +171,7 @@ HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() {
auto iter = pattern_map.find(fq);
if (iter != pattern_map.end() || (iter = pattern_map.find(constant)) != pattern_map.end()) {
auto prev_node = iter->second.get_node_shared_ptr();
if (limitations::IsTranspose2d(prev_node->get_output_shape(0))) {
if (Limitations::is_transpose_2d(prev_node->get_output_shape(0))) {
InsertTranspose(prev_node, prev_node->get_friendly_name(), true);
}
}
@ -187,7 +188,7 @@ HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() {
}
if (prev_node) {
if (limitations::IsTransposeSupported(prev_node->get_output_shape(0))) {
if (Limitations::is_transpose_supported(prev_node->get_output_shape(0))) {
InsertTranspose(prev_node, matmul_node->get_friendly_name(), true);
}
}
@ -243,7 +244,7 @@ HandleTransposeAfterMatMul::HandleTransposeAfterMatMul() {
ReplaceTransposeWithReshape(transpose_it->second.get_node_shared_ptr());
} else {
auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
if (!limitations::IsTransposeSupported(reshape_node->get_input_shape(0)))
if (!Limitations::is_transpose_supported(reshape_node->get_input_shape(0)))
return false;
auto iter = pattern_map.find(act);
if (iter == pattern_map.end() && (iter = pattern_map.find(fq2)) == pattern_map.end() &&

View File

@ -4,6 +4,7 @@
#include "transformations/remove_in_out_processing.hpp"
#include "backend/gna_limitations.hpp"
#include "common/graph_utils.hpp"
#include "openvino/cc/pass/itt.hpp"
#include "openvino/opsets/opset1.hpp"
@ -17,6 +18,7 @@
using namespace ov::opset10;
using namespace ov::intel_gna::pass;
using namespace ov::intel_gna::limitations;
namespace {
@ -29,7 +31,7 @@ inline bool is_preprocessing_layer_not_supported(std::shared_ptr<ov::Node>& laye
// Verify that transpose layer cannot be executed on GNA
if (std::dynamic_pointer_cast<ov::opset1::Transpose>(layer)) {
return !limitations::is_transpose_supported(layer);
return !Limitations::is_transpose_supported(layer);
}
return false;

View File

@ -14,6 +14,8 @@
#include "layers/gna_convolution_layer.hpp"
#include "layers/gna_split_layer.hpp"
using namespace ov::intel_gna::limitations;
namespace ov {
namespace intel_gna {
namespace pass {
@ -56,13 +58,13 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
std::end(conv->get_input_shape(0)),
size_t(1),
std::multiplies<size_t>());
if (input_size <= limitations::bufferMaxSize) {
if (input_size <= Limitations::kBufferMaxSize) {
return false;
}
auto& input = conv->get_input_shape(0);
uint32_t width = input.back();
uint32_t in_channels = input.at(1);
auto split_sizes = GetAlignedSplitSizes(width, limitations::bufferMaxSize / in_channels);
auto split_sizes = GetAlignedSplitSizes(width, Limitations::kBufferMaxSize / in_channels);
IE_ASSERT(split_sizes.size() > 1);
std::vector<int64_t> split_sizes_casted(split_sizes.size());
std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) {

View File

@ -15,6 +15,8 @@
#include "legacy/ngraph_ops/eltwise.hpp"
#include "log/log.hpp"
using namespace ov::intel_gna::limitations;
namespace ov {
namespace intel_gna {
namespace pass {
@ -25,7 +27,7 @@ inline bool is_eltwise_has_to_be_splitted(const ngraph::Output<ngraph::Node>& no
return false;
auto o_dims = eltwise->get_output_shape(0);
auto total_elem_size = std::accumulate(std::begin(o_dims), std::end(o_dims), 1, std::multiplies<size_t>());
return (total_elem_size > limitations::bufferMaxSize);
return (total_elem_size > Limitations::kBufferMaxSize);
}
static std::shared_ptr<ngraph::opset9::VariadicSplit> split_input(

View File

@ -11,7 +11,7 @@ namespace intel_gna {
namespace pass {
/**
* @brief Split over channels for Eltwise to avoid GNA-HW bufferMaxSize limitation per eltwise
* @brief Split over channels for Eltwise to avoid GNA-HW kBufferMaxSize limitation per eltwise
*/
class SplitEltwise : public ov::pass::MatcherPass {
public:

View File

@ -2,15 +2,19 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <gtest/gtest.h>
#include <legacy/layer_transform.hpp>
#include "frontend/model_quantizer.hpp"
#include "frontend/layer_quantizer.hpp"
#include "gna_matcher.hpp"
#include <ie_core.hpp>
#include <legacy/layer_transform.hpp>
#include <vector>
#include "backend/gna_limitations.hpp"
#include "frontend/layer_quantizer.hpp"
#include "frontend/model_quantizer.hpp"
#include "gna_matcher.hpp"
using namespace InferenceEngine;
using namespace ov::intel_gna::limitations;
using namespace ov::intel_gna::frontend;
using namespace GNATestIRs;
@ -26,7 +30,8 @@ class I8QuantisationTest : public GNATest<> {
return newLayer;
};
InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model, float scale_factor) const {
InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model,
float scale_factor) const {
auto scale_factors = std::vector<float>({scale_factor});
GnaInputs inputs;
@ -41,17 +46,16 @@ class I8QuantisationTest : public GNATest<> {
auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
return ModelQuantizer(transformer).quantize(
model,
inputs);
return ModelQuantizer(transformer).quantize(model, inputs);
}
void SetUp() override {}
void SetUp() override {
Limitations::init(target::DeviceVersion::Default);
}
};
// TODO: add test for FC weights after quantization
TEST_F(I8QuantisationTest, canQuantizeFCLayer) {
auto fc = std::make_shared<FullyConnectedLayer>(LayerParams{"name", "type", Precision::FP32});
fc->_out_num = 9;
auto weights = make_shared_blob<float>({Precision::FP32, {1, 1}, Layout::NC});
@ -59,7 +63,8 @@ TEST_F(I8QuantisationTest, canQuantizeFCLayer){
fc->_biases = make_shared_blob<float>({Precision::FP32, {1, 1}, Layout::NC});
fc->_weights->allocate();
fc->_biases->allocate();
std::shared_ptr<Data> outData = std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({ 1, 1 }), Layout::NC));
std::shared_ptr<Data> outData =
std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({1, 1}), Layout::NC));
fc->outData.push_back(outData);
fc->insData.push_back(outData);
@ -74,7 +79,6 @@ TEST_F(I8QuantisationTest, canQuantizeFCLayer){
}
TEST_F(I8QuantisationTest, canQuantizeActivation) {
auto sigmoid = std::make_shared<GenericLayer>(LayerParams{"name", "type", Precision::FP32});
sigmoid->params["value"] = 2;
sigmoid->type = "Activation";
@ -92,7 +96,9 @@ TEST_F(I8QuantisationTest, inputPrecisionIs16Bits){
auto newNet = quantize_single_input_model(network, 1000);
InputsDataMap inputs = newNet.getInputsInfo();
auto inputLayer = getCreatorLayer(getInputTo(inputs.begin()->second->getInputData()).begin()->second->insData.front().lock()).lock();
auto inputLayer =
getCreatorLayer(getInputTo(inputs.begin()->second->getInputData()).begin()->second->insData.front().lock())
.lock();
ASSERT_EQ(inputLayer->precision, Precision::I16);
}

View File

@ -2,17 +2,21 @@
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <gtest/gtest.h>
#include <legacy/layer_transform.hpp>
#include "backend/gna_types.hpp"
#include "frontend/model_quantizer.hpp"
#include "frontend/layer_quantizer.hpp"
#include "gna_matcher.hpp"
#include <ie_core.hpp>
#include <legacy/layer_transform.hpp>
#include <vector>
#include "backend/gna_limitations.hpp"
#include "backend/gna_types.hpp"
#include "frontend/layer_quantizer.hpp"
#include "frontend/model_quantizer.hpp"
#include "gna_matcher.hpp"
#include "ngraph_functions/builders.hpp"
using namespace InferenceEngine;
using namespace ov::intel_gna::limitations;
using namespace ov::intel_gna::frontend;
using namespace GNATestIRs;
@ -28,7 +32,8 @@ class I16QuantisationTest : public GNATest<> {
return newLayer;
};
InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model, float scale_factor) const {
InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model,
float scale_factor) const {
auto scale_factors = std::vector<float>({scale_factor});
GnaInputs inputs;
@ -43,20 +48,19 @@ class I16QuantisationTest : public GNATest<> {
auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
return ModelQuantizer(transformer).quantize(
model,
inputs);
return ModelQuantizer(transformer).quantize(model, inputs);
}
void SetUp() override {
Limitations::init(target::DeviceVersion::Default);
}
};
template <class T>
T setWeights(T blob) {
blob->allocate();
// actual quantisation algorithm is involved - we need to provide weights that will be quantized with scale factor of 1
// actual quantisation algorithm is involved - we need to provide weights that will be quantized with scale factor
// of 1
for (auto&& w : *blob) {
w = MAX_VAL_2B_WEIGHT;
}
@ -75,10 +79,8 @@ TBlob<uint8_t>::Ptr setWeights(TBlob<uint8_t>::Ptr blob) {
return blob;
}
// TODO: add test for FC weights after quantization
TEST_F(I16QuantisationTest, canQuantizeFCLayer) {
auto fc = std::make_shared<FullyConnectedLayer>(LayerParams{"name", "type", Precision::FP32});
fc->_out_num = 9;
fc->_weights = setWeights(make_shared_blob<float>({Precision::FP32, {1, 1}, Layout::NC}));
@ -87,7 +89,8 @@ TEST_F(I16QuantisationTest, canQuantizeFCLayer){
fc->_biases->allocate();
fillWeights(fc->_biases);
std::shared_ptr<Data> outData = std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({1, 1}), Layout::NC));
std::shared_ptr<Data> outData =
std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({1, 1}), Layout::NC));
fc->outData.push_back(outData);
fc->insData.push_back(outData);
@ -95,7 +98,6 @@ TEST_F(I16QuantisationTest, canQuantizeFCLayer){
}
TEST_F(I16QuantisationTest, canQuantizeActivation) {
auto sigmoid = std::make_shared<GenericLayer>(LayerParams{"name", "type", Precision::FP32});
sigmoid->params["value"] = 2;
sigmoid->type = "Activation";
@ -118,7 +120,6 @@ TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits){
ASSERT_EQ(affineDataPtr->getTensorDesc().getPrecision(), Precision::I32);
}
TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) {
auto weights = setWeights(make_shared_blob<uint8_t>({Precision::U8, {440}, C}));
// std::fill_n(weights->buffer().as<float*>(), weights->byteSize()/sizeof(float), 0);
@ -153,51 +154,70 @@ TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect){
TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion) {
assert_that()
.onInferModel(Fc2DOutputModel())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_without().pwl_inserted_into_nnet();
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_without()
.pwl_inserted_into_nnet();
}
TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion_ProfilingEnabled) {
assert_that()
.onInferModel(Fc2DOutputModel())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_without().pwl_inserted_into_nnet().profiling_counters();
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_without()
.pwl_inserted_into_nnet()
.profiling_counters();
}
TEST_F(I16QuantisationTest, OnlyAffineWithNanScaleFactorFails) {
gna()
.onInferModel(Fc2DOutputModel())
.withNanScaleFactor()
.propagate_forward().throws();
gna().onInferModel(Fc2DOutputModel()).withNanScaleFactor().propagate_forward().throws();
}
TEST_F(I16QuantisationTest, OnlyAffineWithInfScaleFactorFails) {
gna()
.onInferModel(Fc2DOutputModel())
.withInfScaleFactor()
.propagate_forward().throws();
gna().onInferModel(Fc2DOutputModel()).withInfScaleFactor().propagate_forward().throws();
}
TEST_F(I16QuantisationTest, AffineToMemoryWillResultInActivationInsertion) {
assert_that()
.onInferModel(affineToMemoryModel())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with().pwl_inserted_into_nnet();
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet();
}
TEST_F(I16QuantisationTest, EltwiseToMemoryWithNoOutputActivationInsertion) {
assert_that().inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.onInferModel(eltwiseToMemoryModelNoOutput(), [](CNNNetwork & net){
assert_that()
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.onInferModel(eltwiseToMemoryModelNoOutput(),
[](CNNNetwork& net) {
net.addOutput("Eltwise_8");
}).gna().propagate_forward().called_with().pwl_inserted_into_nnet();
})
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet();
}
TEST_F(I16QuantisationTest, EltwiseToMemory_ActivationInsertion) {
assert_that().onInferModel(eltwiseToMemoryModel()).withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet();
assert_that()
.onInferModel(eltwiseToMemoryModel())
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet();
}
TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInsertion) {
auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 20});
const auto axis_node = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
@ -205,41 +225,73 @@ TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInserti
auto tanh = std::make_shared<ngraph::opset8::Tanh>(split->outputs()[0]);
auto add = std::make_shared<ngraph::opset8::Add>(split->outputs()[1], tanh);
auto result = std::make_shared<ngraph::opset8::Result>(add);
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
assert_that().onInferNgraphModel(function)
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
auto function =
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
assert_that()
.onInferNgraphModel(function)
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.diagonal_inserted_into_nnet();
}
TEST_F(I16QuantisationTest, SliceFollowedBy2FCsAnd2Eltwises_AlignedFilterInsertion) {
assert_that().onInferModel(twoFCWithPaddingAfterSliceModel())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
assert_that()
.onInferModel(twoFCWithPaddingAfterSliceModel())
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.diagonal_inserted_into_nnet();
}
// ToDo requires implementation of aligning filter for concat inputs and improvement of
// qunatization/scaling algorithm for concat
TEST_F(I16QuantisationTest, DISABLED_DoubleConcatPropageteForwardWithSuccess_AlignedFilterInsertion) {
assert_that().onInferModel(doubleConcatModel()).withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
assert_that()
.onInferModel(doubleConcatModel())
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.diagonal_inserted_into_nnet();
}
TEST_F(I16QuantisationTest, EltwiseSumm_onlyOneIdentityInsertion) {
assert_that().onInferModel(eltwiseSummModel()).withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
assert_that()
.onInferModel(eltwiseSummModel())
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet()
.once();
}
TEST_F(I16QuantisationTest, canDetectLeakyRelu) {
assert_that().onInferModel(TFLeakyReluModel())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with().pwl_inserted_into_nnet();
assert_that()
.onInferModel(TFLeakyReluModel())
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet();
}
TEST_F(I16QuantisationTest, MaxPool_followedAfterActivation) {
assert_that().onInferModel(maxpoolAfterRelu())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with()
assert_that()
.onInferModel(maxpoolAfterRelu())
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.convolution_inserted_into_nnet()
.And()
.pwl_inserted_into_nnet()
@ -248,28 +300,53 @@ TEST_F(I16QuantisationTest, MaxPool_followedAfterActivation) {
}
TEST_F(I16QuantisationTest, EltwiseMull_willInsertTwoIdentities) {
assert_that().onInferModel(eltwiseMulModel())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice();
assert_that()
.onInferModel(eltwiseMulModel())
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet()
.twice();
}
TEST_F(I16QuantisationTest, multiple_inputs_supported) {
std::string configKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_");
assert_that().onInferModel(two_inputs_to_affine())
.inNotCompactMode().withGNAConfig(configKey + std::to_string(0), 1.0f)
.withGNAConfig(configKey + std::to_string(1), 2.0f).gna().propagate_forward()
.called_with().pwl_inserted_into_nnet().once();
assert_that()
.onInferModel(two_inputs_to_affine())
.inNotCompactMode()
.withGNAConfig(configKey + std::to_string(0), 1.0f)
.withGNAConfig(configKey + std::to_string(1), 2.0f)
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet()
.once();
}
TEST_F(I16QuantisationTest, DISABLED_multiple_inputs_into_concat_supported) {
assert_that().onInferModel(two_inputs_to_concat())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f).gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
assert_that()
.onInferModel(two_inputs_to_concat())
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet()
.once();
}
TEST_F(I16QuantisationTest, ScaleShift_Affine_WillResultInIdentityInsertion) {
assert_that().onInferModel(scaleShiftAffineModel())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
assert_that()
.onInferModel(scaleShiftAffineModel())
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet()
.once();
}
TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) {
@ -277,10 +354,17 @@ TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) {
auto clamp = std::make_shared<ngraph::opset8::Clamp>(input_params, -50, 50);
auto tanh = std::make_shared<ngraph::opset8::Tanh>(clamp);
auto result = std::make_shared<ngraph::opset8::Result>(tanh);
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
assert_that().onInferNgraphModel(function)
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with().diagonal_inserted_into_nnet().twice();
auto function =
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
assert_that()
.onInferNgraphModel(function)
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.diagonal_inserted_into_nnet()
.twice();
}
TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInTwoDiagonalsInsertion) {
@ -296,44 +380,71 @@ TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInTwoDiago
auto result = std::make_shared<ngraph::opset8::Result>(add);
mem_w->add_control_dependency(mem_r);
result->add_control_dependency(mem_w);
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
assert_that().onInferNgraphModel(function)
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with().diagonal_inserted_into_nnet().twice();
auto function =
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
assert_that()
.onInferNgraphModel(function)
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.diagonal_inserted_into_nnet()
.twice();
}
TEST_F(I16QuantisationTest, AffineWith2AffineOutputs_ResultInOnlyOneIdentityInsertion) {
// one Identity activation from first FC, and one Identity activation for eltwise
assert_that().onInferModel(AffineWith2AffineOutputsModel())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice();
assert_that()
.onInferModel(AffineWith2AffineOutputsModel())
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwl_inserted_into_nnet()
.twice();
}
TEST_F(I16QuantisationTest, ScaleShiftWithBroadcast_ResultInDiagonalInsertion) {
auto& affineWeights = storage<std::vector<uint16_t>>();
affineWeights = {
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 2048, 4096, 6144, 8192, 10240, 12288,
14336, 16384, 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 2048, 4096, 6144, 8192,
10240, 12288, 14336, 16384, 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
};
assert_that().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f).onInferModel(ScaleShift3DModel())
assert_that()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.onInferModel(ScaleShift3DModel())
.withWeigthsPattern({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f})
.inNotCompactMode().gna().propagate_forward().called_with().called_with().affine_weights_eq(affineWeights);
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.called_with()
.affine_weights_eq(affineWeights);
}
TEST_F(I16QuantisationTest, MemoryAfterConcat_ResultInCopyInsertion) {
assert_that().onInferModel(MemoryAfterConcatModel()).inNotCompactMode().gna().propagate_forward().
called_with().copy_inserted_into_nnet();
assert_that()
.onInferModel(MemoryAfterConcatModel())
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.copy_inserted_into_nnet();
}
TEST_F(I16QuantisationTest, MemoryAndConcatAfterOneNode_ResultInCopyInsertion) {
assert_that().onInferModel(MemoryAndConcatAfterOneNode()).inNotCompactMode().gna().propagate_forward().
called_with().copy_inserted_into_nnet();
assert_that()
.onInferModel(MemoryAndConcatAfterOneNode())
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.copy_inserted_into_nnet();
}
TEST_F(I16QuantisationTest, DISABLED_permutationOfWeightsBetweenConvAndAffine) {
@ -343,12 +454,24 @@ TEST_F(I16QuantisationTest, DISABLED_permutationOfWeightsBetweenConvAndAffine) {
auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
// here weights are transpozed
save().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern)
.inNotCompactMode().from().propagate_forward().affine_weights_transpozed({128, 61}).to(affineWeights);
save()
.onInferModel(affineAfterConvNoPermute())
.withWeigthsPattern(weigthsPattern)
.inNotCompactMode()
.from()
.propagate_forward()
.affine_weights_transpozed({128, 61})
.to(affineWeights);
// here weights shouldn't be transposed
assert_that().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern)
.inNotCompactMode().gna().propagate_forward().called_with().affine_weights_eq(affineWeights);
assert_that()
.onInferModel(affineAfterConvWithPermute())
.withWeigthsPattern(weigthsPattern)
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.affine_weights_eq(affineWeights);
}
TEST_F(I16QuantisationTest, DISABLED_noPermutationOfWeightsBetweenConvAndAffineIfPermuteLayerWithCorrectArgs) {
@ -357,11 +480,23 @@ TEST_F(I16QuantisationTest, DISABLED_noPermutationOfWeightsBetweenConvAndAffineI
// least likely that width and height both are multiple of 7
auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
save().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern)
.inNotCompactMode().from().propagate_forward().affine_weights().to(affineWeights);
save()
.onInferModel(affineAfterConvWithPermute())
.withWeigthsPattern(weigthsPattern)
.inNotCompactMode()
.from()
.propagate_forward()
.affine_weights()
.to(affineWeights);
assert_that().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern)
.inNotCompactMode().gna().propagate_forward().called_with().affine_weights_transposed(affineWeights, {128, 61});
assert_that()
.onInferModel(affineAfterConvNoPermute())
.withWeigthsPattern(weigthsPattern)
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.affine_weights_transposed(affineWeights, {128, 61});
}
TEST_F(I16QuantisationTest, fp16tofp32_on_fullyConnected_model) {
@ -375,7 +510,8 @@ TEST_F(I16QuantisationTest, fp16tofp32_on_fullyConnected_model) {
quantize_single_input_model(network, 1000);
}
TEST_F(I16QuantisationTest, MultipleActivationsAfterAffineWithIdentityActivation_MultipleDiagonalLayersWithActivaitons) {
TEST_F(I16QuantisationTest,
MultipleActivationsAfterAffineWithIdentityActivation_MultipleDiagonalLayersWithActivaitons) {
auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 10});
const auto constant = ngraph::opset8::Constant::create(ngraph::element::f32, ngraph::Shape{10, 10}, {1});
auto matmul1 = std::make_shared<ngraph::opset8::MatMul>(input_params, constant);
@ -386,11 +522,17 @@ TEST_F(I16QuantisationTest, MultipleActivationsAfterAffineWithIdentityActivation
auto mul = std::make_shared<ngraph::opset8::Multiply>(sigmoid, relu);
auto add2 = std::make_shared<ngraph::opset8::Add>(add, mul);
auto result = std::make_shared<ngraph::opset8::Result>(add);
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
auto function =
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
// identiy came from automatic insertion due to
assert_that().onInferNgraphModel(function)
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActSigmoid, kActRelu, kActIdentity, kActIdentity});
assert_that()
.onInferNgraphModel(function)
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwls_inserted_into_nnet({kActSigmoid, kActRelu, kActIdentity, kActIdentity});
}
TEST_F(I16QuantisationTest, MultipleActivationsAfterAffine_ResultInMultipleDiagonalLayersWithActivaitons) {
@ -401,20 +543,32 @@ TEST_F(I16QuantisationTest, MultipleActivationsAfterAffine_ResultInMultipleDiago
auto relu = std::make_shared<ngraph::opset8::Relu>(matmul);
auto mul = std::make_shared<ngraph::opset8::Multiply>(sigmoid, relu);
auto result = std::make_shared<ngraph::opset8::Result>(mul);
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
auto function =
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
// extra identity inserted for affine
assert_that().onInferNgraphModel(function)
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with()
assert_that()
.onInferNgraphModel(function)
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
// 1 diag for second activation, 1 for eltwise
.pwls_inserted_into_nnet({kActRelu, kActSigmoid}).diagonal_inserted_into_nnet().times(3);
.pwls_inserted_into_nnet({kActRelu, kActSigmoid})
.diagonal_inserted_into_nnet()
.times(3);
}
// TODO: build a regression test on top of it using real quantisation accuracy checking
TEST_F(I16QuantisationTest, ConcatWithConstInputPropagatedForward) {
assert_that().onInferModel(concatModelWithConstLayer())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity});
assert_that()
.onInferModel(concatModelWithConstLayer())
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwls_inserted_into_nnet({kActIdentity});
}
TEST_F(I16QuantisationTest, LSTMCell_quantize) {
@ -440,15 +594,27 @@ TEST_F(I16QuantisationTest, LSTMCell_unaligned_quantize) {
}
TEST_F(I16QuantisationTest, EltwisetWithConstInputPropagatedForward) {
assert_that().onInferModel(eltwiseSumModelWithConstLayer())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
assert_that()
.onInferModel(eltwiseSumModelWithConstLayer())
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.diagonal_inserted_into_nnet();
}
TEST_F(I16QuantisationTest, PowerWithScaleFactorPropagateForward) {
assert_that().onInferModel(PowerWithScaleFactor1())
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity}).And().diagonal_inserted_into_nnet();
assert_that()
.onInferModel(PowerWithScaleFactor1())
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwls_inserted_into_nnet({kActIdentity})
.And()
.diagonal_inserted_into_nnet();
}
TEST_F(I16QuantisationTest, ConcatWithDifferentInputScaleFactorsPropagateForward) {
@ -459,10 +625,16 @@ TEST_F(I16QuantisationTest, ConcatWithDifferentInputScaleFactorsPropagateForward
auto tanh = std::make_shared<ngraph::opset8::Tanh>(split->outputs()[1]);
auto concat = std::make_shared<ngraph::opset8::Concat>(ngraph::OutputVector{sigmoid, tanh}, 1);
auto result = std::make_shared<ngraph::opset8::Result>(concat);
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
assert_that().onInferNgraphModel(function)
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity});
auto function =
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
assert_that()
.onInferNgraphModel(function)
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()
.propagate_forward()
.called_with()
.pwls_inserted_into_nnet({kActIdentity});
}
TEST_F(I16QuantisationTest, TI_quantize) {
@ -478,12 +650,16 @@ TEST_F(I16QuantisationTest, TI_quantize) {
TEST_F(I16QuantisationTest, TI_PropagateForward) {
auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 10});
auto mul = std::make_shared<ngraph::opset8::Multiply>(input_params,
auto mul = std::make_shared<ngraph::opset8::Multiply>(
input_params,
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1, 10}));
auto add = std::make_shared<ngraph::opset8::Add>(mul,
auto add = std::make_shared<ngraph::opset8::Add>(
mul,
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1, 10}));
auto reshape = std::make_shared<ngraph::opset8::Reshape>(add,
std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{ 3 }, std::vector<size_t>{ 1, 1, 10 }), false);
auto reshape = std::make_shared<ngraph::opset8::Reshape>(
add,
std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{3}, std::vector<size_t>{1, 1, 10}),
false);
auto reshape_shape = reshape->output(0).get_shape();
const size_t batch_size = 1;
@ -496,21 +672,29 @@ TEST_F(I16QuantisationTest, TI_PropagateForward) {
auto C_t = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{batch_size, hiddenSize});
// Body
auto X = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{ batch_size, 1, reshape_shape[2] });
auto weightsNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, { 4 * hiddenSize, reshape_shape[2] }, {}, true);
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, { 4 * hiddenSize, hiddenSize }, {}, true);
auto X = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32,
ngraph::Shape{batch_size, 1, reshape_shape[2]});
auto weightsNode =
ngraph::builder::makeConstant<float>(ngraph::element::f32, {4 * hiddenSize, reshape_shape[2]}, {}, true);
auto reccurrenceWeightsNode =
ngraph::builder::makeConstant<float>(ngraph::element::f32, {4 * hiddenSize, hiddenSize}, {}, true);
// lstm
auto constantX = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{ 2 }, { batch_size, reshape_shape[2] });
auto lstm1 = std::make_shared<ngraph::opset8::LSTMCell>(std::make_shared<ngraph::opset8::Reshape>(X, constantX, false),
H_t, C_t,
weightsNode, reccurrenceWeightsNode, hiddenSize);
auto constantX =
ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {batch_size, reshape_shape[2]});
auto lstm1 =
std::make_shared<ngraph::opset8::LSTMCell>(std::make_shared<ngraph::opset8::Reshape>(X, constantX, false),
H_t,
C_t,
weightsNode,
reccurrenceWeightsNode,
hiddenSize);
auto H_o = lstm1->output(0);
auto C_o = lstm1->output(1);
auto body = std::make_shared<ngraph::Function>(
ngraph::OutputVector{ H_o, C_o }, ngraph::ParameterVector{ X, H_t, C_t });
auto body =
std::make_shared<ngraph::Function>(ngraph::OutputVector{H_o, C_o}, ngraph::ParameterVector{X, H_t, C_t});
auto tensor_iterator = std::make_shared<ngraph::opset8::TensorIterator>();
tensor_iterator->set_body(body);
@ -522,16 +706,29 @@ TEST_F(I16QuantisationTest, TI_PropagateForward) {
auto out0 = tensor_iterator->get_iter_value(H_o, -1);
const size_t output_size = 12;
auto fc = ngraph::builder::makeFullyConnected(out0, ngraph::element::f32, output_size, true, { hiddenSize, output_size }, { 1 }, { 1 });
auto fc = ngraph::builder::makeFullyConnected(out0,
ngraph::element::f32,
output_size,
true,
{hiddenSize, output_size},
{1},
{1});
auto result = std::make_shared<ngraph::opset8::Result>(fc);
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
assert_that().onInferNgraphModel(function).withWeigthsPattern({0.1f})
.inNotCompactMode().gna().propagate_forward()
.called_with().pwls_inserted_into_nnet({kActIdentity});
auto function =
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
assert_that()
.onInferNgraphModel(function)
.withWeigthsPattern({0.1f})
.inNotCompactMode()
.gna()
.propagate_forward()
.called_with()
.pwls_inserted_into_nnet({kActIdentity});
}
TEST_F(I16QuantisationTest, SplitToConcatWith2Inputs1360NotAlignedNoFC) {
assert_that().onInferModel(SplitToConcatWith2Inputs1360NotAlignedNoFC())
assert_that()
.onInferModel(SplitToConcatWith2Inputs1360NotAlignedNoFC())
.inNotCompactMode()
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
.gna()

View File

@ -282,10 +282,12 @@ struct Validatecnn2dParams {
class GNAcnn2dValidatorTest : public ::testing::TestWithParam<GNACnn2DValidatorTestParam> {
protected:
void SetUp() override {
validator = cnn2d::AbstractValidator::Create(GetParam().target);
ASSERT_TRUE(validator != nullptr);
Limitations::init(GetParam().target);
validator = Limitations::get_instance()->get_cnn_validator();
ASSERT_TRUE(validator);
}
std::unique_ptr<cnn2d::AbstractValidator> validator;
std::shared_ptr<cnn2d::AbstractValidator> validator;
};
class GNAcnn2dValidatorTestPadding : public GNAcnn2dValidatorTest {

View File

@ -18,7 +18,7 @@ class GNAPluginForNetworkMetricsTest : public GNAPlugin {
public:
GNAPluginForNetworkMetricsTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
graphCompiler.setGNAMemoryPtr(gnamem);
m_graph_compiler->setGNAMemoryPtr(gnamem);
gnadevice.reset();
}
};

View File

@ -81,11 +81,11 @@ class GNAPluginForPWLExtraSegmentsTest : public GNAPlugin {
public:
GNAPluginForPWLExtraSegmentsTest(const std::map<std::string, std::string>& config) : GNAPlugin(config) {
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
graphCompiler.setGNAMemoryPtr(gnamem);
m_graph_compiler->setGNAMemoryPtr(gnamem);
gnadevice.reset();
}
void Test(const size_t expected_segments) {
for (const auto& component : graphCompiler.dnnComponents.components) {
for (const auto& component : m_graph_compiler->dnnComponents.components) {
if (component.dnnComponent.operation == kDnnPiecewiselinearOp) {
EXPECT_EQ(expected_segments, component.dnnComponent.op.pwl.num_segments);
}

View File

@ -58,7 +58,7 @@ TEST(CheckSplitSupported, CheckVariadicSplitSupported) {
ngraph::opset9::Constant::create(ngraph::element::i64,
ngraph::Shape({split_lengths.size()}),
split_lengths));
ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result);
ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result);
}
}
@ -86,7 +86,7 @@ TEST(CheckSplitSupported, CheckSplitSupported) {
std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}),
num_splits);
ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result);
ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result);
}
}
} // namespace

View File

@ -17,7 +17,7 @@ class GNAPluginForPrecisionTest : public GNAPlugin {
public:
GNAPluginForPrecisionTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
graphCompiler.setGNAMemoryPtr(gnamem);
m_graph_compiler->setGNAMemoryPtr(gnamem);
gnadevice.reset();
}
std::vector<intel_dnn_component_t> get_components() {

View File

@ -45,9 +45,9 @@ public:
GNAPluginForMemoryAlignmentTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
if (gnadevice) {
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{},
gnadevice->getMemAlignment(),
limitations::kMemoryPageSize));
graphCompiler.setGNAMemoryPtr(gnamem);
Limitations::get_instance()->get_memory_alignment(),
Limitations::kMemoryPageSize));
m_graph_compiler->setGNAMemoryPtr(gnamem);
gnadevice.reset();
}
}
@ -149,16 +149,14 @@ INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_4_0,
class MemoryAlignmentTest : public ::testing::Test {};
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_ExpectExceptionWhenTargetIsUnset) {
EXPECT_ANY_THROW(getMemoryAlignmentBytes(DeviceVersion::NotSet));
}
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect64ByteAlignmentWhenTargetIsGNA3_0) {
EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_0), 64);
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect64ByteAlignmentWhenTargetIsGNA3_5) {
Limitations::init(DeviceVersion::GNA3_5);
EXPECT_EQ(Limitations::get_instance()->get_memory_alignment(), 64);
}
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect16ByteAlignmentWhenTargetIsGNA3_6) {
EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_6), 16);
Limitations::init(DeviceVersion::GNA3_6);
EXPECT_EQ(Limitations::get_instance()->get_memory_alignment(), 16);
}
} // namespace testing

View File

@ -297,7 +297,7 @@ public:
GNAPluginTested() : GNAPlugin() {
gnamem_t = std::make_shared<GNAMemoryTested>();
gnamem = gnamem_t;
graphCompiler.setGNAMemoryPtr(gnamem);
m_graph_compiler->setGNAMemoryPtr(gnamem);
gnadevice.reset();
}
void Test() {

View File

@ -15,6 +15,7 @@
#include "common_test_utils/ngraph_test_utils.hpp"
#include "transformations/decompose_2d_convolution.hpp"
using namespace ov::intel_gna::limitations;
namespace testing {
namespace {
@ -312,6 +313,8 @@ void Decompose2DConvTestInvalidFixture::SetUp() {
std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation, bias_shape, maxpool_stride, maxpool_shape) =
params;
Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
function = get_initial_function(fq,
model,
input_shape,
@ -342,6 +345,7 @@ class Decompose2DConvTestFixture : public CommonTestUtils::TestsCommon,
public ::testing::WithParamInterface<fqDecompose2DConvParams> {
public:
void SetUp() override;
std::shared_ptr<ngraph::Function> get_reference(const bool& fq,
const modelType& model,
const ngraph::PartialShape& input_shape,
@ -365,6 +369,8 @@ void Decompose2DConvTestFixture::SetUp() {
std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation, bias_shape, maxpool_stride, maxpool_shape) =
params;
Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
function = get_initial_function(fq,
model,
input_shape,
@ -779,7 +785,7 @@ static size_t CalculateConvCount(const ConvParams& conv_params) {
size_t conv_count = 1;
size_t total_factorized_conv_channel_count =
(conv_params.input_channel_count * conv_params.filter_height * conv_params.filter_width);
while (total_factorized_conv_channel_count / conv_count > ov::intel_gna::limitations::convFilterMaxSize ||
while (total_factorized_conv_channel_count / conv_count > Limitations::kConvFilterMaxSize ||
total_factorized_conv_channel_count % conv_count != 0 || conv_params.filter_channel_count % conv_count != 0)
conv_count++;
@ -792,7 +798,7 @@ static bool ShouldDecompose(GraphData& graph_data, const ConvParams& conv_params
// Concat (copy) layer limitation allows to split up to a certain limit
// Currently we are able to split only convolutions without pooling in horizontal dimension
if (graph_data.conv_count > ov::intel_gna::limitations::copyMaxGrouping ||
if (graph_data.conv_count > Limitations::kCopyMaxGrouping ||
((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1))
return false;
@ -884,18 +890,13 @@ void execute_test(modelType model,
case modelType::TranspConvBcastAddMaxPoolTransp:
case modelType::TranspConvBcastAddActTransp:
case modelType::TranspConvBcastAddMaxPoolActTransp:
manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(ov::intel_gna::target::DeviceVersion::Default,
gnaPrecision);
manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(gnaPrecision);
break;
case modelType::TranspConvTranspBcastAdd:
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(
ov::intel_gna::target::DeviceVersion::Default,
gnaPrecision);
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(gnaPrecision);
break;
case modelType::TranspConvTranspBcastAddAct:
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(
ov::intel_gna::target::DeviceVersion::Default,
gnaPrecision);
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(gnaPrecision);
break;
}

View File

@ -13,6 +13,8 @@
#include "transformations/decompose_mvn.hpp"
#include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp"
using namespace ov::intel_gna::limitations;
namespace decomposeMVN {
typedef std::tuple<ngraph::Shape, // Input shape
@ -264,7 +266,7 @@ std::shared_ptr<ngraph::Function> getReferenceFunction(const ngraph::Shape& inpu
mvn_data.normalize_variance = normalize_variance;
mvn_data.num_parts = 1;
while (mvn_data.W / mvn_data.num_parts > ov::intel_gna::limitations::convFilterMaxSize) {
while (mvn_data.W / mvn_data.num_parts > Limitations::kConvFilterMaxSize) {
mvn_data.num_parts *= 2;
}

View File

@ -11,6 +11,7 @@
#include <transformations/init_node_info.hpp>
#include <transformations/utils/utils.hpp>
#include "backend/gna_limitations.hpp"
#include "common_test_utils/ngraph_test_utils.hpp"
#include "ngraph_functions/builders.hpp"
#include "ops/copy.hpp"
@ -54,10 +55,10 @@ void InsertCopyLayerTest::Validate() {
void InsertCopyLayerTest::SetUp() {
std::tie(m_axis, m_inputs_num) = this->GetParam();
ov::intel_gna::limitations::Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
}
void InsertCopyLayerTest::Run() {
SetUp();
Validate();
}
@ -176,6 +177,11 @@ public:
}
};
void RunPasses(ngraph::pass::Manager& m, std::shared_ptr<ov::Model> func) {
ov::intel_gna::limitations::Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
m.run_passes(func);
}
// [Parameter] [Parameter]
// \ / => |
// [Concat] [Copy]
@ -211,7 +217,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -263,7 +269,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -324,7 +330,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMultiNFLConcatTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -382,7 +388,7 @@ TEST(TransformationTests, InsertCopyLayerMultiConstConcatTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -442,7 +448,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -510,7 +516,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerNFLConcatTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -573,7 +579,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMemoryTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -633,7 +639,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatMemoryTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -705,7 +711,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatMemoryTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -776,7 +782,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatMemoryTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -851,7 +857,7 @@ TEST(TransformationTests, InsertCopyLayerCropMemoryTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -918,7 +924,7 @@ TEST(TransformationTests, InsertCopyLayerCropNFLMemoryTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -987,7 +993,7 @@ TEST(TransformationTests, InsertCopyLayerConcatMemoryTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -1060,7 +1066,7 @@ TEST(TransformationTests, InsertCopyLayerConcatNFLMemoryTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -1122,7 +1128,7 @@ TEST(TransformationTests, InsertCopyLayerSplitMemoryTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -1189,7 +1195,7 @@ TEST(TransformationTests, InsertCopyLayerSplitNFLMemoryTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -1244,7 +1250,7 @@ TEST(TransformationTests, InsertCopyLayerCropConcatTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -1289,7 +1295,7 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -1338,7 +1344,7 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoSubgraphsTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -1385,7 +1391,7 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoResultsTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -1442,7 +1448,7 @@ TEST(TransformationTests, InsertCopyLayerNFLBranchTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -1499,7 +1505,7 @@ TEST(TransformationTests, InsertCopyLayerNFLvsFLSubgraphTestt) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));
@ -1550,7 +1556,7 @@ TEST(TransformationTests, InsertCopyLayerSplitNFLConcatTest) {
ngraph::pass::Manager m;
m.register_pass<ov::pass::InitNodeInfo>();
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
m.run_passes(func);
RunPasses(m, func);
ASSERT_NO_THROW(check_rt_info(func));