diff --git a/src/plugins/intel_gna/src/backend/am_intel_dnn.cpp b/src/plugins/intel_gna/src/backend/am_intel_dnn.cpp index 43d091582c6..a067c6734cb 100644 --- a/src/plugins/intel_gna/src/backend/am_intel_dnn.cpp +++ b/src/plugins/intel_gna/src/backend/am_intel_dnn.cpp @@ -48,6 +48,8 @@ using ov::intel_gna::gna_convolution_layer::outputFromConv; using ov::intel_gna::gna_convolution_layer::outputFromPooling; +using namespace ov::intel_gna::limitations; + namespace ov { namespace intel_gna { namespace backend { @@ -180,8 +182,8 @@ void AMIntelDNN::InitConvolutional1DComponentPrivate(intel_dnn_component_t& comp THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << num_columns_in << ") is not a multiply by 8"; } - if (num_filters < limitations::convMinFiltersNum || num_filters > limitations::convMaxFiltersNum || - num_filters % limitations::convFiltersNumDivider != 0) { + if (num_filters < Limitations::kConvMinFiltersNum || num_filters > Limitations::kConvMaxFiltersNum || + num_filters % Limitations::kConvFiltersNumDivider != 0) { THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << num_filters; } auto max_number_of_out_elements = outputFromConv(num_columns_in, num_filter_coefficients, convStride); diff --git a/src/plugins/intel_gna/src/backend/gna_limitations.cpp b/src/plugins/intel_gna/src/backend/gna_limitations.cpp index adf9351ad9f..ab4cad4faa5 100644 --- a/src/plugins/intel_gna/src/backend/gna_limitations.cpp +++ b/src/plugins/intel_gna/src/backend/gna_limitations.cpp @@ -37,267 +37,62 @@ namespace intel_gna { using namespace target; namespace limitations { +class SupportedElementTypes { +public: + static bool IsParameterTypeSupported(ov::element::Type type, bool is_exception_allowed = false); + static bool IsConstantTypeSupported(ov::element::Type type, bool is_exception_allowed = false); + +private: + static const std::set supported_parameter_types; + static const std::set supported_constant_types; +}; + const std::set SupportedElementTypes::supported_parameter_types = {ov::element::u8, ov::element::i16, ov::element::f32}; -size_t getMemoryAlignmentBytes(target::DeviceVersion target) { - static const std::unordered_map mem_alignment_map{ - {target::DeviceVersion::GNA1_0, 64}, - {target::DeviceVersion::GNA2_0, 64}, - {target::DeviceVersion::GNA3_0, 64}, - {target::DeviceVersion::GNA3_1, 64}, - {target::DeviceVersion::GNA3_5, 64}, - {target::DeviceVersion::GNAEmbedded3_5, 64}, - {target::DeviceVersion::GNA3_6, 16}, - {target::DeviceVersion::GNA4_0, 16}}; - - return common::GetValueForKey(target, mem_alignment_map); -} - -bool SupportedElementTypes::is_parameter_type_supported(ov::element::Type elem_type, bool is_exception_allowed) { - if (supported_parameter_types.count(elem_type) == 0) { - if (is_exception_allowed) { - THROW_GNA_EXCEPTION << "The plugin does not support input precision with " << elem_type.get_type_name() - << " format. Supported precisions " << supported_parameter_types << "\n"; - } - return false; - } - return true; -} - -const std::set SupportedElementTypes::supported_constant_types = {ov::element::i8, - ov::element::u8, - ov::element::i16, - ov::element::u16, - ov::element::i32, - ov::element::f32, - ov::element::f64}; - -bool SupportedElementTypes::is_constant_type_supported(ov::element::Type elem_type, bool is_exception_allowed) { - if (supported_constant_types.count(elem_type) == 0) { - if (is_exception_allowed) { - THROW_GNA_EXCEPTION << "The plugin does not support constant precision with " << elem_type.get_type_name() - << " format. Supported precisions " << supported_constant_types << "\n"; - } - return false; - } - return true; -} - -bool is_transpose_supported(const std::shared_ptr& node) { - OPENVINO_ASSERT(node, "Transpose node is empty!"); - const ov::Shape squeezed_shape = graph_utils::squeeze_shape(node->get_input_shape(0)); - const size_t min_input_dim = std::min(squeezed_shape[0], squeezed_shape[1]); - const size_t max_input_dim = std::max(squeezed_shape[0], squeezed_shape[1]); - - // GNA transpose limitations: - // - supports 2d transposes only - // - smaller dimension should be less or equal to 8 - // - bigger dimension should be a multiple of limitations::noOfInputsDivisor - if (squeezed_shape.size() == 2 && min_input_dim <= 8 && - ALIGN(max_input_dim, limitations::noOfInputsDivisor) == max_input_dim) { - return true; - } - return false; -} - -bool is_conv_supported(const std::shared_ptr& conv_ie, - const DeviceVersion& effective_compile_target, - const InferenceEngine::Precision gna_precision, - bool is_exception_allowed) { - OPENVINO_ASSERT(conv_ie, "ConvolutionIE node is empty!"); - size_t batch_size = conv_ie->input_value(0).get_shape()[0]; - if (batch_size != 1) { - if (is_exception_allowed) { - THROW_GNA_EXCEPTION << "topology with layer: " + conv_ie->get_friendly_name() + - ", type: " + conv_ie->get_type_name() + ", and batch size(" + - std::to_string(batch_size) + ") != 1 not supported"; - } - return false; - } - auto check_dilation = [&](size_t filter_dilation_height, size_t filter_stride_width) -> bool { - cnn2d::RangeLimit2D dilation_limit{{convDilationHeight, convDilationHeight, "dilation height"}, - {convDilationWidth, convDilationWidth, "dilation width"}}; - std::string error = dilation_limit.GetErrorOrEmpty(filter_dilation_height, filter_stride_width); - return cnn2d::AbstractValidator::ValidationSuccesful(is_exception_allowed, - error, - conv_ie->get_friendly_name(), - conv_ie->get_type_name()); - }; - auto input_shape = conv_ie->input_value(0).get_shape(); - auto filter_shape = conv_ie->input_value(1).get_shape(); - if ((4 == filter_shape.size() && filter_shape[2] > 1 && filter_shape[3] > 1) || - (4 == input_shape.size() && input_shape[2] > 1 && input_shape[3] > 1)) { - pass::helper::ConvData conv_data; - pass::helper::GetConvData(conv_ie, conv_data); - if (gna_convolution_layer::isMappableFrom2DTo1D(conv_data.input_height, - conv_data.input_width, - conv_data.input_channel_count, - conv_data.filter_height, - conv_data.filter_width, - conv_data.filter_stride_height, - conv_data.filter_stride_width)) { - return check_dilation(conv_data.filter_dilation_height, conv_data.filter_dilation_width); - } - const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target); - if (cnn2dValidatorPtr) { - return cnn2dValidatorPtr->ValidateCnn2D(conv_ie->get_friendly_name(), - conv_data.input_height, - conv_data.input_width, - conv_data.input_channel_count, - conv_data.filter_height, - conv_data.filter_width, - conv_data.filter_channel_count, - conv_data.filter_stride_height, - conv_data.filter_stride_width, - conv_data.filter_dilation_height, - conv_data.filter_dilation_width, - OvGnaTypeIntFromBytes(gna_precision.size()), - is_exception_allowed); - } - } - return check_dilation(conv_ie->get_dilations()[0], conv_ie->get_dilations()[1]); -} - -bool is_pooling_supported(const std::shared_ptr max_pool, - const DeviceVersion& effective_compile_target, - bool is_exception_allowed) { - OPENVINO_ASSERT(max_pool, "MaxPool node is empty!"); - auto kernels = max_pool->get_kernel(); - if (2 == kernels.size() && kernels[0] > 1 && kernels[1] > 1) { - const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target); - if (cnn2dValidatorPtr) { - auto strides = max_pool->get_strides(); - return cnn2dValidatorPtr->ValidatePooling2D(max_pool->get_friendly_name(), - kernels[0], - kernels[1], - strides[0], - strides[1], - is_exception_allowed); - } - } - return true; -} - -bool is_fc_supported(const std::shared_ptr& fully_connected, bool is_exception_allowed) { - OPENVINO_ASSERT(fully_connected, "FullyConnected node is empty!"); - size_t output_batch_size = fully_connected->get_output_shape(0)[0]; - if (output_batch_size > 8) { - if (is_exception_allowed) { - THROW_GNA_EXCEPTION << "topology with layer: " + fully_connected->get_friendly_name() + - ", type: " + fully_connected->get_type_name() + ", and batch size(" + - std::to_string(output_batch_size) + ") not supported"; - } - return false; - } - return true; -} - -bool is_split_supported(const std::shared_ptr& node, bool is_exception_allowed) { - OPENVINO_ASSERT(node, "Split node is empty!"); - bool is_aligned = true; - for (size_t i = 0; i < node->get_output_size(); i++) { - is_aligned &= ov::intel_gna::graph_utils::is_aligned_split(node, i); - } - return is_aligned; -} - -bool is_op_supported(const std::shared_ptr& node, - const DeviceVersion& effective_compile_target, - const InferenceEngine::Precision gna_precision, - bool is_exception_allowed) { - if (ov::op::util::is_parameter(node)) { - return SupportedElementTypes::is_parameter_type_supported(node->get_element_type(), is_exception_allowed); - } else if (ov::op::util::is_constant(node)) { - return SupportedElementTypes::is_constant_type_supported(node->get_element_type(), is_exception_allowed); - } else if (auto conv_ie = std::dynamic_pointer_cast(node)) { - return is_conv_supported(conv_ie, effective_compile_target, gna_precision, is_exception_allowed); - } else if (auto fully_connected = std::dynamic_pointer_cast(node)) { - return is_fc_supported(fully_connected, is_exception_allowed); - } else if (ov::intel_gna::graph_utils::is_pooling(node)) { - return is_pooling_supported(std::dynamic_pointer_cast(node), - effective_compile_target, - is_exception_allowed); - } else if (ov::op::util::is_output(node) || ov::op::util::is_sink(node) || - ov::intel_gna::graph_utils::is_eltwise_add(node) || ov::intel_gna::graph_utils::is_eltwise_mul(node) || - ov::intel_gna::graph_utils::is_crop_affined(node) || - ov::intel_gna::graph_utils::is_activation(node.get()) || - ov::intel_gna::graph_utils::is_gna_precision_agnostic( - node) || // check concat/split are aligned when transformations will be moved to ngraph - (std::dynamic_pointer_cast(node) != nullptr) || - (std::dynamic_pointer_cast(node) != nullptr) || - (std::dynamic_pointer_cast(node) != nullptr) || - (std::dynamic_pointer_cast(node) != nullptr)) { - return true; - } else if (ov::intel_gna::graph_utils::is_gna_precision_agnostic(node)) { - if ((std::dynamic_pointer_cast(node) != nullptr) || - (std::dynamic_pointer_cast(node) != nullptr)) { - return is_split_supported(node, is_exception_allowed); - } - // TODO check concat are aligned when transformation will be moved to ngraph - return true; - } - return false; -} - -void check_all_ops_supported(const std::shared_ptr& model, - const DeviceVersion& effective_compile_target, - const InferenceEngine::Precision gna_precision) { - std::stringstream error; - // Walk through the transformed model - for (auto& op : model->get_ops()) { - if (!is_op_supported(op, effective_compile_target, gna_precision, true)) { - error << "The plugin does not support layer " << op->get_friendly_name() << " (type " << op->get_type_name() - << ")!" << std::endl; - } - } - if (!error.str().empty()) { - THROW_GNA_EXCEPTION << error.str(); - } -} namespace cnn2d { -bool IsEqualToLimit::isValid(const uint32_t val) const { +bool IsEqualToLimit::IsValid(const uint32_t val) const { return val == compared_value; } std::string IsEqualToLimit::GetErrorOrEmpty(const uint32_t val) const { std::ostringstream out; - if (!isValid(val)) { + if (!IsValid(val)) { out << "Unsupported " << what << ", actual value: " << val << ", but should be equal to " << compared_value << "\n"; } return out.str(); } -bool IsLessThanLimit ::isValid(const uint32_t val) const { +bool IsLessThanLimit::IsValid(const uint32_t val) const { return val < compared_value; } -std::string IsLessThanLimit ::GetErrorOrEmpty(const uint32_t val) const { +std::string IsLessThanLimit::GetErrorOrEmpty(const uint32_t val) const { std::ostringstream out; - if (!isValid(val)) { + if (!IsValid(val)) { out << "Unsupported " << what << ", actual value: " << val << ", but should be less than " << compared_value << "\n"; } return out.str(); } -bool RangeLimit::isValid(const uint32_t val) const { +bool RangeLimit::IsValid(const uint32_t val) const { return val >= min && val <= max; } std::string RangeLimit::GetErrorOrEmpty(const uint32_t val) const { std::ostringstream out; - if (!isValid(val)) { + if (!IsValid(val)) { out << "Unsupported " << what << ", actual value: " << val << ", valid range [" << min << ", " << max << "]\n"; } return out.str(); } -bool RangeLimit2D::isValid(const uint32_t h, const uint32_t w) const { - return hLimit.isValid(h) && wLimit.isValid(w); +bool RangeLimit2D::IsValid(const uint32_t h, const uint32_t w) const { + return hLimit.IsValid(h) && wLimit.IsValid(w); } std::string RangeLimit2D::GetErrorOrEmpty(const uint32_t h, const uint32_t w) const { @@ -308,8 +103,8 @@ RangeMultipleLimit::RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn) : RangeLimit(rlIn), multiplier(multiplierIn) {} -bool RangeMultipleLimit::isValid(const uint32_t val) const { - return RangeLimit::isValid(val) && (val % multiplier == 0); +bool RangeMultipleLimit::IsValid(const uint32_t val) const { + return RangeLimit::IsValid(val) && (val % multiplier == 0); } std::string RangeMultipleLimit::GetErrorOrEmpty(const uint32_t val) const { @@ -321,7 +116,7 @@ std::string RangeMultipleLimit::GetErrorOrEmpty(const uint32_t val) const { return e + out.str(); } -bool VectorOrSquareLimit::isValid(const uint32_t h, const uint32_t w) const { +bool VectorOrSquareLimit::IsValid(const uint32_t h, const uint32_t w) const { if (w == 1 && h >= 1 && h <= maxVectorHeight) return true; if (h == 1 && w >= 1 && w <= maxVectorWidth) @@ -333,7 +128,7 @@ bool VectorOrSquareLimit::isValid(const uint32_t h, const uint32_t w) const { std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const { std::ostringstream out; - if (!isValid(h, w)) { + if (!IsValid(h, w)) { out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w << ", only vertical vector up to " << maxVectorHeight << "x1, horizontal up to 1x" << maxVectorWidth << " or square up to " << maxSquare << "x" << maxSquare << " are valid\n"; @@ -341,7 +136,7 @@ std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_ return out.str(); } -bool RectLimit::isValid(const uint32_t h, const uint32_t w) const { +bool RectLimit::IsValid(const uint32_t h, const uint32_t w) const { if (h >= 1 && h <= maxVectorHeight && w >= 1 && w <= maxVectorWidth) return true; return false; @@ -349,7 +144,7 @@ bool RectLimit::isValid(const uint32_t h, const uint32_t w) const { std::string RectLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const { std::ostringstream out; - if (!isValid(h, w)) { + if (!IsValid(h, w)) { out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w << ", only rectangular shapes up to " << maxVectorHeight << "x" << maxVectorWidth << " are valid\n"; } @@ -365,8 +160,8 @@ RectLimit RectLimitByChannels::GetByChannels(const uint32_t channels) const { return RectLimit{0, 0}; } -bool RectLimitByChannels::isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const { - return GetByChannels(channels).isValid(h, w); +bool RectLimitByChannels::IsValid(const uint32_t h, const uint32_t w, const uint32_t channels) const { + return GetByChannels(channels).IsValid(h, w); } std::string RectLimitByChannels::GetErrorOrEmpty(const uint32_t h, @@ -380,11 +175,11 @@ RectLimitByChannels RectLimitByChannelsAndPrecision::GetByPrecision(const OvGnaT return precision == OvGnaTypeInt8 ? limit_for_int8 : limit_for_int16; } -bool RectLimitByChannelsAndPrecision::isValid(const uint32_t h, +bool RectLimitByChannelsAndPrecision::IsValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const { - return GetByPrecision(precision).isValid(h, w, channels); + return GetByPrecision(precision).IsValid(h, w, channels); } std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h, @@ -395,6 +190,66 @@ std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h, return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what); } +class Validator_30 : public AbstractValidator { + static const RangeLimit2D kInputHWLimit; + static const RangeMultipleLimit kInputChannelsNumberLimit; + + static const RangeMultipleLimit kKernelNumberLimit; + static const RectLimitByChannelsAndPrecision kKernelLimit; + static const RangeLimit2D kDilationLimit; + + static const VectorOrSquareLimit kPoolingWindowLimit; + +public: + Validator_30() = default; + + bool ValidateCnn2D(const std::string& name, + const uint32_t inHeight, + const uint32_t inWidth, + const uint32_t inChannels, + const uint32_t kH, + const uint32_t kW, + const uint32_t kN, + const uint32_t strideH, + const uint32_t strideW, + const uint32_t dilationH, + const uint32_t dilationW, + OvGnaType inPrecision, + bool exception = true) const override; + + bool ValidatePooling2D(const std::string& name, + const uint32_t windowH, + const uint32_t windowW, + const uint32_t strideH, + const uint32_t strideW, + bool exception = true) const override; + + bool ValidateInputPadding(const std::string& name, + const uint32_t pad_h_begin, + const uint32_t pad_h_end, + const uint32_t pad_w_begin, + const uint32_t pad_w_end, + const uint32_t kernel_h, + const uint32_t kernel_w, + const bool throwOnError = true) const override; + + bool ShouldUseOnlyConv2DGnaIface() const override; + + bool ValidateCnn1D(const std::string& name, + const uint32_t inHeight, + const uint32_t inWidth, + const uint32_t inChannels, + const uint32_t kH, + const uint32_t kW, + const uint32_t kN, + const uint32_t strideH, + const uint32_t strideW, + const uint32_t dilationH, + const uint32_t dilationW, + OvGnaType inPrecision, + bool exception = true) const override; +}; + const RangeLimit2D Validator_30::kInputHWLimit{{16, 384, "input height"}, {16, 240, "input width"}}; const RangeMultipleLimit Validator_30::kInputChannelsNumberLimit{{8, 384, "number of input channels"}, 8}; @@ -404,8 +259,9 @@ const RectLimitByChannelsAndPrecision Validator_30::kKernelLimit{ {{{48, {7, 7}}, {64, {7, 5}}, {80, {7, 4}}, {120, {7, 3}}, {384, {7, 1}}}}, }; -const RangeLimit2D Validator_30::kDilationLimit{{convDilationHeight, convDilationHeight, "dilation height"}, - {convDilationWidth, convDilationWidth, "dilation width"}}; +const RangeLimit2D Validator_30::kDilationLimit{ + {Limitations::kConvDilationHeight, Limitations::kConvDilationHeight, "dilation height"}, + {Limitations::kConvDilationWidth, Limitations::kConvDilationWidth, "dilation width"}}; bool Validator_30::ValidateCnn2D(const std::string& name, const uint32_t inHeight, @@ -493,6 +349,95 @@ bool Validator_30::ShouldUseOnlyConv2DGnaIface() const { return false; } +class Validator_35 : public AbstractValidator { + struct CnnLimits { + const RangeLimit2D kInputHWLimit; + const RangeLimit kInputChannelsNumberLimit1B; + const RangeLimit kInputChannelsNumberLimit2B; + const RangeLimit kKernelNumberLimit; + const RangeLimit2D kKerneHWlLimit1B; + const RangeLimit2D kKerneHWlLimit2B; + const RangeLimit2D kStrideHWLimit1B; + const RangeLimit2D kStrideHWLimit2B; + const RangeLimit2D kDilationLimit; + const RangeLimit2D kPoolingWindowHWLimit; + const RangeLimit2D kPoolingStrideHWLimit; + }; + + static const CnnLimits kCnn2DLimits; + static const CnnLimits kCnn1DLimits; + + std::string ValidateCnn(const CnnLimits& limits, + const std::string& name, + const uint32_t inHeight, + const uint32_t inWidth, + const uint32_t inChannels, + const uint32_t kH, + const uint32_t kW, + const uint32_t kN, + const uint32_t strideH, + const uint32_t strideW, + const uint32_t dilationH, + const uint32_t dilationW, + OvGnaType inPrecision) const; + + std::string ValidatePooling(const CnnLimits& limits, + const std::string& name, + const uint32_t windowH, + const uint32_t windowW, + const uint32_t strideH, + const uint32_t strideW) const; + +public: + Validator_35() = default; + + bool ValidateCnn2D(const std::string& name, + const uint32_t inHeight, + const uint32_t inWidth, + const uint32_t inChannels, + const uint32_t kH, + const uint32_t kW, + const uint32_t kN, + const uint32_t strideH, + const uint32_t strideW, + const uint32_t dilationH, + const uint32_t dilationW, + OvGnaType inPrecision, + bool exception = true) const override; + + bool ValidatePooling2D(const std::string& name, + const uint32_t windowH, + const uint32_t windowW, + const uint32_t strideH, + const uint32_t strideW, + bool exception = true) const override; + + bool ValidateInputPadding(const std::string& name, + const uint32_t pad_h_begin, + const uint32_t pad_h_end, + const uint32_t pad_w_begin, + const uint32_t pad_w_end, + const uint32_t kernel_h, + const uint32_t kernel_w, + const bool throwOnError = true) const override; + + bool ShouldUseOnlyConv2DGnaIface() const override; + + bool ValidateCnn1D(const std::string& name, + const uint32_t inHeight, + const uint32_t inWidth, + const uint32_t inChannels, + const uint32_t kH, + const uint32_t kW, + const uint32_t kN, + const uint32_t strideH, + const uint32_t strideW, + const uint32_t dilationH, + const uint32_t dilationW, + OvGnaType inPrecision, + bool exception = true) const override; +}; + const Validator_35::CnnLimits Validator_35::kCnn2DLimits{ {{1, 65535, "input height"}, {1, 65535, "input width"}}, // kInputHWLimit {1, 2048, "number of input channels"}, // kInputChannelsNumberLimit1B @@ -502,8 +447,8 @@ const Validator_35::CnnLimits Validator_35::kCnn2DLimits{ {{1, 255, "kernel height"}, {1, 256, "kernel width"}}, // kKerneHWlLimit2B {{1, 255, "convolution stride height"}, {1, 256, "convolution stride width"}}, // kStrideHWLimit1B {{1, 255, "convolution stride height"}, {1, 256, "convolution stride width"}}, // kStrideHWLimit2B - {{convDilationHeight, convDilationHeight, "dilation height"}, // kDilationLimit - {convDilationWidth, convDilationWidth, "dilation width"}}, + {{Limitations::kConvDilationHeight, Limitations::kConvDilationHeight, "dilation height"}, // kDilationLimit + {Limitations::kConvDilationWidth, Limitations::kConvDilationWidth, "dilation width"}}, {{1, 255, "pooling window height"}, {1, 255, "pooling window width"}}, // kPoolingWindowHWLimit {{1, 255, "pooling stride height"}, {1, 255, "pooling stride width"}} // kPoolingStrideHWLimit }; @@ -517,8 +462,8 @@ const Validator_35::CnnLimits Validator_35::kCnn1DLimits{ {{1, 1, "kernel height"}, {1, 2048, "kernel width"}}, // kKerneHWlLimit2B {{1, 1, "convolution stride height"}, {1, 4096, "convolution stride width"}}, // kStrideHWLimit1B {{1, 1, "convolution stride height"}, {1, 2048, "convolution stride width"}}, // kStrideHWLimit2B - {{convDilationHeight, convDilationHeight, "dilation height"}, // kDilationLimit - {convDilationWidth, convDilationWidth, "dilation width"}}, + {{Limitations::kConvDilationHeight, Limitations::kConvDilationHeight, "dilation height"}, // kDilationLimit + {Limitations::kConvDilationWidth, Limitations::kConvDilationWidth, "dilation width"}}, {{1, 1, "pooling window height"}, {1, 255, "pooling window width"}}, // kPoolingWindowHWLimit {{1, 1, "pooling stride height"}, {1, 255, "pooling stride width"}} // kPoolingStrideHWLimit }; @@ -672,16 +617,16 @@ bool Validator_35::ShouldUseOnlyConv2DGnaIface() const { return true; } -std::unique_ptr AbstractValidator::Create(const DeviceVersion& target) { +std::shared_ptr AbstractValidator::Create(const DeviceVersion& target) { switch (target) { case DeviceVersion::GNA3_0: case DeviceVersion::GNA3_1: - return tools::make_unique(); + return std::make_shared(); case DeviceVersion::GNA3_5: case DeviceVersion::GNAEmbedded3_5: case DeviceVersion::GNA3_6: case DeviceVersion::GNA4_0: - return tools::make_unique(); + return std::make_shared(); default: return nullptr; } @@ -705,15 +650,280 @@ bool AbstractValidator::ValidationSuccesful(const bool throwOnError, return error.empty(); } -bool UseOnly16BitConvolutionWeights(const DeviceVersion& compile_target) { - return compile_target == DeviceVersion::GNA1_0 || compile_target == DeviceVersion::GNA2_0 || - compile_target == DeviceVersion::GNA3_0 || compile_target == DeviceVersion::GNA3_1; -} - } // namespace cnn2d +constexpr uint32_t Limitations::kBufferMaxSize; +constexpr uint32_t Limitations::kConvMinFiltersNum; +constexpr uint32_t Limitations::kConvMaxFiltersNum; +constexpr uint32_t Limitations::kConvDilationHeight; +constexpr uint32_t Limitations::kConvDilationWidth; +constexpr uint32_t Limitations::kConvFiltersNumDivider; +constexpr uint32_t Limitations::kConvFilterSizeDivider; +constexpr uint32_t Limitations::kConvFilterMaxSize; +constexpr uint32_t Limitations::kConvEachKernelByteAlignment; +constexpr uint32_t Limitations::kInputByteAlignment; +constexpr uint32_t Limitations::kNoOfInputsDivisor; +constexpr uint32_t Limitations::kNoOfInputsLowPrecDivisor; +constexpr uint32_t Limitations::kAffineMaxBatchSize; +constexpr uint32_t Limitations::kMaxPoolMaxWindowSize; +constexpr uint32_t Limitations::kCopyMaxGrouping; +constexpr uint32_t Limitations::kTransposeMaxSize; +constexpr uint32_t Limitations::kMaxLayersCountGNA1_0; +constexpr uint32_t Limitations::kMaxLayersCountGNA2_0; +constexpr uint32_t Limitations::kMaxLayersCountGNA3_X; +constexpr uint32_t Limitations::kBytesPerSplitElement; +constexpr uint32_t Limitations::kBytesPerCropElement; +constexpr uint32_t Limitations::kMemoryPageSize; + +thread_local std::shared_ptr Limitations::k_instance{nullptr}; + +Limitations::Limitations(const DeviceVersion& target) { + m_use_only_16bit_conv_weights = (target == DeviceVersion::GNA1_0 || target == DeviceVersion::GNA2_0 || + target == DeviceVersion::GNA3_0 || target == DeviceVersion::GNA3_1); + + m_mem_alignment = get_memory_alignment_bytes(target); + m_cnn_validator = cnn2d::AbstractValidator::Create(target); +} + +void Limitations::init(const DeviceVersion& compile_target) { + k_instance = std::shared_ptr(new Limitations(compile_target)); +} + +bool Limitations::is_transpose_2d(const std::vector& shape) { + return std::count_if(std::begin(shape), std::end(shape), [](size_t dim) { + return dim != 1; + }) == 2; +} + +bool Limitations::is_transpose_supported(const std::vector& shape) { + if (!is_transpose_2d(shape)) + return false; + auto shape_no_1 = shape; + shape_no_1.erase(std::remove(shape_no_1.begin(), shape_no_1.end(), 1), shape_no_1.end()); + size_t min, max; + std::tie(min, max) = std::minmax(shape_no_1[0], shape_no_1[1]); + return min <= 8 && max % 8 == 0 && max >= 8 && max <= kTransposeMaxSize; +} + +size_t Limitations::get_min_batch_to_fit_in_buffer(InferenceEngine::DataPtr input) { + auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims())); + return total_size / kBufferMaxSize + 1; +} + +size_t Limitations::get_memory_alignment_bytes(const DeviceVersion& target) const { + static const std::unordered_map mem_alignment_map{{DeviceVersion::GNA1_0, 64}, + {DeviceVersion::GNA2_0, 64}, + {DeviceVersion::GNA3_0, 64}, + {DeviceVersion::GNA3_1, 64}, + {DeviceVersion::GNA3_5, 64}, + {DeviceVersion::GNAEmbedded3_5, 64}, + {DeviceVersion::GNA3_6, 16}, + {DeviceVersion::GNA4_0, 16}}; + + return common::GetValueForKey(target, mem_alignment_map); +} + +bool SupportedElementTypes::IsParameterTypeSupported(ov::element::Type elem_type, bool is_exception_allowed) { + if (supported_parameter_types.count(elem_type) == 0) { + if (is_exception_allowed) { + THROW_GNA_EXCEPTION << "The plugin does not support input precision with " << elem_type.get_type_name() + << " format. Supported precisions " << supported_parameter_types << "\n"; + } + return false; + } + return true; +} + +const std::set SupportedElementTypes::supported_constant_types = {ov::element::i8, + ov::element::u8, + ov::element::i16, + ov::element::u16, + ov::element::i32, + ov::element::f32, + ov::element::f64}; + +bool SupportedElementTypes::IsConstantTypeSupported(ov::element::Type elem_type, bool is_exception_allowed) { + if (supported_constant_types.count(elem_type) == 0) { + if (is_exception_allowed) { + THROW_GNA_EXCEPTION << "The plugin does not support constant precision with " << elem_type.get_type_name() + << " format. Supported precisions " << supported_constant_types << "\n"; + } + return false; + } + return true; +} + +bool Limitations::is_transpose_supported(const std::shared_ptr& node) { + OPENVINO_ASSERT(node, "Transpose node is empty!"); + const ov::Shape squeezed_shape = graph_utils::squeeze_shape(node->get_input_shape(0)); + const size_t min_input_dim = std::min(squeezed_shape[0], squeezed_shape[1]); + const size_t max_input_dim = std::max(squeezed_shape[0], squeezed_shape[1]); + + // GNA transpose limitations: + // - supports 2d transposes only + // - smaller dimension should be less or equal to 8 + // - bigger dimension should be a multiple of Limitations::kNoOfInputsDivisor + if (squeezed_shape.size() == 2 && min_input_dim <= 8 && ALIGN(max_input_dim, kNoOfInputsDivisor) == max_input_dim) { + return true; + } + return false; +} + +bool Limitations::is_conv_supported(const std::shared_ptr& conv_ie, + const InferenceEngine::Precision gna_precision, + bool is_exception_allowed) { + OPENVINO_ASSERT(conv_ie, "ConvolutionIE node is empty!"); + size_t batch_size = conv_ie->input_value(0).get_shape()[0]; + if (batch_size != 1) { + if (is_exception_allowed) { + THROW_GNA_EXCEPTION << "topology with layer: " + conv_ie->get_friendly_name() + + ", type: " + conv_ie->get_type_name() + ", and batch size(" + + std::to_string(batch_size) + ") != 1 not supported"; + } + return false; + } + auto check_dilation = [&](size_t filter_dilation_height, size_t filter_stride_width) -> bool { + cnn2d::RangeLimit2D dilation_limit{{kConvDilationHeight, kConvDilationHeight, "dilation height"}, + {kConvDilationWidth, kConvDilationWidth, "dilation width"}}; + std::string error = dilation_limit.GetErrorOrEmpty(filter_dilation_height, filter_stride_width); + return cnn2d::AbstractValidator::ValidationSuccesful(is_exception_allowed, + error, + conv_ie->get_friendly_name(), + conv_ie->get_type_name()); + }; + auto input_shape = conv_ie->input_value(0).get_shape(); + auto filter_shape = conv_ie->input_value(1).get_shape(); + if ((4 == filter_shape.size() && filter_shape[2] > 1 && filter_shape[3] > 1) || + (4 == input_shape.size() && input_shape[2] > 1 && input_shape[3] > 1)) { + pass::helper::ConvData conv_data; + pass::helper::GetConvData(conv_ie, conv_data); + if (gna_convolution_layer::isMappableFrom2DTo1D(conv_data.input_height, + conv_data.input_width, + conv_data.input_channel_count, + conv_data.filter_height, + conv_data.filter_width, + conv_data.filter_stride_height, + conv_data.filter_stride_width)) { + return check_dilation(conv_data.filter_dilation_height, conv_data.filter_dilation_width); + } + + if (m_cnn_validator) { + return m_cnn_validator->ValidateCnn2D(conv_ie->get_friendly_name(), + conv_data.input_height, + conv_data.input_width, + conv_data.input_channel_count, + conv_data.filter_height, + conv_data.filter_width, + conv_data.filter_channel_count, + conv_data.filter_stride_height, + conv_data.filter_stride_width, + conv_data.filter_dilation_height, + conv_data.filter_dilation_width, + OvGnaTypeIntFromBytes(gna_precision.size()), + is_exception_allowed); + } + } + return check_dilation(conv_ie->get_dilations()[0], conv_ie->get_dilations()[1]); +} + +bool Limitations::is_pooling_supported(const std::shared_ptr max_pool, + bool is_exception_allowed) { + OPENVINO_ASSERT(max_pool, "MaxPool node is empty!"); + auto kernels = max_pool->get_kernel(); + if (2 == kernels.size() && kernels[0] > 1 && kernels[1] > 1) { + if (m_cnn_validator) { + auto strides = max_pool->get_strides(); + return m_cnn_validator->ValidatePooling2D(max_pool->get_friendly_name(), + kernels[0], + kernels[1], + strides[0], + strides[1], + is_exception_allowed); + } + } + return true; +} + +bool Limitations::is_fc_supported(const std::shared_ptr& fully_connected, + bool is_exception_allowed) { + OPENVINO_ASSERT(fully_connected, "FullyConnected node is empty!"); + size_t output_batch_size = fully_connected->get_output_shape(0)[0]; + if (output_batch_size > 8) { + if (is_exception_allowed) { + THROW_GNA_EXCEPTION << "topology with layer: " + fully_connected->get_friendly_name() + + ", type: " + fully_connected->get_type_name() + ", and batch size(" + + std::to_string(output_batch_size) + ") not supported"; + } + return false; + } + return true; +} + +bool Limitations::is_split_supported(const std::shared_ptr& node, bool is_exception_allowed) { + OPENVINO_ASSERT(node, "Split node is empty!"); + bool is_aligned = true; + for (size_t i = 0; i < node->get_output_size(); i++) { + is_aligned &= ov::intel_gna::graph_utils::is_aligned_split(node, i); + } + return is_aligned; +} + +bool Limitations::is_op_supported(const std::shared_ptr& node, + const InferenceEngine::Precision gna_precision, + bool is_exception_allowed) { + if (ov::op::util::is_parameter(node)) { + return SupportedElementTypes::IsParameterTypeSupported(node->get_element_type(), is_exception_allowed); + } else if (ov::op::util::is_constant(node)) { + return SupportedElementTypes::IsConstantTypeSupported(node->get_element_type(), is_exception_allowed); + } else if (auto conv_ie = std::dynamic_pointer_cast(node)) { + return is_conv_supported(conv_ie, gna_precision, is_exception_allowed); + } else if (auto fully_connected = std::dynamic_pointer_cast(node)) { + return is_fc_supported(fully_connected, is_exception_allowed); + } else if (ov::intel_gna::graph_utils::is_pooling(node)) { + return is_pooling_supported(std::dynamic_pointer_cast(node), is_exception_allowed); + } else if (ov::op::util::is_output(node) || ov::op::util::is_sink(node) || + ov::intel_gna::graph_utils::is_eltwise_add(node) || ov::intel_gna::graph_utils::is_eltwise_mul(node) || + ov::intel_gna::graph_utils::is_crop_affined(node) || + ov::intel_gna::graph_utils::is_activation(node.get()) || + ov::intel_gna::graph_utils::is_gna_precision_agnostic( + node) || // check concat/split are aligned when transformations will be moved to ngraph + (std::dynamic_pointer_cast(node) != nullptr) || + (std::dynamic_pointer_cast(node) != nullptr) || + (std::dynamic_pointer_cast(node) != nullptr) || + (std::dynamic_pointer_cast(node) != nullptr)) { + return true; + } else if (ov::intel_gna::graph_utils::is_gna_precision_agnostic(node)) { + if ((std::dynamic_pointer_cast(node) != nullptr) || + (std::dynamic_pointer_cast(node) != nullptr)) { + return is_split_supported(node, is_exception_allowed); + } + // TODO check concat are aligned when transformation will be moved to ngraph + return true; + } + return false; +} + +void Limitations::check_all_ops_supported(const std::shared_ptr& model, + const InferenceEngine::Precision gna_precision) { + std::stringstream error; + // Walk through the transformed model + for (auto& op : model->get_ops()) { + if (!is_op_supported(op, gna_precision, true)) { + error << "The plugin does not support layer " << op->get_friendly_name() << " (type " << op->get_type_name() + << ")!" << std::endl; + } + } + if (!error.str().empty()) { + THROW_GNA_EXCEPTION << error.str(); + } +} + +bool Limitations::use_only_16bit_convolution_weights() const { + return m_use_only_16bit_conv_weights; +} + IE_SUPPRESS_DEPRECATED_START -static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage) { +bool Limitations::validate_concat_axis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage) { LayerInfo info(layer); auto concat_layer = info.as(); IE_ASSERT(concat_layer); @@ -747,7 +957,8 @@ static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::st // when all transformations are migrated to ngraph bool is_not_trivial_concat = false; - // Concatentaion of consts and input parameters only is supported, even if first dimentsion of input parameter > + // Concatentaion of consts and input parameters only is supported, even if first dimentsion of input + // parameter > // 1 bool concat_all_const_or_inputs = false; @@ -846,7 +1057,7 @@ static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::st return true; } -bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concat_layer) { +bool Limitations::validate_conv_concat_axis(const InferenceEngine::ConcatLayer* concat_layer) { IE_ASSERT(concat_layer); auto dims_size = concat_layer->insData[0].lock()->getDims().size(); @@ -898,7 +1109,7 @@ bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concat_layer) { return true; } -bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage) { +bool Limitations::are_layers_supported(InferenceEngine::CNNNetwork& network, std::string& errMessage) { IE_SUPPRESS_DEPRECATED_START InferenceEngine::InputsDataMap inputs = network.getInputsInfo(); std::unordered_set allLayers; @@ -909,7 +1120,7 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe // If there are no inputs start search from an output startLayer = getCreatorLayer(outputs.begin()->second).lock(); } else { - SupportedElementTypes::is_parameter_type_supported( + SupportedElementTypes::IsParameterTypeSupported( InferenceEngine::details::convertPrecision(inputs.begin()->second->getPrecision()), true); @@ -944,7 +1155,7 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe check_result = false; } } else if (info.isConcat()) { - if (!ValidateConcatAxis(layer, errMessage)) { + if (!validate_concat_axis(layer, errMessage)) { THROW_GNA_EXCEPTION << errMessage; } } diff --git a/src/plugins/intel_gna/src/backend/gna_limitations.hpp b/src/plugins/intel_gna/src/backend/gna_limitations.hpp index 07a99d8dc4b..e4846d844f8 100644 --- a/src/plugins/intel_gna/src/backend/gna_limitations.hpp +++ b/src/plugins/intel_gna/src/backend/gna_limitations.hpp @@ -9,6 +9,8 @@ #include #include +#include +#include #include "common/gna_target.hpp" #include "common/misc_utils.hpp" @@ -23,158 +25,19 @@ namespace ov { namespace intel_gna { namespace limitations { -constexpr uint32_t bufferMaxSize = 65528; - -constexpr uint32_t convMinFiltersNum = 4; -constexpr uint32_t convMaxFiltersNum = 65532; -constexpr uint32_t convDilationHeight = 1; -constexpr uint32_t convDilationWidth = 1; -constexpr uint32_t convFiltersNumDivider = 4; -constexpr uint32_t convFilterSizeDivider = 8; -constexpr uint32_t convFilterMaxSize = 768; -constexpr uint32_t convEachKernelByteAlignment = 16; -constexpr uint32_t inputByteAlignment = 64; -constexpr uint32_t noOfInputsDivisor = 8; -constexpr uint32_t noOfInputsLowPrecDivisor = 16; - -constexpr uint32_t affineMaxBatchSize = 8; - -constexpr uint32_t maxPoolMaxWindowSize = 6; -constexpr uint32_t copyMaxGrouping = 8; -constexpr uint32_t transposeMaxSize = 65528; - -// TODO In the future there should be created class/struct representing all limitations for specific device versions. -constexpr uint32_t kMaxLayersCountGNA1_0 = 1023; -constexpr uint32_t kMaxLayersCountGNA2_0 = 4096; -constexpr uint32_t kMaxLayersCountGNA3_X = 8192; - -// Currently split layer only supports 2 bytes in int16 and int8 mode. -// In fp32 mode this is not necessary but is useful for testing -constexpr uint32_t bytesPerSplitElement = 2; - -// Currently crop layer only supports 2 bytes in int16 and int8 mode. -// In fp32 mode this is not necessary but is useful for testing -constexpr uint32_t bytesPerCropElement = 2; - -constexpr uint32_t kMemoryPageSize = 4096; - -inline bool isCropAffinedOffset(size_t numberOfElements) { - const auto cropOffset = numberOfElements * bytesPerCropElement; - return (ALIGN64(cropOffset) != cropOffset); -} - -inline bool IsTranspose2d(const std::vector& shape) { - return std::count_if(std::begin(shape), std::end(shape), [](size_t dim) { - return dim != 1; - }) == 2; -} - -inline bool IsTransposeSupported(const std::vector& shape) { - if (!IsTranspose2d(shape)) - return false; - auto shape_no_1 = shape; - shape_no_1.erase(std::remove(shape_no_1.begin(), shape_no_1.end(), 1), shape_no_1.end()); - size_t min, max; - std::tie(min, max) = std::minmax(shape_no_1[0], shape_no_1[1]); - return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize; -} - -size_t getMemoryAlignmentBytes(target::DeviceVersion target); - -class SupportedElementTypes { -public: - static bool is_parameter_type_supported(ov::element::Type type, bool is_exception_allowed = false); - static bool is_constant_type_supported(ov::element::Type type, bool is_exception_allowed = false); - -private: - static const std::set supported_parameter_types; - static const std::set supported_constant_types; -}; - -/** - * @brief Validates if transpose is supported by GNA - * @param node transpose - * @return true if supported - */ -bool is_transpose_supported(const std::shared_ptr& node); - -/** - * @brief Validates if legacy convolution is supported by GNA - * @param conv_ie convolution - * @param effective_compile_target GNA compile targets - * @param gna_precision GNA inference precision - * @param is_exception_allowed flag specifies whether exception is allowed - * @return true if supported - */ -bool is_conv_supported(const std::shared_ptr& conv_ie, - const target::DeviceVersion& effective_compile_target, - const InferenceEngine::Precision gna_precision, - bool is_exception_allowed = false); -/** - * @brief Validates if max pooling is supported by GNA - * @param max_pool max pooling - * @param effective_compile_target GNA compile targets - * @param supported_types list of supported types - * @param is_exception_allowed flag specifies whether exception is allowed - * @return true if precision is found in supported - */ -bool is_pooling_supported(const std::shared_ptr max_pool, - const target::DeviceVersion& effective_compile_target, - bool is_exception_allowed = false); - -/** - * @brief Validates if fully connected is supported by GNA - * @param fully_connected fully connected - * @param is_exception_allowed flag specifies whether exception is allowed - * @return true if supported - */ -bool is_fc_supported(const std::shared_ptr& fully_connected, - bool is_exception_allowed = false); - -/** - * @brief Validates if split is supported by GNA - * @param node split - * @param is_exception_allowed flag specifies whether exception is allowed - * @return true if supported - */ -bool is_split_supported(const std::shared_ptr& node, bool is_exception_allowed = false); - -/** - * @brief Validates if operation is supported by GNA - * @param node operation - * @param gna_compile_target GNA compile target - * @param gna_precision GNA inference precision - * @param is_exception_allowed flag specifies whether exception is allowed - * @return true if supported - */ -bool is_op_supported(const std::shared_ptr& node, - const target::DeviceVersion& effective_compile_target, - const InferenceEngine::Precision gna_precision, - bool is_exception_allowed = false); - -/** - * @brief Check if all operations are supported by GNA - * @param model ngraph model - * @param gna_compile_target GNA compile target - * @param gna_precision GNA inference precision - */ -void check_all_ops_supported(const std::shared_ptr& model, - const target::DeviceVersion& effective_compile_target, - const InferenceEngine::Precision gna_precision); - namespace cnn2d { struct IsEqualToLimit { uint32_t compared_value; std::string what; - bool isValid(const uint32_t val) const; + bool IsValid(const uint32_t val) const; std::string GetErrorOrEmpty(const uint32_t val) const; }; struct IsLessThanLimit { uint32_t compared_value; std::string what; - bool isValid(const uint32_t val) const; + bool IsValid(const uint32_t val) const; std::string GetErrorOrEmpty(const uint32_t val) const; }; @@ -182,28 +45,28 @@ struct RangeLimit { uint32_t min; uint32_t max; std::string what; - bool isValid(const uint32_t val) const; + bool IsValid(const uint32_t val) const; std::string GetErrorOrEmpty(const uint32_t val) const; }; struct RangeLimit2D { RangeLimit hLimit; RangeLimit wLimit; - bool isValid(const uint32_t h, const uint32_t w) const; + bool IsValid(const uint32_t h, const uint32_t w) const; std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w) const; }; struct RangeMultipleLimit : public RangeLimit { uint32_t multiplier; RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn); - bool isValid(const uint32_t val) const; + bool IsValid(const uint32_t val) const; std::string GetErrorOrEmpty(const uint32_t val) const; }; struct RectLimit { uint32_t maxVectorHeight; uint32_t maxVectorWidth; - bool isValid(const uint32_t h, const uint32_t w) const; + bool IsValid(const uint32_t h, const uint32_t w) const; std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const; }; @@ -211,14 +74,14 @@ struct VectorOrSquareLimit { uint32_t maxSquare; uint32_t maxVectorHeight; uint32_t maxVectorWidth; - bool isValid(const uint32_t h, const uint32_t w) const; + bool IsValid(const uint32_t h, const uint32_t w) const; std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const; }; struct RectLimitByChannels { std::vector> limitPerChannel; RectLimit GetByChannels(const uint32_t channels) const; - bool isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const; + bool IsValid(const uint32_t h, const uint32_t w, const uint32_t channels) const; std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, const uint32_t channels, std::string what) const; }; @@ -226,7 +89,7 @@ struct RectLimitByChannelsAndPrecision { RectLimitByChannels limit_for_int8; RectLimitByChannels limit_for_int16; RectLimitByChannels GetByPrecision(const OvGnaType precision) const; - bool isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const; + bool IsValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const; std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, const OvGnaType precision, @@ -291,177 +154,168 @@ public: OvGnaType inPrecision, bool exception = true) const = 0; - static std::unique_ptr Create(const target::DeviceVersion& target); + static std::shared_ptr Create(const target::DeviceVersion& target); }; -class Validator_30 : public AbstractValidator { - static const RangeLimit2D kInputHWLimit; - static const RangeMultipleLimit kInputChannelsNumberLimit; - - static const RangeMultipleLimit kKernelNumberLimit; - static const RectLimitByChannelsAndPrecision kKernelLimit; - static const RangeLimit2D kDilationLimit; - - static const VectorOrSquareLimit kPoolingWindowLimit; - -public: - Validator_30() = default; - - bool ValidateCnn2D(const std::string& name, - const uint32_t inHeight, - const uint32_t inWidth, - const uint32_t inChannels, - const uint32_t kH, - const uint32_t kW, - const uint32_t kN, - const uint32_t strideH, - const uint32_t strideW, - const uint32_t dilationH, - const uint32_t dilationW, - OvGnaType inPrecision, - bool exception = true) const override; - - bool ValidatePooling2D(const std::string& name, - const uint32_t windowH, - const uint32_t windowW, - const uint32_t strideH, - const uint32_t strideW, - bool exception = true) const override; - - bool ValidateInputPadding(const std::string& name, - const uint32_t pad_h_begin, - const uint32_t pad_h_end, - const uint32_t pad_w_begin, - const uint32_t pad_w_end, - const uint32_t kernel_h, - const uint32_t kernel_w, - const bool throwOnError = true) const override; - - bool ShouldUseOnlyConv2DGnaIface() const override; - - bool ValidateCnn1D(const std::string& name, - const uint32_t inHeight, - const uint32_t inWidth, - const uint32_t inChannels, - const uint32_t kH, - const uint32_t kW, - const uint32_t kN, - const uint32_t strideH, - const uint32_t strideW, - const uint32_t dilationH, - const uint32_t dilationW, - OvGnaType inPrecision, - bool exception = true) const override; -}; - -class Validator_35 : public AbstractValidator { - struct CnnLimits { - const RangeLimit2D kInputHWLimit; - const RangeLimit kInputChannelsNumberLimit1B; - const RangeLimit kInputChannelsNumberLimit2B; - const RangeLimit kKernelNumberLimit; - const RangeLimit2D kKerneHWlLimit1B; - const RangeLimit2D kKerneHWlLimit2B; - const RangeLimit2D kStrideHWLimit1B; - const RangeLimit2D kStrideHWLimit2B; - const RangeLimit2D kDilationLimit; - const RangeLimit2D kPoolingWindowHWLimit; - const RangeLimit2D kPoolingStrideHWLimit; - }; - - static const CnnLimits kCnn2DLimits; - static const CnnLimits kCnn1DLimits; - - std::string ValidateCnn(const CnnLimits& limits, - const std::string& name, - const uint32_t inHeight, - const uint32_t inWidth, - const uint32_t inChannels, - const uint32_t kH, - const uint32_t kW, - const uint32_t kN, - const uint32_t strideH, - const uint32_t strideW, - const uint32_t dilationH, - const uint32_t dilationW, - OvGnaType inPrecision) const; - - std::string ValidatePooling(const CnnLimits& limits, - const std::string& name, - const uint32_t windowH, - const uint32_t windowW, - const uint32_t strideH, - const uint32_t strideW) const; - -public: - Validator_35() = default; - - bool ValidateCnn2D(const std::string& name, - const uint32_t inHeight, - const uint32_t inWidth, - const uint32_t inChannels, - const uint32_t kH, - const uint32_t kW, - const uint32_t kN, - const uint32_t strideH, - const uint32_t strideW, - const uint32_t dilationH, - const uint32_t dilationW, - OvGnaType inPrecision, - bool exception = true) const override; - - bool ValidatePooling2D(const std::string& name, - const uint32_t windowH, - const uint32_t windowW, - const uint32_t strideH, - const uint32_t strideW, - bool exception = true) const override; - - bool ValidateInputPadding(const std::string& name, - const uint32_t pad_h_begin, - const uint32_t pad_h_end, - const uint32_t pad_w_begin, - const uint32_t pad_w_end, - const uint32_t kernel_h, - const uint32_t kernel_w, - const bool throwOnError = true) const override; - - bool ShouldUseOnlyConv2DGnaIface() const override; - - bool ValidateCnn1D(const std::string& name, - const uint32_t inHeight, - const uint32_t inWidth, - const uint32_t inChannels, - const uint32_t kH, - const uint32_t kW, - const uint32_t kN, - const uint32_t strideH, - const uint32_t strideW, - const uint32_t dilationH, - const uint32_t dilationW, - OvGnaType inPrecision, - bool exception = true) const override; -}; - -bool UseOnly16BitConvolutionWeights(const target::DeviceVersion& compile_target); - } // namespace cnn2d -bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage); +class Limitations { +public: + /** + * @brief Create instance of the Limitations class. Due to Limitations being a singleton, multiple instances of the + * plugin with different compilation targets cannot exist at the same time + * @param compile_target GNA compile target + */ + static void init(const target::DeviceVersion& compile_target); -inline size_t GetMinBatchToFitInBuffer(InferenceEngine::DataPtr input) { - auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims())); - return total_size / bufferMaxSize + 1; + /** + * @brief Returns the instance of Limitations object. Requires an Init call before the first usage + */ + static inline std::shared_ptr get_instance(); + + static bool is_transpose_2d(const std::vector& shape); + static bool is_transpose_supported(const std::vector& shape); + static size_t get_min_batch_to_fit_in_buffer(InferenceEngine::DataPtr input); + + /** + * @brief Validates if concat layer axis is supported by GNA + * @param layer concat layer + * @return true if concat layer axis is valid + */ + IE_SUPPRESS_DEPRECATED_START + static bool validate_conv_concat_axis(const InferenceEngine::ConcatLayer* concatLayer); + static bool are_layers_supported(InferenceEngine::CNNNetwork& network, std::string& errMessage); + IE_SUPPRESS_DEPRECATED_END + + /** + * @brief Validates if fully connected is supported by GNA + * @param fully_connected fully connected + * @param is_exception_allowed flag specifies whether exception is allowed + * @return true if supported + */ + static bool is_fc_supported(const std::shared_ptr& fully_connected, + bool is_exception_allowed = false); + /** + * @brief Validates if split is supported by GNA + * @param node split + * @param is_exception_allowed flag specifies whether exception is allowed + * @return true if supported + */ + static bool is_split_supported(const std::shared_ptr& node, bool is_exception_allowed = false); + /** + * @brief Validates if transpose is supported by GNA + * @param node transpose + * @return true if supported + */ + static bool is_transpose_supported(const std::shared_ptr& node); + /** + * @brief Validates if legacy convolution is supported by GNA + * @param conv_ie convolution + * @param gna_precision GNA inference precision + * @param is_exception_allowed flag specifies whether exception is allowed + * @return true if supported + */ + bool is_conv_supported(const std::shared_ptr& conv_ie, + const InferenceEngine::Precision gna_precision, + bool is_exception_allowed = false); + /** + * @brief Validates if max pooling is supported by GNA + * @param max_pool max pooling + * @param is_exception_allowed flag specifies whether exception is allowed + * @return true if precision is found in supported + */ + bool is_pooling_supported(const std::shared_ptr max_pool, + bool is_exception_allowed = false); + + /** + * @brief Validates if operation is supported by GNA + * @param node operation + * @param gna_precision GNA inference precision + * @param is_exception_allowed flag specifies whether exception is allowed + * @return true if supported + */ + bool is_op_supported(const std::shared_ptr& node, + const InferenceEngine::Precision gna_precision, + bool is_exception_allowed = false); + + /** + * @brief Check if all operations are supported by GNA + * @param model ngraph model + * @param gna_precision GNA inference precision + */ + void check_all_ops_supported(const std::shared_ptr& model, + const InferenceEngine::Precision gna_precision); + + bool use_only_16bit_convolution_weights() const; + bool is_crop_affined_offset(size_t numberOfElements) const; + size_t get_memory_alignment() const; + std::shared_ptr get_cnn_validator() const; + + constexpr static uint32_t kBufferMaxSize = 65528; + constexpr static uint32_t kConvMinFiltersNum = 4; + constexpr static uint32_t kConvMaxFiltersNum = 65532; + constexpr static uint32_t kConvDilationHeight = 1; + constexpr static uint32_t kConvDilationWidth = 1; + constexpr static uint32_t kConvFiltersNumDivider = 4; + constexpr static uint32_t kConvFilterSizeDivider = 8; + constexpr static uint32_t kConvFilterMaxSize = 768; + constexpr static uint32_t kConvEachKernelByteAlignment = 16; + constexpr static uint32_t kInputByteAlignment = 64; + constexpr static uint32_t kNoOfInputsDivisor = 8; + constexpr static uint32_t kNoOfInputsLowPrecDivisor = 16; + constexpr static uint32_t kAffineMaxBatchSize = 8; + constexpr static uint32_t kMaxPoolMaxWindowSize = 6; + constexpr static uint32_t kCopyMaxGrouping = 8; + constexpr static uint32_t kTransposeMaxSize = 65528; + constexpr static uint32_t kMaxLayersCountGNA1_0 = 1023; + constexpr static uint32_t kMaxLayersCountGNA2_0 = 4096; + constexpr static uint32_t kMaxLayersCountGNA3_X = 8192; + + // Currently split layer only supports 2 bytes in int16 and int8 mode. + // In fp32 mode this is not necessary but is useful for testing + constexpr static uint32_t kBytesPerSplitElement = 2; + + // Currently crop layer only supports 2 bytes in int16 and int8 mode. + // In fp32 mode this is not necessary but is useful for testing + constexpr static uint32_t kBytesPerCropElement = 2; + constexpr static uint32_t kMemoryPageSize = 4096; + +private: + Limitations(const target::DeviceVersion& target); + Limitations(const Limitations&) = delete; + Limitations& operator=(const Limitations&) = delete; + + size_t get_memory_alignment_bytes(const target::DeviceVersion& target) const; + + IE_SUPPRESS_DEPRECATED_START + static bool validate_concat_axis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage); + IE_SUPPRESS_DEPRECATED_END + + bool m_use_only_16bit_conv_weights = false; + size_t m_mem_alignment = 0; + std::shared_ptr m_cnn_validator; + static thread_local std::shared_ptr k_instance; +}; + +inline std::shared_ptr Limitations::get_instance() { + if (!k_instance) { + THROW_GNA_EXCEPTION << "Limitations instance is not initialized.\n"; + } + return k_instance; } -/** - * @brief Validates if concat layer axis is supported by GNA - * @param layer concat layer - * @return true if concat layer axis is valid - */ -IE_SUPPRESS_DEPRECATED_START -bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concatLayer); -IE_SUPPRESS_DEPRECATED_END +inline bool Limitations::is_crop_affined_offset(size_t numberOfElements) const { + const auto cropOffset = numberOfElements * kBytesPerCropElement; + return (ALIGN64(cropOffset) != cropOffset); +} + +inline size_t Limitations::get_memory_alignment() const { + return m_mem_alignment; +} + +inline std::shared_ptr Limitations::get_cnn_validator() const { + return m_cnn_validator; +} } // namespace limitations } // namespace intel_gna diff --git a/src/plugins/intel_gna/src/common/graph_utils.hpp b/src/plugins/intel_gna/src/common/graph_utils.hpp index 5bc41cf7cd4..62e4aad80fe 100644 --- a/src/plugins/intel_gna/src/common/graph_utils.hpp +++ b/src/plugins/intel_gna/src/common/graph_utils.hpp @@ -84,7 +84,7 @@ inline bool is_aligned_split(const std::shared_ptr input_op, size_ std::dynamic_pointer_cast(input_op)) { for (size_t index = 0; index < input_op_out_index; index++) { size_t outputSize = ngraph::shape_size(input_op->get_output_shape(index)); - offset += outputSize * limitations::bytesPerSplitElement; + offset += outputSize * limitations::Limitations::kBytesPerSplitElement; } } return (offset == ALIGN64(offset)); @@ -93,7 +93,7 @@ inline bool is_aligned_split(const std::shared_ptr input_op, size_ inline bool is_crop_affined(std::shared_ptr node) { auto crop = std::dynamic_pointer_cast(node); if (crop != nullptr && !crop->offset.empty()) { - return limitations::isCropAffinedOffset(crop->offset.back()); + return limitations::Limitations::get_instance()->is_crop_affined_offset(crop->offset.back()); } return false; } diff --git a/src/plugins/intel_gna/src/frontend/layer_quantizer.cpp b/src/plugins/intel_gna/src/frontend/layer_quantizer.cpp index b63db46d973..884d7941def 100644 --- a/src/plugins/intel_gna/src/frontend/layer_quantizer.cpp +++ b/src/plugins/intel_gna/src/frontend/layer_quantizer.cpp @@ -11,6 +11,7 @@ namespace ov { namespace intel_gna { +using namespace limitations; namespace frontend { template @@ -352,7 +353,7 @@ InferenceEngine::Precision GetWeightsPrecision(const LayerInfo& layer_info, const QuantizedLayerParams& quant_layer_params, const Config& gna_config) { if (((layer_info.isConvolution() || layer_info.isConvolutionFilter()) && - limitations::cnn2d::UseOnly16BitConvolutionWeights(gna_config.target->get_effective_compile_target())) || + Limitations::get_instance()->use_only_16bit_convolution_weights()) || layer_info.isScaleShift()) { return InferenceEngine::Precision::I16; } diff --git a/src/plugins/intel_gna/src/gna_device.cpp b/src/plugins/intel_gna/src/gna_device.cpp index 6fa38adaf56..e367c6af499 100644 --- a/src/plugins/intel_gna/src/gna_device.cpp +++ b/src/plugins/intel_gna/src/gna_device.cpp @@ -38,8 +38,7 @@ GNADeviceHelper::GNADeviceHelper(std::shared_ptr targetIn, bool isPerfor : target(targetIn), nGnaDeviceIndex{selectGnaDevice()}, useDeviceEmbeddedExport(deviceEmbedded), - isPerformanceMeasuring(isPerformanceMeasuring), - m_mem_alignment(limitations::getMemoryAlignmentBytes(targetIn->get_effective_compile_target())) { + isPerformanceMeasuring(isPerformanceMeasuring) { per_request_diagnostics = log::get_log_level() >= ov::log::Level::TRACE; per_model_diagnostics = log::get_log_level() >= ov::log::Level::DEBUG; open(); @@ -573,7 +572,7 @@ uint32_t GNADeviceHelper::retrieveMaxLayersCount() { switch (target->get_effective_execution_target()) { case DeviceVersion::GNA1_0: case DeviceVersion::GNA2_0: - return kMaxLayersCountGNA2_0; + return Limitations::kMaxLayersCountGNA2_0; case DeviceVersion::GNA3_0: case DeviceVersion::GNA3_1: case DeviceVersion::GNA3_5: @@ -581,7 +580,7 @@ uint32_t GNADeviceHelper::retrieveMaxLayersCount() { case DeviceVersion::GNA3_6: case DeviceVersion::GNA4_0: default: - return kMaxLayersCountGNA3_X; + return Limitations::kMaxLayersCountGNA3_X; } } } // namespace intel_gna diff --git a/src/plugins/intel_gna/src/gna_device.hpp b/src/plugins/intel_gna/src/gna_device.hpp index 1f6d12c0a35..d52b4cee3ce 100644 --- a/src/plugins/intel_gna/src/gna_device.hpp +++ b/src/plugins/intel_gna/src/gna_device.hpp @@ -67,7 +67,6 @@ class GNADeviceHelper : public GNADevice { uint64_t debugLogIndexRequestWait = 0; static constexpr const char* kDumpExt = ".bin"; static constexpr const char* kDumpDelimiter = "."; - const size_t m_mem_alignment; public: explicit GNADeviceHelper(std::shared_ptr target = std::make_shared(), @@ -128,10 +127,6 @@ public: return allAllocations; } - size_t getMemAlignment() const { - return m_mem_alignment; - } - /** * @see GNADevice::createModel() */ diff --git a/src/plugins/intel_gna/src/gna_graph_compiler.cpp b/src/plugins/intel_gna/src/gna_graph_compiler.cpp index f6867f278ba..ae0876035d7 100644 --- a/src/plugins/intel_gna/src/gna_graph_compiler.cpp +++ b/src/plugins/intel_gna/src/gna_graph_compiler.cpp @@ -49,6 +49,7 @@ namespace intel_gna { using namespace frontend; using namespace common; using namespace memory; +using namespace limitations; static bool CheckIFLastComponentIsPrecededByConv2D(const backend::DnnComponents::storage_type& components, bool verify_with_pooling = true) { @@ -81,20 +82,22 @@ static uint32_t count_conv2D_input_width_for_expected_output_width(uint32_t expe return (expected_ouput_width - 1) * stride_width - 2 * padding_width + kernel_width; }; -GNAGraphCompiler::GNAGraphCompiler(const Config& gna_config) : gna_config(gna_config) {} +GNAGraphCompiler::GNAGraphCompiler(const Config& gna_config, + std::shared_ptr dnn_ptr, + std::shared_ptr inputs_ptr, + std::shared_ptr cnn2d_validator_ptr, + std::shared_ptr gna_mem_ptr) + : gna_config(gna_config) { + dnn = std::move(dnn_ptr); + inputs_ptr_ = std::move(inputs_ptr); + m_cnn2d_validator = std::move(cnn2d_validator_ptr); + gnamem = std::move(gna_mem_ptr); +} void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr gnaMemPtr) { this->gnamem = std::move(gnaMemPtr); } -void GNAGraphCompiler::setDNNPtr(std::shared_ptr dnnPtr) { - this->dnn = std::move(dnnPtr); -} - -void GNAGraphCompiler::setInputsPtr(std::shared_ptr inputsPtr) { - this->inputs_ptr_ = std::move(inputsPtr); -} - intel_dnn_component_t* GNAGraphCompiler::find_first_unused_input(InferenceEngine::CNNLayerPtr current) { if (current->insData.empty()) return nullptr; @@ -228,13 +231,8 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) split_connection.emplace(id, layerInfoItem); } -void GNAGraphCompiler::SetValidatorTarget(const target::DeviceVersion& target) { - auto temp = limitations::cnn2d::AbstractValidator::Create(target); - cnn2dValidator.reset(temp.release()); -} - bool GNAGraphCompiler::ShouldUseOnlyConv2DGnaIface() const { - return cnn2dValidator && cnn2dValidator->ShouldUseOnlyConv2DGnaIface(); + return m_cnn2d_validator && m_cnn2d_validator->ShouldUseOnlyConv2DGnaIface(); } void GNAGraphCompiler::ValidateCnn2D(const std::string& name, @@ -249,23 +247,23 @@ void GNAGraphCompiler::ValidateCnn2D(const std::string& name, const uint32_t dilH, const uint32_t dilW, OvGnaType inPrecision) const { - if (cnn2dValidator) { - if (cnn2dValidator->ValidateCnn1D(name, - inHeight, - inWidth, - inChannels, - kH, - kW, - kN, - strideH, - strideW, - dilH, - dilW, - inPrecision, - false)) { + if (m_cnn2d_validator) { + if (m_cnn2d_validator->ValidateCnn1D(name, + inHeight, + inWidth, + inChannels, + kH, + kW, + kN, + strideH, + strideW, + dilH, + dilW, + inPrecision, + false)) { return; } - cnn2dValidator + m_cnn2d_validator ->ValidateCnn2D(name, inHeight, inWidth, inChannels, kH, kW, kN, strideH, strideW, dilH, dilW, inPrecision); } else { THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << name; @@ -277,8 +275,8 @@ void GNAGraphCompiler::ValidatePooling2D(const std::string& name, const uint32_t windowW, const uint32_t strideH, const uint32_t strideW) const { - if (cnn2dValidator) { - cnn2dValidator->ValidatePooling2D(name, windowH, windowW, strideH, strideW); + if (m_cnn2d_validator) { + m_cnn2d_validator->ValidatePooling2D(name, windowH, windowW, strideH, strideW); } else { THROW_GNA_EXCEPTION << "No Pooling2D validator found for layer " << name; } @@ -684,17 +682,17 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP // TODO add function // printConvolution2DLayer(convolution); - if (!cnn2dValidator) { + if (!m_cnn2d_validator) { THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << convolution.name; } - cnn2dValidator->ValidateInputPadding(convolution.name, - convolution._padding_y, - convolution._pads_end_y, - convolution._padding_x, - convolution._pads_end_x, - convolution._kernel_y, - convolution._kernel_x); + m_cnn2d_validator->ValidateInputPadding(convolution.name, + convolution._padding_y, + convolution._pads_end_y, + convolution._padding_x, + convolution._pads_end_x, + convolution._kernel_y, + convolution._kernel_x); // Check if kernel width needs to be extended to stride width. const auto effective_kernel_width = std::max(convolution._kernel_x, convolution._stride_x); @@ -713,7 +711,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP // have to pad input to let last kernel meets it's corresponding input const auto num_inputs = in_batch * effective_input_width * in_height * in_channels; - uint32_t num_input_padding = ALIGN(num_inputs, limitations::noOfInputsDivisor) - num_inputs; + uint32_t num_input_padding = ALIGN(num_inputs, Limitations::kNoOfInputsDivisor) - num_inputs; const uint32_t filter_n = convolution._out_depth; @@ -813,7 +811,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP // Kernel is extended only for 1D case which allows to add 0-s at the end of the kernel. const auto kernel_pad = - ALIGN(effective_single_kernel_size, limitations::convEachKernelByteAlignment) - effective_single_kernel_size; + ALIGN(effective_single_kernel_size, Limitations::kConvEachKernelByteAlignment) - effective_single_kernel_size; for (uint32_t k = 0; k < convolution._out_depth; k++) { uint8_t* ptr_filt_current = convolution._weights->cbuffer().as() + k * single_kernel_size; auto transposed_part = transposeMatrix(ptr_filt_current, convolution_precision, in_channels, kernelHW); @@ -846,14 +844,15 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) { auto input = layer->insData[0].lock(); auto outputs = *layer->outData.begin(); - auto reshaped_dims = Get2DReshapedData(input, limitations::GetMinBatchToFitInBuffer(input), 8)->getDims(); - const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor - : limitations::noOfInputsDivisor; + auto reshaped_dims = Get2DReshapedData(input, Limitations::get_min_batch_to_fit_in_buffer(input), 8)->getDims(); + const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision + ? Limitations::kNoOfInputsLowPrecDivisor + : Limitations::kNoOfInputsDivisor; uint32_t num_rows_in = reshaped_dims[1]; uint32_t num_columns_in = reshaped_dims[0]; uint32_t num_rows_out = num_rows_in; uint32_t num_columns_out = num_columns_in; - uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in; + uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in; size_t num_data_bytes_out = num_columns_out * (num_rows_out + num_padding) * outputs->getPrecision().size(); size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * input->getPrecision().size(); @@ -1097,7 +1096,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) { auto inputs = layer->insData.begin()->lock(); auto outputs = *layer->outData.begin(); - auto reshaped_dims = Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims(); + auto reshaped_dims = Get2DReshapedData(inputs, Limitations::get_min_batch_to_fit_in_buffer(inputs), 8)->getDims(); uint32_t num_rows_in = reshaped_dims[1]; uint32_t num_columns_in = reshaped_dims[0]; uint32_t num_rows_out = num_rows_in; @@ -1159,7 +1158,7 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) { } // Concat axis validation - if (!limitations::ValidateConvConcatAxis(concatLayer)) { + if (!Limitations::validate_conv_concat_axis(concatLayer)) { std::ostringstream in_dims_oss; auto in_dims = concatLayer->insData[0].lock()->getDims(); std::copy(in_dims.begin(), in_dims.end(), std::ostream_iterator(in_dims_oss, ",")); @@ -1270,10 +1269,10 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) { uint32_t num_columns_in = 1; uint32_t num_rows_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())); - const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision - ? limitations::noOfInputsLowPrecDivisor - : limitations::noOfInputsDivisor; - uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in; + const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision + ? Limitations::kNoOfInputsLowPrecDivisor + : Limitations::kNoOfInputsDivisor; + uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in; void* ptr_inputs = nullptr; void* ptr_outputs = nullptr; @@ -1303,7 +1302,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) { InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) * 4; size_t num_data_bytes_in = - num_columns_in * ALIGN(num_rows_in, noOfInputsDivisor) * inputs->getPrecision().size(); + num_columns_in * ALIGN(num_rows_in, num_of_inputs_divisor) * inputs->getPrecision().size(); connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0); connectOutput(layer, ptr_outputs, num_data_bytes_out); @@ -1326,8 +1325,9 @@ void GNAGraphCompiler::SlicePrimitive(InferenceEngine::CNNLayerPtr layer) { void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) { auto& eltwise = dynamic_cast(*layer.get()); auto quantized = InferenceEngine::getInjectedData(layer); - const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor - : limitations::noOfInputsDivisor; + const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision + ? Limitations::kNoOfInputsLowPrecDivisor + : Limitations::kNoOfInputsDivisor; // for eltwise sum/sub in 16-bit precision one input should be 4 bytes and one 2 bytes - detecting that below // the names of variables are left for clarity although not always reflecting the real precision/size @@ -1409,7 +1409,7 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) { uint32_t num_columns_in = 1; uint32_t num_rows_out = num_rows_in; uint32_t num_columns_out = num_columns_in; - uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in; + uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in; void* ptr_inputs = nullptr; void* ptr_outputs = nullptr; @@ -1518,7 +1518,6 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) { auto outputs = *layer->outData.begin(); auto input1_precision = quantized ? Precision(Precision::I16) : input_1->getPrecision(); auto input2_precision = quantized ? Precision(Precision::I16) : input_2->getPrecision(); - uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor; auto in_dims = input_1->getDims(); auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front(); @@ -1527,7 +1526,7 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) { const auto out_dims = outputs->getDims(); const auto out_dims_size = ngraph::shape_size(out_dims); uint32_t num_rows_out = InferenceEngine::GetDimFromBack(out_dims, 1); - uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in; + uint32_t num_padding = ALIGN(num_rows_in, Limitations::kNoOfInputsDivisor) - num_rows_in; // Gemm gets two inputs void* ptr_input_1 = nullptr; // the first input @@ -1578,7 +1577,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool auto outputs = *layer->outData.begin(); const auto out_dims = outputs->getDims(); Precision inputPrecision; - uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor; + uint32_t num_of_inputs_divisor = Limitations::kNoOfInputsDivisor; if (!quantized) { inputPrecision = inputs->getPrecision(); @@ -1586,11 +1585,11 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool inputPrecision = Precision(Precision::I16); } else { inputPrecision = Precision(Precision::I8); - noOfInputsDivisor = limitations::noOfInputsLowPrecDivisor; + num_of_inputs_divisor = Limitations::kNoOfInputsLowPrecDivisor; } auto input_data = HasTo2DReshapeData(layer) - ? Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8) + ? Get2DReshapedData(inputs, Limitations::get_min_batch_to_fit_in_buffer(inputs), 8) : inputs; auto in_dims = input_data->getDims(); auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front(); @@ -1598,7 +1597,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool uint32_t num_columns_in = batch_size; uint32_t num_rows_out = isDiag ? num_rows_in : InferenceEngine::GetDimFromBack(out_dims, 1); uint32_t num_columns_out = num_columns_in; - uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in; + uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in; uint32_t num_padding_out = isDiag ? num_padding : 0; void* ptr_inputs = nullptr; @@ -1803,12 +1802,13 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l auto outputs = *layer->outData.begin(); auto inputs = layer->insData.begin()->lock(); - const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor - : limitations::noOfInputsDivisor; + const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision + ? Limitations::kNoOfInputsLowPrecDivisor + : Limitations::kNoOfInputsDivisor; uint32_t num_columns_in = GetDimFromBack(inputs->getDims(), 2); uint32_t num_rows_out = GetDimFromBack(outputs->getDims(), 1); uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out; - uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in; + uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in; auto numRowsPadded = filterLayer->GetParamAsInt("num_rows_padded"); // number of rows we handled by inserting copy layer @@ -1877,7 +1877,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l false); size_t num_data_bytes_out = num_rows_out * num_columns_in * outputs->getPrecision().size(); - size_t num_data_bytes_in = num_columns_in * ALIGN(num_rows_in, noOfInputsDivisor) * inputs->getPrecision().size(); + size_t num_data_bytes_in = + num_columns_in * ALIGN(num_rows_in, num_of_inputs_divisor) * inputs->getPrecision().size(); connectInput(layer, ptr_inputs, num_data_bytes_in, num_rows_copied * inputs->getPrecision().size(), 0); connectOutput(layer, ptr_outputs, num_data_bytes_out); @@ -1940,8 +1941,8 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l auto outputs = *layer->outData.begin(); auto inputs = layer->insData.begin()->lock(); - const auto noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor - : limitations::noOfInputsDivisor; + const auto num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision ? Limitations::kNoOfInputsLowPrecDivisor + : Limitations::kNoOfInputsDivisor; const uint32_t orginalInputSize = InferenceEngine::details::product(std::next(inputs->getDims().begin()), inputs->getDims().end()); const uint32_t orginalOutputSize = @@ -1956,7 +1957,7 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l const auto filterWidth = filterLayer->_kernel_x; const auto minOutputsPerFilter = ALIGN(orginalOutputSize, numberOfFilters) / numberOfFilters; const auto minInputsNeeded = (minOutputsPerFilter - 1) * convolutionStride + filterWidth; - const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, noOfInputsDivisor); + const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, num_of_inputs_divisor); auto numOutputs = gna_convolution_layer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride); @@ -2278,14 +2279,15 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) { << std::min(squeezedInputOrder[0], squeezedInputOrder[1]) << " > 8)"; } - const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor - : limitations::noOfInputsDivisor; + const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision + ? Limitations::kNoOfInputsLowPrecDivisor + : Limitations::kNoOfInputsDivisor; // now this can be run on GNA if (squeezedInputOrder[0] < squeezedInputOrder[1]) { // interleave case - if (ALIGN(squeezedInputOrder[1], noOfInputsDivisor) != squeezedInputOrder[1]) { + if (ALIGN(squeezedInputOrder[1], num_of_inputs_divisor) != squeezedInputOrder[1]) { THROW_GNA_LAYER_EXCEPTION(layer) - << "unsupported permute (row size not a multiple of " << noOfInputsDivisor << ")"; + << "unsupported permute (row size not a multiple of " << num_of_inputs_divisor << ")"; } else { auto& currentComponent = dnnComponents.addComponent(layer->name, "interleave"); dnn->InitInterleaveComponent(currentComponent, @@ -2299,9 +2301,9 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) { } } else { // deinterleave case - if (ALIGN(squeezedInputOrder[0], noOfInputsDivisor) != squeezedInputOrder[0]) { + if (ALIGN(squeezedInputOrder[0], num_of_inputs_divisor) != squeezedInputOrder[0]) { THROW_GNA_LAYER_EXCEPTION(layer) - << "[GNA plugin] unsupported permute (column size not a multiple of " << noOfInputsDivisor << ")"; + << "[GNA plugin] unsupported permute (column size not a multiple of " << num_of_inputs_divisor << ")"; } else { auto& currentComponent = dnnComponents.addComponent(layer->name, "deinterleave"); dnn->InitDeinterleaveComponent(currentComponent, @@ -2317,7 +2319,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) { size_t num_data_bytes_out = ALIGN(InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())), - noOfInputsDivisor) * + num_of_inputs_divisor) * outputs->getPrecision().size(); size_t num_data_bytes_in = squeezedInputOrder[0] * squeezedInputOrder[1] * inputs->getPrecision().size(); @@ -2610,12 +2612,12 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, // if request for allocation less that realTensorInput - we need to extend request auto minInput = inputs_ptr_->at(prevLayer->name).get_required_size(); if (num_data_bytes_in < minInput) { - const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision - ? limitations::noOfInputsLowPrecDivisor - : limitations::noOfInputsDivisor; + const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision + ? Limitations::kNoOfInputsLowPrecDivisor + : Limitations::kNoOfInputsDivisor; log::debug() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to" - << ALIGN(minInput, noOfInputsDivisor); - num_data_bytes_in = ALIGN(minInput, noOfInputsDivisor); + << ALIGN(minInput, num_of_inputs_divisor); + num_data_bytes_in = ALIGN(minInput, num_of_inputs_divisor); } // real allocation pointer will be kept in ptr not in ptr_inputs_global diff --git a/src/plugins/intel_gna/src/gna_graph_compiler.hpp b/src/plugins/intel_gna/src/gna_graph_compiler.hpp index 6f6f78962bf..b67a786bfff 100644 --- a/src/plugins/intel_gna/src/gna_graph_compiler.hpp +++ b/src/plugins/intel_gna/src/gna_graph_compiler.hpp @@ -54,20 +54,22 @@ private: uint32_t num_rows, uint32_t num_cols); - std::unique_ptr cnn2dValidator; - bool ShouldUseOnlyConv2DGnaIface() const; + std::shared_ptr m_cnn2d_validator; + public: backend::DnnComponents dnnComponents; MemoryConnection memory_connection; ConcatConnection concat_connection; ConstConnections const_connections; - GNAGraphCompiler(const Config& gna_config); + GNAGraphCompiler(const Config& gna_config, + std::shared_ptr dnn_ptr, + std::shared_ptr inputs_ptr, + std::shared_ptr cnn2d_validator, + std::shared_ptr gna_mem_ptr); void setGNAMemoryPtr(std::shared_ptr gnaMemPtr); - void setDNNPtr(std::shared_ptr dnnPtr); - void setInputsPtr(std::shared_ptr inputsPtr); void fillMemoryConnections(std::unordered_map>& memoryPairs); @@ -93,8 +95,6 @@ public: const uint32_t strideH, const uint32_t strideW) const; - void SetValidatorTarget(const target::DeviceVersion& target); - /** * Connects either memory output, or generic output to a layer * @param layer - layer pointer diff --git a/src/plugins/intel_gna/src/gna_plugin.cpp b/src/plugins/intel_gna/src/gna_plugin.cpp index 552f66fd1d5..f6c514cd945 100644 --- a/src/plugins/intel_gna/src/gna_plugin.cpp +++ b/src/plugins/intel_gna/src/gna_plugin.cpp @@ -29,6 +29,7 @@ #include #include "backend/am_intel_dnn.hpp" +#include "backend/gna_limitations.hpp" #include "common/gna_target.hpp" #include "frontend/model_quantizer.hpp" #include "frontend/scale_factor_calc.hpp" @@ -55,6 +56,7 @@ #include "scale_factor_helper.hpp" #include "serial/gna_model_serial.hpp" +using namespace ov::intel_gna::limitations; using namespace ov::intel_gna::graph_utils; inline uint32_t ToByteSize(const Gna2DataType type) { @@ -357,17 +359,23 @@ void GNAPlugin::PrePostProcess(InferenceEngine::Blob::Ptr input_blob, } } -GNAPlugin::GNAPlugin() : graphCompiler(config) { +GNAPlugin::GNAPlugin() { Init(); UpdateFieldsFromConfig(); InitGNADevice(); + Limitations::init(config.target->get_effective_compile_target()); + InitGNAMemory(); + InitGraphCompiler(); } -GNAPlugin::GNAPlugin(const std::map& configMap) : graphCompiler(config) { +GNAPlugin::GNAPlugin(const std::map& configMap) { Init(); SetConfig(configMap); log::set_log_level(gnaFlags->log_level); InitGNADevice(); + Limitations::init(config.target->get_effective_compile_target()); + InitGNAMemory(); + InitGraphCompiler(); } void GNAPlugin::Init() { @@ -376,27 +384,36 @@ void GNAPlugin::Init() { gnaFlags = std::make_shared(GNAFlags()); inputs_ptr_ = std::make_shared(GnaInputs()); outputs_ = GnaOutputs(); - - graphCompiler.setDNNPtr(dnn); - graphCompiler.setInputsPtr(inputs_ptr_); - requestWorkerPool_ = std::make_shared(); } void GNAPlugin::InitGNADevice() { OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNADevice"); - if (gnaFlags->sw_fp32) { - gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{})); - } else { + + if (!gnaFlags->sw_fp32) { gnadevice = std::make_shared(config.target, gnaFlags->performance_counting, !config.embedded_export_path.empty()); - - gnamem = std::make_shared(memory::GNAAllocator(gnadevice), - gnadevice->getMemAlignment(), - limitations::kMemoryPageSize); } - graphCompiler.setGNAMemoryPtr(gnamem); +} + +void GNAPlugin::InitGNAMemory() { + OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNAMemory"); + + if (gnaFlags->sw_fp32) { + gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{})); + } else { + gnamem = std::make_shared(memory::GNAAllocator(gnadevice), + Limitations::get_instance()->get_memory_alignment(), + Limitations::kMemoryPageSize); + } +} + +void GNAPlugin::InitGraphCompiler() { + OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGraphCompiler"); + + m_graph_compiler = std::make_shared( + GNAGraphCompiler(config, dnn, inputs_ptr_, Limitations::get_instance()->get_cnn_validator(), gnamem)); } void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork& network) { @@ -428,8 +445,7 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork& network GNAFakeQuantizeLayer fqLayer(next_layer); auto inputRange = fqLayer.getInputRange(); auto outputRange = fqLayer.getOutputRange(); - if (inputRange.second.size() != 1 || inputRange.second.size() != 1 || outputRange.second.size() != 1 || - outputRange.second.size() != 1) { + if (inputRange.second.size() != 1 || outputRange.second.size() != 1) { THROW_GNA_LAYER_EXCEPTION(next_layer) << "unsupported, per-channel quantization for input layer : " << input.second->name(); } @@ -552,12 +568,12 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN }; // probing gna_primitives - auto irLayerAvatar = std::find_if(graphCompiler.dnnComponents.components.begin(), - graphCompiler.dnnComponents.components.end(), + auto irLayerAvatar = std::find_if(m_graph_compiler->dnnComponents.components.begin(), + m_graph_compiler->dnnComponents.components.end(), [&layer](const backend::DnnComponents::storage_type::value_type& value) { return value.name == layer->name; }); - if (irLayerAvatar != graphCompiler.dnnComponents.components.end()) { + if (irLayerAvatar != m_graph_compiler->dnnComponents.components.end()) { initOutput(irLayerAvatar->dnnComponent.orientation_out, irLayerAvatar->dnnComponent.num_bytes_per_output, irLayerAvatar->dnnComponent.num_rows_out, @@ -567,8 +583,8 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN // probing concatInfo if (LayerInfo(layer).isConcat()) { - auto concatConnection = graphCompiler.concat_connection.find(layer->name); - if (concatConnection != graphCompiler.concat_connection.end()) { + auto concatConnection = m_graph_compiler->concat_connection.find(layer->name); + if (concatConnection != m_graph_compiler->concat_connection.end()) { auto precision = layer->outData.front()->getPrecision().size(); initOutput(kDnnInterleavedOrientation, precision, @@ -581,8 +597,8 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN // probing a constant info, for constant trivial networks support if (LayerInfo(layer).isConst()) { auto const_blob = layer->blobs["custom"]; - auto constConnection = graphCompiler.const_connections.find(layer->name); - if (constConnection != graphCompiler.const_connections.end()) { + auto constConnection = m_graph_compiler->const_connections.find(layer->name); + if (constConnection != m_graph_compiler->const_connections.end()) { initOutput(kDnnInterleavedOrientation, layer->outData.front()->getPrecision().size(), const_blob->size(), @@ -696,16 +712,13 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) { _network_name = _network.getName(); std::shared_ptr convertedNetwork; - const auto effectiveCompileTarget = config.target->get_effective_compile_target(); - graphCompiler.SetValidatorTarget(effectiveCompileTarget); - auto transformer = TransformationsPipeline(config); if (_network.getFunction()) { CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network); auto model = clonedNetwork.getFunction(); transformer.apply(model, &m_input_output_subgraphs); - limitations::check_all_ops_supported(model, effectiveCompileTarget, config.gnaPrecision); + Limitations::get_instance()->check_all_ops_supported(model, config.gnaPrecision); convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(model, clonedNetwork); } IE_SUPPRESS_DEPRECATED_START @@ -717,7 +730,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) { // Check the network std::string error; - if (!limitations::AreLayersSupported(network, error)) { + if (!Limitations::are_layers_supported(network, error)) { THROW_GNA_EXCEPTION << error.c_str(); } @@ -805,17 +818,17 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) { memoryPairs[id][generic->GetParamAsInt("index")] = layer; continue; } else if (layerInfo.isConcat()) { - graphCompiler.fillConcatConnections(layer); + m_graph_compiler->fillConcatConnections(layer); } else if (layerInfo.isSplit() || layerInfo.isSlice()) { - graphCompiler.fillSplitConnections(layer); + m_graph_compiler->fillSplitConnections(layer); } sortedNoMem.push_back(layer); } // fill in extra storage with memory layers - graphCompiler.fillMemoryConnections(memoryPairs); + m_graph_compiler->fillMemoryConnections(memoryPairs); - if (!graphCompiler.memory_connection.empty() && gnaFlags->num_requests != 1) { + if (!m_graph_compiler->memory_connection.empty() && gnaFlags->num_requests != 1) { gnaFlags->num_requests = 1; } @@ -837,17 +850,17 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) { // Creating Layer primitives for (auto& layer : sortedNoMem) { - graphCompiler.CreateLayerPrimitive(layer); + m_graph_compiler->CreateLayerPrimitive(layer); } for (auto& inputLayer : inputLayers) { auto layerInfo = LayerInfo(inputLayer); if (layerInfo.isInput() && 0 == inputs_ptr_->at(inputLayer->name).get_allocated_size()) { - graphCompiler.connectOutput(inputLayer, &inputs_ptr_->at(inputLayer->name).ptrs.front(), 0); + m_graph_compiler->connectOutput(inputLayer, &inputs_ptr_->at(inputLayer->name).ptrs.front(), 0); } } - if (graphCompiler.dnnComponents.components.empty()) { + if (m_graph_compiler->dnnComponents.components.empty()) { log::warning() << "No GNA primitives created based on topology. This might indicate trivial topology\n"; trivialTopology = true; } @@ -861,7 +874,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) { // Memory layers are not dnnComponents hence we need to make switch with identity layer if (outLayer->type == "Memory") { // traverse memory connection to find corresponding output_memory - for (auto&& memConnection : graphCompiler.memory_connection) { + for (auto&& memConnection : m_graph_compiler->memory_connection) { if (memConnection.second.getInput()->name == outLayer->name) { // if connection is found, replace memory input layer with memory output layer outLayer = memConnection.second.getOutput(); @@ -909,11 +922,11 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) { dnn->Init(gnamem.get(), gnaFlags->sw_fp32 ? kDnnFloat : kDnnInt, 1); // TODO: this copy is unneeded; in fact, we can directly create gna structs from list - auto execOrder = graphCompiler.dnnComponents.getExecutionOrder(); + auto execOrder = m_graph_compiler->dnnComponents.getExecutionOrder(); dnn->component.insert(dnn->component.begin(), execOrder.begin(), execOrder.end()); // in fp32 mode last PWL cannot be computed without that - if (!graphCompiler.dnnComponents.components.empty()) { + if (!m_graph_compiler->dnnComponents.components.empty()) { dnn->InitActiveList(NULL); } @@ -965,7 +978,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) { for (auto& inputLayer : inputLayers) { if (LayerInfo(inputLayer).isInput()) { ov::intel_gna::helpers::updateModelInputOrientationWithoutConvolution(*inputLayer, - graphCompiler.dnnComponents, + m_graph_compiler->dnnComponents, *inputs_ptr_); } } @@ -976,7 +989,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) { if (outLayer && LayerInfo(outLayer).isOutput()) { ov::intel_gna::helpers::updateModelOutputOrientation(outPort.first, outLayer->name, - graphCompiler.dnnComponents, + m_graph_compiler->dnnComponents, outputs_); } } @@ -1101,7 +1114,7 @@ void GNAPlugin::DumpXNNToFile() const { uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap& inputs, InferenceEngine::BlobMap& result) { auto freeWorker = requestWorkerPool_->findFreeModelWorker(); if (freeWorker == nullptr) { - if (!graphCompiler.memory_connection.empty()) { + if (!m_graph_compiler->memory_connection.empty()) { Wait(requestWorkerPool_->firstWorker().representingIndex()); freeWorker = requestWorkerPool_->findFreeModelWorker(); if (freeWorker == nullptr) { @@ -1412,7 +1425,7 @@ RequestStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) { } void GNAPlugin::Reset() { - graphCompiler.Reset(); + m_graph_compiler->Reset(); } bool GNAPlugin::Infer(const InferenceEngine::Blob& input, InferenceEngine::Blob& output) { @@ -1479,9 +1492,9 @@ Blob::Ptr GNAPlugin::GetInputBlob(const std::string& name, InferenceEngine::Prec } std::vector GNAPlugin::QueryState() { - if (memoryStates.size() != graphCompiler.memory_connection.size()) { + if (memoryStates.size() != m_graph_compiler->memory_connection.size()) { memoryStates.clear(); - for (auto& connection : graphCompiler.memory_connection) { + for (auto& connection : m_graph_compiler->memory_connection) { auto state = std::make_shared(connection.first, std::make_shared(connection.second)); @@ -1575,7 +1588,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i GNAMemoryLayer memoryLayer(nullptr, nullptr, gnaFlags->sw_fp32 ? 4 : 2); std::string name; std::tie(memoryLayer.gna_ptr, memoryLayer.reserved_size, name, memoryLayer.scale_factor) = memory; - graphCompiler.memory_connection.emplace_back(make_pair(name, memoryLayer)); + m_graph_compiler->memory_connection.emplace_back(make_pair(name, memoryLayer)); } // TODO update documenation to allow exporting tlv with importing cep only for sue creek @@ -1607,7 +1620,7 @@ void GNAPlugin::Export(std::ostream& outStream) { .SetInputRotation(transpose_inputs_info) .SetOutputRotation(transpose_outputs_info); - for (auto&& memoryConnection : graphCompiler.memory_connection) { + for (auto&& memoryConnection : m_graph_compiler->memory_connection) { auto state = std::make_shared(memoryConnection.first, std::make_shared(memoryConnection.second)); @@ -1691,7 +1704,6 @@ InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork( Config qn_config(config); qn_config.UpdateFromMap(config_map); - const auto effectiveCompileTarget = qn_config.target->get_effective_compile_target(); auto model = network.getFunction(); if (model) { auto supported = GetSupportedNodes( @@ -1700,7 +1712,8 @@ InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork( TransformationsPipeline(qn_config).apply(model); }, [&](const std::shared_ptr& op) { - return limitations::is_op_supported(op, effectiveCompileTarget, qn_config.gnaPrecision); + const auto res = Limitations::get_instance()->is_op_supported(op, qn_config.gnaPrecision); + return res; }); for (auto&& op_name : supported) { res.supportedLayersMap.emplace(op_name, GetName()); diff --git a/src/plugins/intel_gna/src/gna_plugin.hpp b/src/plugins/intel_gna/src/gna_plugin.hpp index 817f94fd1d6..587e72f5575 100644 --- a/src/plugins/intel_gna/src/gna_plugin.hpp +++ b/src/plugins/intel_gna/src/gna_plugin.hpp @@ -47,8 +47,7 @@ protected: std::shared_ptr gnamem; std::shared_ptr inputs_ptr_; GnaOutputs outputs_; - - GNAGraphCompiler graphCompiler; + std::shared_ptr m_graph_compiler; uint32_t activeLayerIndex = 0xffffffff; // TODO: transpose_inputs_info and transpose_outputs_info should be moved to GNAModelSerial class when ngraph @@ -189,6 +188,8 @@ protected: void Init(); void InitGNADevice(); + void InitGNAMemory(); + void InitGraphCompiler(); void DumpXNNToFile() const; /** diff --git a/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp b/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp index 1aa5efdcebc..93b049297ab 100644 --- a/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp +++ b/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp @@ -83,11 +83,9 @@ void TransformationsPipeline::apply(const std::shared_ptr& model, manager.register_pass(); manager.register_pass(); manager.register_pass(); - manager.register_pass(effective_compile_target, - config.gnaPrecision); - manager.register_pass(effective_compile_target, - config.gnaPrecision); - manager.register_pass(effective_compile_target, config.gnaPrecision); + manager.register_pass(config.gnaPrecision); + manager.register_pass(config.gnaPrecision); + manager.register_pass(config.gnaPrecision); if (!has_convolution) { manager.register_pass(); manager.register_pass(); diff --git a/src/plugins/intel_gna/src/layers/gna_layer_info.hpp b/src/plugins/intel_gna/src/layers/gna_layer_info.hpp index b3d2ccdd799..f44f7d4e2e8 100644 --- a/src/plugins/intel_gna/src/layers/gna_layer_info.hpp +++ b/src/plugins/intel_gna/src/layers/gna_layer_info.hpp @@ -385,7 +385,7 @@ public: auto cropLayer = dynamic_cast(layer); if (cropLayer != nullptr && !cropLayer->offset.empty()) { const auto crop_params = GetCropParams(cropLayer); - return limitations::isCropAffinedOffset(crop_params.start_offset); + return limitations::Limitations::get_instance()->is_crop_affined_offset(crop_params.start_offset); } return false; } diff --git a/src/plugins/intel_gna/src/layers/gna_split_layer.hpp b/src/plugins/intel_gna/src/layers/gna_split_layer.hpp index a6285fe480d..33468fe8b32 100644 --- a/src/plugins/intel_gna/src/layers/gna_split_layer.hpp +++ b/src/plugins/intel_gna/src/layers/gna_split_layer.hpp @@ -50,7 +50,7 @@ public: // @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size inline std::vector GetAlignedSplitSizes(uint32_t totalSize, uint32_t maxSplitSize, - uint32_t alignment = limitations::inputByteAlignment) { + uint32_t alignment = limitations::Limitations::kInputByteAlignment) { std::vector splitSizes; uint32_t maxAlignedSplitSize = std::max(maxSplitSize - maxSplitSize % alignment, alignment); uint32_t usedSize = 0; @@ -73,7 +73,7 @@ inline std::pair> AlignedSplitSizesPerAxis(Infere IE_ASSERT(firstValuableDim != std::end(dims)); auto splittedElementsSize = *firstValuableDim; auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim); - auto alignment = limitations::inputByteAlignment; + auto alignment = limitations::Limitations::kInputByteAlignment; // Split output size should be multiple by 64 to avoid align filters insertion, // but we need to check if our input size to split exceeds 64; if not we can always @@ -85,9 +85,10 @@ inline std::pair> AlignedSplitSizesPerAxis(Infere return {splittedDimIx, splitSizes}; } } - splitSizes = GetAlignedSplitSizes(splittedElementsSize, - limitations::bufferMaxSize * splittedElementsSize / totalElementsSize, - alignment); + splitSizes = + GetAlignedSplitSizes(splittedElementsSize, + limitations::Limitations::kBufferMaxSize * splittedElementsSize / totalElementsSize, + alignment); return {splittedDimIx, splitSizes}; } diff --git a/src/plugins/intel_gna/src/log/dump.cpp b/src/plugins/intel_gna/src/log/dump.cpp index a5c86a04076..b1044661160 100644 --- a/src/plugins/intel_gna/src/log/dump.cpp +++ b/src/plugins/intel_gna/src/log/dump.cpp @@ -15,11 +15,14 @@ #include #include +#include "backend/gna_limitations.hpp" #include "gna2-model-api.h" #include "gna2_model_helper.hpp" #include "gna_device.hpp" #include "log.hpp" +using namespace ov::intel_gna::limitations; + namespace ov { namespace intel_gna { namespace dump { @@ -486,8 +489,9 @@ void DumpGna2Model(const Gna2Model& gnaModel, } dumpFile << "\tOperand " << j << " (" << GetOperandName(operation.Type, j) << ")" << " type: " << GetOperandType(operand.Type) << " shape: " << GetSimpleString(operand.Shape) - << " tag: " << foundName << " offset: " << offset - << " size: " << Gna2RoundUpTo64(GetGnaShapeSize(operand.Shape, GetTypeByteSize(operand.Type))) + << " tag: " << foundName << " offset: " << offset << " size: " + << Gna2RoundUp(GetGnaShapeSize(operand.Shape, GetTypeByteSize(operand.Type)), + Limitations::get_instance()->get_memory_alignment()) << " data: " << operand.Data << " baseAlloc: " << foundPtr << " layout: "; DumpCharArray(dumpFile, operand.Layout, GNA2_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS); diff --git a/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp b/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp index 5a7ebcc067a..91ed705286c 100644 --- a/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp +++ b/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp @@ -50,6 +50,7 @@ using namespace InferenceEngine::details; using namespace ov::intel_gna::frontend; using namespace ov::intel_gna::common; using namespace ov::intel_gna::pre_post_processing; +using namespace ov::intel_gna::limitations; namespace ov { namespace intel_gna { @@ -149,11 +150,12 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer, return LayerInfo(ptr).isNonValuesChangable(); }); IE_ASSERT(inputLayer != nullptr); - size_t weightsSize = - LayerInfo(prevLayer).has32BOutput() - ? nextLayer->outData[0]->getDims().back() - : Get2DReshapedData(nextLayer->outData[0], limitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8) - ->getDims()[1]; + size_t weightsSize = LayerInfo(prevLayer).has32BOutput() + ? nextLayer->outData[0]->getDims().back() + : Get2DReshapedData(nextLayer->outData[0], + Limitations::get_min_batch_to_fit_in_buffer(nextLayer->outData[0]), + 8) + ->getDims()[1]; std::vector weightsValues(weightsSize, fillValue); IE_ASSERT(diagLayer != nullptr); diagLayer->_weights = make_shared_blob(TensorDesc(nextLayer->outData[0]->getTensorDesc().getPrecision(), @@ -1531,19 +1533,19 @@ void InsertSplitAligningFilterPass::run() { // encodes offset to beginning of split layer input filterLayer->params["offset"] = - std::to_string(aligned64_offset / limitations::bytesPerSplitElement); + std::to_string(aligned64_offset / Limitations::kBytesPerSplitElement); auto dims = splitOutput->getTensorDesc().getDims(); if (dims.size() > 3) { THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size(); } const auto offsetOfUnalignment = - (currentOffset - aligned64_offset) / limitations::bytesPerSplitElement; + (currentOffset - aligned64_offset) / Limitations::kBytesPerSplitElement; // TODO consider to use a different number of filters do decrese the number of trailing zeros // (additionalPaddingOfFilter) - const auto numberOfFilters = limitations::convMinFiltersNum; + const auto numberOfFilters = Limitations::kConvMinFiltersNum; const auto filterSize = - ALIGN(offsetOfUnalignment + numberOfFilters, limitations::convFilterSizeDivider); + ALIGN(offsetOfUnalignment + numberOfFilters, Limitations::kConvFilterSizeDivider); // filterWeights: numberOfFilters X (offsetOfUnalignment + additionalPaddingOfFilter + // numberOfFilters) offsetOfUnalignment - the leading zeros in the filter @@ -1598,7 +1600,7 @@ void InsertSplitAligningFilterPass::run() { } // search data that starts from unaligned location - currentOffset += outputSize * limitations::bytesPerSplitElement; + currentOffset += outputSize * Limitations::kBytesPerSplitElement; splitOutIndex++; } } @@ -1636,7 +1638,7 @@ void EltwiseSplitOverChannelsPass::run() { auto oData = l->outData.front(); auto oDims = oData->getDims(); auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims)); - if (totalElementsSize <= limitations::bufferMaxSize) { + if (totalElementsSize <= Limitations::kBufferMaxSize) { continue; } auto splitSizesPerAxis = AlignedSplitSizesPerAxis(oDims); @@ -1747,9 +1749,10 @@ void SubstituteScaleShiftBroadCastPass::run() { if (was_reshaped) { dataDims = reshaped_data[insData->getName()]; } else { - dataDims = HasTo2DReshapeData(l) - ? Get2DReshapedData(insData, limitations::GetMinBatchToFitInBuffer(insData), 8)->getDims() - : insData->getDims(); + dataDims = + HasTo2DReshapeData(l) + ? Get2DReshapedData(insData, Limitations::get_min_batch_to_fit_in_buffer(insData), 8)->getDims() + : insData->getDims(); } if (dataDims.size() <= 2) { diff --git a/src/plugins/intel_gna/src/runtime/cnn.cpp b/src/plugins/intel_gna/src/runtime/cnn.cpp index ba3ea09a3ba..0a382b869bd 100644 --- a/src/plugins/intel_gna/src/runtime/cnn.cpp +++ b/src/plugins/intel_gna/src/runtime/cnn.cpp @@ -17,6 +17,7 @@ #include "log/debug.hpp" using namespace ov::intel_gna::gna_convolution_layer; +using namespace ov::intel_gna::limitations; void CNNFilter32(intel_dnn_component_t* component) { auto filters = reinterpret_cast(component->op.conv1D.ptr_filters); @@ -306,7 +307,7 @@ void CNN2DFilter32(intel_dnn_component_t* component) { } } // kernel padded to 16B = 4 * sizeof(float) - kernelIndex += ALIGN(kh * kw * kc, ov::intel_gna::limitations::convEachKernelByteAlignment / sizeof(float)); + kernelIndex += ALIGN(kh * kw * kc, Limitations::kConvEachKernelByteAlignment / sizeof(float)); } } diff --git a/src/plugins/intel_gna/src/transformations/convert_matmul_to_pointwise_convolution.cpp b/src/plugins/intel_gna/src/transformations/convert_matmul_to_pointwise_convolution.cpp index 1dcf7241590..2e1cbcb4f97 100644 --- a/src/plugins/intel_gna/src/transformations/convert_matmul_to_pointwise_convolution.cpp +++ b/src/plugins/intel_gna/src/transformations/convert_matmul_to_pointwise_convolution.cpp @@ -15,6 +15,7 @@ using namespace ov::intel_gna; using namespace ov::intel_gna::pass; +using namespace ov::intel_gna::limitations; static bool BiasValidation(const ngraph::Output& output) { auto bias_output_shape = output.get_node()->get_output_shape(0); @@ -49,9 +50,9 @@ static std::tuple VerifyAndGetConvParams( const uint32_t width = input1_shape.front(); const uint32_t in_channels = input2_shape.back(); const uint32_t out_channels = input2_shape.front(); - if (input1_shape.front() <= limitations::affineMaxBatchSize || - out_channels % limitations::convFiltersNumDivider != 0 || out_channels > limitations::convMaxFiltersNum || - in_channels > limitations::convFilterMaxSize) { + if (input1_shape.front() <= Limitations::kAffineMaxBatchSize || + out_channels % Limitations::kConvFiltersNumDivider != 0 || out_channels > Limitations::kConvMaxFiltersNum || + in_channels > Limitations::kConvFilterMaxSize) { return std::make_tuple(false, 0, 0, 0); } diff --git a/src/plugins/intel_gna/src/transformations/decompose_2d_convolution.cpp b/src/plugins/intel_gna/src/transformations/decompose_2d_convolution.cpp index a4be3baa726..baa38c1a51f 100644 --- a/src/plugins/intel_gna/src/transformations/decompose_2d_convolution.cpp +++ b/src/plugins/intel_gna/src/transformations/decompose_2d_convolution.cpp @@ -20,6 +20,7 @@ namespace ov { namespace intel_gna { using namespace target; +using namespace limitations; namespace pass { using namespace helper; @@ -55,7 +56,7 @@ static bool VerifyAndGetConvData(std::shared_ptr co size_t filter_height = filters.get_shape()[2]; size_t filter_width = filters.get_shape()[3]; - if (filter_width > limitations::copyMaxGrouping || filter_height > limitations::copyMaxGrouping) { + if (filter_width > Limitations::kCopyMaxGrouping || filter_height > Limitations::kCopyMaxGrouping) { return false; } @@ -76,7 +77,7 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptrget_auto_pad() != ngraph::op::PadType::EXPLICIT || max_pool->get_pads_begin() != ngraph::Shape({0, 0}) || max_pool->get_pads_end() != ngraph::Shape({0, 0}))) || pool_filter.size() != 2 || pool_strides.size() != 2 || pool_filter[0] > 1 || pool_strides[0] > 1 || - pool_filter[0] > limitations::maxPoolMaxWindowSize) + pool_filter[0] > Limitations::kMaxPoolMaxWindowSize) return false; graph_data.pool_size_width = pool_filter[1]; @@ -84,40 +85,39 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptrget_cnn_validator(); + if (!cnn2dValidatorPtr) { return false; } - const auto& cnn2dValidator = *cnn2dValidatorPtr; - const auto cnnIsValid = cnn2dValidator.ValidateCnn2D(graph_data.conv->get_friendly_name(), - conv_data.input_height, - conv_data.input_width, - conv_data.input_channel_count, - conv_data.filter_height, - conv_data.filter_width, - conv_data.filter_channel_count, - conv_data.filter_stride_height, - conv_data.filter_stride_width, - conv_data.filter_dilation_height, - conv_data.filter_dilation_width, - OvGnaTypeIntFromBytes(gnaPrecision.size()), - false); + const auto cnnIsValid = cnn2dValidatorPtr->ValidateCnn2D(graph_data.conv->get_friendly_name(), + conv_data.input_height, + conv_data.input_width, + conv_data.input_channel_count, + conv_data.filter_height, + conv_data.filter_width, + conv_data.filter_channel_count, + conv_data.filter_stride_height, + conv_data.filter_stride_width, + conv_data.filter_dilation_height, + conv_data.filter_dilation_width, + OvGnaTypeIntFromBytes(gnaPrecision.size()), + false); if (!cnnIsValid) { return false; } if (!graph_data.max_pool) { return true; } - const auto poolingValid = cnn2dValidator.ValidatePooling2D(graph_data.conv->get_friendly_name(), - graph_data.max_pool->get_kernel()[0], - graph_data.max_pool->get_kernel()[1], - graph_data.max_pool->get_strides()[0], - graph_data.max_pool->get_strides()[1], - false); + const auto poolingValid = cnn2dValidatorPtr->ValidatePooling2D(graph_data.conv->get_friendly_name(), + graph_data.max_pool->get_kernel()[0], + graph_data.max_pool->get_kernel()[1], + graph_data.max_pool->get_strides()[0], + graph_data.max_pool->get_strides()[1], + false); return poolingValid; } @@ -126,7 +126,7 @@ static size_t CalculateConvCount(const ConvData& conv_data) { size_t conv_count = 1; size_t total_factorized_conv_channel_count = (conv_data.input_channel_count * conv_data.filter_height * conv_data.filter_width); - while (total_factorized_conv_channel_count / conv_count > limitations::convFilterMaxSize || + while (total_factorized_conv_channel_count / conv_count > Limitations::kConvFilterMaxSize || total_factorized_conv_channel_count % conv_count != 0 || conv_data.filter_channel_count % conv_count != 0) conv_count++; @@ -139,7 +139,7 @@ static bool ShouldDecompose(GraphData& graph_data, const ConvData& conv_data) { // Concat (copy) layer limitation allows to split up to a certain limit // Currently we are able to split only convolutions without pooling in horizontal dimension - if (graph_data.conv_count > limitations::copyMaxGrouping || + if (graph_data.conv_count > Limitations::kCopyMaxGrouping || ((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1)) return false; @@ -561,8 +561,7 @@ static void Decompose(const GraphData& graph_data, ConvData& conv_data) { conv_result->set_friendly_name(conv_result_name); } -static bool Convert(const DeviceVersion& compile_target, - const InferenceEngine::Precision& gnaPrecision, +static bool Convert(const InferenceEngine::Precision& gnaPrecision, std::shared_ptr leading_transpose, std::shared_ptr fq_filters, std::shared_ptr conv, @@ -598,7 +597,7 @@ static bool Convert(const DeviceVersion& compile_target, return false; // If compile target is GNA 3.0 and the convolution is supported on it, then skip decomposition - if (GNA30SupportedConv(compile_target, gnaPrecision, graph_data, conv_data)) + if (GNA30SupportedConv(gnaPrecision, graph_data, conv_data)) return false; // We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) @@ -618,7 +617,7 @@ static bool Convert(const DeviceVersion& compile_target, return true; } -Decompose2DConv::Decompose2DConv(const DeviceVersion& compile_target, const InferenceEngine::Precision& gnaPrecision) { +Decompose2DConv::Decompose2DConv(const InferenceEngine::Precision& gnaPrecision) { MATCHER_SCOPE(Decompose2DConv); auto const_input = ngraph::pattern::wrap_type(); @@ -735,8 +734,7 @@ Decompose2DConv::Decompose2DConv(const DeviceVersion& compile_target, const Infe } } - return Convert(compile_target, - gnaPrecision, + return Convert(gnaPrecision, pattern_map.at(leading_transpose).get_node_shared_ptr(), fq_filters_node, pattern_map.at(conv).get_node_shared_ptr(), @@ -755,8 +753,7 @@ Decompose2DConv::Decompose2DConv(const DeviceVersion& compile_target, const Infe this->register_matcher(m, callback); } -Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const DeviceVersion& compile_target, - const InferenceEngine::Precision& gnaPrecision) { +Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const InferenceEngine::Precision& gnaPrecision) { MATCHER_SCOPE(Decompose2DConvTransposedWithBias); auto const_input_i64 = @@ -781,8 +778,7 @@ Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const Devic pattern_map.at(bias).get_node_shared_ptr()))) return false; - return Convert(compile_target, - gnaPrecision, + return Convert(gnaPrecision, pattern_map.at(leading_transpose).get_node_shared_ptr(), nullptr, pattern_map.at(conv).get_node_shared_ptr(), @@ -802,7 +798,6 @@ Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const Devic } Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF( - const DeviceVersion& compile_target, const InferenceEngine::Precision& gnaPrecision) { MATCHER_SCOPE(Decompose2DConvTransposedWithBiasAF); @@ -836,8 +831,7 @@ Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF( pattern_map.at(bias).get_node_shared_ptr()))) return false; - return Convert(compile_target, - gnaPrecision, + return Convert(gnaPrecision, pattern_map.at(leading_transpose).get_node_shared_ptr(), nullptr, pattern_map.at(conv).get_node_shared_ptr(), diff --git a/src/plugins/intel_gna/src/transformations/decompose_2d_convolution.hpp b/src/plugins/intel_gna/src/transformations/decompose_2d_convolution.hpp index 78d14f73630..1aeb9070295 100644 --- a/src/plugins/intel_gna/src/transformations/decompose_2d_convolution.hpp +++ b/src/plugins/intel_gna/src/transformations/decompose_2d_convolution.hpp @@ -35,7 +35,7 @@ namespace pass { class Decompose2DConv : public ngraph::pass::MatcherPass { public: OPENVINO_RTTI("Decompose2DConv", "0"); - Decompose2DConv(const target::DeviceVersion& compile_target, const InferenceEngine::Precision& gnaPrecision); + Decompose2DConv(const InferenceEngine::Precision& gnaPrecision); }; /** @@ -56,8 +56,7 @@ public: class Decompose2DConvTransposedWithBias : public ngraph::pass::MatcherPass { public: OPENVINO_RTTI("Decompose2DConvTransposedWithBias", "0"); - Decompose2DConvTransposedWithBias(const target::DeviceVersion& compile_target, - const InferenceEngine::Precision& gnaPrecision); + Decompose2DConvTransposedWithBias(const InferenceEngine::Precision& gnaPrecision); }; /** @@ -80,8 +79,7 @@ public: class Decompose2DConvTransposedWithBiasAF : public ngraph::pass::MatcherPass { public: OPENVINO_RTTI("Decompose2DConvTransposedWithBiasAF", "0"); - Decompose2DConvTransposedWithBiasAF(const target::DeviceVersion& compile_target, - const InferenceEngine::Precision& gnaPrecision); + Decompose2DConvTransposedWithBiasAF(const InferenceEngine::Precision& gnaPrecision); }; } // namespace pass diff --git a/src/plugins/intel_gna/src/transformations/decompose_mvn.cpp b/src/plugins/intel_gna/src/transformations/decompose_mvn.cpp index 6b1d088f240..bb3be6a2654 100644 --- a/src/plugins/intel_gna/src/transformations/decompose_mvn.cpp +++ b/src/plugins/intel_gna/src/transformations/decompose_mvn.cpp @@ -13,6 +13,7 @@ #include "backend/gna_limitations.hpp" using namespace ngraph; +using namespace ov::intel_gna::limitations; namespace ov { namespace intel_gna { @@ -81,7 +82,7 @@ static bool GetVerifiedMVNData(const std::shared_ptr mvn, MVNData& // Check if average must be split mvn_data.num_parts = 1; - while (mvn_data.W / mvn_data.num_parts > limitations::convFilterMaxSize) { + while (mvn_data.W / mvn_data.num_parts > Limitations::kConvFilterMaxSize) { mvn_data.num_parts *= 2; } diff --git a/src/plugins/intel_gna/src/transformations/handle_transposes_around_matmul.cpp b/src/plugins/intel_gna/src/transformations/handle_transposes_around_matmul.cpp index 249386ebc1c..4d4e364dae0 100644 --- a/src/plugins/intel_gna/src/transformations/handle_transposes_around_matmul.cpp +++ b/src/plugins/intel_gna/src/transformations/handle_transposes_around_matmul.cpp @@ -16,6 +16,7 @@ #include "backend/gna_limitations.hpp" using namespace ov::intel_gna::pass; +using namespace ov::intel_gna::limitations; namespace { @@ -160,7 +161,7 @@ HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() { } if (prev_node) { - if (limitations::IsTransposeSupported(prev_node->get_output_shape(0))) { + if (Limitations::is_transpose_supported(prev_node->get_output_shape(0))) { InsertTranspose(prev_node, matmul_node->get_friendly_name(), true); } } @@ -170,7 +171,7 @@ HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() { auto iter = pattern_map.find(fq); if (iter != pattern_map.end() || (iter = pattern_map.find(constant)) != pattern_map.end()) { auto prev_node = iter->second.get_node_shared_ptr(); - if (limitations::IsTranspose2d(prev_node->get_output_shape(0))) { + if (Limitations::is_transpose_2d(prev_node->get_output_shape(0))) { InsertTranspose(prev_node, prev_node->get_friendly_name(), true); } } @@ -187,7 +188,7 @@ HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() { } if (prev_node) { - if (limitations::IsTransposeSupported(prev_node->get_output_shape(0))) { + if (Limitations::is_transpose_supported(prev_node->get_output_shape(0))) { InsertTranspose(prev_node, matmul_node->get_friendly_name(), true); } } @@ -243,7 +244,7 @@ HandleTransposeAfterMatMul::HandleTransposeAfterMatMul() { ReplaceTransposeWithReshape(transpose_it->second.get_node_shared_ptr()); } else { auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr(); - if (!limitations::IsTransposeSupported(reshape_node->get_input_shape(0))) + if (!Limitations::is_transpose_supported(reshape_node->get_input_shape(0))) return false; auto iter = pattern_map.find(act); if (iter == pattern_map.end() && (iter = pattern_map.find(fq2)) == pattern_map.end() && diff --git a/src/plugins/intel_gna/src/transformations/remove_in_out_processing.cpp b/src/plugins/intel_gna/src/transformations/remove_in_out_processing.cpp index a8413dafc5a..9fadcbc28fd 100644 --- a/src/plugins/intel_gna/src/transformations/remove_in_out_processing.cpp +++ b/src/plugins/intel_gna/src/transformations/remove_in_out_processing.cpp @@ -4,6 +4,7 @@ #include "transformations/remove_in_out_processing.hpp" +#include "backend/gna_limitations.hpp" #include "common/graph_utils.hpp" #include "openvino/cc/pass/itt.hpp" #include "openvino/opsets/opset1.hpp" @@ -17,6 +18,7 @@ using namespace ov::opset10; using namespace ov::intel_gna::pass; +using namespace ov::intel_gna::limitations; namespace { @@ -29,7 +31,7 @@ inline bool is_preprocessing_layer_not_supported(std::shared_ptr& laye // Verify that transpose layer cannot be executed on GNA if (std::dynamic_pointer_cast(layer)) { - return !limitations::is_transpose_supported(layer); + return !Limitations::is_transpose_supported(layer); } return false; diff --git a/src/plugins/intel_gna/src/transformations/split_convolution_with_large_buffer_size.cpp b/src/plugins/intel_gna/src/transformations/split_convolution_with_large_buffer_size.cpp index 4d4cd44fb3d..64a26489232 100644 --- a/src/plugins/intel_gna/src/transformations/split_convolution_with_large_buffer_size.cpp +++ b/src/plugins/intel_gna/src/transformations/split_convolution_with_large_buffer_size.cpp @@ -14,6 +14,8 @@ #include "layers/gna_convolution_layer.hpp" #include "layers/gna_split_layer.hpp" +using namespace ov::intel_gna::limitations; + namespace ov { namespace intel_gna { namespace pass { @@ -56,13 +58,13 @@ static bool Convert(std::shared_ptr conv, std::end(conv->get_input_shape(0)), size_t(1), std::multiplies()); - if (input_size <= limitations::bufferMaxSize) { + if (input_size <= Limitations::kBufferMaxSize) { return false; } auto& input = conv->get_input_shape(0); uint32_t width = input.back(); uint32_t in_channels = input.at(1); - auto split_sizes = GetAlignedSplitSizes(width, limitations::bufferMaxSize / in_channels); + auto split_sizes = GetAlignedSplitSizes(width, Limitations::kBufferMaxSize / in_channels); IE_ASSERT(split_sizes.size() > 1); std::vector split_sizes_casted(split_sizes.size()); std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) { diff --git a/src/plugins/intel_gna/src/transformations/split_eltwise.cpp b/src/plugins/intel_gna/src/transformations/split_eltwise.cpp index 6b13c07ddc2..d62c0fcba9e 100644 --- a/src/plugins/intel_gna/src/transformations/split_eltwise.cpp +++ b/src/plugins/intel_gna/src/transformations/split_eltwise.cpp @@ -15,6 +15,8 @@ #include "legacy/ngraph_ops/eltwise.hpp" #include "log/log.hpp" +using namespace ov::intel_gna::limitations; + namespace ov { namespace intel_gna { namespace pass { @@ -25,7 +27,7 @@ inline bool is_eltwise_has_to_be_splitted(const ngraph::Output& no return false; auto o_dims = eltwise->get_output_shape(0); auto total_elem_size = std::accumulate(std::begin(o_dims), std::end(o_dims), 1, std::multiplies()); - return (total_elem_size > limitations::bufferMaxSize); + return (total_elem_size > Limitations::kBufferMaxSize); } static std::shared_ptr split_input( diff --git a/src/plugins/intel_gna/src/transformations/split_eltwise.hpp b/src/plugins/intel_gna/src/transformations/split_eltwise.hpp index 9cae21b6c6b..ca8f5fdc88f 100644 --- a/src/plugins/intel_gna/src/transformations/split_eltwise.hpp +++ b/src/plugins/intel_gna/src/transformations/split_eltwise.hpp @@ -11,7 +11,7 @@ namespace intel_gna { namespace pass { /** - * @brief Split over channels for Eltwise to avoid GNA-HW bufferMaxSize limitation per eltwise + * @brief Split over channels for Eltwise to avoid GNA-HW kBufferMaxSize limitation per eltwise */ class SplitEltwise : public ov::pass::MatcherPass { public: diff --git a/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/I8_quantisation_test.cpp b/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/I8_quantisation_test.cpp index 840e34ffd41..cdd882c107a 100644 --- a/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/I8_quantisation_test.cpp +++ b/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/I8_quantisation_test.cpp @@ -2,21 +2,25 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include -#include -#include "frontend/model_quantizer.hpp" -#include "frontend/layer_quantizer.hpp" -#include "gna_matcher.hpp" + #include +#include +#include + +#include "backend/gna_limitations.hpp" +#include "frontend/layer_quantizer.hpp" +#include "frontend/model_quantizer.hpp" +#include "gna_matcher.hpp" using namespace InferenceEngine; +using namespace ov::intel_gna::limitations; using namespace ov::intel_gna::frontend; using namespace GNATestIRs; class I8QuantisationTest : public GNATest<> { - protected: - InferenceEngine::CNNLayerPtr quantize(InferenceEngine::CNNLayerPtr lp) { +protected: + InferenceEngine::CNNLayerPtr quantize(InferenceEngine::CNNLayerPtr lp) { auto newLayer = InferenceEngine::injectData(lp); Config gna_config; gna_config.gnaPrecision = InferenceEngine::Precision::I16; @@ -26,7 +30,8 @@ class I8QuantisationTest : public GNATest<> { return newLayer; }; - InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model, float scale_factor) const { + InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model, + float scale_factor) const { auto scale_factors = std::vector({scale_factor}); GnaInputs inputs; @@ -41,31 +46,31 @@ class I8QuantisationTest : public GNATest<> { auto transformer = ov::intel_gna::TransformationsPipeline(gna_config); - return ModelQuantizer(transformer).quantize( - model, - inputs); + return ModelQuantizer(transformer).quantize(model, inputs); } - void SetUp() override {} + void SetUp() override { + Limitations::init(target::DeviceVersion::Default); + } }; // TODO: add test for FC weights after quantization -TEST_F(I8QuantisationTest, canQuantizeFCLayer){ - +TEST_F(I8QuantisationTest, canQuantizeFCLayer) { auto fc = std::make_shared(LayerParams{"name", "type", Precision::FP32}); fc->_out_num = 9; - auto weights = make_shared_blob({ Precision::FP32, {1, 1}, Layout::NC }); + auto weights = make_shared_blob({Precision::FP32, {1, 1}, Layout::NC}); fc->_weights = weights; - fc->_biases = make_shared_blob({ Precision::FP32, {1, 1}, Layout::NC }); + fc->_biases = make_shared_blob({Precision::FP32, {1, 1}, Layout::NC}); fc->_weights->allocate(); fc->_biases->allocate(); - std::shared_ptr outData = std::make_shared("data", TensorDesc(Precision::FP32, SizeVector({ 1, 1 }), Layout::NC)); + std::shared_ptr outData = + std::make_shared("data", TensorDesc(Precision::FP32, SizeVector({1, 1}), Layout::NC)); fc->outData.push_back(outData); fc->insData.push_back(outData); // actual quantisation algorithm is involved - for (auto && w : *weights) { - w = MAX_OUT_MULTIPLIER * MAX_VAL_1B_WEIGHT; + for (auto&& w : *weights) { + w = MAX_OUT_MULTIPLIER * MAX_VAL_1B_WEIGHT; } fillWeights(fc->_biases); @@ -73,17 +78,16 @@ TEST_F(I8QuantisationTest, canQuantizeFCLayer){ ASSERT_NO_THROW(quantize(fc)); } -TEST_F(I8QuantisationTest, canQuantizeActivation){ - - auto sigmoid = std::make_shared(LayerParams{"name", "type", Precision::FP32}); +TEST_F(I8QuantisationTest, canQuantizeActivation) { + auto sigmoid = std::make_shared(LayerParams{"name", "type", Precision::FP32}); sigmoid->params["value"] = 2; sigmoid->type = "Activation"; ASSERT_NO_THROW(quantize(sigmoid)); } -TEST_F(I8QuantisationTest, inputPrecisionIs16Bits){ - auto weights = make_shared_blob({ Precision::U8, {440}, C }); +TEST_F(I8QuantisationTest, inputPrecisionIs16Bits) { + auto weights = make_shared_blob({Precision::U8, {440}, C}); weights->allocate(); fillWeights(weights); @@ -92,13 +96,15 @@ TEST_F(I8QuantisationTest, inputPrecisionIs16Bits){ auto newNet = quantize_single_input_model(network, 1000); InputsDataMap inputs = newNet.getInputsInfo(); - auto inputLayer = getCreatorLayer(getInputTo(inputs.begin()->second->getInputData()).begin()->second->insData.front().lock()).lock(); + auto inputLayer = + getCreatorLayer(getInputTo(inputs.begin()->second->getInputData()).begin()->second->insData.front().lock()) + .lock(); ASSERT_EQ(inputLayer->precision, Precision::I16); } -TEST_F(I8QuantisationTest, FCDimensionIs1){ - auto weights = make_shared_blob({ Precision::U8, {440}, C }); +TEST_F(I8QuantisationTest, FCDimensionIs1) { + auto weights = make_shared_blob({Precision::U8, {440}, C}); weights->allocate(); fillWeights(weights); @@ -108,8 +114,8 @@ TEST_F(I8QuantisationTest, FCDimensionIs1){ ASSERT_NO_THROW(quantize_single_input_model(network, 1000)); } -TEST_F(I8QuantisationTest, outputAffinePrecisionIs32Bits){ - auto weights = make_shared_blob({ Precision::U8, {440}, C }); +TEST_F(I8QuantisationTest, outputAffinePrecisionIs32Bits) { + auto weights = make_shared_blob({Precision::U8, {440}, C}); weights->allocate(); fillWeights(weights); @@ -124,7 +130,7 @@ TEST_F(I8QuantisationTest, outputAffinePrecisionIs32Bits){ } TEST_F(I8QuantisationTest, fp16tofp32_on_fullyConnected_model) { - auto weights = make_shared_blob({ Precision::U8, {220}, Layout::C }); + auto weights = make_shared_blob({Precision::U8, {220}, Layout::C}); weights->allocate(); fillWeights(weights); @@ -135,7 +141,7 @@ TEST_F(I8QuantisationTest, fp16tofp32_on_fullyConnected_model) { } TEST_F(I8QuantisationTest, LSTMCell_quantize) { - auto weights = make_shared_blob({ Precision::U8, {33664}, C }); + auto weights = make_shared_blob({Precision::U8, {33664}, C}); weights->allocate(); fillWeights(weights); @@ -146,7 +152,7 @@ TEST_F(I8QuantisationTest, LSTMCell_quantize) { } TEST_F(I8QuantisationTest, LSTMCell_unaligned_quantize) { - auto weights = make_shared_blob({ Precision::U8, {3480}, C }); + auto weights = make_shared_blob({Precision::U8, {3480}, C}); weights->allocate(); fillWeights(weights); @@ -157,7 +163,7 @@ TEST_F(I8QuantisationTest, LSTMCell_unaligned_quantize) { } TEST_F(I8QuantisationTest, TI_quantize) { - auto weights = make_shared_blob({ Precision::U8, {249748}, C }); + auto weights = make_shared_blob({Precision::U8, {249748}, C}); weights->allocate(); fillWeights(weights); diff --git a/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/i16_quantisation_test.cpp b/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/i16_quantisation_test.cpp index 6a633c843a2..31ba21b6dfe 100644 --- a/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/i16_quantisation_test.cpp +++ b/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/i16_quantisation_test.cpp @@ -2,23 +2,27 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include -#include -#include "backend/gna_types.hpp" -#include "frontend/model_quantizer.hpp" -#include "frontend/layer_quantizer.hpp" -#include "gna_matcher.hpp" + #include +#include +#include + +#include "backend/gna_limitations.hpp" +#include "backend/gna_types.hpp" +#include "frontend/layer_quantizer.hpp" +#include "frontend/model_quantizer.hpp" +#include "gna_matcher.hpp" #include "ngraph_functions/builders.hpp" using namespace InferenceEngine; +using namespace ov::intel_gna::limitations; using namespace ov::intel_gna::frontend; using namespace GNATestIRs; class I16QuantisationTest : public GNATest<> { - protected: - InferenceEngine::CNNLayerPtr quantize (InferenceEngine::CNNLayerPtr lp) { +protected: + InferenceEngine::CNNLayerPtr quantize(InferenceEngine::CNNLayerPtr lp) { auto newLayer = InferenceEngine::injectData(lp); Config gna_config; gna_config.gnaPrecision = InferenceEngine::Precision::I16; @@ -28,7 +32,8 @@ class I16QuantisationTest : public GNATest<> { return newLayer; }; - InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model, float scale_factor) const { + InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model, + float scale_factor) const { auto scale_factors = std::vector({scale_factor}); GnaInputs inputs; @@ -43,28 +48,27 @@ class I16QuantisationTest : public GNATest<> { auto transformer = ov::intel_gna::TransformationsPipeline(gna_config); - return ModelQuantizer(transformer).quantize( - model, - inputs); + return ModelQuantizer(transformer).quantize(model, inputs); } - void SetUp() override { + void SetUp() override { + Limitations::init(target::DeviceVersion::Default); } - }; template -T setWeights(T blob) { +T setWeights(T blob) { blob->allocate(); - // actual quantisation algorithm is involved - we need to provide weights that will be quantized with scale factor of 1 - for (auto && w : *blob) { + // actual quantisation algorithm is involved - we need to provide weights that will be quantized with scale factor + // of 1 + for (auto&& w : *blob) { w = MAX_VAL_2B_WEIGHT; } return blob; } template <> -TBlob::Ptr setWeights(TBlob::Ptr blob) { +TBlob::Ptr setWeights(TBlob::Ptr blob) { blob->allocate(); auto buf = blob->buffer(); auto ptr = buf.as(); @@ -75,36 +79,34 @@ TBlob::Ptr setWeights(TBlob::Ptr blob) { return blob; } - // TODO: add test for FC weights after quantization -TEST_F(I16QuantisationTest, canQuantizeFCLayer){ - +TEST_F(I16QuantisationTest, canQuantizeFCLayer) { auto fc = std::make_shared(LayerParams{"name", "type", Precision::FP32}); fc->_out_num = 9; - fc->_weights = setWeights(make_shared_blob({ Precision::FP32, {1, 1}, Layout::NC })); + fc->_weights = setWeights(make_shared_blob({Precision::FP32, {1, 1}, Layout::NC})); fillWeights(fc->_weights); - fc->_biases = make_shared_blob({ Precision::FP32, {1, 1}, Layout::NC }); + fc->_biases = make_shared_blob({Precision::FP32, {1, 1}, Layout::NC}); fc->_biases->allocate(); fillWeights(fc->_biases); - std::shared_ptr outData = std::make_shared("data", TensorDesc(Precision::FP32, SizeVector({1, 1}), Layout::NC)); + std::shared_ptr outData = + std::make_shared("data", TensorDesc(Precision::FP32, SizeVector({1, 1}), Layout::NC)); fc->outData.push_back(outData); fc->insData.push_back(outData); ASSERT_NO_THROW(quantize(fc)); } -TEST_F(I16QuantisationTest, canQuantizeActivation){ - - auto sigmoid = std::make_shared(LayerParams{"name", "type", Precision::FP32}); +TEST_F(I16QuantisationTest, canQuantizeActivation) { + auto sigmoid = std::make_shared(LayerParams{"name", "type", Precision::FP32}); sigmoid->params["value"] = 2; sigmoid->type = "Activation"; ASSERT_NO_THROW(quantize(sigmoid)); } -TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits){ - auto weights = make_shared_blob({ Precision::U8, {440}, C }); +TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits) { + auto weights = make_shared_blob({Precision::U8, {440}, C}); weights->allocate(); fillWeights(weights); @@ -118,10 +120,9 @@ TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits){ ASSERT_EQ(affineDataPtr->getTensorDesc().getPrecision(), Precision::I32); } - TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) { - auto weights = setWeights(make_shared_blob({ Precision::U8, {440}, C })); - //std::fill_n(weights->buffer().as(), weights->byteSize()/sizeof(float), 0); + auto weights = setWeights(make_shared_blob({Precision::U8, {440}, C})); + // std::fill_n(weights->buffer().as(), weights->byteSize()/sizeof(float), 0); Core ie; auto network = ie.ReadNetwork(affineToMemoryModel(), weights); @@ -129,13 +130,13 @@ TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) { ASSERT_NO_THROW(quantize_single_input_model(network, 1000)); } -TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect){ +TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect) { const float inputScaleFactorTest = 1000; const float weightValueTest = 100; - auto weights = make_shared_blob({ Precision::U8, {440}, C }); + auto weights = make_shared_blob({Precision::U8, {440}, C}); weights->allocate(); - fillWeights(weights, { weightValueTest }); + fillWeights(weights, {weightValueTest}); Core ie; auto network = ie.ReadNetwork(Fc2DOutputModel(), weights); @@ -153,51 +154,70 @@ TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect){ TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion) { assert_that() .onInferModel(Fc2DOutputModel()) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_without().pwl_inserted_into_nnet(); + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_without() + .pwl_inserted_into_nnet(); } TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion_ProfilingEnabled) { assert_that() .onInferModel(Fc2DOutputModel()) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_without().pwl_inserted_into_nnet().profiling_counters(); + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_without() + .pwl_inserted_into_nnet() + .profiling_counters(); } TEST_F(I16QuantisationTest, OnlyAffineWithNanScaleFactorFails) { - gna() - .onInferModel(Fc2DOutputModel()) - .withNanScaleFactor() - .propagate_forward().throws(); + gna().onInferModel(Fc2DOutputModel()).withNanScaleFactor().propagate_forward().throws(); } TEST_F(I16QuantisationTest, OnlyAffineWithInfScaleFactorFails) { - gna() - .onInferModel(Fc2DOutputModel()) - .withInfScaleFactor() - .propagate_forward().throws(); + gna().onInferModel(Fc2DOutputModel()).withInfScaleFactor().propagate_forward().throws(); } TEST_F(I16QuantisationTest, AffineToMemoryWillResultInActivationInsertion) { assert_that() .onInferModel(affineToMemoryModel()) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with().pwl_inserted_into_nnet(); + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + .pwl_inserted_into_nnet(); } TEST_F(I16QuantisationTest, EltwiseToMemoryWithNoOutputActivationInsertion) { - assert_that().inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .onInferModel(eltwiseToMemoryModelNoOutput(), [](CNNNetwork & net){ - net.addOutput("Eltwise_8"); - }).gna().propagate_forward().called_with().pwl_inserted_into_nnet(); + assert_that() + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .onInferModel(eltwiseToMemoryModelNoOutput(), + [](CNNNetwork& net) { + net.addOutput("Eltwise_8"); + }) + .gna() + .propagate_forward() + .called_with() + .pwl_inserted_into_nnet(); } TEST_F(I16QuantisationTest, EltwiseToMemory_ActivationInsertion) { - assert_that().onInferModel(eltwiseToMemoryModel()).withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet(); + assert_that() + .onInferModel(eltwiseToMemoryModel()) + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .inNotCompactMode() + .gna() + .propagate_forward() + .called_with() + .pwl_inserted_into_nnet(); } - TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInsertion) { auto input_params = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 20}); const auto axis_node = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1}); @@ -205,41 +225,73 @@ TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInserti auto tanh = std::make_shared(split->outputs()[0]); auto add = std::make_shared(split->outputs()[1], tanh); auto result = std::make_shared(add); - auto function = std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); - assert_that().onInferNgraphModel(function) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with().diagonal_inserted_into_nnet(); + auto function = + std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); + assert_that() + .onInferNgraphModel(function) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + .diagonal_inserted_into_nnet(); } TEST_F(I16QuantisationTest, SliceFollowedBy2FCsAnd2Eltwises_AlignedFilterInsertion) { - assert_that().onInferModel(twoFCWithPaddingAfterSliceModel()) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with().diagonal_inserted_into_nnet(); + assert_that() + .onInferModel(twoFCWithPaddingAfterSliceModel()) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + .diagonal_inserted_into_nnet(); } // ToDo requires implementation of aligning filter for concat inputs and improvement of // qunatization/scaling algorithm for concat TEST_F(I16QuantisationTest, DISABLED_DoubleConcatPropageteForwardWithSuccess_AlignedFilterInsertion) { - assert_that().onInferModel(doubleConcatModel()).withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet(); + assert_that() + .onInferModel(doubleConcatModel()) + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .inNotCompactMode() + .gna() + .propagate_forward() + .called_with() + .diagonal_inserted_into_nnet(); } TEST_F(I16QuantisationTest, EltwiseSumm_onlyOneIdentityInsertion) { - assert_that().onInferModel(eltwiseSummModel()).withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once(); + assert_that() + .onInferModel(eltwiseSummModel()) + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .inNotCompactMode() + .gna() + .propagate_forward() + .called_with() + .pwl_inserted_into_nnet() + .once(); } - TEST_F(I16QuantisationTest, canDetectLeakyRelu) { - assert_that().onInferModel(TFLeakyReluModel()) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with().pwl_inserted_into_nnet(); + assert_that() + .onInferModel(TFLeakyReluModel()) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + .pwl_inserted_into_nnet(); } TEST_F(I16QuantisationTest, MaxPool_followedAfterActivation) { - assert_that().onInferModel(maxpoolAfterRelu()) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with() + assert_that() + .onInferModel(maxpoolAfterRelu()) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() .convolution_inserted_into_nnet() .And() .pwl_inserted_into_nnet() @@ -248,28 +300,53 @@ TEST_F(I16QuantisationTest, MaxPool_followedAfterActivation) { } TEST_F(I16QuantisationTest, EltwiseMull_willInsertTwoIdentities) { - assert_that().onInferModel(eltwiseMulModel()) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice(); + assert_that() + .onInferModel(eltwiseMulModel()) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + .pwl_inserted_into_nnet() + .twice(); } TEST_F(I16QuantisationTest, multiple_inputs_supported) { std::string configKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_"); - assert_that().onInferModel(two_inputs_to_affine()) - .inNotCompactMode().withGNAConfig(configKey + std::to_string(0), 1.0f) - .withGNAConfig(configKey + std::to_string(1), 2.0f).gna().propagate_forward() - .called_with().pwl_inserted_into_nnet().once(); + assert_that() + .onInferModel(two_inputs_to_affine()) + .inNotCompactMode() + .withGNAConfig(configKey + std::to_string(0), 1.0f) + .withGNAConfig(configKey + std::to_string(1), 2.0f) + .gna() + .propagate_forward() + .called_with() + .pwl_inserted_into_nnet() + .once(); } TEST_F(I16QuantisationTest, DISABLED_multiple_inputs_into_concat_supported) { - assert_that().onInferModel(two_inputs_to_concat()) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f).gna().propagate_forward().called_with().pwl_inserted_into_nnet().once(); + assert_that() + .onInferModel(two_inputs_to_concat()) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + .pwl_inserted_into_nnet() + .once(); } TEST_F(I16QuantisationTest, ScaleShift_Affine_WillResultInIdentityInsertion) { - assert_that().onInferModel(scaleShiftAffineModel()) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with().pwl_inserted_into_nnet().once(); + assert_that() + .onInferModel(scaleShiftAffineModel()) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + .pwl_inserted_into_nnet() + .once(); } TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) { @@ -277,10 +354,17 @@ TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) { auto clamp = std::make_shared(input_params, -50, 50); auto tanh = std::make_shared(clamp); auto result = std::make_shared(tanh); - auto function = std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); - assert_that().onInferNgraphModel(function) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with().diagonal_inserted_into_nnet().twice(); + auto function = + std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); + assert_that() + .onInferNgraphModel(function) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + .diagonal_inserted_into_nnet() + .twice(); } TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInTwoDiagonalsInsertion) { @@ -296,76 +380,127 @@ TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInTwoDiago auto result = std::make_shared(add); mem_w->add_control_dependency(mem_r); result->add_control_dependency(mem_w); - auto function = std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); - assert_that().onInferNgraphModel(function) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with().diagonal_inserted_into_nnet().twice(); + auto function = + std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); + assert_that() + .onInferNgraphModel(function) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + .diagonal_inserted_into_nnet() + .twice(); } TEST_F(I16QuantisationTest, AffineWith2AffineOutputs_ResultInOnlyOneIdentityInsertion) { // one Identity activation from first FC, and one Identity activation for eltwise - assert_that().onInferModel(AffineWith2AffineOutputsModel()) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice(); + assert_that() + .onInferModel(AffineWith2AffineOutputsModel()) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + .pwl_inserted_into_nnet() + .twice(); } TEST_F(I16QuantisationTest, ScaleShiftWithBroadcast_ResultInDiagonalInsertion) { - - auto & affineWeights = storage>(); + auto& affineWeights = storage>(); affineWeights = { - 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, - 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, - 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, - 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, - 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, + 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 2048, 4096, 6144, 8192, 10240, 12288, + 14336, 16384, 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 2048, 4096, 6144, 8192, + 10240, 12288, 14336, 16384, 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, }; - assert_that().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f).onInferModel(ScaleShift3DModel()) - .withWeigthsPattern({1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f}) - .inNotCompactMode().gna().propagate_forward().called_with().called_with().affine_weights_eq(affineWeights); + assert_that() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .onInferModel(ScaleShift3DModel()) + .withWeigthsPattern({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}) + .inNotCompactMode() + .gna() + .propagate_forward() + .called_with() + .called_with() + .affine_weights_eq(affineWeights); } TEST_F(I16QuantisationTest, MemoryAfterConcat_ResultInCopyInsertion) { - assert_that().onInferModel(MemoryAfterConcatModel()).inNotCompactMode().gna().propagate_forward(). - called_with().copy_inserted_into_nnet(); + assert_that() + .onInferModel(MemoryAfterConcatModel()) + .inNotCompactMode() + .gna() + .propagate_forward() + .called_with() + .copy_inserted_into_nnet(); } TEST_F(I16QuantisationTest, MemoryAndConcatAfterOneNode_ResultInCopyInsertion) { - assert_that().onInferModel(MemoryAndConcatAfterOneNode()).inNotCompactMode().gna().propagate_forward(). - called_with().copy_inserted_into_nnet(); + assert_that() + .onInferModel(MemoryAndConcatAfterOneNode()) + .inNotCompactMode() + .gna() + .propagate_forward() + .called_with() + .copy_inserted_into_nnet(); } TEST_F(I16QuantisationTest, DISABLED_permutationOfWeightsBetweenConvAndAffine) { - auto & affineWeights = storage>(); + auto& affineWeights = storage>(); // least likely that width and height both are multiple of 7 auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}; // here weights are transpozed - save().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern) - .inNotCompactMode().from().propagate_forward().affine_weights_transpozed({128, 61}).to(affineWeights); + save() + .onInferModel(affineAfterConvNoPermute()) + .withWeigthsPattern(weigthsPattern) + .inNotCompactMode() + .from() + .propagate_forward() + .affine_weights_transpozed({128, 61}) + .to(affineWeights); // here weights shouldn't be transposed - assert_that().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern) - .inNotCompactMode().gna().propagate_forward().called_with().affine_weights_eq(affineWeights); + assert_that() + .onInferModel(affineAfterConvWithPermute()) + .withWeigthsPattern(weigthsPattern) + .inNotCompactMode() + .gna() + .propagate_forward() + .called_with() + .affine_weights_eq(affineWeights); } TEST_F(I16QuantisationTest, DISABLED_noPermutationOfWeightsBetweenConvAndAffineIfPermuteLayerWithCorrectArgs) { - auto & affineWeights = storage>(); + auto& affineWeights = storage>(); // least likely that width and height both are multiple of 7 auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}; - save().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern) - .inNotCompactMode().from().propagate_forward().affine_weights().to(affineWeights); + save() + .onInferModel(affineAfterConvWithPermute()) + .withWeigthsPattern(weigthsPattern) + .inNotCompactMode() + .from() + .propagate_forward() + .affine_weights() + .to(affineWeights); - assert_that().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern) - .inNotCompactMode().gna().propagate_forward().called_with().affine_weights_transposed(affineWeights, {128, 61}); + assert_that() + .onInferModel(affineAfterConvNoPermute()) + .withWeigthsPattern(weigthsPattern) + .inNotCompactMode() + .gna() + .propagate_forward() + .called_with() + .affine_weights_transposed(affineWeights, {128, 61}); } TEST_F(I16QuantisationTest, fp16tofp32_on_fullyConnected_model) { - auto weights = make_shared_blob({ Precision::U8, {220}, Layout::C }); + auto weights = make_shared_blob({Precision::U8, {220}, Layout::C}); weights->allocate(); fillWeights(weights); @@ -375,7 +510,8 @@ TEST_F(I16QuantisationTest, fp16tofp32_on_fullyConnected_model) { quantize_single_input_model(network, 1000); } -TEST_F(I16QuantisationTest, MultipleActivationsAfterAffineWithIdentityActivation_MultipleDiagonalLayersWithActivaitons) { +TEST_F(I16QuantisationTest, + MultipleActivationsAfterAffineWithIdentityActivation_MultipleDiagonalLayersWithActivaitons) { auto input_params = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 10}); const auto constant = ngraph::opset8::Constant::create(ngraph::element::f32, ngraph::Shape{10, 10}, {1}); auto matmul1 = std::make_shared(input_params, constant); @@ -386,11 +522,17 @@ TEST_F(I16QuantisationTest, MultipleActivationsAfterAffineWithIdentityActivation auto mul = std::make_shared(sigmoid, relu); auto add2 = std::make_shared(add, mul); auto result = std::make_shared(add); - auto function = std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); + auto function = + std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); // identiy came from automatic insertion due to - assert_that().onInferNgraphModel(function) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActSigmoid, kActRelu, kActIdentity, kActIdentity}); + assert_that() + .onInferNgraphModel(function) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + .pwls_inserted_into_nnet({kActSigmoid, kActRelu, kActIdentity, kActIdentity}); } TEST_F(I16QuantisationTest, MultipleActivationsAfterAffine_ResultInMultipleDiagonalLayersWithActivaitons) { @@ -401,24 +543,36 @@ TEST_F(I16QuantisationTest, MultipleActivationsAfterAffine_ResultInMultipleDiago auto relu = std::make_shared(matmul); auto mul = std::make_shared(sigmoid, relu); auto result = std::make_shared(mul); - auto function = std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); + auto function = + std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); // extra identity inserted for affine - assert_that().onInferNgraphModel(function) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with() - // 1 diag for second activation, 1 for eltwise - .pwls_inserted_into_nnet({kActRelu, kActSigmoid}).diagonal_inserted_into_nnet().times(3); + assert_that() + .onInferNgraphModel(function) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + // 1 diag for second activation, 1 for eltwise + .pwls_inserted_into_nnet({kActRelu, kActSigmoid}) + .diagonal_inserted_into_nnet() + .times(3); } // TODO: build a regression test on top of it using real quantisation accuracy checking TEST_F(I16QuantisationTest, ConcatWithConstInputPropagatedForward) { - assert_that().onInferModel(concatModelWithConstLayer()) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity}); + assert_that() + .onInferModel(concatModelWithConstLayer()) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + .pwls_inserted_into_nnet({kActIdentity}); } TEST_F(I16QuantisationTest, LSTMCell_quantize) { - auto weights = make_shared_blob({ Precision::U8, {33664}, C }); + auto weights = make_shared_blob({Precision::U8, {33664}, C}); weights->allocate(); fillWeights(weights); @@ -429,7 +583,7 @@ TEST_F(I16QuantisationTest, LSTMCell_quantize) { } TEST_F(I16QuantisationTest, LSTMCell_unaligned_quantize) { - auto weights = make_shared_blob({ Precision::U8, {3480}, C }); + auto weights = make_shared_blob({Precision::U8, {3480}, C}); weights->allocate(); fillWeights(weights); @@ -440,15 +594,27 @@ TEST_F(I16QuantisationTest, LSTMCell_unaligned_quantize) { } TEST_F(I16QuantisationTest, EltwisetWithConstInputPropagatedForward) { - assert_that().onInferModel(eltwiseSumModelWithConstLayer()) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with().diagonal_inserted_into_nnet(); + assert_that() + .onInferModel(eltwiseSumModelWithConstLayer()) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + .diagonal_inserted_into_nnet(); } TEST_F(I16QuantisationTest, PowerWithScaleFactorPropagateForward) { - assert_that().onInferModel(PowerWithScaleFactor1()) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity}).And().diagonal_inserted_into_nnet(); + assert_that() + .onInferModel(PowerWithScaleFactor1()) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + .pwls_inserted_into_nnet({kActIdentity}) + .And() + .diagonal_inserted_into_nnet(); } TEST_F(I16QuantisationTest, ConcatWithDifferentInputScaleFactorsPropagateForward) { @@ -459,14 +625,20 @@ TEST_F(I16QuantisationTest, ConcatWithDifferentInputScaleFactorsPropagateForward auto tanh = std::make_shared(split->outputs()[1]); auto concat = std::make_shared(ngraph::OutputVector{sigmoid, tanh}, 1); auto result = std::make_shared(concat); - auto function = std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); - assert_that().onInferNgraphModel(function) - .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity}); + auto function = + std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); + assert_that() + .onInferNgraphModel(function) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called_with() + .pwls_inserted_into_nnet({kActIdentity}); } TEST_F(I16QuantisationTest, TI_quantize) { - auto weights = make_shared_blob({ Precision::U8, {249748}, C }); + auto weights = make_shared_blob({Precision::U8, {249748}, C}); weights->allocate(); fillWeights(weights); @@ -477,40 +649,52 @@ TEST_F(I16QuantisationTest, TI_quantize) { } TEST_F(I16QuantisationTest, TI_PropagateForward) { - auto input_params = std::make_shared(ngraph::element::f32, ngraph::Shape{ 1, 10 }); - auto mul = std::make_shared(input_params, - std::make_shared(ngraph::element::f32, ngraph::Shape{ 1, 10 })); - auto add = std::make_shared(mul, - std::make_shared(ngraph::element::f32, ngraph::Shape{ 1, 10 })); - auto reshape = std::make_shared(add, - std::make_shared(ngraph::element::i64, ngraph::Shape{ 3 }, std::vector{ 1, 1, 10 }), false); + auto input_params = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 10}); + auto mul = std::make_shared( + input_params, + std::make_shared(ngraph::element::f32, ngraph::Shape{1, 10})); + auto add = std::make_shared( + mul, + std::make_shared(ngraph::element::f32, ngraph::Shape{1, 10})); + auto reshape = std::make_shared( + add, + std::make_shared(ngraph::element::i64, ngraph::Shape{3}, std::vector{1, 1, 10}), + false); auto reshape_shape = reshape->output(0).get_shape(); const size_t batch_size = 1; const size_t hiddenSize = 10; - auto H_init = ngraph::builder::makeConstant(ngraph::element::f32, { batch_size, hiddenSize }, {}, true); - auto C_init = ngraph::builder::makeConstant(ngraph::element::f32, { batch_size, hiddenSize }, {}, true); + auto H_init = ngraph::builder::makeConstant(ngraph::element::f32, {batch_size, hiddenSize}, {}, true); + auto C_init = ngraph::builder::makeConstant(ngraph::element::f32, {batch_size, hiddenSize}, {}, true); - auto H_t = std::make_shared(ngraph::element::f32, ngraph::Shape{ batch_size, hiddenSize }); - auto C_t = std::make_shared(ngraph::element::f32, ngraph::Shape{ batch_size, hiddenSize }); + auto H_t = std::make_shared(ngraph::element::f32, ngraph::Shape{batch_size, hiddenSize}); + auto C_t = std::make_shared(ngraph::element::f32, ngraph::Shape{batch_size, hiddenSize}); - //Body - auto X = std::make_shared(ngraph::element::f32, ngraph::Shape{ batch_size, 1, reshape_shape[2] }); - auto weightsNode = ngraph::builder::makeConstant(ngraph::element::f32, { 4 * hiddenSize, reshape_shape[2] }, {}, true); - auto reccurrenceWeightsNode = ngraph::builder::makeConstant(ngraph::element::f32, { 4 * hiddenSize, hiddenSize }, {}, true); + // Body + auto X = std::make_shared(ngraph::element::f32, + ngraph::Shape{batch_size, 1, reshape_shape[2]}); + auto weightsNode = + ngraph::builder::makeConstant(ngraph::element::f32, {4 * hiddenSize, reshape_shape[2]}, {}, true); + auto reccurrenceWeightsNode = + ngraph::builder::makeConstant(ngraph::element::f32, {4 * hiddenSize, hiddenSize}, {}, true); // lstm - auto constantX = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{ 2 }, { batch_size, reshape_shape[2] }); - auto lstm1 = std::make_shared(std::make_shared(X, constantX, false), - H_t, C_t, - weightsNode, reccurrenceWeightsNode, hiddenSize); + auto constantX = + ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {batch_size, reshape_shape[2]}); + auto lstm1 = + std::make_shared(std::make_shared(X, constantX, false), + H_t, + C_t, + weightsNode, + reccurrenceWeightsNode, + hiddenSize); auto H_o = lstm1->output(0); auto C_o = lstm1->output(1); - auto body = std::make_shared( - ngraph::OutputVector{ H_o, C_o }, ngraph::ParameterVector{ X, H_t, C_t }); + auto body = + std::make_shared(ngraph::OutputVector{H_o, C_o}, ngraph::ParameterVector{X, H_t, C_t}); auto tensor_iterator = std::make_shared(); tensor_iterator->set_body(body); @@ -522,19 +706,32 @@ TEST_F(I16QuantisationTest, TI_PropagateForward) { auto out0 = tensor_iterator->get_iter_value(H_o, -1); const size_t output_size = 12; - auto fc = ngraph::builder::makeFullyConnected(out0, ngraph::element::f32, output_size, true, { hiddenSize, output_size }, { 1 }, { 1 }); + auto fc = ngraph::builder::makeFullyConnected(out0, + ngraph::element::f32, + output_size, + true, + {hiddenSize, output_size}, + {1}, + {1}); auto result = std::make_shared(fc); - auto function = std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); - assert_that().onInferNgraphModel(function).withWeigthsPattern({0.1f}) - .inNotCompactMode().gna().propagate_forward() - .called_with().pwls_inserted_into_nnet({kActIdentity}); + auto function = + std::make_shared(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params}); + assert_that() + .onInferNgraphModel(function) + .withWeigthsPattern({0.1f}) + .inNotCompactMode() + .gna() + .propagate_forward() + .called_with() + .pwls_inserted_into_nnet({kActIdentity}); } TEST_F(I16QuantisationTest, SplitToConcatWith2Inputs1360NotAlignedNoFC) { - assert_that().onInferModel(SplitToConcatWith2Inputs1360NotAlignedNoFC()) - .inNotCompactMode() - .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) - .gna() - .propagate_forward() - .called(); + assert_that() + .onInferModel(SplitToConcatWith2Inputs1360NotAlignedNoFC()) + .inNotCompactMode() + .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f) + .gna() + .propagate_forward() + .called(); } diff --git a/src/plugins/intel_gna/tests/unit/backend/gna_limitations_test.cpp b/src/plugins/intel_gna/tests/unit/backend/gna_limitations_test.cpp index 37a3d7e1e70..4ce23c8de29 100644 --- a/src/plugins/intel_gna/tests/unit/backend/gna_limitations_test.cpp +++ b/src/plugins/intel_gna/tests/unit/backend/gna_limitations_test.cpp @@ -282,10 +282,12 @@ struct Validatecnn2dParams { class GNAcnn2dValidatorTest : public ::testing::TestWithParam { protected: void SetUp() override { - validator = cnn2d::AbstractValidator::Create(GetParam().target); - ASSERT_TRUE(validator != nullptr); + Limitations::init(GetParam().target); + validator = Limitations::get_instance()->get_cnn_validator(); + ASSERT_TRUE(validator); } - std::unique_ptr validator; + + std::shared_ptr validator; }; class GNAcnn2dValidatorTestPadding : public GNAcnn2dValidatorTest { diff --git a/src/plugins/intel_gna/tests/unit/gna_executable_network_metrics_test.cpp b/src/plugins/intel_gna/tests/unit/gna_executable_network_metrics_test.cpp index 6bd7875a964..a849ba9e4e9 100644 --- a/src/plugins/intel_gna/tests/unit/gna_executable_network_metrics_test.cpp +++ b/src/plugins/intel_gna/tests/unit/gna_executable_network_metrics_test.cpp @@ -18,7 +18,7 @@ class GNAPluginForNetworkMetricsTest : public GNAPlugin { public: GNAPluginForNetworkMetricsTest(const std::map& configMap) : GNAPlugin(configMap) { gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{})); - graphCompiler.setGNAMemoryPtr(gnamem); + m_graph_compiler->setGNAMemoryPtr(gnamem); gnadevice.reset(); } }; diff --git a/src/plugins/intel_gna/tests/unit/gna_extra_pwl_segments_tests.cpp b/src/plugins/intel_gna/tests/unit/gna_extra_pwl_segments_tests.cpp index 8c352e69cdf..b05ba43b623 100644 --- a/src/plugins/intel_gna/tests/unit/gna_extra_pwl_segments_tests.cpp +++ b/src/plugins/intel_gna/tests/unit/gna_extra_pwl_segments_tests.cpp @@ -81,11 +81,11 @@ class GNAPluginForPWLExtraSegmentsTest : public GNAPlugin { public: GNAPluginForPWLExtraSegmentsTest(const std::map& config) : GNAPlugin(config) { gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{})); - graphCompiler.setGNAMemoryPtr(gnamem); + m_graph_compiler->setGNAMemoryPtr(gnamem); gnadevice.reset(); } void Test(const size_t expected_segments) { - for (const auto& component : graphCompiler.dnnComponents.components) { + for (const auto& component : m_graph_compiler->dnnComponents.components) { if (component.dnnComponent.operation == kDnnPiecewiselinearOp) { EXPECT_EQ(expected_segments, component.dnnComponent.op.pwl.num_segments); } diff --git a/src/plugins/intel_gna/tests/unit/gna_get_aligned_split_sizes.cpp b/src/plugins/intel_gna/tests/unit/gna_get_aligned_split_sizes.cpp index ffc22b28cef..4a7a0dce948 100644 --- a/src/plugins/intel_gna/tests/unit/gna_get_aligned_split_sizes.cpp +++ b/src/plugins/intel_gna/tests/unit/gna_get_aligned_split_sizes.cpp @@ -58,7 +58,7 @@ TEST(CheckSplitSupported, CheckVariadicSplitSupported) { ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({split_lengths.size()}), split_lengths)); - ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result); + ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result); } } @@ -86,7 +86,7 @@ TEST(CheckSplitSupported, CheckSplitSupported) { std::make_shared(ngraph::element::f32, input_shape), ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}), num_splits); - ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result); + ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result); } } } // namespace diff --git a/src/plugins/intel_gna/tests/unit/gna_hw_precision_test.cpp b/src/plugins/intel_gna/tests/unit/gna_hw_precision_test.cpp index cd7d27997b4..49956874998 100644 --- a/src/plugins/intel_gna/tests/unit/gna_hw_precision_test.cpp +++ b/src/plugins/intel_gna/tests/unit/gna_hw_precision_test.cpp @@ -17,7 +17,7 @@ class GNAPluginForPrecisionTest : public GNAPlugin { public: GNAPluginForPrecisionTest(const std::map& configMap) : GNAPlugin(configMap) { gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{})); - graphCompiler.setGNAMemoryPtr(gnamem); + m_graph_compiler->setGNAMemoryPtr(gnamem); gnadevice.reset(); } std::vector get_components() { diff --git a/src/plugins/intel_gna/tests/unit/gna_memory_alignment.cpp b/src/plugins/intel_gna/tests/unit/gna_memory_alignment.cpp index 7530644a1f5..0275e0fd425 100644 --- a/src/plugins/intel_gna/tests/unit/gna_memory_alignment.cpp +++ b/src/plugins/intel_gna/tests/unit/gna_memory_alignment.cpp @@ -45,9 +45,9 @@ public: GNAPluginForMemoryAlignmentTest(const std::map& configMap) : GNAPlugin(configMap) { if (gnadevice) { gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}, - gnadevice->getMemAlignment(), - limitations::kMemoryPageSize)); - graphCompiler.setGNAMemoryPtr(gnamem); + Limitations::get_instance()->get_memory_alignment(), + Limitations::kMemoryPageSize)); + m_graph_compiler->setGNAMemoryPtr(gnamem); gnadevice.reset(); } } @@ -149,16 +149,14 @@ INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_4_0, class MemoryAlignmentTest : public ::testing::Test {}; -TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_ExpectExceptionWhenTargetIsUnset) { - EXPECT_ANY_THROW(getMemoryAlignmentBytes(DeviceVersion::NotSet)); -} - -TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect64ByteAlignmentWhenTargetIsGNA3_0) { - EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_0), 64); +TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect64ByteAlignmentWhenTargetIsGNA3_5) { + Limitations::init(DeviceVersion::GNA3_5); + EXPECT_EQ(Limitations::get_instance()->get_memory_alignment(), 64); } TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect16ByteAlignmentWhenTargetIsGNA3_6) { - EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_6), 16); + Limitations::init(DeviceVersion::GNA3_6); + EXPECT_EQ(Limitations::get_instance()->get_memory_alignment(), 16); } } // namespace testing diff --git a/src/plugins/intel_gna/tests/unit/gna_memory_compact_test.cpp b/src/plugins/intel_gna/tests/unit/gna_memory_compact_test.cpp index 75437fffba0..b59d5287b6d 100644 --- a/src/plugins/intel_gna/tests/unit/gna_memory_compact_test.cpp +++ b/src/plugins/intel_gna/tests/unit/gna_memory_compact_test.cpp @@ -297,7 +297,7 @@ public: GNAPluginTested() : GNAPlugin() { gnamem_t = std::make_shared(); gnamem = gnamem_t; - graphCompiler.setGNAMemoryPtr(gnamem); + m_graph_compiler->setGNAMemoryPtr(gnamem); gnadevice.reset(); } void Test() { diff --git a/src/plugins/intel_gna/tests/unit/transformations/gna_decompose_2d_convolution.cpp b/src/plugins/intel_gna/tests/unit/transformations/gna_decompose_2d_convolution.cpp index 26d9ec4f575..a10220035f6 100644 --- a/src/plugins/intel_gna/tests/unit/transformations/gna_decompose_2d_convolution.cpp +++ b/src/plugins/intel_gna/tests/unit/transformations/gna_decompose_2d_convolution.cpp @@ -15,6 +15,7 @@ #include "common_test_utils/ngraph_test_utils.hpp" #include "transformations/decompose_2d_convolution.hpp" +using namespace ov::intel_gna::limitations; namespace testing { namespace { @@ -312,6 +313,8 @@ void Decompose2DConvTestInvalidFixture::SetUp() { std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation, bias_shape, maxpool_stride, maxpool_shape) = params; + Limitations::init(ov::intel_gna::target::DeviceVersion::Default); + function = get_initial_function(fq, model, input_shape, @@ -342,6 +345,7 @@ class Decompose2DConvTestFixture : public CommonTestUtils::TestsCommon, public ::testing::WithParamInterface { public: void SetUp() override; + std::shared_ptr get_reference(const bool& fq, const modelType& model, const ngraph::PartialShape& input_shape, @@ -365,6 +369,8 @@ void Decompose2DConvTestFixture::SetUp() { std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation, bias_shape, maxpool_stride, maxpool_shape) = params; + Limitations::init(ov::intel_gna::target::DeviceVersion::Default); + function = get_initial_function(fq, model, input_shape, @@ -779,7 +785,7 @@ static size_t CalculateConvCount(const ConvParams& conv_params) { size_t conv_count = 1; size_t total_factorized_conv_channel_count = (conv_params.input_channel_count * conv_params.filter_height * conv_params.filter_width); - while (total_factorized_conv_channel_count / conv_count > ov::intel_gna::limitations::convFilterMaxSize || + while (total_factorized_conv_channel_count / conv_count > Limitations::kConvFilterMaxSize || total_factorized_conv_channel_count % conv_count != 0 || conv_params.filter_channel_count % conv_count != 0) conv_count++; @@ -792,7 +798,7 @@ static bool ShouldDecompose(GraphData& graph_data, const ConvParams& conv_params // Concat (copy) layer limitation allows to split up to a certain limit // Currently we are able to split only convolutions without pooling in horizontal dimension - if (graph_data.conv_count > ov::intel_gna::limitations::copyMaxGrouping || + if (graph_data.conv_count > Limitations::kCopyMaxGrouping || ((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1)) return false; @@ -884,18 +890,13 @@ void execute_test(modelType model, case modelType::TranspConvBcastAddMaxPoolTransp: case modelType::TranspConvBcastAddActTransp: case modelType::TranspConvBcastAddMaxPoolActTransp: - manager.register_pass(ov::intel_gna::target::DeviceVersion::Default, - gnaPrecision); + manager.register_pass(gnaPrecision); break; case modelType::TranspConvTranspBcastAdd: - manager.register_pass( - ov::intel_gna::target::DeviceVersion::Default, - gnaPrecision); + manager.register_pass(gnaPrecision); break; case modelType::TranspConvTranspBcastAddAct: - manager.register_pass( - ov::intel_gna::target::DeviceVersion::Default, - gnaPrecision); + manager.register_pass(gnaPrecision); break; } diff --git a/src/plugins/intel_gna/tests/unit/transformations/gna_decompose_mvn.cpp b/src/plugins/intel_gna/tests/unit/transformations/gna_decompose_mvn.cpp index 6823606a464..2f01a3882de 100644 --- a/src/plugins/intel_gna/tests/unit/transformations/gna_decompose_mvn.cpp +++ b/src/plugins/intel_gna/tests/unit/transformations/gna_decompose_mvn.cpp @@ -13,6 +13,8 @@ #include "transformations/decompose_mvn.hpp" #include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp" +using namespace ov::intel_gna::limitations; + namespace decomposeMVN { typedef std::tuple getReferenceFunction(const ngraph::Shape& inpu mvn_data.normalize_variance = normalize_variance; mvn_data.num_parts = 1; - while (mvn_data.W / mvn_data.num_parts > ov::intel_gna::limitations::convFilterMaxSize) { + while (mvn_data.W / mvn_data.num_parts > Limitations::kConvFilterMaxSize) { mvn_data.num_parts *= 2; } diff --git a/src/plugins/intel_gna/tests/unit/transformations/gna_insert_copy_layer.cpp b/src/plugins/intel_gna/tests/unit/transformations/gna_insert_copy_layer.cpp index 962ca17f68d..c315d7ac11d 100644 --- a/src/plugins/intel_gna/tests/unit/transformations/gna_insert_copy_layer.cpp +++ b/src/plugins/intel_gna/tests/unit/transformations/gna_insert_copy_layer.cpp @@ -11,6 +11,7 @@ #include #include +#include "backend/gna_limitations.hpp" #include "common_test_utils/ngraph_test_utils.hpp" #include "ngraph_functions/builders.hpp" #include "ops/copy.hpp" @@ -54,10 +55,10 @@ void InsertCopyLayerTest::Validate() { void InsertCopyLayerTest::SetUp() { std::tie(m_axis, m_inputs_num) = this->GetParam(); + ov::intel_gna::limitations::Limitations::init(ov::intel_gna::target::DeviceVersion::Default); } void InsertCopyLayerTest::Run() { - SetUp(); Validate(); } @@ -176,6 +177,11 @@ public: } }; +void RunPasses(ngraph::pass::Manager& m, std::shared_ptr func) { + ov::intel_gna::limitations::Limitations::init(ov::intel_gna::target::DeviceVersion::Default); + m.run_passes(func); +} + // [Parameter] [Parameter] // \ / => | // [Concat] [Copy] @@ -211,7 +217,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -263,7 +269,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -324,7 +330,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMultiNFLConcatTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -382,7 +388,7 @@ TEST(TransformationTests, InsertCopyLayerMultiConstConcatTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -442,7 +448,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -510,7 +516,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerNFLConcatTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -573,7 +579,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMemoryTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -633,7 +639,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatMemoryTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -705,7 +711,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatMemoryTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -776,7 +782,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatMemoryTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -851,7 +857,7 @@ TEST(TransformationTests, InsertCopyLayerCropMemoryTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -918,7 +924,7 @@ TEST(TransformationTests, InsertCopyLayerCropNFLMemoryTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -987,7 +993,7 @@ TEST(TransformationTests, InsertCopyLayerConcatMemoryTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -1060,7 +1066,7 @@ TEST(TransformationTests, InsertCopyLayerConcatNFLMemoryTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -1122,7 +1128,7 @@ TEST(TransformationTests, InsertCopyLayerSplitMemoryTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -1189,7 +1195,7 @@ TEST(TransformationTests, InsertCopyLayerSplitNFLMemoryTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -1244,7 +1250,7 @@ TEST(TransformationTests, InsertCopyLayerCropConcatTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -1289,7 +1295,7 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -1338,7 +1344,7 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoSubgraphsTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -1385,7 +1391,7 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoResultsTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -1442,7 +1448,7 @@ TEST(TransformationTests, InsertCopyLayerNFLBranchTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -1499,7 +1505,7 @@ TEST(TransformationTests, InsertCopyLayerNFLvsFLSubgraphTestt) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func)); @@ -1550,7 +1556,7 @@ TEST(TransformationTests, InsertCopyLayerSplitNFLConcatTest) { ngraph::pass::Manager m; m.register_pass(); m.register_pass(); - m.run_passes(func); + RunPasses(m, func); ASSERT_NO_THROW(check_rt_info(func));