[GNA]: QueryNetwork support (#13639)

Review comments

Refactor is_op_supported and apply review comments

Fix split checks

Remove split checks

Adjust 2dconv check

Rebase fixes, refactoring, applying comments
This commit is contained in:
Nadezhda Ageeva 2023-02-14 15:17:07 +04:00 committed by GitHub
parent c62be51cc1
commit 4e5f79b4ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 748 additions and 293 deletions

View File

@ -15,12 +15,215 @@
#include "gna/gna_config.hpp"
#include "gna_graph_tools.hpp"
#include "gna_lib_ver_selector.hpp"
#include "ie_ngraph_utils.hpp"
#include "log/log.hpp"
#include "ops/util/util.hpp"
namespace ov {
namespace intel_gna {
using namespace common;
namespace limitations {
namespace {
std::ostream& operator<<(std::ostream& os, const std::set<ov::element::Type>& t) {
for (auto it = t.begin(); it != t.end(); ++it) {
if (it != t.begin()) {
os << ", " << *it;
} else {
os << *it;
}
}
return os;
}
} // namespace
const std::set<ov::element::Type> SupportedElementTypes::supported_parameter_types = {ov::element::u8,
ov::element::i16,
ov::element::f32};
bool SupportedElementTypes::is_parameter_type_supported(ov::element::Type elem_type, bool is_exception_allowed) {
if (supported_parameter_types.count(elem_type) == 0) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "The plugin does not support input precision with " << elem_type.get_type_name()
<< " format. Supported precisions " << supported_parameter_types << "\n";
}
return false;
}
return true;
}
const std::set<ov::element::Type> SupportedElementTypes::supported_constant_types = {ov::element::i8,
ov::element::u8,
ov::element::i16,
ov::element::u16,
ov::element::i32,
ov::element::f32,
ov::element::f64};
bool SupportedElementTypes::is_constant_type_supported(ov::element::Type elem_type, bool is_exception_allowed) {
if (supported_constant_types.count(elem_type) == 0) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "The plugin does not support constant precision with " << elem_type.get_type_name()
<< " format. Supported precisions " << supported_constant_types << "\n";
}
return false;
}
return true;
}
bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed) {
OPENVINO_ASSERT(conv_ie, "ConvolutionIE node is empty!");
size_t batch_size = conv_ie->input_value(0).get_shape()[0];
if (batch_size != 1) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "topology with layer: " + conv_ie->get_friendly_name() +
", type: " + conv_ie->get_type_name() + ", and batch size(" +
std::to_string(batch_size) + ") != 1 not supported";
}
return false;
}
auto check_dilation = [&](size_t filter_dilation_height, size_t filter_stride_width) -> bool {
cnn2d::RangeLimit2D dilation_limit{{convDilationHeight, convDilationHeight, "dilation height"},
{convDilationWidth, convDilationWidth, "dilation width"}};
std::string error = dilation_limit.GetErrorOrEmpty(filter_dilation_height, filter_stride_width);
return cnn2d::AbstractValidator::ValidationSuccesful(is_exception_allowed,
error,
conv_ie->get_friendly_name(),
conv_ie->get_type_name());
};
auto input_shape = conv_ie->input_value(0).get_shape();
auto filter_shape = conv_ie->input_value(1).get_shape();
if ((4 == filter_shape.size() && filter_shape[2] > 1 && filter_shape[3] > 1) ||
(4 == input_shape.size() && input_shape[2] > 1 && input_shape[3] > 1)) {
pass::helper::ConvData conv_data;
pass::helper::GetConvData(conv_ie, conv_data);
if (gna_convolution_layer::isMappableFrom2DTo1D(conv_data.input_height,
conv_data.input_width,
conv_data.input_channel_count,
conv_data.filter_height,
conv_data.filter_width,
conv_data.filter_stride_height,
conv_data.filter_stride_width)) {
return check_dilation(conv_data.filter_dilation_height, conv_data.filter_dilation_width);
}
const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target);
if (cnn2dValidatorPtr) {
return cnn2dValidatorPtr->ValidateCnn2D(conv_ie->get_friendly_name(),
conv_data.input_height,
conv_data.input_width,
conv_data.input_channel_count,
conv_data.filter_height,
conv_data.filter_width,
conv_data.filter_channel_count,
conv_data.filter_stride_height,
conv_data.filter_stride_width,
conv_data.filter_dilation_height,
conv_data.filter_dilation_width,
OvGnaTypeIntFromBytes(gna_precision.size()),
is_exception_allowed);
}
}
return check_dilation(conv_ie->get_dilations()[0], conv_ie->get_dilations()[1]);
}
bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
bool is_exception_allowed) {
OPENVINO_ASSERT(max_pool, "MaxPool node is empty!");
auto kernels = max_pool->get_kernel();
if (2 == kernels.size() && kernels[0] > 1 && kernels[1] > 1) {
const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target);
if (cnn2dValidatorPtr) {
auto strides = max_pool->get_strides();
return cnn2dValidatorPtr->ValidatePooling2D(max_pool->get_friendly_name(),
kernels[0],
kernels[1],
strides[0],
strides[1],
is_exception_allowed);
}
}
return true;
}
bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected, bool is_exception_allowed) {
OPENVINO_ASSERT(fully_connected, "FullyConnected node is empty!");
size_t output_batch_size = fully_connected->get_output_shape(0)[0];
if (output_batch_size > 8) {
if (is_exception_allowed) {
THROW_GNA_EXCEPTION << "topology with layer: " + fully_connected->get_friendly_name() +
", type: " + fully_connected->get_type_name() + ", and batch size(" +
std::to_string(output_batch_size) + ") not supported";
}
return false;
}
return true;
}
bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed) {
OPENVINO_ASSERT(node, "Split node is empty!");
bool is_aligned = true;
for (size_t i = 0; i < node->get_output_size(); i++) {
is_aligned &= ov::intel_gna::ngraph_util::is_aligned_split(node, i);
}
return is_aligned;
}
bool is_op_supported(const std::shared_ptr<ov::Node>& node,
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed) {
if (ov::op::util::is_parameter(node)) {
return SupportedElementTypes::is_parameter_type_supported(node->get_element_type(), is_exception_allowed);
} else if (ov::op::util::is_constant(node)) {
return SupportedElementTypes::is_constant_type_supported(node->get_element_type(), is_exception_allowed);
} else if (auto conv_ie = std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node)) {
return is_conv_supported(conv_ie, effective_compile_target, gna_precision, is_exception_allowed);
} else if (auto fully_connected = std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node)) {
return is_fc_supported(fully_connected, is_exception_allowed);
} else if (ov::intel_gna::ngraph_util::is_pooling(node)) {
return is_pooling_supported(std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node),
effective_compile_target,
is_exception_allowed);
} else if (ov::op::util::is_output(node) || ov::op::util::is_sink(node) ||
ov::intel_gna::ngraph_util::is_eltwise_add(node) || ov::intel_gna::ngraph_util::is_eltwise_mul(node) ||
ov::intel_gna::ngraph_util::is_crop_affined(node) ||
ov::intel_gna::ngraph_util::is_activation(node.get()) ||
ov::intel_gna::ngraph_util::is_gna_precision_agnostic(
node) || // check concat/split are aligned when transformations will be moved to ngraph
(std::dynamic_pointer_cast<ov::op::util::ReadValueBase>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::op::ScaleShiftIE>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::op::PowerIE>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::opset9::MatMul>(node) != nullptr)) {
return true;
} else if (ov::intel_gna::ngraph_util::is_gna_precision_agnostic(node)) {
if ((std::dynamic_pointer_cast<ngraph::opset9::Split>(node) != nullptr) ||
(std::dynamic_pointer_cast<ngraph::opset9::VariadicSplit>(node) != nullptr)) {
return is_split_supported(node, is_exception_allowed);
}
// TODO check concat are aligned when transformation will be moved to ngraph
return true;
}
return false;
}
void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision) {
std::stringstream error;
// Walk through the transformed model
for (auto& op : model->get_ops()) {
if (!is_op_supported(op, effective_compile_target, gna_precision, true)) {
error << "The plugin does not support layer " << op->get_friendly_name() << " (type " << op->get_type_name()
<< ")!" << std::endl;
}
}
if (!error.str().empty()) {
THROW_GNA_EXCEPTION << error.str();
}
}
namespace cnn2d {
bool IsEqualToLimit::isValid(const uint32_t val) const {
@ -655,16 +858,9 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
// If there are no inputs start search from an output
startLayer = getCreatorLayer(outputs.begin()->second).lock();
} else {
auto network_input_precision = inputs.begin()->second->getPrecision();
if (network_input_precision != InferenceEngine::Precision::FP32 &&
network_input_precision != InferenceEngine::Precision::I16 &&
network_input_precision != InferenceEngine::Precision::U8) {
errMessage = "The plugin does not support input precision with " +
std::string(network_input_precision.name()) +
" format. Supported input precisions FP32, I16, U8\n";
return false;
}
SupportedElementTypes::is_parameter_type_supported(
InferenceEngine::details::convertPrecision(inputs.begin()->second->getPrecision()),
true);
auto& secondLayers = getInputTo(inputs.begin()->second->getInputData());
if (secondLayers.empty()) {

View File

@ -13,6 +13,10 @@
#include "common/gna_target.hpp"
#include "dnn_types.hpp"
#include "gna_lib_ver_selector.hpp"
#include "legacy/ngraph_ops/convolution_ie.hpp"
#include "legacy/ngraph_ops/fully_connected.hpp"
#include "ngraph/opsets/opset7.hpp"
#include "ngraph/opsets/opset9.hpp"
namespace ov {
namespace intel_gna {
@ -72,6 +76,80 @@ inline bool IsTransposeSupported(const std::vector<size_t>& shape) {
return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize;
}
class SupportedElementTypes {
public:
static bool is_parameter_type_supported(ov::element::Type type, bool is_exception_allowed = false);
static bool is_constant_type_supported(ov::element::Type type, bool is_exception_allowed = false);
private:
static const std::set<ov::element::Type> supported_parameter_types;
static const std::set<ov::element::Type> supported_constant_types;
};
/**
* @brief Validates if legacy convolution is supported by GNA
* @param conv_ie convolution
* @param effective_compile_target GNA compile targets
* @param gna_precision GNA inference precision
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed = false);
/**
* @brief Validates if max pooling is supported by GNA
* @param max_pool max pooling
* @param effective_compile_target GNA compile targets
* @param supported_types list of supported types
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if precision is found in supported
*/
bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
bool is_exception_allowed = false);
/**
* @brief Validates if fully connected is supported by GNA
* @param fully_connected fully connected
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected,
bool is_exception_allowed = false);
/**
* @brief Validates if split is supported by GNA
* @param node split
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed = false);
/**
* @brief Validates if operation is supported by GNA
* @param node operation
* @param gna_compile_target GNA compile target
* @param gna_precision GNA inference precision
* @param is_exception_allowed flag specifies whether exception is allowed
* @return true if supported
*/
bool is_op_supported(const std::shared_ptr<ov::Node>& node,
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision,
bool is_exception_allowed = false);
/**
* @brief Check if all operations are supported by GNA
* @param model ngraph model
* @param gna_compile_target GNA compile target
* @param gna_precision GNA inference precision
*/
void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
const InferenceEngine::Precision gna_precision);
namespace cnn2d {
struct IsEqualToLimit {
@ -147,12 +225,13 @@ struct RectLimitByChannelsAndPrecision {
class AbstractValidator {
protected:
static void ThrowIfNotEmpty(const std::string& prefix, const std::string& error);
public:
static bool ValidationSuccesful(const bool throwOnError,
const std::string& error,
const std::string& operation,
const std::string& type);
public:
virtual ~AbstractValidator() = default;
virtual bool ValidateCnn2D(const std::string& name,
const uint32_t inHeight,

View File

@ -15,6 +15,7 @@
#include "gna_graph_tools.hpp"
#include "gna_itt.hpp"
#include "gna_plugin_config.hpp"
#include "gna_transformations_pipeline.hpp"
#include "layer_quantizer.hpp"
#include "scale_factor_calc.hpp"
#include "weights_converter.hpp"
@ -27,17 +28,11 @@ namespace frontend {
* Quantize entire network
*/
class ModelQuantizer {
const Config& gna_config;
const bool fake_quantized;
ov::intel_gna::TransformationsPipeline& gna_transformer;
public:
ModelQuantizer(const Config& gna_config, const bool fake_quantized)
: gna_config(gna_config),
fake_quantized(fake_quantized) {}
template <class PreQuantisationCb>
InferenceEngine::CNNNetwork quantize(const InferenceEngine::CNNNetwork& model,
const PreQuantisationCb& cb,
const GnaInputs& inputs) const {
ModelQuantizer(ov::intel_gna::TransformationsPipeline& transformer) : gna_transformer(transformer) {}
InferenceEngine::CNNNetwork quantize(const InferenceEngine::CNNNetwork& model, const GnaInputs& inputs) const {
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ModelQuantizer::quantize");
auto visitor = [&](InferenceEngine::CNNLayerPtr layer_ptr) {
auto new_layer = InferenceEngine::injectData<QuantizedLayerParams>(layer_ptr);
@ -46,12 +41,12 @@ public:
};
InferenceEngine::CNNNetwork copied_net = InferenceEngine::CNNNetCopy(model);
cb(copied_net, true, gna_config.gnaFlags.input_low_precision);
gna_transformer.apply_legacy(copied_net, true);
copied_net = InferenceEngine::CNNNetCopy(copied_net, visitor);
// Allow client code to access copied topology, to avoid copies if user would like to chain quantisation with
// another preprocessing
cb(copied_net, false, gna_config.gnaFlags.input_low_precision);
gna_transformer.apply_legacy(copied_net, false);
auto sorted_new_net = InferenceEngine::details::CNNNetSortTopologically(copied_net);
log::debug() << "Sorted layers: " << std::endl;
@ -67,7 +62,7 @@ public:
// Propagate scale factor and quantize layers
propagateScaleFactor(sorted_new_net);
frontend::LayerQuantizer lq(gna_config);
frontend::LayerQuantizer lq(gna_transformer.config);
for (auto&& layer : sorted_new_net) {
lq.quantize(*layer);
@ -78,7 +73,7 @@ public:
private:
void propagateScaleFactor(std::vector<InferenceEngine::CNNLayerPtr>& net) const {
ScaleFactorCalculator sf(net, gna_config, fake_quantized);
ScaleFactorCalculator sf(net, gna_transformer.config, gna_transformer.is_fake_quantized());
uint32_t inf_loop_count = 0;
std::vector<std::string> inf_loop_pattern;
std::vector<std::string> inf_loop_history;

View File

@ -385,11 +385,6 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
THROW_GNA_LAYER_EXCEPTION(layer) << "with batch size not equals 1 is not supported";
}
if (convolution._dilation_x != 1 || convolution._dilation_y != 1) {
// TODO: Issue 24839
THROW_GNA_LAYER_EXCEPTION(layer) << "with dilation is not supported on GNA";
}
if (convolution._kernel_x > in_width * in_height) {
THROW_GNA_LAYER_EXCEPTION(layer) << "Kernel dimensions X (" << convolution._kernel_x << ")"
<< " is bigger than total input dimensions WxH (" << in_width << "x"

View File

@ -10,7 +10,6 @@
#include <gna2-common-api.h>
#include <gna2-model-api.h>
#include <ie_common.h>
#include <legacy/net_pass.h>
#include <algorithm>
#include <cstdlib>
@ -19,26 +18,11 @@
#include <layers/gna_fake_quantize_layer.hpp>
#include <legacy/convert_function_to_cnn_network.hpp>
#include <legacy/graph_tools.hpp>
#include <legacy/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
#include <limits>
#include <list>
#include <map>
#include <memory>
#include <ngraph/opsets/opset7.hpp>
#include <ngraph/pass/manager.hpp>
#include <string>
#include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
#include <transformations/common_optimizations/common_optimizations.hpp>
#include <transformations/common_optimizations/fq_mul_fusion.hpp>
#include <transformations/common_optimizations/fq_reshape_fusion.hpp>
#include <transformations/common_optimizations/pull_transpose_through_fq.hpp>
#include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
#include <transformations/common_optimizations/transpose_sinking.hpp>
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
#include <transformations/init_node_info.hpp>
#include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
#include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
#include <transformations/utils/utils.hpp>
#include <unordered_map>
#include <unordered_set>
#include <utility>
@ -58,10 +42,10 @@
#include "gna_model_serial.hpp"
#include "gna_plugin_config.hpp"
#include "gna_tensor_tools.hpp"
#include "gna_transformations_pipeline.hpp"
#include "layers/gna_layer_type.hpp"
#include "log/log.hpp"
#include "memory/gna_memory_state.hpp"
#include "optimizer/gna_pass_manager.hpp"
#include "orientation_helper.hpp"
#include "preprocessing.hpp"
#include "request/model_wrapper_factory.hpp"
@ -69,37 +53,8 @@
#include "request/worker_pool_impl.hpp"
#include "runtime/gna_float_runtime.hpp"
#include "scale_factor_helper.hpp"
#include "transformations/broadcast_const.hpp"
#include "transformations/common_optimizations/concat_reduce_fusion.hpp"
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
#include "transformations/convert_dwsc_to_scaleshifts.hpp"
#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
#include "transformations/convert_padded_to_valid_convolution.hpp"
#include "transformations/convert_precision.hpp"
#include "transformations/decompose_2d_convolution.hpp"
#include "transformations/decompose_mvn.hpp"
#include "transformations/disable_decompression_convert_constant_folding.hpp"
#include "transformations/handle_transposes_around_matmul.hpp"
#include "transformations/insert_copy_layer.hpp"
#include "transformations/insert_identity_layer.hpp"
#include "transformations/insert_reshape_around_matmul.hpp"
#include "transformations/insert_transpose_after_convolution_or_pooling.hpp"
#include "transformations/markup_fusable_transpose.hpp"
#include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp"
#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
#include "transformations/op_conversions/gru_cell_decomposition.hpp"
#include "transformations/op_conversions/lstm_cell_decomposition.hpp"
#include "transformations/op_conversions/softsign_decomposition.hpp"
#include "transformations/pwl_approximation.hpp"
#include "transformations/remove_converts.hpp"
#include "transformations/remove_extra_reshapes.hpp"
#include "transformations/remove_single_input_concat.hpp"
#include "transformations/reorder_activation_and_pooling.hpp"
#include "transformations/split_convolution_with_large_buffer_size.hpp"
#include "transformations/split_eltwise.hpp"
#include "transformations/substitute_softsign.hpp"
#include "transformations/swap_input_matmul_gna.hpp"
#include "transformations/unfuse_reshape_and_transpose.hpp"
using namespace ov::intel_gna::ngraph_util;
inline uint32_t ToByteSize(const Gna2DataType type) {
switch (type) {
@ -706,130 +661,20 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
const auto effectiveCompileTarget = config.target->get_effective_compile_target();
graphCompiler.SetValidatorTarget(effectiveCompileTarget);
bool isNgraphPassesUsed = false;
bool fake_quantized = false;
auto transformer = TransformationsPipeline(config, effectiveCompileTarget);
if (_network.getFunction()) {
CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network);
const auto& graph = clonedNetwork.getFunction();
ngraph::pass::Manager manager;
manager.register_pass<ov::pass::InitNodeInfo>();
fake_quantized = ov::op::util::has_op_with_type<ngraph::opset7::FakeQuantize>(graph);
// In OV API 2.0(IRv10) default convertion to fp32 (inputs, outputs and weights) is disabled
// and we need to run the ConvertPrecision transformation to support old networks.
manager.register_pass<ov::pass::ConvertPrecision>(
precisions_array{{ngraph::element::f16, ngraph::element::f32}});
manager.register_pass<ov::pass::ConvertMVN1ToMVN6>();
manager.register_pass<ov::intel_gna::pass::DecomposeMVN>();
manager.register_pass<ov::pass::CommonOptimizations>();
manager.register_pass<ov::intel_gna::pass::RemoveInputConvert>();
manager.register_pass<ov::intel_gna::pass::RemoveOutputConvert>();
manager.register_pass<ov::pass::ConvertSequenceToTensorIterator>();
manager.register_pass<ov::pass::GRUCellDecomposition>();
manager.register_pass<ov::pass::LSTMCellDecomposition>();
manager.register_pass<ov::intel_gna::pass::ConvertDWSCToScaleShifts>();
manager.register_pass<ov::intel_gna::pass::ConvertPaddedToValidConv>();
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(effectiveCompileTarget,
config.gnaPrecision);
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(effectiveCompileTarget,
config.gnaPrecision);
manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(effectiveCompileTarget, config.gnaPrecision);
// TODO enable this transformation for networks with convolutions
if (!ov::op::util::has_op_with_type<ngraph::opset7::Convolution>(graph)) {
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithFqToPointWiseConvolution>();
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithBiasToPointWiseConvolution>();
manager.register_pass<ov::intel_gna::pass::ConvertMatmulToPointWiseConvolution>();
}
manager.register_pass<ov::intel_gna::pass::SplitConvolutionWithFq>();
manager.register_pass<ov::intel_gna::pass::SplitConvolutionWithBias>();
manager.register_pass<ov::intel_gna::pass::SplitConvolution>();
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmulWithTranspose>();
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmulWithFq>();
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmulWithAdd>();
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmul>();
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithTrailingTranspose>();
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithAct>();
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithFq>();
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithBias>();
manager.register_pass<ov::intel_gna::pass::SwapInputMatMul>();
manager.register_pass<ov::intel_gna::pass::HandleTransposesAroundMatMul>();
manager.register_pass<ov::intel_gna::pass::InsertTransposeAfterConvOrPool>();
manager.register_pass<ov::intel_gna::pass::Unfuse2dto4dReshapeAndTranspose>();
manager.register_pass<ov::intel_gna::pass::Unfuse4dto2dReshapeAndTranspose>();
manager.register_pass<ov::intel_gna::pass::RemoveExtraReshapes>();
manager.register_pass<ov::intel_gna::pass::ReorderActivationAndPooling>();
manager.register_pass<ov::intel_gna::pass::RemoveSingleInputConcat>();
manager.register_pass<ov::intel_gna::pass::SubstituteSoftsign>();
manager.register_pass<ov::intel_gna::pass::InsertCopyBeforeLayerToBeEliminated>();
manager.register_pass<ov::pass::ConvertOpSet3ToOpSet2>();
manager.register_pass<ov::pass::ConvertOpSet2ToOpSet1>();
manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
manager.register_pass<ov::intel_gna::pass::MarkupFusableTranspose>();
manager.register_pass<ov::intel_gna::pass::RemoveExtraReshapes>();
/*
Put BroadcastAddMultiplyConst here after ConvertOpSet..() transformations since there are conficts with them.
ngraph::pass::ConvertOpSet1ToLegacy -> ngraph::pass::BiasFusions ->
ngraph::pass::ConvAddFusion, ngraph::pass::ConvMultiplyFusion
That transormations fuse bias into convolution and recognizes const node as [1, C, 1, 1].
TODO: move that transformation just beyond RemoveSingleInputConcat pass after removing ConvertOpSet1ToLegacy
transormations
*/
manager.register_pass<ov::intel_gna::pass::BroadcastAddMultiplyConst>();
/*
SplitEltwise has dependency on BroadcastAddMultiplyConst for case when spliting of Constant
input is doing
*/
manager.register_pass<ov::intel_gna::pass::SplitEltwise>();
/* The following transformations perform insertion of Identity layer in 3 steps:
1. Mark inputs with rt_info attribute where precision change from i32 to i16/i8 is happened
2. Insert Identity after operation which have consumers marked with precision change
3. Cleanup appropriate attribute from rt_info
*/
manager.register_pass<ov::intel_gna::pass::MarkIdentityCandidates>(config.gnaFlags.input_low_precision);
manager.register_pass<ov::intel_gna::pass::InsertIdentity>();
manager.register_pass<ov::intel_gna::pass::IdentityCandidatesCleanup>();
// Breaks fusing of layers before result
manager.register_pass<ov::intel_gna::pass::BreakFusingOfOutputLayers>();
if (!config.gnaFlags.sw_fp32 && !config.gnaFlags.uniformPwlDesign) {
manager.register_pass<ov::intel_gna::pass::PWLApproximationWithFq>(config.gnaFlags.pwlMaxErrorPercent);
manager.register_pass<ov::intel_gna::pass::PWLApproximation>(config.gnaFlags.pwlMaxErrorPercent);
}
manager.register_pass<ov::pass::UnrollTensorIterator>();
manager.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
manager.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
manager.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
manager.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
const auto& pass_config = manager.get_pass_config();
// Allowing FP16 Converts to be folded and FP16 constants to upgrade to FP32 data type
pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
pass_config->disable<ov::pass::DisableDecompressionConvertConstantFolding>();
pass_config->disable<ov::pass::FakeQuantizeMulFusion>();
pass_config->disable<ov::pass::FakeQuantizeReshapeFusion>();
pass_config->disable<ov::pass::PullTransposeThroughFQUp>();
pass_config->disable<ov::pass::ReluFakeQuantizeFusion>();
// Consider to enable after per-channel quantization on FakeQuantize layer is supported in GNAPlugin, see issue
// 52034
pass_config->disable<ov::pass::AddFakeQuantizeFusion>();
// TransposeReduction can be enabled when Transpose-Conv-Transpose patterns will be handled in ngraph
// transformations
pass_config->disable<ov::pass::TransposeReduction>();
// Operations Max and Min aren't supported
pass_config->disable<ov::pass::ConcatReduceFusion>();
// pass_config->disable<ov::pass::SoftSignDecomposition>();
manager.run_passes(graph);
convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(graph, clonedNetwork);
isNgraphPassesUsed = true;
auto model = clonedNetwork.getFunction();
transformer.apply(model);
limitations::check_all_ops_supported(model, effectiveCompileTarget, config.gnaPrecision);
convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(model, clonedNetwork);
}
IE_SUPPRESS_DEPRECATED_START
InferenceEngine::CNNNetwork network = convertedNetwork ? InferenceEngine::CNNNetwork{convertedNetwork} : _network;
IE_SUPPRESS_DEPRECATED_END
NetPass::ConvertPrecision(network, Precision::I64, Precision::I32);
NetPass::ConvertPrecision(network, Precision::U64, Precision::I32);
NetPass::ConvertPrecision(network, Precision::U32, Precision::I32);
transformer.convert_precision_legacy(network);
// Check the network
std::string error;
@ -849,7 +694,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
// Set Scale Factors for inputs according to configuration.
ov::intel_gna::helpers::ApplyInputScaleFactors(*inputs_ptr_, config);
if (fake_quantized) {
if (transformer.is_fake_quantized()) {
UpdateInputScaleFromNetwork(network);
}
@ -857,56 +702,6 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
FillInputsAndOutputsTranspositionInfo(network);
}
// network optimisation phases
int passIdx = 0;
auto run_passes = [&](const CNNNetwork& network, bool runBeforeCopy, bool lowPrecision) {
auto passes = make_shared<PassManager>(PassManagerSettings{runBeforeCopy, lowPrecision}, network);
passes->registerPass<RemoveConstPass>();
if (!isNgraphPassesUsed) {
passes->registerPass<UnrollTIPass>();
passes->registerPass<RemoveConstPass>();
passes->registerPass<UnrollLSTMCellPass>();
passes->registerPass<RemoveSingleInputConcatPass>();
passes->registerPass<BroadcastConstPass>();
passes->registerPass<SubstituteScaleShiftBroadCastPass>();
}
if (fake_quantized)
passes->registerPass<SubstituteSoftSignPass>();
// fake quantisation aware passes
passes->registerPass<FuseFQIntoWeightsPass>();
passes->registerPass<MoveFakeQuantizeLayerIntoQuantParamsPass>();
passes->registerPass<TransposeWeightsFromNCHWToNHWCPass>();
passes->registerPass<SubstitutePReluPass>();
if (!isNgraphPassesUsed) {
passes->registerPass<ReorderMaxPoolPass>();
passes->registerPass<EltwiseSplitOverChannelsPass>();
}
passes->registerPass<InsertSplitAligningFilterPass>();
if (!isNgraphPassesUsed) {
passes->registerPass<InsertCopyLayerPass>();
}
passes->registerPass<FlattenTrivialConcatPass>();
passes->registerPass<InsertConcatAligningFilterPass>();
passes->registerPass<ReorderConcatInputsPass>();
passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
// Keep legacy inserting of Identity layer here
// because concat and split aliging passes are not moved to ngraph yet
passes->registerPass<InsertIdentityLayerPass>();
passes->registerPass<BreakFusingOfOutputLayersPass>();
passes->registerPass<InsertDiagonalLayerPass>();
passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
passes->registerPass<ForbidActivationFusingPass>();
passes->registerPass<FuseMultipleIdentitiesPass>();
passIdx = passes->run(passIdx);
};
InferenceEngine::CNNNetwork newNet;
if (gnaFlags->sw_fp32) {
@ -916,11 +711,11 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
};
newNet = InferenceEngine::CNNNetCopy(network, visitor);
// to run all passes need to have two calls to pass manager
run_passes(newNet, true, gnaFlags->input_low_precision);
run_passes(newNet, false, gnaFlags->input_low_precision);
transformer.apply_legacy(newNet, true);
transformer.apply_legacy(newNet, false);
} else {
ov::intel_gna::frontend::ModelQuantizer modelQuantizer(config, fake_quantized);
newNet = modelQuantizer.quantize(network, run_passes, *inputs_ptr_);
ov::intel_gna::frontend::ModelQuantizer modelQuantizer(transformer);
newNet = modelQuantizer.quantize(network, *inputs_ptr_);
}
auto inputLayers = CNNNetGetAllInputLayers(newNet);
@ -1822,12 +1617,29 @@ std::vector<std::shared_ptr<const ov::Node>> GNAPlugin::GetOutputs() {
return results;
}
InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork(const InferenceEngine::CNNNetwork& network,
const std::map<std::string, std::string>& config) const {
InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork(
const InferenceEngine::CNNNetwork& network,
const std::map<std::string, std::string>& config_map) const {
InferenceEngine::QueryNetworkResult res;
if (network.getFunction()) {
IE_THROW(NotImplemented) << " ngraph::Function is not supported natively";
Config qn_config(config);
qn_config.UpdateFromMap(config_map);
const auto effectiveCompileTarget = qn_config.target->get_effective_compile_target();
auto model = network.getFunction();
if (model) {
auto supported = GetSupportedNodes(
model,
[&](std::shared_ptr<ov::Model>& model) {
TransformationsPipeline(qn_config, effectiveCompileTarget).apply(model);
},
[&](const std::shared_ptr<ngraph::Node>& op) {
return limitations::is_op_supported(op, effectiveCompileTarget, qn_config.gnaPrecision);
});
for (auto&& op_name : supported) {
res.supportedLayersMap.emplace(op_name, GetName());
}
return res;
}
std::unordered_set<CNNLayer*> allLayers;

View File

@ -0,0 +1,244 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "gna_transformations_pipeline.hpp"
#include "gna_itt.hpp"
#include "legacy/net_pass.h"
#include "legacy/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp"
#include "ngraph/opsets/opset7.hpp"
#include "openvino/pass/manager.hpp"
#include "optimizer/gna_pass_manager.hpp"
#include "transformations/broadcast_const.hpp"
#include "transformations/common_optimizations/add_fake_quantize_fusion.hpp"
#include "transformations/common_optimizations/common_optimizations.hpp"
#include "transformations/common_optimizations/concat_reduce_fusion.hpp"
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
#include "transformations/common_optimizations/fq_mul_fusion.hpp"
#include "transformations/common_optimizations/fq_reshape_fusion.hpp"
#include "transformations/common_optimizations/pull_transpose_through_fq.hpp"
#include "transformations/common_optimizations/relu_fake_quantize_fusion.hpp"
#include "transformations/common_optimizations/transpose_sinking.hpp"
#include "transformations/control_flow/unroll_tensor_iterator.hpp"
#include "transformations/convert_dwsc_to_scaleshifts.hpp"
#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
#include "transformations/convert_padded_to_valid_convolution.hpp"
#include "transformations/convert_precision.hpp"
#include "transformations/decompose_2d_convolution.hpp"
#include "transformations/decompose_mvn.hpp"
#include "transformations/disable_decompression_convert_constant_folding.hpp"
#include "transformations/handle_transposes_around_matmul.hpp"
#include "transformations/init_node_info.hpp"
#include "transformations/insert_copy_layer.hpp"
#include "transformations/insert_identity_layer.hpp"
#include "transformations/insert_reshape_around_matmul.hpp"
#include "transformations/insert_transpose_after_convolution_or_pooling.hpp"
#include "transformations/markup_fusable_transpose.hpp"
#include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp"
#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
#include "transformations/op_conversions/gru_cell_decomposition.hpp"
#include "transformations/op_conversions/lstm_cell_decomposition.hpp"
#include "transformations/op_conversions/softsign_decomposition.hpp"
#include "transformations/opset_conversions/convert_opset2_to_opset1.hpp"
#include "transformations/opset_conversions/convert_opset3_to_opset2.hpp"
#include "transformations/pwl_approximation.hpp"
#include "transformations/remove_converts.hpp"
#include "transformations/remove_extra_reshapes.hpp"
#include "transformations/remove_single_input_concat.hpp"
#include "transformations/reorder_activation_and_pooling.hpp"
#include "transformations/split_convolution_with_large_buffer_size.hpp"
#include "transformations/split_eltwise.hpp"
#include "transformations/substitute_softsign.hpp"
#include "transformations/swap_input_matmul_gna.hpp"
#include "transformations/unfuse_reshape_and_transpose.hpp"
#include "transformations/utils/utils.hpp"
namespace ov {
namespace intel_gna {
void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model) {
OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "TransformationsPipeline::apply");
fake_quantized = ov::op::util::has_op_with_type<ngraph::op::FakeQuantize>(model);
ov::pass::Manager manager;
manager.register_pass<ov::pass::InitNodeInfo>();
// In OV API 2.0(IRv10) default convertion to fp32 (inputs, outputs and weights) is disabled
// and we need to run the ConvertPrecision transformation to support old networks.
manager.register_pass<ov::pass::ConvertPrecision>(precisions_array{{ngraph::element::f16, ngraph::element::f32}});
manager.register_pass<ov::pass::ConvertMVN1ToMVN6>();
manager.register_pass<ov::intel_gna::pass::DecomposeMVN>();
manager.register_pass<ov::pass::CommonOptimizations>();
manager.register_pass<ov::intel_gna::pass::RemoveInputConvert>();
manager.register_pass<ov::intel_gna::pass::RemoveOutputConvert>();
manager.register_pass<ov::pass::ConvertSequenceToTensorIterator>();
manager.register_pass<ov::pass::GRUCellDecomposition>();
manager.register_pass<ov::pass::LSTMCellDecomposition>();
manager.register_pass<ov::intel_gna::pass::ConvertDWSCToScaleShifts>();
manager.register_pass<ov::intel_gna::pass::ConvertPaddedToValidConv>();
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(effective_compile_target,
config.gnaPrecision);
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(effective_compile_target,
config.gnaPrecision);
manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(effective_compile_target, config.gnaPrecision);
// TODO enable this transformation for networks with convolutions
if (!ov::op::util::has_op_with_type<ngraph::opset7::Convolution>(model)) {
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithFqToPointWiseConvolution>();
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithBiasToPointWiseConvolution>();
manager.register_pass<ov::intel_gna::pass::ConvertMatmulToPointWiseConvolution>();
}
manager.register_pass<ov::intel_gna::pass::SplitConvolutionWithFq>();
manager.register_pass<ov::intel_gna::pass::SplitConvolutionWithBias>();
manager.register_pass<ov::intel_gna::pass::SplitConvolution>();
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmulWithTranspose>();
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmulWithFq>();
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmulWithAdd>();
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmul>();
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithTrailingTranspose>();
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithAct>();
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithFq>();
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithBias>();
manager.register_pass<ov::intel_gna::pass::SwapInputMatMul>();
manager.register_pass<ov::intel_gna::pass::HandleTransposesAroundMatMul>();
manager.register_pass<ov::intel_gna::pass::InsertTransposeAfterConvOrPool>();
manager.register_pass<ov::intel_gna::pass::Unfuse2dto4dReshapeAndTranspose>();
manager.register_pass<ov::intel_gna::pass::Unfuse4dto2dReshapeAndTranspose>();
manager.register_pass<ov::intel_gna::pass::RemoveExtraReshapes>();
manager.register_pass<ov::intel_gna::pass::ReorderActivationAndPooling>();
manager.register_pass<ov::intel_gna::pass::RemoveSingleInputConcat>();
manager.register_pass<ov::intel_gna::pass::SubstituteSoftsign>();
manager.register_pass<ov::intel_gna::pass::InsertCopyBeforeLayerToBeEliminated>();
manager.register_pass<ov::pass::ConvertOpSet3ToOpSet2>();
manager.register_pass<ov::pass::ConvertOpSet2ToOpSet1>();
manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
manager.register_pass<ov::intel_gna::pass::MarkupFusableTranspose>();
manager.register_pass<ov::intel_gna::pass::RemoveExtraReshapes>();
/*
Put BroadcastAddMultiplyConst here after ConvertOpSet..() transformations since there are conficts with them.
ngraph::pass::ConvertOpSet1ToLegacy -> ngraph::pass::BiasFusions ->
ngraph::pass::ConvAddFusion, ngraph::pass::ConvMultiplyFusion
That transormations fuse bias into convolution and recognizes const node as [1, C, 1, 1].
TODO: move that transformation just beyond RemoveSingleInputConcat pass after removing ConvertOpSet1ToLegacy
transormations
*/
manager.register_pass<ov::intel_gna::pass::BroadcastAddMultiplyConst>();
/*
SplitEltwise has dependency on BroadcastAddMultiplyConst for case when spliting of Constant
input is doing
*/
manager.register_pass<ov::intel_gna::pass::SplitEltwise>();
/* The following transformations perform insertion of Identity layer in 3 steps:
1. Mark inputs with rt_info attribute where precision change from i32 to i16/i8 is happened
2. Insert Identity after operation which have consumers marked with precision change
3. Cleanup appropriate attribute from rt_info
*/
manager.register_pass<ov::intel_gna::pass::MarkIdentityCandidates>(config.gnaFlags.input_low_precision);
manager.register_pass<ov::intel_gna::pass::InsertIdentity>();
manager.register_pass<ov::intel_gna::pass::IdentityCandidatesCleanup>();
// Breaks fusing of layers before result
manager.register_pass<ov::intel_gna::pass::BreakFusingOfOutputLayers>();
if (!config.gnaFlags.sw_fp32 && !config.gnaFlags.uniformPwlDesign) {
manager.register_pass<ov::intel_gna::pass::PWLApproximationWithFq>(config.gnaFlags.pwlMaxErrorPercent);
manager.register_pass<ov::intel_gna::pass::PWLApproximation>(config.gnaFlags.pwlMaxErrorPercent);
}
manager.register_pass<ov::pass::UnrollTensorIterator>();
manager.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
manager.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
manager.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
manager.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
manager.register_pass<ov::pass::ConvertPrecision>(precisions_array{{ov::element::i64, ov::element::i32},
{ov::element::u64, ov::element::i32},
{ov::element::u32, ov::element::i32}});
const auto& pass_config = manager.get_pass_config();
// Allowing FP16 Converts to be folded and FP16 constants to upgrade to FP32 data type
pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
pass_config->disable<ov::pass::DisableDecompressionConvertConstantFolding>();
pass_config->disable<ov::pass::FakeQuantizeMulFusion>();
pass_config->disable<ov::pass::FakeQuantizeReshapeFusion>();
pass_config->disable<ov::pass::PullTransposeThroughFQUp>();
pass_config->disable<ov::pass::ReluFakeQuantizeFusion>();
// Consider to enable after per-channel quantization on FakeQuantize layer is supported in GNAPlugin, see issue
// 52034
pass_config->disable<ov::pass::AddFakeQuantizeFusion>();
// TransposeReduction can be enabled when Transpose-Conv-Transpose patterns will be handled in ngraph
// transformations
pass_config->disable<ov::pass::TransposeReduction>();
// Operations Max and Min aren't supported
pass_config->disable<ov::pass::ConcatReduceFusion>();
manager.run_passes(model);
is_ngraph_passes_used = true;
}
IE_SUPPRESS_DEPRECATED_START
void TransformationsPipeline::apply_legacy(const InferenceEngine::CNNNetwork& network, bool runBeforeCopy) {
OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "TransformationsPipeline::apply_legacy");
auto passes =
std::make_shared<PassManager>(PassManagerSettings{runBeforeCopy, config.gnaFlags.input_low_precision}, network);
passes->registerPass<RemoveConstPass>();
if (!is_ngraph_passes_used) {
passes->registerPass<UnrollTIPass>();
passes->registerPass<RemoveConstPass>();
passes->registerPass<UnrollLSTMCellPass>();
passes->registerPass<RemoveSingleInputConcatPass>();
passes->registerPass<BroadcastConstPass>();
passes->registerPass<SubstituteScaleShiftBroadCastPass>();
}
if (fake_quantized)
passes->registerPass<SubstituteSoftSignPass>();
// fake quantisation aware passes
passes->registerPass<FuseFQIntoWeightsPass>();
passes->registerPass<MoveFakeQuantizeLayerIntoQuantParamsPass>();
passes->registerPass<TransposeWeightsFromNCHWToNHWCPass>();
passes->registerPass<SubstitutePReluPass>();
if (!is_ngraph_passes_used) {
passes->registerPass<ReorderMaxPoolPass>();
passes->registerPass<EltwiseSplitOverChannelsPass>();
}
passes->registerPass<InsertSplitAligningFilterPass>();
if (!is_ngraph_passes_used) {
passes->registerPass<InsertCopyLayerPass>();
}
passes->registerPass<FlattenTrivialConcatPass>();
passes->registerPass<InsertConcatAligningFilterPass>();
passes->registerPass<ReorderConcatInputsPass>();
passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
// Keep legacy inserting of Identity layer here
// because concat and split aliging passes are not moved to ngraph yet
passes->registerPass<InsertIdentityLayerPass>();
passes->registerPass<BreakFusingOfOutputLayersPass>();
passes->registerPass<InsertDiagonalLayerPass>();
passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
passes->registerPass<ForbidActivationFusingPass>();
passes->registerPass<FuseMultipleIdentitiesPass>();
legacy_pass_index = passes->run(legacy_pass_index);
}
void TransformationsPipeline::convert_precision_legacy(InferenceEngine::CNNNetwork& network) {
if (!is_ngraph_passes_used) {
InferenceEngine::NetPass::ConvertPrecision(network,
InferenceEngine::Precision::I64,
InferenceEngine::Precision::I32);
InferenceEngine::NetPass::ConvertPrecision(network,
InferenceEngine::Precision::U64,
InferenceEngine::Precision::I32);
InferenceEngine::NetPass::ConvertPrecision(network,
InferenceEngine::Precision::U32,
InferenceEngine::Precision::I32);
}
}
IE_SUPPRESS_DEPRECATED_END
} // namespace intel_gna
} // namespace ov

View File

@ -0,0 +1,41 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include "cpp/ie_cnn_network.h"
#include "gna_plugin_config.hpp"
#include "openvino/core/model.hpp"
namespace ov {
namespace intel_gna {
class TransformationsPipeline {
public:
explicit TransformationsPipeline(const Config& config,
const ov::intel_gna::common::DeviceVersion& effective_compile_target =
ov::intel_gna::common::DeviceVersion::NotSet)
: config(config),
effective_compile_target(effective_compile_target) {}
void apply(const std::shared_ptr<ov::Model>& model);
IE_SUPPRESS_DEPRECATED_START
void apply_legacy(const InferenceEngine::CNNNetwork& network, bool runBeforeCopy);
void convert_precision_legacy(InferenceEngine::CNNNetwork& network);
IE_SUPPRESS_DEPRECATED_END
bool is_fake_quantized() {
return fake_quantized;
};
const ov::intel_gna::Config& config;
private:
bool is_ngraph_passes_used = false;
bool fake_quantized = false;
int legacy_pass_index = 0;
ov::intel_gna::common::DeviceVersion effective_compile_target;
};
} // namespace intel_gna
} // namespace ov

View File

@ -3,26 +3,29 @@
//
#pragma once
#include <legacy/ngraph_ops/convolution_ie.hpp>
#include <legacy/ngraph_ops/crop_ie.hpp>
#include <legacy/ngraph_ops/eltwise.hpp>
#include <legacy/ngraph_ops/fully_connected.hpp>
#include <legacy/ngraph_ops/power.hpp>
#include <legacy/ngraph_ops/relu_ie.hpp>
#include <legacy/ngraph_ops/scaleshift.hpp>
#include <memory>
#include <ngraph/opsets/opset7.hpp>
#include <ngraph/opsets/opset8.hpp>
#include <ngraph/opsets/opset9.hpp>
#include <transformations/rt_info/gna_transpose_fusable.hpp>
#include <transformations/utils/utils.hpp>
#include <vector>
#include "backend/gna_limitations.hpp"
#include "gna_plugin_config.hpp"
#include "layers/gna_convolution_layer.hpp"
#include "layers/gna_permute.hpp"
#include "legacy/ngraph_ops/convolution_ie.hpp"
#include "legacy/ngraph_ops/crop_ie.hpp"
#include "legacy/ngraph_ops/eltwise.hpp"
#include "legacy/ngraph_ops/fully_connected.hpp"
#include "legacy/ngraph_ops/power.hpp"
#include "legacy/ngraph_ops/relu_ie.hpp"
#include "legacy/ngraph_ops/scaleshift.hpp"
#include "ngraph/opsets/opset7.hpp"
#include "ngraph/opsets/opset8.hpp"
#include "ngraph/opsets/opset9.hpp"
#include "ops/copy.hpp"
#include "ops/identity.hpp"
#include "ops/pwl.hpp"
#include "transformations/rt_info/gna_transpose_fusable.hpp"
#include "transformations/utils/transformation_helper.hpp"
#include "transformations/utils/utils.hpp"
namespace ov {
namespace intel_gna {
@ -200,7 +203,7 @@ inline bool is_Tbit_fq(const std::shared_ptr<ngraph::Node>& node) {
if (!fq_node)
return false;
auto levels = fq_node->get_levels();
return std::numeric_limits<T>::max() == levels;
return (std::numeric_limits<T>::max() == levels) || (std::numeric_limits<T>::max() == levels - 1);
}
inline bool is_32bit_fq(const std::shared_ptr<ngraph::Node>& node) {

View File

@ -13,7 +13,32 @@ namespace intel_gna {
namespace pass {
namespace helper {
void GetConvData(std::shared_ptr<ngraph::op::ConvolutionIE> conv, ConvData& conv_data) {
OPENVINO_ASSERT(conv);
conv_data.output_height = conv->get_output_shape(0)[2];
conv_data.output_width = conv->get_output_shape(0)[3];
conv_data.input_channel_count = conv->input_value(0).get_shape()[1];
conv_data.input_height = conv->input_value(0).get_shape()[2];
conv_data.input_width = conv->input_value(0).get_shape()[3];
conv_data.filter_count = conv->input_value(1).get_shape()[0];
conv_data.filter_channel_count = conv->input_value(1).get_shape()[1];
conv_data.filter_height = conv->input_value(1).get_shape()[2];
conv_data.filter_width = conv->input_value(1).get_shape()[3];
conv_data.filter_dilation_height = conv->get_dilations()[0];
conv_data.filter_dilation_width = conv->get_dilations()[1];
conv_data.filter_stride_height = conv->get_strides()[0];
conv_data.filter_stride_width = conv->get_strides()[1];
conv_data.output_channel_count = conv_data.filter_count;
conv_data.pads_begin_height = conv->get_pads_begin()[0];
conv_data.pads_begin_width = conv->get_pads_begin()[1];
conv_data.pads_end_height = conv->get_pads_end()[0];
conv_data.pads_end_width = conv->get_pads_end()[1];
conv_data.padding_type = conv->get_auto_pad();
conv_data.element_type = conv->get_element_type();
}
void GetConvData(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData& conv_data) {
OPENVINO_ASSERT(conv);
conv_data.output_height = conv->get_output_shape(0)[2];
conv_data.output_width = conv->get_output_shape(0)[3];
conv_data.input_channel_count = conv->input_value(0).get_shape()[1];

View File

@ -4,6 +4,7 @@
#pragma once
#include <legacy/ngraph_ops/convolution_ie.hpp>
#include <ngraph/opsets/opset7.hpp>
namespace ov {
@ -42,6 +43,14 @@ struct ConvData {
*/
void GetConvData(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData& conv_data);
/**
* @brief gets all legacy convolution related data into a struct for further processing
* @param conv legacy convolution node to get data of
* @param conv_data convolution data structure to put data into
* @return void
*/
void GetConvData(std::shared_ptr<ngraph::op::ConvolutionIE> conv, ConvData& conv_data);
/**
* @brief ngraph matcher predicate fusing existing predicates for consumers count and rank of a layer
* @param expected_count expected consumers count for of node

View File

@ -39,9 +39,10 @@ class I8QuantisationTest : public GNATest<> {
gna_config.gnaPrecision = InferenceEngine::Precision::I16;
gna_config.gnaFlags.input_low_precision = false;
return ModelQuantizer(gna_config, false).quantize(
auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
return ModelQuantizer(transformer).quantize(
model,
[](const InferenceEngine::CNNNetwork&, bool run_before_copy, bool low_precision) {},
inputs);
}
@ -100,7 +101,7 @@ TEST_F(I8QuantisationTest, FCDimensionIs1){
auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C });
weights->allocate();
fillWeights(weights);
Core ie;
auto network = ie.ReadNetwork(FCOnlyModel(), weights);
@ -111,7 +112,7 @@ TEST_F(I8QuantisationTest, outputAffinePrecisionIs32Bits){
auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C });
weights->allocate();
fillWeights(weights);
Core ie;
auto network = ie.ReadNetwork(Fc2DOutputModel(), weights);
@ -126,7 +127,7 @@ TEST_F(I8QuantisationTest, fp16tofp32_on_fullyConnected_model) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {220}, Layout::C });
weights->allocate();
fillWeights(weights);
Core ie;
auto network = ie.ReadNetwork(FCOnlyModelFP16(), weights);
@ -137,7 +138,7 @@ TEST_F(I8QuantisationTest, LSTMCell_quantize) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {33664}, C });
weights->allocate();
fillWeights(weights);
Core ie;
auto network = ie.ReadNetwork(LSTMCellOnlyModel(), weights);
@ -148,7 +149,7 @@ TEST_F(I8QuantisationTest, LSTMCell_unaligned_quantize) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {3480}, C });
weights->allocate();
fillWeights(weights);
Core ie;
auto network = ie.ReadNetwork(LSTMCellOnlyModelUnaligned(), weights);
@ -159,7 +160,7 @@ TEST_F(I8QuantisationTest, TI_quantize) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {249748}, C });
weights->allocate();
fillWeights(weights);
Core ie;
auto network = ie.ReadNetwork(TIModelWithLSTMCell2(), weights);

View File

@ -41,10 +41,11 @@ protected:
gna_config.gnaPrecision = InferenceEngine::Precision::I16;
gna_config.gnaFlags.input_low_precision = false;
return ModelQuantizer(gna_config, false)
auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
return ModelQuantizer(transformer)
.quantize(
model,
[](InferenceEngine::CNNNetwork&, bool run_before_copy, bool inputs_int8_precision) {},
inputs);
}

View File

@ -20,7 +20,7 @@ class I16QuantisationTest : public GNATest<> {
protected:
InferenceEngine::CNNLayerPtr quantize (InferenceEngine::CNNLayerPtr lp) {
auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
Config gna_config;
Config gna_config;
gna_config.gnaPrecision = InferenceEngine::Precision::I16;
gna_config.gnaFlags.input_low_precision = false;
LayerQuantizer lq(gna_config);
@ -41,9 +41,10 @@ class I16QuantisationTest : public GNATest<> {
gna_config.gnaPrecision = InferenceEngine::Precision::I16;
gna_config.gnaFlags.input_low_precision = false;
return ModelQuantizer(gna_config, false).quantize(
auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
return ModelQuantizer(transformer).quantize(
model,
[](const InferenceEngine::CNNNetwork&, bool run_before_copy, bool low_precision) {},
inputs);
}
@ -367,7 +368,7 @@ TEST_F(I16QuantisationTest, fp16tofp32_on_fullyConnected_model) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {220}, Layout::C });
weights->allocate();
fillWeights(weights);
Core ie;
auto network = ie.ReadNetwork(FCOnlyModelFP16(), weights);
@ -431,7 +432,7 @@ TEST_F(I16QuantisationTest, LSTMCell_unaligned_quantize) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {3480}, C });
weights->allocate();
fillWeights(weights);
Core ie;
auto network = ie.ReadNetwork(LSTMCellOnlyModelUnaligned(), weights);
@ -468,7 +469,7 @@ TEST_F(I16QuantisationTest, TI_quantize) {
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {249748}, C });
weights->allocate();
fillWeights(weights);
Core ie;
auto network = ie.ReadNetwork(TIModelWithLSTMCell2(), weights);

View File

@ -209,9 +209,8 @@ GNA_NEG_INSTANTIATE(InputW, Fine, InvalidInputW, "Unsupported input width", GNA_
GNA_NEG_INSTANTIATE(InputC, Fine, InvalidInputC, "Unsupported number of input channels", GNA_3_0)
GNA_NEG_INSTANTIATE(Padding, InvalidPadding, Fine, "Unsupported convolution input padding", GNA_3_0)
GNA_NEG_INSTANTIATE(Stride, InvalidStride, Fine, "Unsupported convolution stride shape", GNA_3_0)
GNA_NEG_INSTANTIATE(Dilation, InvalidDilation, Fine, "dilation is not supported on GNA", GNA_3_0)
GNA_NEG_INSTANTIATE(Dilation35, InvalidDilation, Fine, "dilation is not supported on GNA", GNA_3_5)
GNA_NEG_INSTANTIATE(Dilation, InvalidDilation, Fine, "Unsupported dilation", GNA_3_0)
GNA_NEG_INSTANTIATE(Dilation35, InvalidDilation, Fine, "Unsupported dilation", GNA_3_5)
GNA_NEG_INSTANTIATE(PaddingSize, InvalidPaddingSize, Fine, "Unsupported convolution input padding", GNA_3_0)
GNA_NEG_INSTANTIATE(PaddingSize35, InvalidPaddingSize, Fine, "Unsupported convolution input padding", GNA_3_5)
} // namespace

View File

@ -24,8 +24,6 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*(EltwiseLayerTest).*eltwiseOpType=Prod.*secondaryInputType=PARAMETER.*opType=SCALAR.*)",
// TODO: Issue: 34348
R"(.*IEClassGetAvailableDevices.*)",
// TODO: Issue 32923
R"(.*IEClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK.*)",
// TODO: Issue 39358
R"(.*unaligned.*MultipleConcatTest.*)",
R"(.*ActivationConcatsEltwise.*CS=35.*)",
@ -33,8 +31,6 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*ConcatMultiInput.CompareWithRefConstOnly.*IS=\(1.8\).*)",
R"(.*ConcatMultiInput.CompareWithRefConstOnly.*IS=\(1.16\).*)",
R"(.*ConcatMultiInput.CompareWithRefConstOnly.*IS=\(1.32\).*)",
// TODO: Issue: 29577
R"(.*CoreThreadingTests.smoke_QueryNetwork.*)",
// TODO: Issue: 46416
R"(.*InferRequestVariableStateTest.inferreq_smoke_VariableState_2infers*.*)",
// TODO: Issue 24839
@ -70,12 +66,13 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*OVExecutableNetworkBaseTest.*CanGetInputsInfoAndCheck.*)",
R"(.*OVExecutableNetworkBaseTest.*getOutputsFromSplitFunctionWithSeveralOutputs.*)",
R"(.*OVExecutableNetworkBaseTest.*canLoadNetworkFromMemory.*)",
R"(.*OVClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK.*GetMetricNoThrow.*)",
R"(.*(OVClass|IEClass)HeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK.*GetMetricNoThrow.*)",
R"(.*LoadNetwork*.*LoadNetwork(HETEROWithDeviceIDNoThrow|WithBigDeviceID|WithInvalidDeviceID)*.*)",
R"(.*QueryNetwork*.*QueryNetwork(HETEROWithDeviceIDNoThrow|WithBigDeviceID|WithInvalidDeviceID)*.*)",
R"(.*LoadNetworkTest.*QueryNetwork(MULTIWithHETERO|HETEROWithMULTI)NoThrow_V10.*)",
R"(.*Behavior.*OVExecutableNetworkBaseTest.*get(Inputs|Outputs)FromFunctionWithSeveral(Inputs|Outputs).*)",
// TODO: temporary disabled. Need to be enabled when PR 9282 is merged
R"(.*OVExecGraphImportExportTest.*readFromV10IR.*)",
// TODO: Issue: 29577
R"(.*QueryNetwork.*)",
// Issue connected with OV2.0
R"(.*EltwiseLayerTest.*NetType=f16.*)",
// TODO: Issue: 69639
@ -91,12 +88,10 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*CompileModelCacheTestBase.*(SplitConvConcat|KSOFunction).*)",
R"(.*CompileModelCacheTestBase.*(SingleConv|NestedSplitConvConcat).*)",
R"(.*CompileModelCacheTestBase.*(Bias|ReadConcatSplitAssign).*)",
R"(.*OVClassLoadNetworkTest.*LoadNetwork.*)",
// does not work due to GNA 3.0 convolution and other primitives limitations, partially can be resolved by
// switching GNA library to GNA3.5
R"(.*CachingSupportCase.*LoadNet.*(Bias|Split|Concat|KSO|SingleConv).*)",
R"(.*CachingSupportCase.*LoadNet.*(ConvPoolRelu|TIwithLSTMcell1)_f32_batch2.*)",
R"(.*IEClassLoadNetworkTest.*LoadNetwork(HETERO|MULTI|WithDeviceIDNoThrow|WithInvalidDeviceIDThrows).*)",
R"(.*smoke_Multi_BehaviorTests.*)",
};
}

View File

@ -8,6 +8,8 @@
// to suppress deprecated definition errors
#define IMPLEMENT_INFERENCE_ENGINE_PLUGIN
#include "layers/gna_split_layer.hpp"
#include "ngraph/opsets/opset9.hpp"
#include "ops/util/util.hpp"
namespace {
@ -31,4 +33,61 @@ TEST(GetAlignedSplitSizesTest, testAlignedSplitSizes) {
}
}
using VariadicSplitParameters = std::tuple<ov::Shape, // input size
uint32_t, // axis
std::vector<int32_t>, // split lengths
bool // supported
>;
const std::vector<VariadicSplitParameters> variadic_split_data = {
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{192, 192, 320, 320}, true},
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{640, 192, 192}, true},
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{500, 24, 500}, false},
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{700, 300, 24}, false},
};
TEST(CheckSplitSupported, CheckVariadicSplitSupported) {
ov::Shape input_shape;
uint32_t axis;
std::vector<int32_t> split_lengths;
bool result;
for (const auto& item : variadic_split_data) {
std::tie(input_shape, axis, split_lengths, result) = item;
auto split = std::make_shared<ngraph::opset9::VariadicSplit>(
std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({1}), {axis}),
ngraph::opset9::Constant::create(ngraph::element::i64,
ngraph::Shape({split_lengths.size()}),
split_lengths));
ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result);
}
}
using SplitParameters = std::tuple<ov::Shape, // input size
uint32_t, // axis
uint32_t, // num_splits
bool // supported
>;
const std::vector<SplitParameters> split_data = {
SplitParameters{ov::Shape{1024}, 0, 4, true},
SplitParameters{ov::Shape{1, 1024}, 1, 16, true},
SplitParameters{ov::Shape{1024}, 0, 64, false},
SplitParameters{ov::Shape{1, 1024}, 1, 256, false},
};
TEST(CheckSplitSupported, CheckSplitSupported) {
ov::Shape input_shape;
uint32_t axis;
uint32_t num_splits;
bool result;
for (const auto& item : split_data) {
std::tie(input_shape, axis, num_splits, result) = item;
auto split = std::make_shared<ngraph::opset9::Split>(
std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}),
num_splits);
ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result);
}
}
} // namespace