[GNA]: QueryNetwork support (#13639)
Review comments Refactor is_op_supported and apply review comments Fix split checks Remove split checks Adjust 2dconv check Rebase fixes, refactoring, applying comments
This commit is contained in:
parent
c62be51cc1
commit
4e5f79b4ea
@ -15,12 +15,215 @@
|
||||
#include "gna/gna_config.hpp"
|
||||
#include "gna_graph_tools.hpp"
|
||||
#include "gna_lib_ver_selector.hpp"
|
||||
#include "ie_ngraph_utils.hpp"
|
||||
#include "log/log.hpp"
|
||||
#include "ops/util/util.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
using namespace common;
|
||||
namespace limitations {
|
||||
namespace {
|
||||
std::ostream& operator<<(std::ostream& os, const std::set<ov::element::Type>& t) {
|
||||
for (auto it = t.begin(); it != t.end(); ++it) {
|
||||
if (it != t.begin()) {
|
||||
os << ", " << *it;
|
||||
} else {
|
||||
os << *it;
|
||||
}
|
||||
}
|
||||
return os;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
const std::set<ov::element::Type> SupportedElementTypes::supported_parameter_types = {ov::element::u8,
|
||||
ov::element::i16,
|
||||
ov::element::f32};
|
||||
|
||||
bool SupportedElementTypes::is_parameter_type_supported(ov::element::Type elem_type, bool is_exception_allowed) {
|
||||
if (supported_parameter_types.count(elem_type) == 0) {
|
||||
if (is_exception_allowed) {
|
||||
THROW_GNA_EXCEPTION << "The plugin does not support input precision with " << elem_type.get_type_name()
|
||||
<< " format. Supported precisions " << supported_parameter_types << "\n";
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::set<ov::element::Type> SupportedElementTypes::supported_constant_types = {ov::element::i8,
|
||||
ov::element::u8,
|
||||
ov::element::i16,
|
||||
ov::element::u16,
|
||||
ov::element::i32,
|
||||
ov::element::f32,
|
||||
ov::element::f64};
|
||||
|
||||
bool SupportedElementTypes::is_constant_type_supported(ov::element::Type elem_type, bool is_exception_allowed) {
|
||||
if (supported_constant_types.count(elem_type) == 0) {
|
||||
if (is_exception_allowed) {
|
||||
THROW_GNA_EXCEPTION << "The plugin does not support constant precision with " << elem_type.get_type_name()
|
||||
<< " format. Supported precisions " << supported_constant_types << "\n";
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
|
||||
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
|
||||
const InferenceEngine::Precision gna_precision,
|
||||
bool is_exception_allowed) {
|
||||
OPENVINO_ASSERT(conv_ie, "ConvolutionIE node is empty!");
|
||||
size_t batch_size = conv_ie->input_value(0).get_shape()[0];
|
||||
if (batch_size != 1) {
|
||||
if (is_exception_allowed) {
|
||||
THROW_GNA_EXCEPTION << "topology with layer: " + conv_ie->get_friendly_name() +
|
||||
", type: " + conv_ie->get_type_name() + ", and batch size(" +
|
||||
std::to_string(batch_size) + ") != 1 not supported";
|
||||
}
|
||||
return false;
|
||||
}
|
||||
auto check_dilation = [&](size_t filter_dilation_height, size_t filter_stride_width) -> bool {
|
||||
cnn2d::RangeLimit2D dilation_limit{{convDilationHeight, convDilationHeight, "dilation height"},
|
||||
{convDilationWidth, convDilationWidth, "dilation width"}};
|
||||
std::string error = dilation_limit.GetErrorOrEmpty(filter_dilation_height, filter_stride_width);
|
||||
return cnn2d::AbstractValidator::ValidationSuccesful(is_exception_allowed,
|
||||
error,
|
||||
conv_ie->get_friendly_name(),
|
||||
conv_ie->get_type_name());
|
||||
};
|
||||
auto input_shape = conv_ie->input_value(0).get_shape();
|
||||
auto filter_shape = conv_ie->input_value(1).get_shape();
|
||||
if ((4 == filter_shape.size() && filter_shape[2] > 1 && filter_shape[3] > 1) ||
|
||||
(4 == input_shape.size() && input_shape[2] > 1 && input_shape[3] > 1)) {
|
||||
pass::helper::ConvData conv_data;
|
||||
pass::helper::GetConvData(conv_ie, conv_data);
|
||||
if (gna_convolution_layer::isMappableFrom2DTo1D(conv_data.input_height,
|
||||
conv_data.input_width,
|
||||
conv_data.input_channel_count,
|
||||
conv_data.filter_height,
|
||||
conv_data.filter_width,
|
||||
conv_data.filter_stride_height,
|
||||
conv_data.filter_stride_width)) {
|
||||
return check_dilation(conv_data.filter_dilation_height, conv_data.filter_dilation_width);
|
||||
}
|
||||
const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target);
|
||||
if (cnn2dValidatorPtr) {
|
||||
return cnn2dValidatorPtr->ValidateCnn2D(conv_ie->get_friendly_name(),
|
||||
conv_data.input_height,
|
||||
conv_data.input_width,
|
||||
conv_data.input_channel_count,
|
||||
conv_data.filter_height,
|
||||
conv_data.filter_width,
|
||||
conv_data.filter_channel_count,
|
||||
conv_data.filter_stride_height,
|
||||
conv_data.filter_stride_width,
|
||||
conv_data.filter_dilation_height,
|
||||
conv_data.filter_dilation_width,
|
||||
OvGnaTypeIntFromBytes(gna_precision.size()),
|
||||
is_exception_allowed);
|
||||
}
|
||||
}
|
||||
return check_dilation(conv_ie->get_dilations()[0], conv_ie->get_dilations()[1]);
|
||||
}
|
||||
|
||||
bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
|
||||
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
|
||||
bool is_exception_allowed) {
|
||||
OPENVINO_ASSERT(max_pool, "MaxPool node is empty!");
|
||||
auto kernels = max_pool->get_kernel();
|
||||
if (2 == kernels.size() && kernels[0] > 1 && kernels[1] > 1) {
|
||||
const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target);
|
||||
if (cnn2dValidatorPtr) {
|
||||
auto strides = max_pool->get_strides();
|
||||
return cnn2dValidatorPtr->ValidatePooling2D(max_pool->get_friendly_name(),
|
||||
kernels[0],
|
||||
kernels[1],
|
||||
strides[0],
|
||||
strides[1],
|
||||
is_exception_allowed);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected, bool is_exception_allowed) {
|
||||
OPENVINO_ASSERT(fully_connected, "FullyConnected node is empty!");
|
||||
size_t output_batch_size = fully_connected->get_output_shape(0)[0];
|
||||
if (output_batch_size > 8) {
|
||||
if (is_exception_allowed) {
|
||||
THROW_GNA_EXCEPTION << "topology with layer: " + fully_connected->get_friendly_name() +
|
||||
", type: " + fully_connected->get_type_name() + ", and batch size(" +
|
||||
std::to_string(output_batch_size) + ") not supported";
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed) {
|
||||
OPENVINO_ASSERT(node, "Split node is empty!");
|
||||
bool is_aligned = true;
|
||||
for (size_t i = 0; i < node->get_output_size(); i++) {
|
||||
is_aligned &= ov::intel_gna::ngraph_util::is_aligned_split(node, i);
|
||||
}
|
||||
return is_aligned;
|
||||
}
|
||||
|
||||
bool is_op_supported(const std::shared_ptr<ov::Node>& node,
|
||||
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
|
||||
const InferenceEngine::Precision gna_precision,
|
||||
bool is_exception_allowed) {
|
||||
if (ov::op::util::is_parameter(node)) {
|
||||
return SupportedElementTypes::is_parameter_type_supported(node->get_element_type(), is_exception_allowed);
|
||||
} else if (ov::op::util::is_constant(node)) {
|
||||
return SupportedElementTypes::is_constant_type_supported(node->get_element_type(), is_exception_allowed);
|
||||
} else if (auto conv_ie = std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node)) {
|
||||
return is_conv_supported(conv_ie, effective_compile_target, gna_precision, is_exception_allowed);
|
||||
} else if (auto fully_connected = std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node)) {
|
||||
return is_fc_supported(fully_connected, is_exception_allowed);
|
||||
} else if (ov::intel_gna::ngraph_util::is_pooling(node)) {
|
||||
return is_pooling_supported(std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node),
|
||||
effective_compile_target,
|
||||
is_exception_allowed);
|
||||
} else if (ov::op::util::is_output(node) || ov::op::util::is_sink(node) ||
|
||||
ov::intel_gna::ngraph_util::is_eltwise_add(node) || ov::intel_gna::ngraph_util::is_eltwise_mul(node) ||
|
||||
ov::intel_gna::ngraph_util::is_crop_affined(node) ||
|
||||
ov::intel_gna::ngraph_util::is_activation(node.get()) ||
|
||||
ov::intel_gna::ngraph_util::is_gna_precision_agnostic(
|
||||
node) || // check concat/split are aligned when transformations will be moved to ngraph
|
||||
(std::dynamic_pointer_cast<ov::op::util::ReadValueBase>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<ngraph::op::ScaleShiftIE>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<ngraph::op::PowerIE>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<ngraph::opset9::MatMul>(node) != nullptr)) {
|
||||
return true;
|
||||
} else if (ov::intel_gna::ngraph_util::is_gna_precision_agnostic(node)) {
|
||||
if ((std::dynamic_pointer_cast<ngraph::opset9::Split>(node) != nullptr) ||
|
||||
(std::dynamic_pointer_cast<ngraph::opset9::VariadicSplit>(node) != nullptr)) {
|
||||
return is_split_supported(node, is_exception_allowed);
|
||||
}
|
||||
// TODO check concat are aligned when transformation will be moved to ngraph
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
|
||||
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
|
||||
const InferenceEngine::Precision gna_precision) {
|
||||
std::stringstream error;
|
||||
// Walk through the transformed model
|
||||
for (auto& op : model->get_ops()) {
|
||||
if (!is_op_supported(op, effective_compile_target, gna_precision, true)) {
|
||||
error << "The plugin does not support layer " << op->get_friendly_name() << " (type " << op->get_type_name()
|
||||
<< ")!" << std::endl;
|
||||
}
|
||||
}
|
||||
if (!error.str().empty()) {
|
||||
THROW_GNA_EXCEPTION << error.str();
|
||||
}
|
||||
}
|
||||
namespace cnn2d {
|
||||
|
||||
bool IsEqualToLimit::isValid(const uint32_t val) const {
|
||||
@ -655,16 +858,9 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
|
||||
// If there are no inputs start search from an output
|
||||
startLayer = getCreatorLayer(outputs.begin()->second).lock();
|
||||
} else {
|
||||
auto network_input_precision = inputs.begin()->second->getPrecision();
|
||||
|
||||
if (network_input_precision != InferenceEngine::Precision::FP32 &&
|
||||
network_input_precision != InferenceEngine::Precision::I16 &&
|
||||
network_input_precision != InferenceEngine::Precision::U8) {
|
||||
errMessage = "The plugin does not support input precision with " +
|
||||
std::string(network_input_precision.name()) +
|
||||
" format. Supported input precisions FP32, I16, U8\n";
|
||||
return false;
|
||||
}
|
||||
SupportedElementTypes::is_parameter_type_supported(
|
||||
InferenceEngine::details::convertPrecision(inputs.begin()->second->getPrecision()),
|
||||
true);
|
||||
|
||||
auto& secondLayers = getInputTo(inputs.begin()->second->getInputData());
|
||||
if (secondLayers.empty()) {
|
||||
|
@ -13,6 +13,10 @@
|
||||
#include "common/gna_target.hpp"
|
||||
#include "dnn_types.hpp"
|
||||
#include "gna_lib_ver_selector.hpp"
|
||||
#include "legacy/ngraph_ops/convolution_ie.hpp"
|
||||
#include "legacy/ngraph_ops/fully_connected.hpp"
|
||||
#include "ngraph/opsets/opset7.hpp"
|
||||
#include "ngraph/opsets/opset9.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
@ -72,6 +76,80 @@ inline bool IsTransposeSupported(const std::vector<size_t>& shape) {
|
||||
return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize;
|
||||
}
|
||||
|
||||
class SupportedElementTypes {
|
||||
public:
|
||||
static bool is_parameter_type_supported(ov::element::Type type, bool is_exception_allowed = false);
|
||||
static bool is_constant_type_supported(ov::element::Type type, bool is_exception_allowed = false);
|
||||
|
||||
private:
|
||||
static const std::set<ov::element::Type> supported_parameter_types;
|
||||
static const std::set<ov::element::Type> supported_constant_types;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Validates if legacy convolution is supported by GNA
|
||||
* @param conv_ie convolution
|
||||
* @param effective_compile_target GNA compile targets
|
||||
* @param gna_precision GNA inference precision
|
||||
* @param is_exception_allowed flag specifies whether exception is allowed
|
||||
* @return true if supported
|
||||
*/
|
||||
bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
|
||||
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
|
||||
const InferenceEngine::Precision gna_precision,
|
||||
bool is_exception_allowed = false);
|
||||
/**
|
||||
* @brief Validates if max pooling is supported by GNA
|
||||
* @param max_pool max pooling
|
||||
* @param effective_compile_target GNA compile targets
|
||||
* @param supported_types list of supported types
|
||||
* @param is_exception_allowed flag specifies whether exception is allowed
|
||||
* @return true if precision is found in supported
|
||||
*/
|
||||
bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
|
||||
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
|
||||
bool is_exception_allowed = false);
|
||||
|
||||
/**
|
||||
* @brief Validates if fully connected is supported by GNA
|
||||
* @param fully_connected fully connected
|
||||
* @param is_exception_allowed flag specifies whether exception is allowed
|
||||
* @return true if supported
|
||||
*/
|
||||
bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected,
|
||||
bool is_exception_allowed = false);
|
||||
|
||||
/**
|
||||
* @brief Validates if split is supported by GNA
|
||||
* @param node split
|
||||
* @param is_exception_allowed flag specifies whether exception is allowed
|
||||
* @return true if supported
|
||||
*/
|
||||
bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed = false);
|
||||
|
||||
/**
|
||||
* @brief Validates if operation is supported by GNA
|
||||
* @param node operation
|
||||
* @param gna_compile_target GNA compile target
|
||||
* @param gna_precision GNA inference precision
|
||||
* @param is_exception_allowed flag specifies whether exception is allowed
|
||||
* @return true if supported
|
||||
*/
|
||||
bool is_op_supported(const std::shared_ptr<ov::Node>& node,
|
||||
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
|
||||
const InferenceEngine::Precision gna_precision,
|
||||
bool is_exception_allowed = false);
|
||||
|
||||
/**
|
||||
* @brief Check if all operations are supported by GNA
|
||||
* @param model ngraph model
|
||||
* @param gna_compile_target GNA compile target
|
||||
* @param gna_precision GNA inference precision
|
||||
*/
|
||||
void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
|
||||
const ov::intel_gna::common::DeviceVersion& effective_compile_target,
|
||||
const InferenceEngine::Precision gna_precision);
|
||||
|
||||
namespace cnn2d {
|
||||
|
||||
struct IsEqualToLimit {
|
||||
@ -147,12 +225,13 @@ struct RectLimitByChannelsAndPrecision {
|
||||
class AbstractValidator {
|
||||
protected:
|
||||
static void ThrowIfNotEmpty(const std::string& prefix, const std::string& error);
|
||||
|
||||
public:
|
||||
static bool ValidationSuccesful(const bool throwOnError,
|
||||
const std::string& error,
|
||||
const std::string& operation,
|
||||
const std::string& type);
|
||||
|
||||
public:
|
||||
virtual ~AbstractValidator() = default;
|
||||
virtual bool ValidateCnn2D(const std::string& name,
|
||||
const uint32_t inHeight,
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "gna_graph_tools.hpp"
|
||||
#include "gna_itt.hpp"
|
||||
#include "gna_plugin_config.hpp"
|
||||
#include "gna_transformations_pipeline.hpp"
|
||||
#include "layer_quantizer.hpp"
|
||||
#include "scale_factor_calc.hpp"
|
||||
#include "weights_converter.hpp"
|
||||
@ -27,17 +28,11 @@ namespace frontend {
|
||||
* Quantize entire network
|
||||
*/
|
||||
class ModelQuantizer {
|
||||
const Config& gna_config;
|
||||
const bool fake_quantized;
|
||||
ov::intel_gna::TransformationsPipeline& gna_transformer;
|
||||
|
||||
public:
|
||||
ModelQuantizer(const Config& gna_config, const bool fake_quantized)
|
||||
: gna_config(gna_config),
|
||||
fake_quantized(fake_quantized) {}
|
||||
template <class PreQuantisationCb>
|
||||
InferenceEngine::CNNNetwork quantize(const InferenceEngine::CNNNetwork& model,
|
||||
const PreQuantisationCb& cb,
|
||||
const GnaInputs& inputs) const {
|
||||
ModelQuantizer(ov::intel_gna::TransformationsPipeline& transformer) : gna_transformer(transformer) {}
|
||||
InferenceEngine::CNNNetwork quantize(const InferenceEngine::CNNNetwork& model, const GnaInputs& inputs) const {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ModelQuantizer::quantize");
|
||||
auto visitor = [&](InferenceEngine::CNNLayerPtr layer_ptr) {
|
||||
auto new_layer = InferenceEngine::injectData<QuantizedLayerParams>(layer_ptr);
|
||||
@ -46,12 +41,12 @@ public:
|
||||
};
|
||||
|
||||
InferenceEngine::CNNNetwork copied_net = InferenceEngine::CNNNetCopy(model);
|
||||
cb(copied_net, true, gna_config.gnaFlags.input_low_precision);
|
||||
gna_transformer.apply_legacy(copied_net, true);
|
||||
copied_net = InferenceEngine::CNNNetCopy(copied_net, visitor);
|
||||
|
||||
// Allow client code to access copied topology, to avoid copies if user would like to chain quantisation with
|
||||
// another preprocessing
|
||||
cb(copied_net, false, gna_config.gnaFlags.input_low_precision);
|
||||
gna_transformer.apply_legacy(copied_net, false);
|
||||
|
||||
auto sorted_new_net = InferenceEngine::details::CNNNetSortTopologically(copied_net);
|
||||
log::debug() << "Sorted layers: " << std::endl;
|
||||
@ -67,7 +62,7 @@ public:
|
||||
|
||||
// Propagate scale factor and quantize layers
|
||||
propagateScaleFactor(sorted_new_net);
|
||||
frontend::LayerQuantizer lq(gna_config);
|
||||
frontend::LayerQuantizer lq(gna_transformer.config);
|
||||
|
||||
for (auto&& layer : sorted_new_net) {
|
||||
lq.quantize(*layer);
|
||||
@ -78,7 +73,7 @@ public:
|
||||
|
||||
private:
|
||||
void propagateScaleFactor(std::vector<InferenceEngine::CNNLayerPtr>& net) const {
|
||||
ScaleFactorCalculator sf(net, gna_config, fake_quantized);
|
||||
ScaleFactorCalculator sf(net, gna_transformer.config, gna_transformer.is_fake_quantized());
|
||||
uint32_t inf_loop_count = 0;
|
||||
std::vector<std::string> inf_loop_pattern;
|
||||
std::vector<std::string> inf_loop_history;
|
||||
|
@ -385,11 +385,6 @@ void GNAGraphCompiler::ConvolutionPrimitive(InferenceEngine::CNNLayerPtr layer)
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) << "with batch size not equals 1 is not supported";
|
||||
}
|
||||
|
||||
if (convolution._dilation_x != 1 || convolution._dilation_y != 1) {
|
||||
// TODO: Issue 24839
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) << "with dilation is not supported on GNA";
|
||||
}
|
||||
|
||||
if (convolution._kernel_x > in_width * in_height) {
|
||||
THROW_GNA_LAYER_EXCEPTION(layer) << "Kernel dimensions X (" << convolution._kernel_x << ")"
|
||||
<< " is bigger than total input dimensions WxH (" << in_width << "x"
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include <gna2-common-api.h>
|
||||
#include <gna2-model-api.h>
|
||||
#include <ie_common.h>
|
||||
#include <legacy/net_pass.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
@ -19,26 +18,11 @@
|
||||
#include <layers/gna_fake_quantize_layer.hpp>
|
||||
#include <legacy/convert_function_to_cnn_network.hpp>
|
||||
#include <legacy/graph_tools.hpp>
|
||||
#include <legacy/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp>
|
||||
#include <limits>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <ngraph/opsets/opset7.hpp>
|
||||
#include <ngraph/pass/manager.hpp>
|
||||
#include <string>
|
||||
#include <transformations/common_optimizations/add_fake_quantize_fusion.hpp>
|
||||
#include <transformations/common_optimizations/common_optimizations.hpp>
|
||||
#include <transformations/common_optimizations/fq_mul_fusion.hpp>
|
||||
#include <transformations/common_optimizations/fq_reshape_fusion.hpp>
|
||||
#include <transformations/common_optimizations/pull_transpose_through_fq.hpp>
|
||||
#include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
|
||||
#include <transformations/common_optimizations/transpose_sinking.hpp>
|
||||
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
#include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
|
||||
#include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
@ -58,10 +42,10 @@
|
||||
#include "gna_model_serial.hpp"
|
||||
#include "gna_plugin_config.hpp"
|
||||
#include "gna_tensor_tools.hpp"
|
||||
#include "gna_transformations_pipeline.hpp"
|
||||
#include "layers/gna_layer_type.hpp"
|
||||
#include "log/log.hpp"
|
||||
#include "memory/gna_memory_state.hpp"
|
||||
#include "optimizer/gna_pass_manager.hpp"
|
||||
#include "orientation_helper.hpp"
|
||||
#include "preprocessing.hpp"
|
||||
#include "request/model_wrapper_factory.hpp"
|
||||
@ -69,37 +53,8 @@
|
||||
#include "request/worker_pool_impl.hpp"
|
||||
#include "runtime/gna_float_runtime.hpp"
|
||||
#include "scale_factor_helper.hpp"
|
||||
#include "transformations/broadcast_const.hpp"
|
||||
#include "transformations/common_optimizations/concat_reduce_fusion.hpp"
|
||||
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
|
||||
#include "transformations/convert_dwsc_to_scaleshifts.hpp"
|
||||
#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
|
||||
#include "transformations/convert_padded_to_valid_convolution.hpp"
|
||||
#include "transformations/convert_precision.hpp"
|
||||
#include "transformations/decompose_2d_convolution.hpp"
|
||||
#include "transformations/decompose_mvn.hpp"
|
||||
#include "transformations/disable_decompression_convert_constant_folding.hpp"
|
||||
#include "transformations/handle_transposes_around_matmul.hpp"
|
||||
#include "transformations/insert_copy_layer.hpp"
|
||||
#include "transformations/insert_identity_layer.hpp"
|
||||
#include "transformations/insert_reshape_around_matmul.hpp"
|
||||
#include "transformations/insert_transpose_after_convolution_or_pooling.hpp"
|
||||
#include "transformations/markup_fusable_transpose.hpp"
|
||||
#include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp"
|
||||
#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
|
||||
#include "transformations/op_conversions/gru_cell_decomposition.hpp"
|
||||
#include "transformations/op_conversions/lstm_cell_decomposition.hpp"
|
||||
#include "transformations/op_conversions/softsign_decomposition.hpp"
|
||||
#include "transformations/pwl_approximation.hpp"
|
||||
#include "transformations/remove_converts.hpp"
|
||||
#include "transformations/remove_extra_reshapes.hpp"
|
||||
#include "transformations/remove_single_input_concat.hpp"
|
||||
#include "transformations/reorder_activation_and_pooling.hpp"
|
||||
#include "transformations/split_convolution_with_large_buffer_size.hpp"
|
||||
#include "transformations/split_eltwise.hpp"
|
||||
#include "transformations/substitute_softsign.hpp"
|
||||
#include "transformations/swap_input_matmul_gna.hpp"
|
||||
#include "transformations/unfuse_reshape_and_transpose.hpp"
|
||||
|
||||
using namespace ov::intel_gna::ngraph_util;
|
||||
|
||||
inline uint32_t ToByteSize(const Gna2DataType type) {
|
||||
switch (type) {
|
||||
@ -706,130 +661,20 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
||||
const auto effectiveCompileTarget = config.target->get_effective_compile_target();
|
||||
graphCompiler.SetValidatorTarget(effectiveCompileTarget);
|
||||
|
||||
bool isNgraphPassesUsed = false;
|
||||
bool fake_quantized = false;
|
||||
auto transformer = TransformationsPipeline(config, effectiveCompileTarget);
|
||||
|
||||
if (_network.getFunction()) {
|
||||
CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network);
|
||||
const auto& graph = clonedNetwork.getFunction();
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ov::pass::InitNodeInfo>();
|
||||
|
||||
fake_quantized = ov::op::util::has_op_with_type<ngraph::opset7::FakeQuantize>(graph);
|
||||
// In OV API 2.0(IRv10) default convertion to fp32 (inputs, outputs and weights) is disabled
|
||||
// and we need to run the ConvertPrecision transformation to support old networks.
|
||||
manager.register_pass<ov::pass::ConvertPrecision>(
|
||||
precisions_array{{ngraph::element::f16, ngraph::element::f32}});
|
||||
manager.register_pass<ov::pass::ConvertMVN1ToMVN6>();
|
||||
manager.register_pass<ov::intel_gna::pass::DecomposeMVN>();
|
||||
manager.register_pass<ov::pass::CommonOptimizations>();
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveInputConvert>();
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveOutputConvert>();
|
||||
manager.register_pass<ov::pass::ConvertSequenceToTensorIterator>();
|
||||
manager.register_pass<ov::pass::GRUCellDecomposition>();
|
||||
manager.register_pass<ov::pass::LSTMCellDecomposition>();
|
||||
manager.register_pass<ov::intel_gna::pass::ConvertDWSCToScaleShifts>();
|
||||
manager.register_pass<ov::intel_gna::pass::ConvertPaddedToValidConv>();
|
||||
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(effectiveCompileTarget,
|
||||
config.gnaPrecision);
|
||||
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(effectiveCompileTarget,
|
||||
config.gnaPrecision);
|
||||
manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(effectiveCompileTarget, config.gnaPrecision);
|
||||
// TODO enable this transformation for networks with convolutions
|
||||
if (!ov::op::util::has_op_with_type<ngraph::opset7::Convolution>(graph)) {
|
||||
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithFqToPointWiseConvolution>();
|
||||
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithBiasToPointWiseConvolution>();
|
||||
manager.register_pass<ov::intel_gna::pass::ConvertMatmulToPointWiseConvolution>();
|
||||
}
|
||||
manager.register_pass<ov::intel_gna::pass::SplitConvolutionWithFq>();
|
||||
manager.register_pass<ov::intel_gna::pass::SplitConvolutionWithBias>();
|
||||
manager.register_pass<ov::intel_gna::pass::SplitConvolution>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmulWithTranspose>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmulWithFq>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmulWithAdd>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmul>();
|
||||
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithTrailingTranspose>();
|
||||
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithAct>();
|
||||
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithFq>();
|
||||
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithBias>();
|
||||
manager.register_pass<ov::intel_gna::pass::SwapInputMatMul>();
|
||||
manager.register_pass<ov::intel_gna::pass::HandleTransposesAroundMatMul>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertTransposeAfterConvOrPool>();
|
||||
manager.register_pass<ov::intel_gna::pass::Unfuse2dto4dReshapeAndTranspose>();
|
||||
manager.register_pass<ov::intel_gna::pass::Unfuse4dto2dReshapeAndTranspose>();
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveExtraReshapes>();
|
||||
manager.register_pass<ov::intel_gna::pass::ReorderActivationAndPooling>();
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveSingleInputConcat>();
|
||||
manager.register_pass<ov::intel_gna::pass::SubstituteSoftsign>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertCopyBeforeLayerToBeEliminated>();
|
||||
manager.register_pass<ov::pass::ConvertOpSet3ToOpSet2>();
|
||||
manager.register_pass<ov::pass::ConvertOpSet2ToOpSet1>();
|
||||
manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
|
||||
manager.register_pass<ov::intel_gna::pass::MarkupFusableTranspose>();
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveExtraReshapes>();
|
||||
/*
|
||||
Put BroadcastAddMultiplyConst here after ConvertOpSet..() transformations since there are conficts with them.
|
||||
ngraph::pass::ConvertOpSet1ToLegacy -> ngraph::pass::BiasFusions ->
|
||||
ngraph::pass::ConvAddFusion, ngraph::pass::ConvMultiplyFusion
|
||||
That transormations fuse bias into convolution and recognizes const node as [1, C, 1, 1].
|
||||
TODO: move that transformation just beyond RemoveSingleInputConcat pass after removing ConvertOpSet1ToLegacy
|
||||
transormations
|
||||
*/
|
||||
manager.register_pass<ov::intel_gna::pass::BroadcastAddMultiplyConst>();
|
||||
/*
|
||||
SplitEltwise has dependency on BroadcastAddMultiplyConst for case when spliting of Constant
|
||||
input is doing
|
||||
*/
|
||||
manager.register_pass<ov::intel_gna::pass::SplitEltwise>();
|
||||
/* The following transformations perform insertion of Identity layer in 3 steps:
|
||||
1. Mark inputs with rt_info attribute where precision change from i32 to i16/i8 is happened
|
||||
2. Insert Identity after operation which have consumers marked with precision change
|
||||
3. Cleanup appropriate attribute from rt_info
|
||||
*/
|
||||
manager.register_pass<ov::intel_gna::pass::MarkIdentityCandidates>(config.gnaFlags.input_low_precision);
|
||||
manager.register_pass<ov::intel_gna::pass::InsertIdentity>();
|
||||
manager.register_pass<ov::intel_gna::pass::IdentityCandidatesCleanup>();
|
||||
// Breaks fusing of layers before result
|
||||
manager.register_pass<ov::intel_gna::pass::BreakFusingOfOutputLayers>();
|
||||
if (!config.gnaFlags.sw_fp32 && !config.gnaFlags.uniformPwlDesign) {
|
||||
manager.register_pass<ov::intel_gna::pass::PWLApproximationWithFq>(config.gnaFlags.pwlMaxErrorPercent);
|
||||
manager.register_pass<ov::intel_gna::pass::PWLApproximation>(config.gnaFlags.pwlMaxErrorPercent);
|
||||
}
|
||||
manager.register_pass<ov::pass::UnrollTensorIterator>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
|
||||
manager.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
||||
manager.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
|
||||
const auto& pass_config = manager.get_pass_config();
|
||||
|
||||
// Allowing FP16 Converts to be folded and FP16 constants to upgrade to FP32 data type
|
||||
pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
|
||||
pass_config->disable<ov::pass::DisableDecompressionConvertConstantFolding>();
|
||||
|
||||
pass_config->disable<ov::pass::FakeQuantizeMulFusion>();
|
||||
pass_config->disable<ov::pass::FakeQuantizeReshapeFusion>();
|
||||
pass_config->disable<ov::pass::PullTransposeThroughFQUp>();
|
||||
pass_config->disable<ov::pass::ReluFakeQuantizeFusion>();
|
||||
// Consider to enable after per-channel quantization on FakeQuantize layer is supported in GNAPlugin, see issue
|
||||
// 52034
|
||||
pass_config->disable<ov::pass::AddFakeQuantizeFusion>();
|
||||
// TransposeReduction can be enabled when Transpose-Conv-Transpose patterns will be handled in ngraph
|
||||
// transformations
|
||||
pass_config->disable<ov::pass::TransposeReduction>();
|
||||
// Operations Max and Min aren't supported
|
||||
pass_config->disable<ov::pass::ConcatReduceFusion>();
|
||||
// pass_config->disable<ov::pass::SoftSignDecomposition>();
|
||||
manager.run_passes(graph);
|
||||
convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(graph, clonedNetwork);
|
||||
isNgraphPassesUsed = true;
|
||||
auto model = clonedNetwork.getFunction();
|
||||
transformer.apply(model);
|
||||
limitations::check_all_ops_supported(model, effectiveCompileTarget, config.gnaPrecision);
|
||||
convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(model, clonedNetwork);
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
InferenceEngine::CNNNetwork network = convertedNetwork ? InferenceEngine::CNNNetwork{convertedNetwork} : _network;
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
|
||||
NetPass::ConvertPrecision(network, Precision::I64, Precision::I32);
|
||||
NetPass::ConvertPrecision(network, Precision::U64, Precision::I32);
|
||||
NetPass::ConvertPrecision(network, Precision::U32, Precision::I32);
|
||||
transformer.convert_precision_legacy(network);
|
||||
|
||||
// Check the network
|
||||
std::string error;
|
||||
@ -849,7 +694,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
||||
// Set Scale Factors for inputs according to configuration.
|
||||
ov::intel_gna::helpers::ApplyInputScaleFactors(*inputs_ptr_, config);
|
||||
|
||||
if (fake_quantized) {
|
||||
if (transformer.is_fake_quantized()) {
|
||||
UpdateInputScaleFromNetwork(network);
|
||||
}
|
||||
|
||||
@ -857,56 +702,6 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
||||
FillInputsAndOutputsTranspositionInfo(network);
|
||||
}
|
||||
|
||||
// network optimisation phases
|
||||
int passIdx = 0;
|
||||
auto run_passes = [&](const CNNNetwork& network, bool runBeforeCopy, bool lowPrecision) {
|
||||
auto passes = make_shared<PassManager>(PassManagerSettings{runBeforeCopy, lowPrecision}, network);
|
||||
passes->registerPass<RemoveConstPass>();
|
||||
if (!isNgraphPassesUsed) {
|
||||
passes->registerPass<UnrollTIPass>();
|
||||
passes->registerPass<RemoveConstPass>();
|
||||
passes->registerPass<UnrollLSTMCellPass>();
|
||||
passes->registerPass<RemoveSingleInputConcatPass>();
|
||||
passes->registerPass<BroadcastConstPass>();
|
||||
passes->registerPass<SubstituteScaleShiftBroadCastPass>();
|
||||
}
|
||||
|
||||
if (fake_quantized)
|
||||
passes->registerPass<SubstituteSoftSignPass>();
|
||||
|
||||
// fake quantisation aware passes
|
||||
passes->registerPass<FuseFQIntoWeightsPass>();
|
||||
passes->registerPass<MoveFakeQuantizeLayerIntoQuantParamsPass>();
|
||||
|
||||
passes->registerPass<TransposeWeightsFromNCHWToNHWCPass>();
|
||||
|
||||
passes->registerPass<SubstitutePReluPass>();
|
||||
|
||||
if (!isNgraphPassesUsed) {
|
||||
passes->registerPass<ReorderMaxPoolPass>();
|
||||
passes->registerPass<EltwiseSplitOverChannelsPass>();
|
||||
}
|
||||
|
||||
passes->registerPass<InsertSplitAligningFilterPass>();
|
||||
|
||||
if (!isNgraphPassesUsed) {
|
||||
passes->registerPass<InsertCopyLayerPass>();
|
||||
}
|
||||
passes->registerPass<FlattenTrivialConcatPass>();
|
||||
passes->registerPass<InsertConcatAligningFilterPass>();
|
||||
passes->registerPass<ReorderConcatInputsPass>();
|
||||
passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
|
||||
// Keep legacy inserting of Identity layer here
|
||||
// because concat and split aliging passes are not moved to ngraph yet
|
||||
passes->registerPass<InsertIdentityLayerPass>();
|
||||
passes->registerPass<BreakFusingOfOutputLayersPass>();
|
||||
passes->registerPass<InsertDiagonalLayerPass>();
|
||||
passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
|
||||
passes->registerPass<ForbidActivationFusingPass>();
|
||||
passes->registerPass<FuseMultipleIdentitiesPass>();
|
||||
passIdx = passes->run(passIdx);
|
||||
};
|
||||
|
||||
InferenceEngine::CNNNetwork newNet;
|
||||
|
||||
if (gnaFlags->sw_fp32) {
|
||||
@ -916,11 +711,11 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
||||
};
|
||||
newNet = InferenceEngine::CNNNetCopy(network, visitor);
|
||||
// to run all passes need to have two calls to pass manager
|
||||
run_passes(newNet, true, gnaFlags->input_low_precision);
|
||||
run_passes(newNet, false, gnaFlags->input_low_precision);
|
||||
transformer.apply_legacy(newNet, true);
|
||||
transformer.apply_legacy(newNet, false);
|
||||
} else {
|
||||
ov::intel_gna::frontend::ModelQuantizer modelQuantizer(config, fake_quantized);
|
||||
newNet = modelQuantizer.quantize(network, run_passes, *inputs_ptr_);
|
||||
ov::intel_gna::frontend::ModelQuantizer modelQuantizer(transformer);
|
||||
newNet = modelQuantizer.quantize(network, *inputs_ptr_);
|
||||
}
|
||||
|
||||
auto inputLayers = CNNNetGetAllInputLayers(newNet);
|
||||
@ -1822,12 +1617,29 @@ std::vector<std::shared_ptr<const ov::Node>> GNAPlugin::GetOutputs() {
|
||||
return results;
|
||||
}
|
||||
|
||||
InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork(const InferenceEngine::CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& config) const {
|
||||
InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork(
|
||||
const InferenceEngine::CNNNetwork& network,
|
||||
const std::map<std::string, std::string>& config_map) const {
|
||||
InferenceEngine::QueryNetworkResult res;
|
||||
|
||||
if (network.getFunction()) {
|
||||
IE_THROW(NotImplemented) << " ngraph::Function is not supported natively";
|
||||
Config qn_config(config);
|
||||
qn_config.UpdateFromMap(config_map);
|
||||
|
||||
const auto effectiveCompileTarget = qn_config.target->get_effective_compile_target();
|
||||
auto model = network.getFunction();
|
||||
if (model) {
|
||||
auto supported = GetSupportedNodes(
|
||||
model,
|
||||
[&](std::shared_ptr<ov::Model>& model) {
|
||||
TransformationsPipeline(qn_config, effectiveCompileTarget).apply(model);
|
||||
},
|
||||
[&](const std::shared_ptr<ngraph::Node>& op) {
|
||||
return limitations::is_op_supported(op, effectiveCompileTarget, qn_config.gnaPrecision);
|
||||
});
|
||||
for (auto&& op_name : supported) {
|
||||
res.supportedLayersMap.emplace(op_name, GetName());
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
std::unordered_set<CNNLayer*> allLayers;
|
||||
|
244
src/plugins/intel_gna/src/gna_transformations_pipeline.cpp
Normal file
244
src/plugins/intel_gna/src/gna_transformations_pipeline.cpp
Normal file
@ -0,0 +1,244 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "gna_transformations_pipeline.hpp"
|
||||
|
||||
#include "gna_itt.hpp"
|
||||
#include "legacy/net_pass.h"
|
||||
#include "legacy/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.hpp"
|
||||
#include "ngraph/opsets/opset7.hpp"
|
||||
#include "openvino/pass/manager.hpp"
|
||||
#include "optimizer/gna_pass_manager.hpp"
|
||||
#include "transformations/broadcast_const.hpp"
|
||||
#include "transformations/common_optimizations/add_fake_quantize_fusion.hpp"
|
||||
#include "transformations/common_optimizations/common_optimizations.hpp"
|
||||
#include "transformations/common_optimizations/concat_reduce_fusion.hpp"
|
||||
#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp"
|
||||
#include "transformations/common_optimizations/fq_mul_fusion.hpp"
|
||||
#include "transformations/common_optimizations/fq_reshape_fusion.hpp"
|
||||
#include "transformations/common_optimizations/pull_transpose_through_fq.hpp"
|
||||
#include "transformations/common_optimizations/relu_fake_quantize_fusion.hpp"
|
||||
#include "transformations/common_optimizations/transpose_sinking.hpp"
|
||||
#include "transformations/control_flow/unroll_tensor_iterator.hpp"
|
||||
#include "transformations/convert_dwsc_to_scaleshifts.hpp"
|
||||
#include "transformations/convert_matmul_to_pointwise_convolution.hpp"
|
||||
#include "transformations/convert_padded_to_valid_convolution.hpp"
|
||||
#include "transformations/convert_precision.hpp"
|
||||
#include "transformations/decompose_2d_convolution.hpp"
|
||||
#include "transformations/decompose_mvn.hpp"
|
||||
#include "transformations/disable_decompression_convert_constant_folding.hpp"
|
||||
#include "transformations/handle_transposes_around_matmul.hpp"
|
||||
#include "transformations/init_node_info.hpp"
|
||||
#include "transformations/insert_copy_layer.hpp"
|
||||
#include "transformations/insert_identity_layer.hpp"
|
||||
#include "transformations/insert_reshape_around_matmul.hpp"
|
||||
#include "transformations/insert_transpose_after_convolution_or_pooling.hpp"
|
||||
#include "transformations/markup_fusable_transpose.hpp"
|
||||
#include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp"
|
||||
#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
|
||||
#include "transformations/op_conversions/gru_cell_decomposition.hpp"
|
||||
#include "transformations/op_conversions/lstm_cell_decomposition.hpp"
|
||||
#include "transformations/op_conversions/softsign_decomposition.hpp"
|
||||
#include "transformations/opset_conversions/convert_opset2_to_opset1.hpp"
|
||||
#include "transformations/opset_conversions/convert_opset3_to_opset2.hpp"
|
||||
#include "transformations/pwl_approximation.hpp"
|
||||
#include "transformations/remove_converts.hpp"
|
||||
#include "transformations/remove_extra_reshapes.hpp"
|
||||
#include "transformations/remove_single_input_concat.hpp"
|
||||
#include "transformations/reorder_activation_and_pooling.hpp"
|
||||
#include "transformations/split_convolution_with_large_buffer_size.hpp"
|
||||
#include "transformations/split_eltwise.hpp"
|
||||
#include "transformations/substitute_softsign.hpp"
|
||||
#include "transformations/swap_input_matmul_gna.hpp"
|
||||
#include "transformations/unfuse_reshape_and_transpose.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "TransformationsPipeline::apply");
|
||||
|
||||
fake_quantized = ov::op::util::has_op_with_type<ngraph::op::FakeQuantize>(model);
|
||||
|
||||
ov::pass::Manager manager;
|
||||
manager.register_pass<ov::pass::InitNodeInfo>();
|
||||
|
||||
// In OV API 2.0(IRv10) default convertion to fp32 (inputs, outputs and weights) is disabled
|
||||
// and we need to run the ConvertPrecision transformation to support old networks.
|
||||
manager.register_pass<ov::pass::ConvertPrecision>(precisions_array{{ngraph::element::f16, ngraph::element::f32}});
|
||||
manager.register_pass<ov::pass::ConvertMVN1ToMVN6>();
|
||||
manager.register_pass<ov::intel_gna::pass::DecomposeMVN>();
|
||||
manager.register_pass<ov::pass::CommonOptimizations>();
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveInputConvert>();
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveOutputConvert>();
|
||||
manager.register_pass<ov::pass::ConvertSequenceToTensorIterator>();
|
||||
manager.register_pass<ov::pass::GRUCellDecomposition>();
|
||||
manager.register_pass<ov::pass::LSTMCellDecomposition>();
|
||||
manager.register_pass<ov::intel_gna::pass::ConvertDWSCToScaleShifts>();
|
||||
manager.register_pass<ov::intel_gna::pass::ConvertPaddedToValidConv>();
|
||||
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(effective_compile_target,
|
||||
config.gnaPrecision);
|
||||
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(effective_compile_target,
|
||||
config.gnaPrecision);
|
||||
manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(effective_compile_target, config.gnaPrecision);
|
||||
// TODO enable this transformation for networks with convolutions
|
||||
if (!ov::op::util::has_op_with_type<ngraph::opset7::Convolution>(model)) {
|
||||
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithFqToPointWiseConvolution>();
|
||||
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithBiasToPointWiseConvolution>();
|
||||
manager.register_pass<ov::intel_gna::pass::ConvertMatmulToPointWiseConvolution>();
|
||||
}
|
||||
manager.register_pass<ov::intel_gna::pass::SplitConvolutionWithFq>();
|
||||
manager.register_pass<ov::intel_gna::pass::SplitConvolutionWithBias>();
|
||||
manager.register_pass<ov::intel_gna::pass::SplitConvolution>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmulWithTranspose>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmulWithFq>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmulWithAdd>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertReshapeAroundMatmul>();
|
||||
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithTrailingTranspose>();
|
||||
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithAct>();
|
||||
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithFq>();
|
||||
manager.register_pass<ov::intel_gna::pass::SwapInputMatMulWithBias>();
|
||||
manager.register_pass<ov::intel_gna::pass::SwapInputMatMul>();
|
||||
manager.register_pass<ov::intel_gna::pass::HandleTransposesAroundMatMul>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertTransposeAfterConvOrPool>();
|
||||
manager.register_pass<ov::intel_gna::pass::Unfuse2dto4dReshapeAndTranspose>();
|
||||
manager.register_pass<ov::intel_gna::pass::Unfuse4dto2dReshapeAndTranspose>();
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveExtraReshapes>();
|
||||
manager.register_pass<ov::intel_gna::pass::ReorderActivationAndPooling>();
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveSingleInputConcat>();
|
||||
manager.register_pass<ov::intel_gna::pass::SubstituteSoftsign>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertCopyBeforeLayerToBeEliminated>();
|
||||
manager.register_pass<ov::pass::ConvertOpSet3ToOpSet2>();
|
||||
manager.register_pass<ov::pass::ConvertOpSet2ToOpSet1>();
|
||||
manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>();
|
||||
manager.register_pass<ov::intel_gna::pass::MarkupFusableTranspose>();
|
||||
manager.register_pass<ov::intel_gna::pass::RemoveExtraReshapes>();
|
||||
/*
|
||||
Put BroadcastAddMultiplyConst here after ConvertOpSet..() transformations since there are conficts with them.
|
||||
ngraph::pass::ConvertOpSet1ToLegacy -> ngraph::pass::BiasFusions ->
|
||||
ngraph::pass::ConvAddFusion, ngraph::pass::ConvMultiplyFusion
|
||||
That transormations fuse bias into convolution and recognizes const node as [1, C, 1, 1].
|
||||
TODO: move that transformation just beyond RemoveSingleInputConcat pass after removing ConvertOpSet1ToLegacy
|
||||
transormations
|
||||
*/
|
||||
manager.register_pass<ov::intel_gna::pass::BroadcastAddMultiplyConst>();
|
||||
/*
|
||||
SplitEltwise has dependency on BroadcastAddMultiplyConst for case when spliting of Constant
|
||||
input is doing
|
||||
*/
|
||||
manager.register_pass<ov::intel_gna::pass::SplitEltwise>();
|
||||
/* The following transformations perform insertion of Identity layer in 3 steps:
|
||||
1. Mark inputs with rt_info attribute where precision change from i32 to i16/i8 is happened
|
||||
2. Insert Identity after operation which have consumers marked with precision change
|
||||
3. Cleanup appropriate attribute from rt_info
|
||||
*/
|
||||
manager.register_pass<ov::intel_gna::pass::MarkIdentityCandidates>(config.gnaFlags.input_low_precision);
|
||||
manager.register_pass<ov::intel_gna::pass::InsertIdentity>();
|
||||
manager.register_pass<ov::intel_gna::pass::IdentityCandidatesCleanup>();
|
||||
// Breaks fusing of layers before result
|
||||
manager.register_pass<ov::intel_gna::pass::BreakFusingOfOutputLayers>();
|
||||
if (!config.gnaFlags.sw_fp32 && !config.gnaFlags.uniformPwlDesign) {
|
||||
manager.register_pass<ov::intel_gna::pass::PWLApproximationWithFq>(config.gnaFlags.pwlMaxErrorPercent);
|
||||
manager.register_pass<ov::intel_gna::pass::PWLApproximation>(config.gnaFlags.pwlMaxErrorPercent);
|
||||
}
|
||||
manager.register_pass<ov::pass::UnrollTensorIterator>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
||||
manager.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
|
||||
manager.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
||||
manager.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
|
||||
manager.register_pass<ov::pass::ConvertPrecision>(precisions_array{{ov::element::i64, ov::element::i32},
|
||||
{ov::element::u64, ov::element::i32},
|
||||
{ov::element::u32, ov::element::i32}});
|
||||
const auto& pass_config = manager.get_pass_config();
|
||||
|
||||
// Allowing FP16 Converts to be folded and FP16 constants to upgrade to FP32 data type
|
||||
pass_config->disable<ov::pass::ConvertCompressedOnlyToLegacy>();
|
||||
pass_config->disable<ov::pass::DisableDecompressionConvertConstantFolding>();
|
||||
|
||||
pass_config->disable<ov::pass::FakeQuantizeMulFusion>();
|
||||
pass_config->disable<ov::pass::FakeQuantizeReshapeFusion>();
|
||||
pass_config->disable<ov::pass::PullTransposeThroughFQUp>();
|
||||
pass_config->disable<ov::pass::ReluFakeQuantizeFusion>();
|
||||
// Consider to enable after per-channel quantization on FakeQuantize layer is supported in GNAPlugin, see issue
|
||||
// 52034
|
||||
pass_config->disable<ov::pass::AddFakeQuantizeFusion>();
|
||||
// TransposeReduction can be enabled when Transpose-Conv-Transpose patterns will be handled in ngraph
|
||||
// transformations
|
||||
pass_config->disable<ov::pass::TransposeReduction>();
|
||||
// Operations Max and Min aren't supported
|
||||
pass_config->disable<ov::pass::ConcatReduceFusion>();
|
||||
|
||||
manager.run_passes(model);
|
||||
|
||||
is_ngraph_passes_used = true;
|
||||
}
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
void TransformationsPipeline::apply_legacy(const InferenceEngine::CNNNetwork& network, bool runBeforeCopy) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::GNAPlugin, "TransformationsPipeline::apply_legacy");
|
||||
auto passes =
|
||||
std::make_shared<PassManager>(PassManagerSettings{runBeforeCopy, config.gnaFlags.input_low_precision}, network);
|
||||
passes->registerPass<RemoveConstPass>();
|
||||
if (!is_ngraph_passes_used) {
|
||||
passes->registerPass<UnrollTIPass>();
|
||||
passes->registerPass<RemoveConstPass>();
|
||||
passes->registerPass<UnrollLSTMCellPass>();
|
||||
passes->registerPass<RemoveSingleInputConcatPass>();
|
||||
passes->registerPass<BroadcastConstPass>();
|
||||
passes->registerPass<SubstituteScaleShiftBroadCastPass>();
|
||||
}
|
||||
|
||||
if (fake_quantized)
|
||||
passes->registerPass<SubstituteSoftSignPass>();
|
||||
|
||||
// fake quantisation aware passes
|
||||
passes->registerPass<FuseFQIntoWeightsPass>();
|
||||
passes->registerPass<MoveFakeQuantizeLayerIntoQuantParamsPass>();
|
||||
|
||||
passes->registerPass<TransposeWeightsFromNCHWToNHWCPass>();
|
||||
|
||||
passes->registerPass<SubstitutePReluPass>();
|
||||
|
||||
if (!is_ngraph_passes_used) {
|
||||
passes->registerPass<ReorderMaxPoolPass>();
|
||||
passes->registerPass<EltwiseSplitOverChannelsPass>();
|
||||
}
|
||||
|
||||
passes->registerPass<InsertSplitAligningFilterPass>();
|
||||
|
||||
if (!is_ngraph_passes_used) {
|
||||
passes->registerPass<InsertCopyLayerPass>();
|
||||
}
|
||||
passes->registerPass<FlattenTrivialConcatPass>();
|
||||
passes->registerPass<InsertConcatAligningFilterPass>();
|
||||
passes->registerPass<ReorderConcatInputsPass>();
|
||||
passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
|
||||
// Keep legacy inserting of Identity layer here
|
||||
// because concat and split aliging passes are not moved to ngraph yet
|
||||
passes->registerPass<InsertIdentityLayerPass>();
|
||||
passes->registerPass<BreakFusingOfOutputLayersPass>();
|
||||
passes->registerPass<InsertDiagonalLayerPass>();
|
||||
passes->registerPass<HandleMultipleActivationsForTheLayerPass>();
|
||||
passes->registerPass<ForbidActivationFusingPass>();
|
||||
passes->registerPass<FuseMultipleIdentitiesPass>();
|
||||
legacy_pass_index = passes->run(legacy_pass_index);
|
||||
}
|
||||
|
||||
void TransformationsPipeline::convert_precision_legacy(InferenceEngine::CNNNetwork& network) {
|
||||
if (!is_ngraph_passes_used) {
|
||||
InferenceEngine::NetPass::ConvertPrecision(network,
|
||||
InferenceEngine::Precision::I64,
|
||||
InferenceEngine::Precision::I32);
|
||||
InferenceEngine::NetPass::ConvertPrecision(network,
|
||||
InferenceEngine::Precision::U64,
|
||||
InferenceEngine::Precision::I32);
|
||||
InferenceEngine::NetPass::ConvertPrecision(network,
|
||||
InferenceEngine::Precision::U32,
|
||||
InferenceEngine::Precision::I32);
|
||||
}
|
||||
}
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
41
src/plugins/intel_gna/src/gna_transformations_pipeline.hpp
Normal file
41
src/plugins/intel_gna/src/gna_transformations_pipeline.hpp
Normal file
@ -0,0 +1,41 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "cpp/ie_cnn_network.h"
|
||||
#include "gna_plugin_config.hpp"
|
||||
#include "openvino/core/model.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
|
||||
class TransformationsPipeline {
|
||||
public:
|
||||
explicit TransformationsPipeline(const Config& config,
|
||||
const ov::intel_gna::common::DeviceVersion& effective_compile_target =
|
||||
ov::intel_gna::common::DeviceVersion::NotSet)
|
||||
: config(config),
|
||||
effective_compile_target(effective_compile_target) {}
|
||||
void apply(const std::shared_ptr<ov::Model>& model);
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
void apply_legacy(const InferenceEngine::CNNNetwork& network, bool runBeforeCopy);
|
||||
void convert_precision_legacy(InferenceEngine::CNNNetwork& network);
|
||||
IE_SUPPRESS_DEPRECATED_END
|
||||
bool is_fake_quantized() {
|
||||
return fake_quantized;
|
||||
};
|
||||
const ov::intel_gna::Config& config;
|
||||
|
||||
private:
|
||||
bool is_ngraph_passes_used = false;
|
||||
bool fake_quantized = false;
|
||||
int legacy_pass_index = 0;
|
||||
ov::intel_gna::common::DeviceVersion effective_compile_target;
|
||||
};
|
||||
|
||||
} // namespace intel_gna
|
||||
} // namespace ov
|
@ -3,26 +3,29 @@
|
||||
//
|
||||
|
||||
#pragma once
|
||||
#include <legacy/ngraph_ops/convolution_ie.hpp>
|
||||
#include <legacy/ngraph_ops/crop_ie.hpp>
|
||||
#include <legacy/ngraph_ops/eltwise.hpp>
|
||||
#include <legacy/ngraph_ops/fully_connected.hpp>
|
||||
#include <legacy/ngraph_ops/power.hpp>
|
||||
#include <legacy/ngraph_ops/relu_ie.hpp>
|
||||
#include <legacy/ngraph_ops/scaleshift.hpp>
|
||||
#include <memory>
|
||||
#include <ngraph/opsets/opset7.hpp>
|
||||
#include <ngraph/opsets/opset8.hpp>
|
||||
#include <ngraph/opsets/opset9.hpp>
|
||||
#include <transformations/rt_info/gna_transpose_fusable.hpp>
|
||||
#include <transformations/utils/utils.hpp>
|
||||
#include <vector>
|
||||
|
||||
#include "backend/gna_limitations.hpp"
|
||||
#include "gna_plugin_config.hpp"
|
||||
#include "layers/gna_convolution_layer.hpp"
|
||||
#include "layers/gna_permute.hpp"
|
||||
#include "legacy/ngraph_ops/convolution_ie.hpp"
|
||||
#include "legacy/ngraph_ops/crop_ie.hpp"
|
||||
#include "legacy/ngraph_ops/eltwise.hpp"
|
||||
#include "legacy/ngraph_ops/fully_connected.hpp"
|
||||
#include "legacy/ngraph_ops/power.hpp"
|
||||
#include "legacy/ngraph_ops/relu_ie.hpp"
|
||||
#include "legacy/ngraph_ops/scaleshift.hpp"
|
||||
#include "ngraph/opsets/opset7.hpp"
|
||||
#include "ngraph/opsets/opset8.hpp"
|
||||
#include "ngraph/opsets/opset9.hpp"
|
||||
#include "ops/copy.hpp"
|
||||
#include "ops/identity.hpp"
|
||||
#include "ops/pwl.hpp"
|
||||
#include "transformations/rt_info/gna_transpose_fusable.hpp"
|
||||
#include "transformations/utils/transformation_helper.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace intel_gna {
|
||||
@ -200,7 +203,7 @@ inline bool is_Tbit_fq(const std::shared_ptr<ngraph::Node>& node) {
|
||||
if (!fq_node)
|
||||
return false;
|
||||
auto levels = fq_node->get_levels();
|
||||
return std::numeric_limits<T>::max() == levels;
|
||||
return (std::numeric_limits<T>::max() == levels) || (std::numeric_limits<T>::max() == levels - 1);
|
||||
}
|
||||
|
||||
inline bool is_32bit_fq(const std::shared_ptr<ngraph::Node>& node) {
|
||||
|
@ -13,7 +13,32 @@ namespace intel_gna {
|
||||
namespace pass {
|
||||
namespace helper {
|
||||
|
||||
void GetConvData(std::shared_ptr<ngraph::op::ConvolutionIE> conv, ConvData& conv_data) {
|
||||
OPENVINO_ASSERT(conv);
|
||||
conv_data.output_height = conv->get_output_shape(0)[2];
|
||||
conv_data.output_width = conv->get_output_shape(0)[3];
|
||||
conv_data.input_channel_count = conv->input_value(0).get_shape()[1];
|
||||
conv_data.input_height = conv->input_value(0).get_shape()[2];
|
||||
conv_data.input_width = conv->input_value(0).get_shape()[3];
|
||||
conv_data.filter_count = conv->input_value(1).get_shape()[0];
|
||||
conv_data.filter_channel_count = conv->input_value(1).get_shape()[1];
|
||||
conv_data.filter_height = conv->input_value(1).get_shape()[2];
|
||||
conv_data.filter_width = conv->input_value(1).get_shape()[3];
|
||||
conv_data.filter_dilation_height = conv->get_dilations()[0];
|
||||
conv_data.filter_dilation_width = conv->get_dilations()[1];
|
||||
conv_data.filter_stride_height = conv->get_strides()[0];
|
||||
conv_data.filter_stride_width = conv->get_strides()[1];
|
||||
conv_data.output_channel_count = conv_data.filter_count;
|
||||
conv_data.pads_begin_height = conv->get_pads_begin()[0];
|
||||
conv_data.pads_begin_width = conv->get_pads_begin()[1];
|
||||
conv_data.pads_end_height = conv->get_pads_end()[0];
|
||||
conv_data.pads_end_width = conv->get_pads_end()[1];
|
||||
conv_data.padding_type = conv->get_auto_pad();
|
||||
conv_data.element_type = conv->get_element_type();
|
||||
}
|
||||
|
||||
void GetConvData(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData& conv_data) {
|
||||
OPENVINO_ASSERT(conv);
|
||||
conv_data.output_height = conv->get_output_shape(0)[2];
|
||||
conv_data.output_width = conv->get_output_shape(0)[3];
|
||||
conv_data.input_channel_count = conv->input_value(0).get_shape()[1];
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <legacy/ngraph_ops/convolution_ie.hpp>
|
||||
#include <ngraph/opsets/opset7.hpp>
|
||||
|
||||
namespace ov {
|
||||
@ -42,6 +43,14 @@ struct ConvData {
|
||||
*/
|
||||
void GetConvData(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData& conv_data);
|
||||
|
||||
/**
|
||||
* @brief gets all legacy convolution related data into a struct for further processing
|
||||
* @param conv legacy convolution node to get data of
|
||||
* @param conv_data convolution data structure to put data into
|
||||
* @return void
|
||||
*/
|
||||
void GetConvData(std::shared_ptr<ngraph::op::ConvolutionIE> conv, ConvData& conv_data);
|
||||
|
||||
/**
|
||||
* @brief ngraph matcher predicate fusing existing predicates for consumers count and rank of a layer
|
||||
* @param expected_count expected consumers count for of node
|
||||
|
@ -39,9 +39,10 @@ class I8QuantisationTest : public GNATest<> {
|
||||
gna_config.gnaPrecision = InferenceEngine::Precision::I16;
|
||||
gna_config.gnaFlags.input_low_precision = false;
|
||||
|
||||
return ModelQuantizer(gna_config, false).quantize(
|
||||
auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
|
||||
|
||||
return ModelQuantizer(transformer).quantize(
|
||||
model,
|
||||
[](const InferenceEngine::CNNNetwork&, bool run_before_copy, bool low_precision) {},
|
||||
inputs);
|
||||
}
|
||||
|
||||
@ -100,7 +101,7 @@ TEST_F(I8QuantisationTest, FCDimensionIs1){
|
||||
auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C });
|
||||
weights->allocate();
|
||||
fillWeights(weights);
|
||||
|
||||
|
||||
Core ie;
|
||||
auto network = ie.ReadNetwork(FCOnlyModel(), weights);
|
||||
|
||||
@ -111,7 +112,7 @@ TEST_F(I8QuantisationTest, outputAffinePrecisionIs32Bits){
|
||||
auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C });
|
||||
weights->allocate();
|
||||
fillWeights(weights);
|
||||
|
||||
|
||||
Core ie;
|
||||
auto network = ie.ReadNetwork(Fc2DOutputModel(), weights);
|
||||
|
||||
@ -126,7 +127,7 @@ TEST_F(I8QuantisationTest, fp16tofp32_on_fullyConnected_model) {
|
||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {220}, Layout::C });
|
||||
weights->allocate();
|
||||
fillWeights(weights);
|
||||
|
||||
|
||||
Core ie;
|
||||
auto network = ie.ReadNetwork(FCOnlyModelFP16(), weights);
|
||||
|
||||
@ -137,7 +138,7 @@ TEST_F(I8QuantisationTest, LSTMCell_quantize) {
|
||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {33664}, C });
|
||||
weights->allocate();
|
||||
fillWeights(weights);
|
||||
|
||||
|
||||
Core ie;
|
||||
auto network = ie.ReadNetwork(LSTMCellOnlyModel(), weights);
|
||||
|
||||
@ -148,7 +149,7 @@ TEST_F(I8QuantisationTest, LSTMCell_unaligned_quantize) {
|
||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {3480}, C });
|
||||
weights->allocate();
|
||||
fillWeights(weights);
|
||||
|
||||
|
||||
Core ie;
|
||||
auto network = ie.ReadNetwork(LSTMCellOnlyModelUnaligned(), weights);
|
||||
|
||||
@ -159,7 +160,7 @@ TEST_F(I8QuantisationTest, TI_quantize) {
|
||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {249748}, C });
|
||||
weights->allocate();
|
||||
fillWeights(weights);
|
||||
|
||||
|
||||
Core ie;
|
||||
auto network = ie.ReadNetwork(TIModelWithLSTMCell2(), weights);
|
||||
|
||||
|
@ -41,10 +41,11 @@ protected:
|
||||
gna_config.gnaPrecision = InferenceEngine::Precision::I16;
|
||||
gna_config.gnaFlags.input_low_precision = false;
|
||||
|
||||
return ModelQuantizer(gna_config, false)
|
||||
auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
|
||||
|
||||
return ModelQuantizer(transformer)
|
||||
.quantize(
|
||||
model,
|
||||
[](InferenceEngine::CNNNetwork&, bool run_before_copy, bool inputs_int8_precision) {},
|
||||
inputs);
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,7 @@ class I16QuantisationTest : public GNATest<> {
|
||||
protected:
|
||||
InferenceEngine::CNNLayerPtr quantize (InferenceEngine::CNNLayerPtr lp) {
|
||||
auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
|
||||
Config gna_config;
|
||||
Config gna_config;
|
||||
gna_config.gnaPrecision = InferenceEngine::Precision::I16;
|
||||
gna_config.gnaFlags.input_low_precision = false;
|
||||
LayerQuantizer lq(gna_config);
|
||||
@ -41,9 +41,10 @@ class I16QuantisationTest : public GNATest<> {
|
||||
gna_config.gnaPrecision = InferenceEngine::Precision::I16;
|
||||
gna_config.gnaFlags.input_low_precision = false;
|
||||
|
||||
return ModelQuantizer(gna_config, false).quantize(
|
||||
auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
|
||||
|
||||
return ModelQuantizer(transformer).quantize(
|
||||
model,
|
||||
[](const InferenceEngine::CNNNetwork&, bool run_before_copy, bool low_precision) {},
|
||||
inputs);
|
||||
}
|
||||
|
||||
@ -367,7 +368,7 @@ TEST_F(I16QuantisationTest, fp16tofp32_on_fullyConnected_model) {
|
||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {220}, Layout::C });
|
||||
weights->allocate();
|
||||
fillWeights(weights);
|
||||
|
||||
|
||||
Core ie;
|
||||
auto network = ie.ReadNetwork(FCOnlyModelFP16(), weights);
|
||||
|
||||
@ -431,7 +432,7 @@ TEST_F(I16QuantisationTest, LSTMCell_unaligned_quantize) {
|
||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {3480}, C });
|
||||
weights->allocate();
|
||||
fillWeights(weights);
|
||||
|
||||
|
||||
Core ie;
|
||||
auto network = ie.ReadNetwork(LSTMCellOnlyModelUnaligned(), weights);
|
||||
|
||||
@ -468,7 +469,7 @@ TEST_F(I16QuantisationTest, TI_quantize) {
|
||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {249748}, C });
|
||||
weights->allocate();
|
||||
fillWeights(weights);
|
||||
|
||||
|
||||
Core ie;
|
||||
auto network = ie.ReadNetwork(TIModelWithLSTMCell2(), weights);
|
||||
|
||||
|
@ -209,9 +209,8 @@ GNA_NEG_INSTANTIATE(InputW, Fine, InvalidInputW, "Unsupported input width", GNA_
|
||||
GNA_NEG_INSTANTIATE(InputC, Fine, InvalidInputC, "Unsupported number of input channels", GNA_3_0)
|
||||
GNA_NEG_INSTANTIATE(Padding, InvalidPadding, Fine, "Unsupported convolution input padding", GNA_3_0)
|
||||
GNA_NEG_INSTANTIATE(Stride, InvalidStride, Fine, "Unsupported convolution stride shape", GNA_3_0)
|
||||
GNA_NEG_INSTANTIATE(Dilation, InvalidDilation, Fine, "dilation is not supported on GNA", GNA_3_0)
|
||||
GNA_NEG_INSTANTIATE(Dilation35, InvalidDilation, Fine, "dilation is not supported on GNA", GNA_3_5)
|
||||
GNA_NEG_INSTANTIATE(Dilation, InvalidDilation, Fine, "Unsupported dilation", GNA_3_0)
|
||||
GNA_NEG_INSTANTIATE(Dilation35, InvalidDilation, Fine, "Unsupported dilation", GNA_3_5)
|
||||
GNA_NEG_INSTANTIATE(PaddingSize, InvalidPaddingSize, Fine, "Unsupported convolution input padding", GNA_3_0)
|
||||
GNA_NEG_INSTANTIATE(PaddingSize35, InvalidPaddingSize, Fine, "Unsupported convolution input padding", GNA_3_5)
|
||||
|
||||
} // namespace
|
||||
|
@ -24,8 +24,6 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*(EltwiseLayerTest).*eltwiseOpType=Prod.*secondaryInputType=PARAMETER.*opType=SCALAR.*)",
|
||||
// TODO: Issue: 34348
|
||||
R"(.*IEClassGetAvailableDevices.*)",
|
||||
// TODO: Issue 32923
|
||||
R"(.*IEClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK.*)",
|
||||
// TODO: Issue 39358
|
||||
R"(.*unaligned.*MultipleConcatTest.*)",
|
||||
R"(.*ActivationConcatsEltwise.*CS=35.*)",
|
||||
@ -33,8 +31,6 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*ConcatMultiInput.CompareWithRefConstOnly.*IS=\(1.8\).*)",
|
||||
R"(.*ConcatMultiInput.CompareWithRefConstOnly.*IS=\(1.16\).*)",
|
||||
R"(.*ConcatMultiInput.CompareWithRefConstOnly.*IS=\(1.32\).*)",
|
||||
// TODO: Issue: 29577
|
||||
R"(.*CoreThreadingTests.smoke_QueryNetwork.*)",
|
||||
// TODO: Issue: 46416
|
||||
R"(.*InferRequestVariableStateTest.inferreq_smoke_VariableState_2infers*.*)",
|
||||
// TODO: Issue 24839
|
||||
@ -70,12 +66,13 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*OVExecutableNetworkBaseTest.*CanGetInputsInfoAndCheck.*)",
|
||||
R"(.*OVExecutableNetworkBaseTest.*getOutputsFromSplitFunctionWithSeveralOutputs.*)",
|
||||
R"(.*OVExecutableNetworkBaseTest.*canLoadNetworkFromMemory.*)",
|
||||
R"(.*OVClassHeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK.*GetMetricNoThrow.*)",
|
||||
R"(.*(OVClass|IEClass)HeteroExecutableNetworkGetMetricTest_TARGET_FALLBACK.*GetMetricNoThrow.*)",
|
||||
R"(.*LoadNetwork*.*LoadNetwork(HETEROWithDeviceIDNoThrow|WithBigDeviceID|WithInvalidDeviceID)*.*)",
|
||||
R"(.*QueryNetwork*.*QueryNetwork(HETEROWithDeviceIDNoThrow|WithBigDeviceID|WithInvalidDeviceID)*.*)",
|
||||
R"(.*LoadNetworkTest.*QueryNetwork(MULTIWithHETERO|HETEROWithMULTI)NoThrow_V10.*)",
|
||||
R"(.*Behavior.*OVExecutableNetworkBaseTest.*get(Inputs|Outputs)FromFunctionWithSeveral(Inputs|Outputs).*)",
|
||||
// TODO: temporary disabled. Need to be enabled when PR 9282 is merged
|
||||
R"(.*OVExecGraphImportExportTest.*readFromV10IR.*)",
|
||||
// TODO: Issue: 29577
|
||||
R"(.*QueryNetwork.*)",
|
||||
// Issue connected with OV2.0
|
||||
R"(.*EltwiseLayerTest.*NetType=f16.*)",
|
||||
// TODO: Issue: 69639
|
||||
@ -91,12 +88,10 @@ std::vector<std::string> disabledTestPatterns() {
|
||||
R"(.*CompileModelCacheTestBase.*(SplitConvConcat|KSOFunction).*)",
|
||||
R"(.*CompileModelCacheTestBase.*(SingleConv|NestedSplitConvConcat).*)",
|
||||
R"(.*CompileModelCacheTestBase.*(Bias|ReadConcatSplitAssign).*)",
|
||||
R"(.*OVClassLoadNetworkTest.*LoadNetwork.*)",
|
||||
// does not work due to GNA 3.0 convolution and other primitives limitations, partially can be resolved by
|
||||
// switching GNA library to GNA3.5
|
||||
R"(.*CachingSupportCase.*LoadNet.*(Bias|Split|Concat|KSO|SingleConv).*)",
|
||||
R"(.*CachingSupportCase.*LoadNet.*(ConvPoolRelu|TIwithLSTMcell1)_f32_batch2.*)",
|
||||
R"(.*IEClassLoadNetworkTest.*LoadNetwork(HETERO|MULTI|WithDeviceIDNoThrow|WithInvalidDeviceIDThrows).*)",
|
||||
R"(.*smoke_Multi_BehaviorTests.*)",
|
||||
};
|
||||
}
|
||||
|
@ -8,6 +8,8 @@
|
||||
// to suppress deprecated definition errors
|
||||
#define IMPLEMENT_INFERENCE_ENGINE_PLUGIN
|
||||
#include "layers/gna_split_layer.hpp"
|
||||
#include "ngraph/opsets/opset9.hpp"
|
||||
#include "ops/util/util.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
@ -31,4 +33,61 @@ TEST(GetAlignedSplitSizesTest, testAlignedSplitSizes) {
|
||||
}
|
||||
}
|
||||
|
||||
using VariadicSplitParameters = std::tuple<ov::Shape, // input size
|
||||
uint32_t, // axis
|
||||
std::vector<int32_t>, // split lengths
|
||||
bool // supported
|
||||
>;
|
||||
|
||||
const std::vector<VariadicSplitParameters> variadic_split_data = {
|
||||
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{192, 192, 320, 320}, true},
|
||||
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{640, 192, 192}, true},
|
||||
VariadicSplitParameters{ov::Shape{1024}, 0, std::vector<int32_t>{500, 24, 500}, false},
|
||||
VariadicSplitParameters{ov::Shape{1, 1024}, 1, std::vector<int32_t>{700, 300, 24}, false},
|
||||
};
|
||||
|
||||
TEST(CheckSplitSupported, CheckVariadicSplitSupported) {
|
||||
ov::Shape input_shape;
|
||||
uint32_t axis;
|
||||
std::vector<int32_t> split_lengths;
|
||||
bool result;
|
||||
for (const auto& item : variadic_split_data) {
|
||||
std::tie(input_shape, axis, split_lengths, result) = item;
|
||||
auto split = std::make_shared<ngraph::opset9::VariadicSplit>(
|
||||
std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
|
||||
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({1}), {axis}),
|
||||
ngraph::opset9::Constant::create(ngraph::element::i64,
|
||||
ngraph::Shape({split_lengths.size()}),
|
||||
split_lengths));
|
||||
ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result);
|
||||
}
|
||||
}
|
||||
|
||||
using SplitParameters = std::tuple<ov::Shape, // input size
|
||||
uint32_t, // axis
|
||||
uint32_t, // num_splits
|
||||
bool // supported
|
||||
>;
|
||||
|
||||
const std::vector<SplitParameters> split_data = {
|
||||
SplitParameters{ov::Shape{1024}, 0, 4, true},
|
||||
SplitParameters{ov::Shape{1, 1024}, 1, 16, true},
|
||||
SplitParameters{ov::Shape{1024}, 0, 64, false},
|
||||
SplitParameters{ov::Shape{1, 1024}, 1, 256, false},
|
||||
};
|
||||
|
||||
TEST(CheckSplitSupported, CheckSplitSupported) {
|
||||
ov::Shape input_shape;
|
||||
uint32_t axis;
|
||||
uint32_t num_splits;
|
||||
bool result;
|
||||
for (const auto& item : split_data) {
|
||||
std::tie(input_shape, axis, num_splits, result) = item;
|
||||
auto split = std::make_shared<ngraph::opset9::Split>(
|
||||
std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
|
||||
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}),
|
||||
num_splits);
|
||||
ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user