[GNA] Limitations refactoring (#16957)
* Limitations refactoring * fix CI builds/tests * changes after review * Move GraphCompiler initialization to constructor * resolve conflicts after rebase * update after review * resolve problem with double initialization for Limitations
This commit is contained in:
parent
3300543eac
commit
cccbf7ce7e
@ -48,6 +48,8 @@
|
|||||||
using ov::intel_gna::gna_convolution_layer::outputFromConv;
|
using ov::intel_gna::gna_convolution_layer::outputFromConv;
|
||||||
using ov::intel_gna::gna_convolution_layer::outputFromPooling;
|
using ov::intel_gna::gna_convolution_layer::outputFromPooling;
|
||||||
|
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
namespace intel_gna {
|
namespace intel_gna {
|
||||||
namespace backend {
|
namespace backend {
|
||||||
@ -180,8 +182,8 @@ void AMIntelDNN::InitConvolutional1DComponentPrivate(intel_dnn_component_t& comp
|
|||||||
THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << num_columns_in
|
THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << num_columns_in
|
||||||
<< ") is not a multiply by 8";
|
<< ") is not a multiply by 8";
|
||||||
}
|
}
|
||||||
if (num_filters < limitations::convMinFiltersNum || num_filters > limitations::convMaxFiltersNum ||
|
if (num_filters < Limitations::kConvMinFiltersNum || num_filters > Limitations::kConvMaxFiltersNum ||
|
||||||
num_filters % limitations::convFiltersNumDivider != 0) {
|
num_filters % Limitations::kConvFiltersNumDivider != 0) {
|
||||||
THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << num_filters;
|
THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << num_filters;
|
||||||
}
|
}
|
||||||
auto max_number_of_out_elements = outputFromConv(num_columns_in, num_filter_coefficients, convStride);
|
auto max_number_of_out_elements = outputFromConv(num_columns_in, num_filter_coefficients, convStride);
|
||||||
|
@ -37,267 +37,62 @@ namespace intel_gna {
|
|||||||
using namespace target;
|
using namespace target;
|
||||||
namespace limitations {
|
namespace limitations {
|
||||||
|
|
||||||
|
class SupportedElementTypes {
|
||||||
|
public:
|
||||||
|
static bool IsParameterTypeSupported(ov::element::Type type, bool is_exception_allowed = false);
|
||||||
|
static bool IsConstantTypeSupported(ov::element::Type type, bool is_exception_allowed = false);
|
||||||
|
|
||||||
|
private:
|
||||||
|
static const std::set<ov::element::Type> supported_parameter_types;
|
||||||
|
static const std::set<ov::element::Type> supported_constant_types;
|
||||||
|
};
|
||||||
|
|
||||||
const std::set<ov::element::Type> SupportedElementTypes::supported_parameter_types = {ov::element::u8,
|
const std::set<ov::element::Type> SupportedElementTypes::supported_parameter_types = {ov::element::u8,
|
||||||
ov::element::i16,
|
ov::element::i16,
|
||||||
ov::element::f32};
|
ov::element::f32};
|
||||||
|
|
||||||
size_t getMemoryAlignmentBytes(target::DeviceVersion target) {
|
|
||||||
static const std::unordered_map<target::DeviceVersion, size_t> mem_alignment_map{
|
|
||||||
{target::DeviceVersion::GNA1_0, 64},
|
|
||||||
{target::DeviceVersion::GNA2_0, 64},
|
|
||||||
{target::DeviceVersion::GNA3_0, 64},
|
|
||||||
{target::DeviceVersion::GNA3_1, 64},
|
|
||||||
{target::DeviceVersion::GNA3_5, 64},
|
|
||||||
{target::DeviceVersion::GNAEmbedded3_5, 64},
|
|
||||||
{target::DeviceVersion::GNA3_6, 16},
|
|
||||||
{target::DeviceVersion::GNA4_0, 16}};
|
|
||||||
|
|
||||||
return common::GetValueForKey<target::DeviceVersion, size_t>(target, mem_alignment_map);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool SupportedElementTypes::is_parameter_type_supported(ov::element::Type elem_type, bool is_exception_allowed) {
|
|
||||||
if (supported_parameter_types.count(elem_type) == 0) {
|
|
||||||
if (is_exception_allowed) {
|
|
||||||
THROW_GNA_EXCEPTION << "The plugin does not support input precision with " << elem_type.get_type_name()
|
|
||||||
<< " format. Supported precisions " << supported_parameter_types << "\n";
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::set<ov::element::Type> SupportedElementTypes::supported_constant_types = {ov::element::i8,
|
|
||||||
ov::element::u8,
|
|
||||||
ov::element::i16,
|
|
||||||
ov::element::u16,
|
|
||||||
ov::element::i32,
|
|
||||||
ov::element::f32,
|
|
||||||
ov::element::f64};
|
|
||||||
|
|
||||||
bool SupportedElementTypes::is_constant_type_supported(ov::element::Type elem_type, bool is_exception_allowed) {
|
|
||||||
if (supported_constant_types.count(elem_type) == 0) {
|
|
||||||
if (is_exception_allowed) {
|
|
||||||
THROW_GNA_EXCEPTION << "The plugin does not support constant precision with " << elem_type.get_type_name()
|
|
||||||
<< " format. Supported precisions " << supported_constant_types << "\n";
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_transpose_supported(const std::shared_ptr<const ov::Node>& node) {
|
|
||||||
OPENVINO_ASSERT(node, "Transpose node is empty!");
|
|
||||||
const ov::Shape squeezed_shape = graph_utils::squeeze_shape(node->get_input_shape(0));
|
|
||||||
const size_t min_input_dim = std::min(squeezed_shape[0], squeezed_shape[1]);
|
|
||||||
const size_t max_input_dim = std::max(squeezed_shape[0], squeezed_shape[1]);
|
|
||||||
|
|
||||||
// GNA transpose limitations:
|
|
||||||
// - supports 2d transposes only
|
|
||||||
// - smaller dimension should be less or equal to 8
|
|
||||||
// - bigger dimension should be a multiple of limitations::noOfInputsDivisor
|
|
||||||
if (squeezed_shape.size() == 2 && min_input_dim <= 8 &&
|
|
||||||
ALIGN(max_input_dim, limitations::noOfInputsDivisor) == max_input_dim) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
|
|
||||||
const DeviceVersion& effective_compile_target,
|
|
||||||
const InferenceEngine::Precision gna_precision,
|
|
||||||
bool is_exception_allowed) {
|
|
||||||
OPENVINO_ASSERT(conv_ie, "ConvolutionIE node is empty!");
|
|
||||||
size_t batch_size = conv_ie->input_value(0).get_shape()[0];
|
|
||||||
if (batch_size != 1) {
|
|
||||||
if (is_exception_allowed) {
|
|
||||||
THROW_GNA_EXCEPTION << "topology with layer: " + conv_ie->get_friendly_name() +
|
|
||||||
", type: " + conv_ie->get_type_name() + ", and batch size(" +
|
|
||||||
std::to_string(batch_size) + ") != 1 not supported";
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
auto check_dilation = [&](size_t filter_dilation_height, size_t filter_stride_width) -> bool {
|
|
||||||
cnn2d::RangeLimit2D dilation_limit{{convDilationHeight, convDilationHeight, "dilation height"},
|
|
||||||
{convDilationWidth, convDilationWidth, "dilation width"}};
|
|
||||||
std::string error = dilation_limit.GetErrorOrEmpty(filter_dilation_height, filter_stride_width);
|
|
||||||
return cnn2d::AbstractValidator::ValidationSuccesful(is_exception_allowed,
|
|
||||||
error,
|
|
||||||
conv_ie->get_friendly_name(),
|
|
||||||
conv_ie->get_type_name());
|
|
||||||
};
|
|
||||||
auto input_shape = conv_ie->input_value(0).get_shape();
|
|
||||||
auto filter_shape = conv_ie->input_value(1).get_shape();
|
|
||||||
if ((4 == filter_shape.size() && filter_shape[2] > 1 && filter_shape[3] > 1) ||
|
|
||||||
(4 == input_shape.size() && input_shape[2] > 1 && input_shape[3] > 1)) {
|
|
||||||
pass::helper::ConvData conv_data;
|
|
||||||
pass::helper::GetConvData(conv_ie, conv_data);
|
|
||||||
if (gna_convolution_layer::isMappableFrom2DTo1D(conv_data.input_height,
|
|
||||||
conv_data.input_width,
|
|
||||||
conv_data.input_channel_count,
|
|
||||||
conv_data.filter_height,
|
|
||||||
conv_data.filter_width,
|
|
||||||
conv_data.filter_stride_height,
|
|
||||||
conv_data.filter_stride_width)) {
|
|
||||||
return check_dilation(conv_data.filter_dilation_height, conv_data.filter_dilation_width);
|
|
||||||
}
|
|
||||||
const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target);
|
|
||||||
if (cnn2dValidatorPtr) {
|
|
||||||
return cnn2dValidatorPtr->ValidateCnn2D(conv_ie->get_friendly_name(),
|
|
||||||
conv_data.input_height,
|
|
||||||
conv_data.input_width,
|
|
||||||
conv_data.input_channel_count,
|
|
||||||
conv_data.filter_height,
|
|
||||||
conv_data.filter_width,
|
|
||||||
conv_data.filter_channel_count,
|
|
||||||
conv_data.filter_stride_height,
|
|
||||||
conv_data.filter_stride_width,
|
|
||||||
conv_data.filter_dilation_height,
|
|
||||||
conv_data.filter_dilation_width,
|
|
||||||
OvGnaTypeIntFromBytes(gna_precision.size()),
|
|
||||||
is_exception_allowed);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return check_dilation(conv_ie->get_dilations()[0], conv_ie->get_dilations()[1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
|
|
||||||
const DeviceVersion& effective_compile_target,
|
|
||||||
bool is_exception_allowed) {
|
|
||||||
OPENVINO_ASSERT(max_pool, "MaxPool node is empty!");
|
|
||||||
auto kernels = max_pool->get_kernel();
|
|
||||||
if (2 == kernels.size() && kernels[0] > 1 && kernels[1] > 1) {
|
|
||||||
const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target);
|
|
||||||
if (cnn2dValidatorPtr) {
|
|
||||||
auto strides = max_pool->get_strides();
|
|
||||||
return cnn2dValidatorPtr->ValidatePooling2D(max_pool->get_friendly_name(),
|
|
||||||
kernels[0],
|
|
||||||
kernels[1],
|
|
||||||
strides[0],
|
|
||||||
strides[1],
|
|
||||||
is_exception_allowed);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected, bool is_exception_allowed) {
|
|
||||||
OPENVINO_ASSERT(fully_connected, "FullyConnected node is empty!");
|
|
||||||
size_t output_batch_size = fully_connected->get_output_shape(0)[0];
|
|
||||||
if (output_batch_size > 8) {
|
|
||||||
if (is_exception_allowed) {
|
|
||||||
THROW_GNA_EXCEPTION << "topology with layer: " + fully_connected->get_friendly_name() +
|
|
||||||
", type: " + fully_connected->get_type_name() + ", and batch size(" +
|
|
||||||
std::to_string(output_batch_size) + ") not supported";
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed) {
|
|
||||||
OPENVINO_ASSERT(node, "Split node is empty!");
|
|
||||||
bool is_aligned = true;
|
|
||||||
for (size_t i = 0; i < node->get_output_size(); i++) {
|
|
||||||
is_aligned &= ov::intel_gna::graph_utils::is_aligned_split(node, i);
|
|
||||||
}
|
|
||||||
return is_aligned;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_op_supported(const std::shared_ptr<ov::Node>& node,
|
|
||||||
const DeviceVersion& effective_compile_target,
|
|
||||||
const InferenceEngine::Precision gna_precision,
|
|
||||||
bool is_exception_allowed) {
|
|
||||||
if (ov::op::util::is_parameter(node)) {
|
|
||||||
return SupportedElementTypes::is_parameter_type_supported(node->get_element_type(), is_exception_allowed);
|
|
||||||
} else if (ov::op::util::is_constant(node)) {
|
|
||||||
return SupportedElementTypes::is_constant_type_supported(node->get_element_type(), is_exception_allowed);
|
|
||||||
} else if (auto conv_ie = std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node)) {
|
|
||||||
return is_conv_supported(conv_ie, effective_compile_target, gna_precision, is_exception_allowed);
|
|
||||||
} else if (auto fully_connected = std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node)) {
|
|
||||||
return is_fc_supported(fully_connected, is_exception_allowed);
|
|
||||||
} else if (ov::intel_gna::graph_utils::is_pooling(node)) {
|
|
||||||
return is_pooling_supported(std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node),
|
|
||||||
effective_compile_target,
|
|
||||||
is_exception_allowed);
|
|
||||||
} else if (ov::op::util::is_output(node) || ov::op::util::is_sink(node) ||
|
|
||||||
ov::intel_gna::graph_utils::is_eltwise_add(node) || ov::intel_gna::graph_utils::is_eltwise_mul(node) ||
|
|
||||||
ov::intel_gna::graph_utils::is_crop_affined(node) ||
|
|
||||||
ov::intel_gna::graph_utils::is_activation(node.get()) ||
|
|
||||||
ov::intel_gna::graph_utils::is_gna_precision_agnostic(
|
|
||||||
node) || // check concat/split are aligned when transformations will be moved to ngraph
|
|
||||||
(std::dynamic_pointer_cast<ov::op::util::ReadValueBase>(node) != nullptr) ||
|
|
||||||
(std::dynamic_pointer_cast<ngraph::op::ScaleShiftIE>(node) != nullptr) ||
|
|
||||||
(std::dynamic_pointer_cast<ngraph::op::PowerIE>(node) != nullptr) ||
|
|
||||||
(std::dynamic_pointer_cast<ngraph::opset9::MatMul>(node) != nullptr)) {
|
|
||||||
return true;
|
|
||||||
} else if (ov::intel_gna::graph_utils::is_gna_precision_agnostic(node)) {
|
|
||||||
if ((std::dynamic_pointer_cast<ngraph::opset9::Split>(node) != nullptr) ||
|
|
||||||
(std::dynamic_pointer_cast<ngraph::opset9::VariadicSplit>(node) != nullptr)) {
|
|
||||||
return is_split_supported(node, is_exception_allowed);
|
|
||||||
}
|
|
||||||
// TODO check concat are aligned when transformation will be moved to ngraph
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
|
|
||||||
const DeviceVersion& effective_compile_target,
|
|
||||||
const InferenceEngine::Precision gna_precision) {
|
|
||||||
std::stringstream error;
|
|
||||||
// Walk through the transformed model
|
|
||||||
for (auto& op : model->get_ops()) {
|
|
||||||
if (!is_op_supported(op, effective_compile_target, gna_precision, true)) {
|
|
||||||
error << "The plugin does not support layer " << op->get_friendly_name() << " (type " << op->get_type_name()
|
|
||||||
<< ")!" << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!error.str().empty()) {
|
|
||||||
THROW_GNA_EXCEPTION << error.str();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
namespace cnn2d {
|
namespace cnn2d {
|
||||||
|
|
||||||
bool IsEqualToLimit::isValid(const uint32_t val) const {
|
bool IsEqualToLimit::IsValid(const uint32_t val) const {
|
||||||
return val == compared_value;
|
return val == compared_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string IsEqualToLimit::GetErrorOrEmpty(const uint32_t val) const {
|
std::string IsEqualToLimit::GetErrorOrEmpty(const uint32_t val) const {
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
if (!isValid(val)) {
|
if (!IsValid(val)) {
|
||||||
out << "Unsupported " << what << ", actual value: " << val << ", but should be equal to " << compared_value
|
out << "Unsupported " << what << ", actual value: " << val << ", but should be equal to " << compared_value
|
||||||
<< "\n";
|
<< "\n";
|
||||||
}
|
}
|
||||||
return out.str();
|
return out.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsLessThanLimit ::isValid(const uint32_t val) const {
|
bool IsLessThanLimit::IsValid(const uint32_t val) const {
|
||||||
return val < compared_value;
|
return val < compared_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string IsLessThanLimit ::GetErrorOrEmpty(const uint32_t val) const {
|
std::string IsLessThanLimit::GetErrorOrEmpty(const uint32_t val) const {
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
if (!isValid(val)) {
|
if (!IsValid(val)) {
|
||||||
out << "Unsupported " << what << ", actual value: " << val << ", but should be less than " << compared_value
|
out << "Unsupported " << what << ", actual value: " << val << ", but should be less than " << compared_value
|
||||||
<< "\n";
|
<< "\n";
|
||||||
}
|
}
|
||||||
return out.str();
|
return out.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RangeLimit::isValid(const uint32_t val) const {
|
bool RangeLimit::IsValid(const uint32_t val) const {
|
||||||
return val >= min && val <= max;
|
return val >= min && val <= max;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string RangeLimit::GetErrorOrEmpty(const uint32_t val) const {
|
std::string RangeLimit::GetErrorOrEmpty(const uint32_t val) const {
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
if (!isValid(val)) {
|
if (!IsValid(val)) {
|
||||||
out << "Unsupported " << what << ", actual value: " << val << ", valid range [" << min << ", " << max << "]\n";
|
out << "Unsupported " << what << ", actual value: " << val << ", valid range [" << min << ", " << max << "]\n";
|
||||||
}
|
}
|
||||||
return out.str();
|
return out.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RangeLimit2D::isValid(const uint32_t h, const uint32_t w) const {
|
bool RangeLimit2D::IsValid(const uint32_t h, const uint32_t w) const {
|
||||||
return hLimit.isValid(h) && wLimit.isValid(w);
|
return hLimit.IsValid(h) && wLimit.IsValid(w);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string RangeLimit2D::GetErrorOrEmpty(const uint32_t h, const uint32_t w) const {
|
std::string RangeLimit2D::GetErrorOrEmpty(const uint32_t h, const uint32_t w) const {
|
||||||
@ -308,8 +103,8 @@ RangeMultipleLimit::RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn)
|
|||||||
: RangeLimit(rlIn),
|
: RangeLimit(rlIn),
|
||||||
multiplier(multiplierIn) {}
|
multiplier(multiplierIn) {}
|
||||||
|
|
||||||
bool RangeMultipleLimit::isValid(const uint32_t val) const {
|
bool RangeMultipleLimit::IsValid(const uint32_t val) const {
|
||||||
return RangeLimit::isValid(val) && (val % multiplier == 0);
|
return RangeLimit::IsValid(val) && (val % multiplier == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string RangeMultipleLimit::GetErrorOrEmpty(const uint32_t val) const {
|
std::string RangeMultipleLimit::GetErrorOrEmpty(const uint32_t val) const {
|
||||||
@ -321,7 +116,7 @@ std::string RangeMultipleLimit::GetErrorOrEmpty(const uint32_t val) const {
|
|||||||
return e + out.str();
|
return e + out.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VectorOrSquareLimit::isValid(const uint32_t h, const uint32_t w) const {
|
bool VectorOrSquareLimit::IsValid(const uint32_t h, const uint32_t w) const {
|
||||||
if (w == 1 && h >= 1 && h <= maxVectorHeight)
|
if (w == 1 && h >= 1 && h <= maxVectorHeight)
|
||||||
return true;
|
return true;
|
||||||
if (h == 1 && w >= 1 && w <= maxVectorWidth)
|
if (h == 1 && w >= 1 && w <= maxVectorWidth)
|
||||||
@ -333,7 +128,7 @@ bool VectorOrSquareLimit::isValid(const uint32_t h, const uint32_t w) const {
|
|||||||
|
|
||||||
std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
|
std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
if (!isValid(h, w)) {
|
if (!IsValid(h, w)) {
|
||||||
out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w << ", only vertical vector up to "
|
out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w << ", only vertical vector up to "
|
||||||
<< maxVectorHeight << "x1, horizontal up to 1x" << maxVectorWidth << " or square up to " << maxSquare << "x"
|
<< maxVectorHeight << "x1, horizontal up to 1x" << maxVectorWidth << " or square up to " << maxSquare << "x"
|
||||||
<< maxSquare << " are valid\n";
|
<< maxSquare << " are valid\n";
|
||||||
@ -341,7 +136,7 @@ std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_
|
|||||||
return out.str();
|
return out.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RectLimit::isValid(const uint32_t h, const uint32_t w) const {
|
bool RectLimit::IsValid(const uint32_t h, const uint32_t w) const {
|
||||||
if (h >= 1 && h <= maxVectorHeight && w >= 1 && w <= maxVectorWidth)
|
if (h >= 1 && h <= maxVectorHeight && w >= 1 && w <= maxVectorWidth)
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
@ -349,7 +144,7 @@ bool RectLimit::isValid(const uint32_t h, const uint32_t w) const {
|
|||||||
|
|
||||||
std::string RectLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
|
std::string RectLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
if (!isValid(h, w)) {
|
if (!IsValid(h, w)) {
|
||||||
out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w << ", only rectangular shapes up to "
|
out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w << ", only rectangular shapes up to "
|
||||||
<< maxVectorHeight << "x" << maxVectorWidth << " are valid\n";
|
<< maxVectorHeight << "x" << maxVectorWidth << " are valid\n";
|
||||||
}
|
}
|
||||||
@ -365,8 +160,8 @@ RectLimit RectLimitByChannels::GetByChannels(const uint32_t channels) const {
|
|||||||
return RectLimit{0, 0};
|
return RectLimit{0, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RectLimitByChannels::isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const {
|
bool RectLimitByChannels::IsValid(const uint32_t h, const uint32_t w, const uint32_t channels) const {
|
||||||
return GetByChannels(channels).isValid(h, w);
|
return GetByChannels(channels).IsValid(h, w);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string RectLimitByChannels::GetErrorOrEmpty(const uint32_t h,
|
std::string RectLimitByChannels::GetErrorOrEmpty(const uint32_t h,
|
||||||
@ -380,11 +175,11 @@ RectLimitByChannels RectLimitByChannelsAndPrecision::GetByPrecision(const OvGnaT
|
|||||||
return precision == OvGnaTypeInt8 ? limit_for_int8 : limit_for_int16;
|
return precision == OvGnaTypeInt8 ? limit_for_int8 : limit_for_int16;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RectLimitByChannelsAndPrecision::isValid(const uint32_t h,
|
bool RectLimitByChannelsAndPrecision::IsValid(const uint32_t h,
|
||||||
const uint32_t w,
|
const uint32_t w,
|
||||||
const OvGnaType precision,
|
const OvGnaType precision,
|
||||||
const uint32_t channels) const {
|
const uint32_t channels) const {
|
||||||
return GetByPrecision(precision).isValid(h, w, channels);
|
return GetByPrecision(precision).IsValid(h, w, channels);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h,
|
std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h,
|
||||||
@ -395,6 +190,66 @@ std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h,
|
|||||||
return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what);
|
return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class Validator_30 : public AbstractValidator {
|
||||||
|
static const RangeLimit2D kInputHWLimit;
|
||||||
|
static const RangeMultipleLimit kInputChannelsNumberLimit;
|
||||||
|
|
||||||
|
static const RangeMultipleLimit kKernelNumberLimit;
|
||||||
|
static const RectLimitByChannelsAndPrecision kKernelLimit;
|
||||||
|
static const RangeLimit2D kDilationLimit;
|
||||||
|
|
||||||
|
static const VectorOrSquareLimit kPoolingWindowLimit;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Validator_30() = default;
|
||||||
|
|
||||||
|
bool ValidateCnn2D(const std::string& name,
|
||||||
|
const uint32_t inHeight,
|
||||||
|
const uint32_t inWidth,
|
||||||
|
const uint32_t inChannels,
|
||||||
|
const uint32_t kH,
|
||||||
|
const uint32_t kW,
|
||||||
|
const uint32_t kN,
|
||||||
|
const uint32_t strideH,
|
||||||
|
const uint32_t strideW,
|
||||||
|
const uint32_t dilationH,
|
||||||
|
const uint32_t dilationW,
|
||||||
|
OvGnaType inPrecision,
|
||||||
|
bool exception = true) const override;
|
||||||
|
|
||||||
|
bool ValidatePooling2D(const std::string& name,
|
||||||
|
const uint32_t windowH,
|
||||||
|
const uint32_t windowW,
|
||||||
|
const uint32_t strideH,
|
||||||
|
const uint32_t strideW,
|
||||||
|
bool exception = true) const override;
|
||||||
|
|
||||||
|
bool ValidateInputPadding(const std::string& name,
|
||||||
|
const uint32_t pad_h_begin,
|
||||||
|
const uint32_t pad_h_end,
|
||||||
|
const uint32_t pad_w_begin,
|
||||||
|
const uint32_t pad_w_end,
|
||||||
|
const uint32_t kernel_h,
|
||||||
|
const uint32_t kernel_w,
|
||||||
|
const bool throwOnError = true) const override;
|
||||||
|
|
||||||
|
bool ShouldUseOnlyConv2DGnaIface() const override;
|
||||||
|
|
||||||
|
bool ValidateCnn1D(const std::string& name,
|
||||||
|
const uint32_t inHeight,
|
||||||
|
const uint32_t inWidth,
|
||||||
|
const uint32_t inChannels,
|
||||||
|
const uint32_t kH,
|
||||||
|
const uint32_t kW,
|
||||||
|
const uint32_t kN,
|
||||||
|
const uint32_t strideH,
|
||||||
|
const uint32_t strideW,
|
||||||
|
const uint32_t dilationH,
|
||||||
|
const uint32_t dilationW,
|
||||||
|
OvGnaType inPrecision,
|
||||||
|
bool exception = true) const override;
|
||||||
|
};
|
||||||
|
|
||||||
const RangeLimit2D Validator_30::kInputHWLimit{{16, 384, "input height"}, {16, 240, "input width"}};
|
const RangeLimit2D Validator_30::kInputHWLimit{{16, 384, "input height"}, {16, 240, "input width"}};
|
||||||
const RangeMultipleLimit Validator_30::kInputChannelsNumberLimit{{8, 384, "number of input channels"}, 8};
|
const RangeMultipleLimit Validator_30::kInputChannelsNumberLimit{{8, 384, "number of input channels"}, 8};
|
||||||
|
|
||||||
@ -404,8 +259,9 @@ const RectLimitByChannelsAndPrecision Validator_30::kKernelLimit{
|
|||||||
{{{48, {7, 7}}, {64, {7, 5}}, {80, {7, 4}}, {120, {7, 3}}, {384, {7, 1}}}},
|
{{{48, {7, 7}}, {64, {7, 5}}, {80, {7, 4}}, {120, {7, 3}}, {384, {7, 1}}}},
|
||||||
};
|
};
|
||||||
|
|
||||||
const RangeLimit2D Validator_30::kDilationLimit{{convDilationHeight, convDilationHeight, "dilation height"},
|
const RangeLimit2D Validator_30::kDilationLimit{
|
||||||
{convDilationWidth, convDilationWidth, "dilation width"}};
|
{Limitations::kConvDilationHeight, Limitations::kConvDilationHeight, "dilation height"},
|
||||||
|
{Limitations::kConvDilationWidth, Limitations::kConvDilationWidth, "dilation width"}};
|
||||||
|
|
||||||
bool Validator_30::ValidateCnn2D(const std::string& name,
|
bool Validator_30::ValidateCnn2D(const std::string& name,
|
||||||
const uint32_t inHeight,
|
const uint32_t inHeight,
|
||||||
@ -493,6 +349,95 @@ bool Validator_30::ShouldUseOnlyConv2DGnaIface() const {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class Validator_35 : public AbstractValidator {
|
||||||
|
struct CnnLimits {
|
||||||
|
const RangeLimit2D kInputHWLimit;
|
||||||
|
const RangeLimit kInputChannelsNumberLimit1B;
|
||||||
|
const RangeLimit kInputChannelsNumberLimit2B;
|
||||||
|
const RangeLimit kKernelNumberLimit;
|
||||||
|
const RangeLimit2D kKerneHWlLimit1B;
|
||||||
|
const RangeLimit2D kKerneHWlLimit2B;
|
||||||
|
const RangeLimit2D kStrideHWLimit1B;
|
||||||
|
const RangeLimit2D kStrideHWLimit2B;
|
||||||
|
const RangeLimit2D kDilationLimit;
|
||||||
|
const RangeLimit2D kPoolingWindowHWLimit;
|
||||||
|
const RangeLimit2D kPoolingStrideHWLimit;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const CnnLimits kCnn2DLimits;
|
||||||
|
static const CnnLimits kCnn1DLimits;
|
||||||
|
|
||||||
|
std::string ValidateCnn(const CnnLimits& limits,
|
||||||
|
const std::string& name,
|
||||||
|
const uint32_t inHeight,
|
||||||
|
const uint32_t inWidth,
|
||||||
|
const uint32_t inChannels,
|
||||||
|
const uint32_t kH,
|
||||||
|
const uint32_t kW,
|
||||||
|
const uint32_t kN,
|
||||||
|
const uint32_t strideH,
|
||||||
|
const uint32_t strideW,
|
||||||
|
const uint32_t dilationH,
|
||||||
|
const uint32_t dilationW,
|
||||||
|
OvGnaType inPrecision) const;
|
||||||
|
|
||||||
|
std::string ValidatePooling(const CnnLimits& limits,
|
||||||
|
const std::string& name,
|
||||||
|
const uint32_t windowH,
|
||||||
|
const uint32_t windowW,
|
||||||
|
const uint32_t strideH,
|
||||||
|
const uint32_t strideW) const;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Validator_35() = default;
|
||||||
|
|
||||||
|
bool ValidateCnn2D(const std::string& name,
|
||||||
|
const uint32_t inHeight,
|
||||||
|
const uint32_t inWidth,
|
||||||
|
const uint32_t inChannels,
|
||||||
|
const uint32_t kH,
|
||||||
|
const uint32_t kW,
|
||||||
|
const uint32_t kN,
|
||||||
|
const uint32_t strideH,
|
||||||
|
const uint32_t strideW,
|
||||||
|
const uint32_t dilationH,
|
||||||
|
const uint32_t dilationW,
|
||||||
|
OvGnaType inPrecision,
|
||||||
|
bool exception = true) const override;
|
||||||
|
|
||||||
|
bool ValidatePooling2D(const std::string& name,
|
||||||
|
const uint32_t windowH,
|
||||||
|
const uint32_t windowW,
|
||||||
|
const uint32_t strideH,
|
||||||
|
const uint32_t strideW,
|
||||||
|
bool exception = true) const override;
|
||||||
|
|
||||||
|
bool ValidateInputPadding(const std::string& name,
|
||||||
|
const uint32_t pad_h_begin,
|
||||||
|
const uint32_t pad_h_end,
|
||||||
|
const uint32_t pad_w_begin,
|
||||||
|
const uint32_t pad_w_end,
|
||||||
|
const uint32_t kernel_h,
|
||||||
|
const uint32_t kernel_w,
|
||||||
|
const bool throwOnError = true) const override;
|
||||||
|
|
||||||
|
bool ShouldUseOnlyConv2DGnaIface() const override;
|
||||||
|
|
||||||
|
bool ValidateCnn1D(const std::string& name,
|
||||||
|
const uint32_t inHeight,
|
||||||
|
const uint32_t inWidth,
|
||||||
|
const uint32_t inChannels,
|
||||||
|
const uint32_t kH,
|
||||||
|
const uint32_t kW,
|
||||||
|
const uint32_t kN,
|
||||||
|
const uint32_t strideH,
|
||||||
|
const uint32_t strideW,
|
||||||
|
const uint32_t dilationH,
|
||||||
|
const uint32_t dilationW,
|
||||||
|
OvGnaType inPrecision,
|
||||||
|
bool exception = true) const override;
|
||||||
|
};
|
||||||
|
|
||||||
const Validator_35::CnnLimits Validator_35::kCnn2DLimits{
|
const Validator_35::CnnLimits Validator_35::kCnn2DLimits{
|
||||||
{{1, 65535, "input height"}, {1, 65535, "input width"}}, // kInputHWLimit
|
{{1, 65535, "input height"}, {1, 65535, "input width"}}, // kInputHWLimit
|
||||||
{1, 2048, "number of input channels"}, // kInputChannelsNumberLimit1B
|
{1, 2048, "number of input channels"}, // kInputChannelsNumberLimit1B
|
||||||
@ -502,8 +447,8 @@ const Validator_35::CnnLimits Validator_35::kCnn2DLimits{
|
|||||||
{{1, 255, "kernel height"}, {1, 256, "kernel width"}}, // kKerneHWlLimit2B
|
{{1, 255, "kernel height"}, {1, 256, "kernel width"}}, // kKerneHWlLimit2B
|
||||||
{{1, 255, "convolution stride height"}, {1, 256, "convolution stride width"}}, // kStrideHWLimit1B
|
{{1, 255, "convolution stride height"}, {1, 256, "convolution stride width"}}, // kStrideHWLimit1B
|
||||||
{{1, 255, "convolution stride height"}, {1, 256, "convolution stride width"}}, // kStrideHWLimit2B
|
{{1, 255, "convolution stride height"}, {1, 256, "convolution stride width"}}, // kStrideHWLimit2B
|
||||||
{{convDilationHeight, convDilationHeight, "dilation height"}, // kDilationLimit
|
{{Limitations::kConvDilationHeight, Limitations::kConvDilationHeight, "dilation height"}, // kDilationLimit
|
||||||
{convDilationWidth, convDilationWidth, "dilation width"}},
|
{Limitations::kConvDilationWidth, Limitations::kConvDilationWidth, "dilation width"}},
|
||||||
{{1, 255, "pooling window height"}, {1, 255, "pooling window width"}}, // kPoolingWindowHWLimit
|
{{1, 255, "pooling window height"}, {1, 255, "pooling window width"}}, // kPoolingWindowHWLimit
|
||||||
{{1, 255, "pooling stride height"}, {1, 255, "pooling stride width"}} // kPoolingStrideHWLimit
|
{{1, 255, "pooling stride height"}, {1, 255, "pooling stride width"}} // kPoolingStrideHWLimit
|
||||||
};
|
};
|
||||||
@ -517,8 +462,8 @@ const Validator_35::CnnLimits Validator_35::kCnn1DLimits{
|
|||||||
{{1, 1, "kernel height"}, {1, 2048, "kernel width"}}, // kKerneHWlLimit2B
|
{{1, 1, "kernel height"}, {1, 2048, "kernel width"}}, // kKerneHWlLimit2B
|
||||||
{{1, 1, "convolution stride height"}, {1, 4096, "convolution stride width"}}, // kStrideHWLimit1B
|
{{1, 1, "convolution stride height"}, {1, 4096, "convolution stride width"}}, // kStrideHWLimit1B
|
||||||
{{1, 1, "convolution stride height"}, {1, 2048, "convolution stride width"}}, // kStrideHWLimit2B
|
{{1, 1, "convolution stride height"}, {1, 2048, "convolution stride width"}}, // kStrideHWLimit2B
|
||||||
{{convDilationHeight, convDilationHeight, "dilation height"}, // kDilationLimit
|
{{Limitations::kConvDilationHeight, Limitations::kConvDilationHeight, "dilation height"}, // kDilationLimit
|
||||||
{convDilationWidth, convDilationWidth, "dilation width"}},
|
{Limitations::kConvDilationWidth, Limitations::kConvDilationWidth, "dilation width"}},
|
||||||
{{1, 1, "pooling window height"}, {1, 255, "pooling window width"}}, // kPoolingWindowHWLimit
|
{{1, 1, "pooling window height"}, {1, 255, "pooling window width"}}, // kPoolingWindowHWLimit
|
||||||
{{1, 1, "pooling stride height"}, {1, 255, "pooling stride width"}} // kPoolingStrideHWLimit
|
{{1, 1, "pooling stride height"}, {1, 255, "pooling stride width"}} // kPoolingStrideHWLimit
|
||||||
};
|
};
|
||||||
@ -672,16 +617,16 @@ bool Validator_35::ShouldUseOnlyConv2DGnaIface() const {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<AbstractValidator> AbstractValidator::Create(const DeviceVersion& target) {
|
std::shared_ptr<AbstractValidator> AbstractValidator::Create(const DeviceVersion& target) {
|
||||||
switch (target) {
|
switch (target) {
|
||||||
case DeviceVersion::GNA3_0:
|
case DeviceVersion::GNA3_0:
|
||||||
case DeviceVersion::GNA3_1:
|
case DeviceVersion::GNA3_1:
|
||||||
return tools::make_unique<Validator_30>();
|
return std::make_shared<Validator_30>();
|
||||||
case DeviceVersion::GNA3_5:
|
case DeviceVersion::GNA3_5:
|
||||||
case DeviceVersion::GNAEmbedded3_5:
|
case DeviceVersion::GNAEmbedded3_5:
|
||||||
case DeviceVersion::GNA3_6:
|
case DeviceVersion::GNA3_6:
|
||||||
case DeviceVersion::GNA4_0:
|
case DeviceVersion::GNA4_0:
|
||||||
return tools::make_unique<Validator_35>();
|
return std::make_shared<Validator_35>();
|
||||||
default:
|
default:
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -705,15 +650,280 @@ bool AbstractValidator::ValidationSuccesful(const bool throwOnError,
|
|||||||
return error.empty();
|
return error.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UseOnly16BitConvolutionWeights(const DeviceVersion& compile_target) {
|
|
||||||
return compile_target == DeviceVersion::GNA1_0 || compile_target == DeviceVersion::GNA2_0 ||
|
|
||||||
compile_target == DeviceVersion::GNA3_0 || compile_target == DeviceVersion::GNA3_1;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace cnn2d
|
} // namespace cnn2d
|
||||||
|
|
||||||
|
constexpr uint32_t Limitations::kBufferMaxSize;
|
||||||
|
constexpr uint32_t Limitations::kConvMinFiltersNum;
|
||||||
|
constexpr uint32_t Limitations::kConvMaxFiltersNum;
|
||||||
|
constexpr uint32_t Limitations::kConvDilationHeight;
|
||||||
|
constexpr uint32_t Limitations::kConvDilationWidth;
|
||||||
|
constexpr uint32_t Limitations::kConvFiltersNumDivider;
|
||||||
|
constexpr uint32_t Limitations::kConvFilterSizeDivider;
|
||||||
|
constexpr uint32_t Limitations::kConvFilterMaxSize;
|
||||||
|
constexpr uint32_t Limitations::kConvEachKernelByteAlignment;
|
||||||
|
constexpr uint32_t Limitations::kInputByteAlignment;
|
||||||
|
constexpr uint32_t Limitations::kNoOfInputsDivisor;
|
||||||
|
constexpr uint32_t Limitations::kNoOfInputsLowPrecDivisor;
|
||||||
|
constexpr uint32_t Limitations::kAffineMaxBatchSize;
|
||||||
|
constexpr uint32_t Limitations::kMaxPoolMaxWindowSize;
|
||||||
|
constexpr uint32_t Limitations::kCopyMaxGrouping;
|
||||||
|
constexpr uint32_t Limitations::kTransposeMaxSize;
|
||||||
|
constexpr uint32_t Limitations::kMaxLayersCountGNA1_0;
|
||||||
|
constexpr uint32_t Limitations::kMaxLayersCountGNA2_0;
|
||||||
|
constexpr uint32_t Limitations::kMaxLayersCountGNA3_X;
|
||||||
|
constexpr uint32_t Limitations::kBytesPerSplitElement;
|
||||||
|
constexpr uint32_t Limitations::kBytesPerCropElement;
|
||||||
|
constexpr uint32_t Limitations::kMemoryPageSize;
|
||||||
|
|
||||||
|
thread_local std::shared_ptr<Limitations> Limitations::k_instance{nullptr};
|
||||||
|
|
||||||
|
Limitations::Limitations(const DeviceVersion& target) {
|
||||||
|
m_use_only_16bit_conv_weights = (target == DeviceVersion::GNA1_0 || target == DeviceVersion::GNA2_0 ||
|
||||||
|
target == DeviceVersion::GNA3_0 || target == DeviceVersion::GNA3_1);
|
||||||
|
|
||||||
|
m_mem_alignment = get_memory_alignment_bytes(target);
|
||||||
|
m_cnn_validator = cnn2d::AbstractValidator::Create(target);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Limitations::init(const DeviceVersion& compile_target) {
|
||||||
|
k_instance = std::shared_ptr<Limitations>(new Limitations(compile_target));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Limitations::is_transpose_2d(const std::vector<size_t>& shape) {
|
||||||
|
return std::count_if(std::begin(shape), std::end(shape), [](size_t dim) {
|
||||||
|
return dim != 1;
|
||||||
|
}) == 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Limitations::is_transpose_supported(const std::vector<size_t>& shape) {
|
||||||
|
if (!is_transpose_2d(shape))
|
||||||
|
return false;
|
||||||
|
auto shape_no_1 = shape;
|
||||||
|
shape_no_1.erase(std::remove(shape_no_1.begin(), shape_no_1.end(), 1), shape_no_1.end());
|
||||||
|
size_t min, max;
|
||||||
|
std::tie(min, max) = std::minmax(shape_no_1[0], shape_no_1[1]);
|
||||||
|
return min <= 8 && max % 8 == 0 && max >= 8 && max <= kTransposeMaxSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t Limitations::get_min_batch_to_fit_in_buffer(InferenceEngine::DataPtr input) {
|
||||||
|
auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims()));
|
||||||
|
return total_size / kBufferMaxSize + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t Limitations::get_memory_alignment_bytes(const DeviceVersion& target) const {
|
||||||
|
static const std::unordered_map<DeviceVersion, size_t> mem_alignment_map{{DeviceVersion::GNA1_0, 64},
|
||||||
|
{DeviceVersion::GNA2_0, 64},
|
||||||
|
{DeviceVersion::GNA3_0, 64},
|
||||||
|
{DeviceVersion::GNA3_1, 64},
|
||||||
|
{DeviceVersion::GNA3_5, 64},
|
||||||
|
{DeviceVersion::GNAEmbedded3_5, 64},
|
||||||
|
{DeviceVersion::GNA3_6, 16},
|
||||||
|
{DeviceVersion::GNA4_0, 16}};
|
||||||
|
|
||||||
|
return common::GetValueForKey<DeviceVersion, size_t>(target, mem_alignment_map);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SupportedElementTypes::IsParameterTypeSupported(ov::element::Type elem_type, bool is_exception_allowed) {
|
||||||
|
if (supported_parameter_types.count(elem_type) == 0) {
|
||||||
|
if (is_exception_allowed) {
|
||||||
|
THROW_GNA_EXCEPTION << "The plugin does not support input precision with " << elem_type.get_type_name()
|
||||||
|
<< " format. Supported precisions " << supported_parameter_types << "\n";
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::set<ov::element::Type> SupportedElementTypes::supported_constant_types = {ov::element::i8,
|
||||||
|
ov::element::u8,
|
||||||
|
ov::element::i16,
|
||||||
|
ov::element::u16,
|
||||||
|
ov::element::i32,
|
||||||
|
ov::element::f32,
|
||||||
|
ov::element::f64};
|
||||||
|
|
||||||
|
bool SupportedElementTypes::IsConstantTypeSupported(ov::element::Type elem_type, bool is_exception_allowed) {
|
||||||
|
if (supported_constant_types.count(elem_type) == 0) {
|
||||||
|
if (is_exception_allowed) {
|
||||||
|
THROW_GNA_EXCEPTION << "The plugin does not support constant precision with " << elem_type.get_type_name()
|
||||||
|
<< " format. Supported precisions " << supported_constant_types << "\n";
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Limitations::is_transpose_supported(const std::shared_ptr<const ov::Node>& node) {
|
||||||
|
OPENVINO_ASSERT(node, "Transpose node is empty!");
|
||||||
|
const ov::Shape squeezed_shape = graph_utils::squeeze_shape(node->get_input_shape(0));
|
||||||
|
const size_t min_input_dim = std::min(squeezed_shape[0], squeezed_shape[1]);
|
||||||
|
const size_t max_input_dim = std::max(squeezed_shape[0], squeezed_shape[1]);
|
||||||
|
|
||||||
|
// GNA transpose limitations:
|
||||||
|
// - supports 2d transposes only
|
||||||
|
// - smaller dimension should be less or equal to 8
|
||||||
|
// - bigger dimension should be a multiple of Limitations::kNoOfInputsDivisor
|
||||||
|
if (squeezed_shape.size() == 2 && min_input_dim <= 8 && ALIGN(max_input_dim, kNoOfInputsDivisor) == max_input_dim) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Limitations::is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
|
||||||
|
const InferenceEngine::Precision gna_precision,
|
||||||
|
bool is_exception_allowed) {
|
||||||
|
OPENVINO_ASSERT(conv_ie, "ConvolutionIE node is empty!");
|
||||||
|
size_t batch_size = conv_ie->input_value(0).get_shape()[0];
|
||||||
|
if (batch_size != 1) {
|
||||||
|
if (is_exception_allowed) {
|
||||||
|
THROW_GNA_EXCEPTION << "topology with layer: " + conv_ie->get_friendly_name() +
|
||||||
|
", type: " + conv_ie->get_type_name() + ", and batch size(" +
|
||||||
|
std::to_string(batch_size) + ") != 1 not supported";
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
auto check_dilation = [&](size_t filter_dilation_height, size_t filter_stride_width) -> bool {
|
||||||
|
cnn2d::RangeLimit2D dilation_limit{{kConvDilationHeight, kConvDilationHeight, "dilation height"},
|
||||||
|
{kConvDilationWidth, kConvDilationWidth, "dilation width"}};
|
||||||
|
std::string error = dilation_limit.GetErrorOrEmpty(filter_dilation_height, filter_stride_width);
|
||||||
|
return cnn2d::AbstractValidator::ValidationSuccesful(is_exception_allowed,
|
||||||
|
error,
|
||||||
|
conv_ie->get_friendly_name(),
|
||||||
|
conv_ie->get_type_name());
|
||||||
|
};
|
||||||
|
auto input_shape = conv_ie->input_value(0).get_shape();
|
||||||
|
auto filter_shape = conv_ie->input_value(1).get_shape();
|
||||||
|
if ((4 == filter_shape.size() && filter_shape[2] > 1 && filter_shape[3] > 1) ||
|
||||||
|
(4 == input_shape.size() && input_shape[2] > 1 && input_shape[3] > 1)) {
|
||||||
|
pass::helper::ConvData conv_data;
|
||||||
|
pass::helper::GetConvData(conv_ie, conv_data);
|
||||||
|
if (gna_convolution_layer::isMappableFrom2DTo1D(conv_data.input_height,
|
||||||
|
conv_data.input_width,
|
||||||
|
conv_data.input_channel_count,
|
||||||
|
conv_data.filter_height,
|
||||||
|
conv_data.filter_width,
|
||||||
|
conv_data.filter_stride_height,
|
||||||
|
conv_data.filter_stride_width)) {
|
||||||
|
return check_dilation(conv_data.filter_dilation_height, conv_data.filter_dilation_width);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_cnn_validator) {
|
||||||
|
return m_cnn_validator->ValidateCnn2D(conv_ie->get_friendly_name(),
|
||||||
|
conv_data.input_height,
|
||||||
|
conv_data.input_width,
|
||||||
|
conv_data.input_channel_count,
|
||||||
|
conv_data.filter_height,
|
||||||
|
conv_data.filter_width,
|
||||||
|
conv_data.filter_channel_count,
|
||||||
|
conv_data.filter_stride_height,
|
||||||
|
conv_data.filter_stride_width,
|
||||||
|
conv_data.filter_dilation_height,
|
||||||
|
conv_data.filter_dilation_width,
|
||||||
|
OvGnaTypeIntFromBytes(gna_precision.size()),
|
||||||
|
is_exception_allowed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return check_dilation(conv_ie->get_dilations()[0], conv_ie->get_dilations()[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Limitations::is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
|
||||||
|
bool is_exception_allowed) {
|
||||||
|
OPENVINO_ASSERT(max_pool, "MaxPool node is empty!");
|
||||||
|
auto kernels = max_pool->get_kernel();
|
||||||
|
if (2 == kernels.size() && kernels[0] > 1 && kernels[1] > 1) {
|
||||||
|
if (m_cnn_validator) {
|
||||||
|
auto strides = max_pool->get_strides();
|
||||||
|
return m_cnn_validator->ValidatePooling2D(max_pool->get_friendly_name(),
|
||||||
|
kernels[0],
|
||||||
|
kernels[1],
|
||||||
|
strides[0],
|
||||||
|
strides[1],
|
||||||
|
is_exception_allowed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Limitations::is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected,
|
||||||
|
bool is_exception_allowed) {
|
||||||
|
OPENVINO_ASSERT(fully_connected, "FullyConnected node is empty!");
|
||||||
|
size_t output_batch_size = fully_connected->get_output_shape(0)[0];
|
||||||
|
if (output_batch_size > 8) {
|
||||||
|
if (is_exception_allowed) {
|
||||||
|
THROW_GNA_EXCEPTION << "topology with layer: " + fully_connected->get_friendly_name() +
|
||||||
|
", type: " + fully_connected->get_type_name() + ", and batch size(" +
|
||||||
|
std::to_string(output_batch_size) + ") not supported";
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Limitations::is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed) {
|
||||||
|
OPENVINO_ASSERT(node, "Split node is empty!");
|
||||||
|
bool is_aligned = true;
|
||||||
|
for (size_t i = 0; i < node->get_output_size(); i++) {
|
||||||
|
is_aligned &= ov::intel_gna::graph_utils::is_aligned_split(node, i);
|
||||||
|
}
|
||||||
|
return is_aligned;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Limitations::is_op_supported(const std::shared_ptr<ov::Node>& node,
|
||||||
|
const InferenceEngine::Precision gna_precision,
|
||||||
|
bool is_exception_allowed) {
|
||||||
|
if (ov::op::util::is_parameter(node)) {
|
||||||
|
return SupportedElementTypes::IsParameterTypeSupported(node->get_element_type(), is_exception_allowed);
|
||||||
|
} else if (ov::op::util::is_constant(node)) {
|
||||||
|
return SupportedElementTypes::IsConstantTypeSupported(node->get_element_type(), is_exception_allowed);
|
||||||
|
} else if (auto conv_ie = std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node)) {
|
||||||
|
return is_conv_supported(conv_ie, gna_precision, is_exception_allowed);
|
||||||
|
} else if (auto fully_connected = std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node)) {
|
||||||
|
return is_fc_supported(fully_connected, is_exception_allowed);
|
||||||
|
} else if (ov::intel_gna::graph_utils::is_pooling(node)) {
|
||||||
|
return is_pooling_supported(std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node), is_exception_allowed);
|
||||||
|
} else if (ov::op::util::is_output(node) || ov::op::util::is_sink(node) ||
|
||||||
|
ov::intel_gna::graph_utils::is_eltwise_add(node) || ov::intel_gna::graph_utils::is_eltwise_mul(node) ||
|
||||||
|
ov::intel_gna::graph_utils::is_crop_affined(node) ||
|
||||||
|
ov::intel_gna::graph_utils::is_activation(node.get()) ||
|
||||||
|
ov::intel_gna::graph_utils::is_gna_precision_agnostic(
|
||||||
|
node) || // check concat/split are aligned when transformations will be moved to ngraph
|
||||||
|
(std::dynamic_pointer_cast<ov::op::util::ReadValueBase>(node) != nullptr) ||
|
||||||
|
(std::dynamic_pointer_cast<ngraph::op::ScaleShiftIE>(node) != nullptr) ||
|
||||||
|
(std::dynamic_pointer_cast<ngraph::op::PowerIE>(node) != nullptr) ||
|
||||||
|
(std::dynamic_pointer_cast<ngraph::opset9::MatMul>(node) != nullptr)) {
|
||||||
|
return true;
|
||||||
|
} else if (ov::intel_gna::graph_utils::is_gna_precision_agnostic(node)) {
|
||||||
|
if ((std::dynamic_pointer_cast<ngraph::opset9::Split>(node) != nullptr) ||
|
||||||
|
(std::dynamic_pointer_cast<ngraph::opset9::VariadicSplit>(node) != nullptr)) {
|
||||||
|
return is_split_supported(node, is_exception_allowed);
|
||||||
|
}
|
||||||
|
// TODO check concat are aligned when transformation will be moved to ngraph
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Limitations::check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
|
||||||
|
const InferenceEngine::Precision gna_precision) {
|
||||||
|
std::stringstream error;
|
||||||
|
// Walk through the transformed model
|
||||||
|
for (auto& op : model->get_ops()) {
|
||||||
|
if (!is_op_supported(op, gna_precision, true)) {
|
||||||
|
error << "The plugin does not support layer " << op->get_friendly_name() << " (type " << op->get_type_name()
|
||||||
|
<< ")!" << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!error.str().empty()) {
|
||||||
|
THROW_GNA_EXCEPTION << error.str();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Limitations::use_only_16bit_convolution_weights() const {
|
||||||
|
return m_use_only_16bit_conv_weights;
|
||||||
|
}
|
||||||
|
|
||||||
IE_SUPPRESS_DEPRECATED_START
|
IE_SUPPRESS_DEPRECATED_START
|
||||||
static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage) {
|
bool Limitations::validate_concat_axis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage) {
|
||||||
LayerInfo info(layer);
|
LayerInfo info(layer);
|
||||||
auto concat_layer = info.as<InferenceEngine::ConcatLayer*>();
|
auto concat_layer = info.as<InferenceEngine::ConcatLayer*>();
|
||||||
IE_ASSERT(concat_layer);
|
IE_ASSERT(concat_layer);
|
||||||
@ -747,7 +957,8 @@ static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::st
|
|||||||
// when all transformations are migrated to ngraph
|
// when all transformations are migrated to ngraph
|
||||||
bool is_not_trivial_concat = false;
|
bool is_not_trivial_concat = false;
|
||||||
|
|
||||||
// Concatentaion of consts and input parameters only is supported, even if first dimentsion of input parameter >
|
// Concatentaion of consts and input parameters only is supported, even if first dimentsion of input
|
||||||
|
// parameter >
|
||||||
// 1
|
// 1
|
||||||
bool concat_all_const_or_inputs = false;
|
bool concat_all_const_or_inputs = false;
|
||||||
|
|
||||||
@ -846,7 +1057,7 @@ static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::st
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concat_layer) {
|
bool Limitations::validate_conv_concat_axis(const InferenceEngine::ConcatLayer* concat_layer) {
|
||||||
IE_ASSERT(concat_layer);
|
IE_ASSERT(concat_layer);
|
||||||
auto dims_size = concat_layer->insData[0].lock()->getDims().size();
|
auto dims_size = concat_layer->insData[0].lock()->getDims().size();
|
||||||
|
|
||||||
@ -898,7 +1109,7 @@ bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concat_layer) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage) {
|
bool Limitations::are_layers_supported(InferenceEngine::CNNNetwork& network, std::string& errMessage) {
|
||||||
IE_SUPPRESS_DEPRECATED_START
|
IE_SUPPRESS_DEPRECATED_START
|
||||||
InferenceEngine::InputsDataMap inputs = network.getInputsInfo();
|
InferenceEngine::InputsDataMap inputs = network.getInputsInfo();
|
||||||
std::unordered_set<InferenceEngine::CNNLayer*> allLayers;
|
std::unordered_set<InferenceEngine::CNNLayer*> allLayers;
|
||||||
@ -909,7 +1120,7 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
|
|||||||
// If there are no inputs start search from an output
|
// If there are no inputs start search from an output
|
||||||
startLayer = getCreatorLayer(outputs.begin()->second).lock();
|
startLayer = getCreatorLayer(outputs.begin()->second).lock();
|
||||||
} else {
|
} else {
|
||||||
SupportedElementTypes::is_parameter_type_supported(
|
SupportedElementTypes::IsParameterTypeSupported(
|
||||||
InferenceEngine::details::convertPrecision(inputs.begin()->second->getPrecision()),
|
InferenceEngine::details::convertPrecision(inputs.begin()->second->getPrecision()),
|
||||||
true);
|
true);
|
||||||
|
|
||||||
@ -944,7 +1155,7 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
|
|||||||
check_result = false;
|
check_result = false;
|
||||||
}
|
}
|
||||||
} else if (info.isConcat()) {
|
} else if (info.isConcat()) {
|
||||||
if (!ValidateConcatAxis(layer, errMessage)) {
|
if (!validate_concat_axis(layer, errMessage)) {
|
||||||
THROW_GNA_EXCEPTION << errMessage;
|
THROW_GNA_EXCEPTION << errMessage;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,8 @@
|
|||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <ie_algorithm.hpp>
|
#include <ie_algorithm.hpp>
|
||||||
|
#include <memory>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
#include "common/gna_target.hpp"
|
#include "common/gna_target.hpp"
|
||||||
#include "common/misc_utils.hpp"
|
#include "common/misc_utils.hpp"
|
||||||
@ -23,158 +25,19 @@ namespace ov {
|
|||||||
namespace intel_gna {
|
namespace intel_gna {
|
||||||
namespace limitations {
|
namespace limitations {
|
||||||
|
|
||||||
constexpr uint32_t bufferMaxSize = 65528;
|
|
||||||
|
|
||||||
constexpr uint32_t convMinFiltersNum = 4;
|
|
||||||
constexpr uint32_t convMaxFiltersNum = 65532;
|
|
||||||
constexpr uint32_t convDilationHeight = 1;
|
|
||||||
constexpr uint32_t convDilationWidth = 1;
|
|
||||||
constexpr uint32_t convFiltersNumDivider = 4;
|
|
||||||
constexpr uint32_t convFilterSizeDivider = 8;
|
|
||||||
constexpr uint32_t convFilterMaxSize = 768;
|
|
||||||
constexpr uint32_t convEachKernelByteAlignment = 16;
|
|
||||||
constexpr uint32_t inputByteAlignment = 64;
|
|
||||||
constexpr uint32_t noOfInputsDivisor = 8;
|
|
||||||
constexpr uint32_t noOfInputsLowPrecDivisor = 16;
|
|
||||||
|
|
||||||
constexpr uint32_t affineMaxBatchSize = 8;
|
|
||||||
|
|
||||||
constexpr uint32_t maxPoolMaxWindowSize = 6;
|
|
||||||
constexpr uint32_t copyMaxGrouping = 8;
|
|
||||||
constexpr uint32_t transposeMaxSize = 65528;
|
|
||||||
|
|
||||||
// TODO In the future there should be created class/struct representing all limitations for specific device versions.
|
|
||||||
constexpr uint32_t kMaxLayersCountGNA1_0 = 1023;
|
|
||||||
constexpr uint32_t kMaxLayersCountGNA2_0 = 4096;
|
|
||||||
constexpr uint32_t kMaxLayersCountGNA3_X = 8192;
|
|
||||||
|
|
||||||
// Currently split layer only supports 2 bytes in int16 and int8 mode.
|
|
||||||
// In fp32 mode this is not necessary but is useful for testing
|
|
||||||
constexpr uint32_t bytesPerSplitElement = 2;
|
|
||||||
|
|
||||||
// Currently crop layer only supports 2 bytes in int16 and int8 mode.
|
|
||||||
// In fp32 mode this is not necessary but is useful for testing
|
|
||||||
constexpr uint32_t bytesPerCropElement = 2;
|
|
||||||
|
|
||||||
constexpr uint32_t kMemoryPageSize = 4096;
|
|
||||||
|
|
||||||
inline bool isCropAffinedOffset(size_t numberOfElements) {
|
|
||||||
const auto cropOffset = numberOfElements * bytesPerCropElement;
|
|
||||||
return (ALIGN64(cropOffset) != cropOffset);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool IsTranspose2d(const std::vector<size_t>& shape) {
|
|
||||||
return std::count_if(std::begin(shape), std::end(shape), [](size_t dim) {
|
|
||||||
return dim != 1;
|
|
||||||
}) == 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool IsTransposeSupported(const std::vector<size_t>& shape) {
|
|
||||||
if (!IsTranspose2d(shape))
|
|
||||||
return false;
|
|
||||||
auto shape_no_1 = shape;
|
|
||||||
shape_no_1.erase(std::remove(shape_no_1.begin(), shape_no_1.end(), 1), shape_no_1.end());
|
|
||||||
size_t min, max;
|
|
||||||
std::tie(min, max) = std::minmax(shape_no_1[0], shape_no_1[1]);
|
|
||||||
return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t getMemoryAlignmentBytes(target::DeviceVersion target);
|
|
||||||
|
|
||||||
class SupportedElementTypes {
|
|
||||||
public:
|
|
||||||
static bool is_parameter_type_supported(ov::element::Type type, bool is_exception_allowed = false);
|
|
||||||
static bool is_constant_type_supported(ov::element::Type type, bool is_exception_allowed = false);
|
|
||||||
|
|
||||||
private:
|
|
||||||
static const std::set<ov::element::Type> supported_parameter_types;
|
|
||||||
static const std::set<ov::element::Type> supported_constant_types;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Validates if transpose is supported by GNA
|
|
||||||
* @param node transpose
|
|
||||||
* @return true if supported
|
|
||||||
*/
|
|
||||||
bool is_transpose_supported(const std::shared_ptr<const ov::Node>& node);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Validates if legacy convolution is supported by GNA
|
|
||||||
* @param conv_ie convolution
|
|
||||||
* @param effective_compile_target GNA compile targets
|
|
||||||
* @param gna_precision GNA inference precision
|
|
||||||
* @param is_exception_allowed flag specifies whether exception is allowed
|
|
||||||
* @return true if supported
|
|
||||||
*/
|
|
||||||
bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
|
|
||||||
const target::DeviceVersion& effective_compile_target,
|
|
||||||
const InferenceEngine::Precision gna_precision,
|
|
||||||
bool is_exception_allowed = false);
|
|
||||||
/**
|
|
||||||
* @brief Validates if max pooling is supported by GNA
|
|
||||||
* @param max_pool max pooling
|
|
||||||
* @param effective_compile_target GNA compile targets
|
|
||||||
* @param supported_types list of supported types
|
|
||||||
* @param is_exception_allowed flag specifies whether exception is allowed
|
|
||||||
* @return true if precision is found in supported
|
|
||||||
*/
|
|
||||||
bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
|
|
||||||
const target::DeviceVersion& effective_compile_target,
|
|
||||||
bool is_exception_allowed = false);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Validates if fully connected is supported by GNA
|
|
||||||
* @param fully_connected fully connected
|
|
||||||
* @param is_exception_allowed flag specifies whether exception is allowed
|
|
||||||
* @return true if supported
|
|
||||||
*/
|
|
||||||
bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected,
|
|
||||||
bool is_exception_allowed = false);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Validates if split is supported by GNA
|
|
||||||
* @param node split
|
|
||||||
* @param is_exception_allowed flag specifies whether exception is allowed
|
|
||||||
* @return true if supported
|
|
||||||
*/
|
|
||||||
bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed = false);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Validates if operation is supported by GNA
|
|
||||||
* @param node operation
|
|
||||||
* @param gna_compile_target GNA compile target
|
|
||||||
* @param gna_precision GNA inference precision
|
|
||||||
* @param is_exception_allowed flag specifies whether exception is allowed
|
|
||||||
* @return true if supported
|
|
||||||
*/
|
|
||||||
bool is_op_supported(const std::shared_ptr<ov::Node>& node,
|
|
||||||
const target::DeviceVersion& effective_compile_target,
|
|
||||||
const InferenceEngine::Precision gna_precision,
|
|
||||||
bool is_exception_allowed = false);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Check if all operations are supported by GNA
|
|
||||||
* @param model ngraph model
|
|
||||||
* @param gna_compile_target GNA compile target
|
|
||||||
* @param gna_precision GNA inference precision
|
|
||||||
*/
|
|
||||||
void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
|
|
||||||
const target::DeviceVersion& effective_compile_target,
|
|
||||||
const InferenceEngine::Precision gna_precision);
|
|
||||||
|
|
||||||
namespace cnn2d {
|
namespace cnn2d {
|
||||||
|
|
||||||
struct IsEqualToLimit {
|
struct IsEqualToLimit {
|
||||||
uint32_t compared_value;
|
uint32_t compared_value;
|
||||||
std::string what;
|
std::string what;
|
||||||
bool isValid(const uint32_t val) const;
|
bool IsValid(const uint32_t val) const;
|
||||||
std::string GetErrorOrEmpty(const uint32_t val) const;
|
std::string GetErrorOrEmpty(const uint32_t val) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct IsLessThanLimit {
|
struct IsLessThanLimit {
|
||||||
uint32_t compared_value;
|
uint32_t compared_value;
|
||||||
std::string what;
|
std::string what;
|
||||||
bool isValid(const uint32_t val) const;
|
bool IsValid(const uint32_t val) const;
|
||||||
std::string GetErrorOrEmpty(const uint32_t val) const;
|
std::string GetErrorOrEmpty(const uint32_t val) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -182,28 +45,28 @@ struct RangeLimit {
|
|||||||
uint32_t min;
|
uint32_t min;
|
||||||
uint32_t max;
|
uint32_t max;
|
||||||
std::string what;
|
std::string what;
|
||||||
bool isValid(const uint32_t val) const;
|
bool IsValid(const uint32_t val) const;
|
||||||
std::string GetErrorOrEmpty(const uint32_t val) const;
|
std::string GetErrorOrEmpty(const uint32_t val) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct RangeLimit2D {
|
struct RangeLimit2D {
|
||||||
RangeLimit hLimit;
|
RangeLimit hLimit;
|
||||||
RangeLimit wLimit;
|
RangeLimit wLimit;
|
||||||
bool isValid(const uint32_t h, const uint32_t w) const;
|
bool IsValid(const uint32_t h, const uint32_t w) const;
|
||||||
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w) const;
|
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct RangeMultipleLimit : public RangeLimit {
|
struct RangeMultipleLimit : public RangeLimit {
|
||||||
uint32_t multiplier;
|
uint32_t multiplier;
|
||||||
RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn);
|
RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn);
|
||||||
bool isValid(const uint32_t val) const;
|
bool IsValid(const uint32_t val) const;
|
||||||
std::string GetErrorOrEmpty(const uint32_t val) const;
|
std::string GetErrorOrEmpty(const uint32_t val) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct RectLimit {
|
struct RectLimit {
|
||||||
uint32_t maxVectorHeight;
|
uint32_t maxVectorHeight;
|
||||||
uint32_t maxVectorWidth;
|
uint32_t maxVectorWidth;
|
||||||
bool isValid(const uint32_t h, const uint32_t w) const;
|
bool IsValid(const uint32_t h, const uint32_t w) const;
|
||||||
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
|
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -211,14 +74,14 @@ struct VectorOrSquareLimit {
|
|||||||
uint32_t maxSquare;
|
uint32_t maxSquare;
|
||||||
uint32_t maxVectorHeight;
|
uint32_t maxVectorHeight;
|
||||||
uint32_t maxVectorWidth;
|
uint32_t maxVectorWidth;
|
||||||
bool isValid(const uint32_t h, const uint32_t w) const;
|
bool IsValid(const uint32_t h, const uint32_t w) const;
|
||||||
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
|
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct RectLimitByChannels {
|
struct RectLimitByChannels {
|
||||||
std::vector<std::pair<uint32_t, RectLimit>> limitPerChannel;
|
std::vector<std::pair<uint32_t, RectLimit>> limitPerChannel;
|
||||||
RectLimit GetByChannels(const uint32_t channels) const;
|
RectLimit GetByChannels(const uint32_t channels) const;
|
||||||
bool isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const;
|
bool IsValid(const uint32_t h, const uint32_t w, const uint32_t channels) const;
|
||||||
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, const uint32_t channels, std::string what) const;
|
std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, const uint32_t channels, std::string what) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -226,7 +89,7 @@ struct RectLimitByChannelsAndPrecision {
|
|||||||
RectLimitByChannels limit_for_int8;
|
RectLimitByChannels limit_for_int8;
|
||||||
RectLimitByChannels limit_for_int16;
|
RectLimitByChannels limit_for_int16;
|
||||||
RectLimitByChannels GetByPrecision(const OvGnaType precision) const;
|
RectLimitByChannels GetByPrecision(const OvGnaType precision) const;
|
||||||
bool isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const;
|
bool IsValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const;
|
||||||
std::string GetErrorOrEmpty(const uint32_t h,
|
std::string GetErrorOrEmpty(const uint32_t h,
|
||||||
const uint32_t w,
|
const uint32_t w,
|
||||||
const OvGnaType precision,
|
const OvGnaType precision,
|
||||||
@ -291,177 +154,168 @@ public:
|
|||||||
OvGnaType inPrecision,
|
OvGnaType inPrecision,
|
||||||
bool exception = true) const = 0;
|
bool exception = true) const = 0;
|
||||||
|
|
||||||
static std::unique_ptr<AbstractValidator> Create(const target::DeviceVersion& target);
|
static std::shared_ptr<AbstractValidator> Create(const target::DeviceVersion& target);
|
||||||
};
|
};
|
||||||
|
|
||||||
class Validator_30 : public AbstractValidator {
|
|
||||||
static const RangeLimit2D kInputHWLimit;
|
|
||||||
static const RangeMultipleLimit kInputChannelsNumberLimit;
|
|
||||||
|
|
||||||
static const RangeMultipleLimit kKernelNumberLimit;
|
|
||||||
static const RectLimitByChannelsAndPrecision kKernelLimit;
|
|
||||||
static const RangeLimit2D kDilationLimit;
|
|
||||||
|
|
||||||
static const VectorOrSquareLimit kPoolingWindowLimit;
|
|
||||||
|
|
||||||
public:
|
|
||||||
Validator_30() = default;
|
|
||||||
|
|
||||||
bool ValidateCnn2D(const std::string& name,
|
|
||||||
const uint32_t inHeight,
|
|
||||||
const uint32_t inWidth,
|
|
||||||
const uint32_t inChannels,
|
|
||||||
const uint32_t kH,
|
|
||||||
const uint32_t kW,
|
|
||||||
const uint32_t kN,
|
|
||||||
const uint32_t strideH,
|
|
||||||
const uint32_t strideW,
|
|
||||||
const uint32_t dilationH,
|
|
||||||
const uint32_t dilationW,
|
|
||||||
OvGnaType inPrecision,
|
|
||||||
bool exception = true) const override;
|
|
||||||
|
|
||||||
bool ValidatePooling2D(const std::string& name,
|
|
||||||
const uint32_t windowH,
|
|
||||||
const uint32_t windowW,
|
|
||||||
const uint32_t strideH,
|
|
||||||
const uint32_t strideW,
|
|
||||||
bool exception = true) const override;
|
|
||||||
|
|
||||||
bool ValidateInputPadding(const std::string& name,
|
|
||||||
const uint32_t pad_h_begin,
|
|
||||||
const uint32_t pad_h_end,
|
|
||||||
const uint32_t pad_w_begin,
|
|
||||||
const uint32_t pad_w_end,
|
|
||||||
const uint32_t kernel_h,
|
|
||||||
const uint32_t kernel_w,
|
|
||||||
const bool throwOnError = true) const override;
|
|
||||||
|
|
||||||
bool ShouldUseOnlyConv2DGnaIface() const override;
|
|
||||||
|
|
||||||
bool ValidateCnn1D(const std::string& name,
|
|
||||||
const uint32_t inHeight,
|
|
||||||
const uint32_t inWidth,
|
|
||||||
const uint32_t inChannels,
|
|
||||||
const uint32_t kH,
|
|
||||||
const uint32_t kW,
|
|
||||||
const uint32_t kN,
|
|
||||||
const uint32_t strideH,
|
|
||||||
const uint32_t strideW,
|
|
||||||
const uint32_t dilationH,
|
|
||||||
const uint32_t dilationW,
|
|
||||||
OvGnaType inPrecision,
|
|
||||||
bool exception = true) const override;
|
|
||||||
};
|
|
||||||
|
|
||||||
class Validator_35 : public AbstractValidator {
|
|
||||||
struct CnnLimits {
|
|
||||||
const RangeLimit2D kInputHWLimit;
|
|
||||||
const RangeLimit kInputChannelsNumberLimit1B;
|
|
||||||
const RangeLimit kInputChannelsNumberLimit2B;
|
|
||||||
const RangeLimit kKernelNumberLimit;
|
|
||||||
const RangeLimit2D kKerneHWlLimit1B;
|
|
||||||
const RangeLimit2D kKerneHWlLimit2B;
|
|
||||||
const RangeLimit2D kStrideHWLimit1B;
|
|
||||||
const RangeLimit2D kStrideHWLimit2B;
|
|
||||||
const RangeLimit2D kDilationLimit;
|
|
||||||
const RangeLimit2D kPoolingWindowHWLimit;
|
|
||||||
const RangeLimit2D kPoolingStrideHWLimit;
|
|
||||||
};
|
|
||||||
|
|
||||||
static const CnnLimits kCnn2DLimits;
|
|
||||||
static const CnnLimits kCnn1DLimits;
|
|
||||||
|
|
||||||
std::string ValidateCnn(const CnnLimits& limits,
|
|
||||||
const std::string& name,
|
|
||||||
const uint32_t inHeight,
|
|
||||||
const uint32_t inWidth,
|
|
||||||
const uint32_t inChannels,
|
|
||||||
const uint32_t kH,
|
|
||||||
const uint32_t kW,
|
|
||||||
const uint32_t kN,
|
|
||||||
const uint32_t strideH,
|
|
||||||
const uint32_t strideW,
|
|
||||||
const uint32_t dilationH,
|
|
||||||
const uint32_t dilationW,
|
|
||||||
OvGnaType inPrecision) const;
|
|
||||||
|
|
||||||
std::string ValidatePooling(const CnnLimits& limits,
|
|
||||||
const std::string& name,
|
|
||||||
const uint32_t windowH,
|
|
||||||
const uint32_t windowW,
|
|
||||||
const uint32_t strideH,
|
|
||||||
const uint32_t strideW) const;
|
|
||||||
|
|
||||||
public:
|
|
||||||
Validator_35() = default;
|
|
||||||
|
|
||||||
bool ValidateCnn2D(const std::string& name,
|
|
||||||
const uint32_t inHeight,
|
|
||||||
const uint32_t inWidth,
|
|
||||||
const uint32_t inChannels,
|
|
||||||
const uint32_t kH,
|
|
||||||
const uint32_t kW,
|
|
||||||
const uint32_t kN,
|
|
||||||
const uint32_t strideH,
|
|
||||||
const uint32_t strideW,
|
|
||||||
const uint32_t dilationH,
|
|
||||||
const uint32_t dilationW,
|
|
||||||
OvGnaType inPrecision,
|
|
||||||
bool exception = true) const override;
|
|
||||||
|
|
||||||
bool ValidatePooling2D(const std::string& name,
|
|
||||||
const uint32_t windowH,
|
|
||||||
const uint32_t windowW,
|
|
||||||
const uint32_t strideH,
|
|
||||||
const uint32_t strideW,
|
|
||||||
bool exception = true) const override;
|
|
||||||
|
|
||||||
bool ValidateInputPadding(const std::string& name,
|
|
||||||
const uint32_t pad_h_begin,
|
|
||||||
const uint32_t pad_h_end,
|
|
||||||
const uint32_t pad_w_begin,
|
|
||||||
const uint32_t pad_w_end,
|
|
||||||
const uint32_t kernel_h,
|
|
||||||
const uint32_t kernel_w,
|
|
||||||
const bool throwOnError = true) const override;
|
|
||||||
|
|
||||||
bool ShouldUseOnlyConv2DGnaIface() const override;
|
|
||||||
|
|
||||||
bool ValidateCnn1D(const std::string& name,
|
|
||||||
const uint32_t inHeight,
|
|
||||||
const uint32_t inWidth,
|
|
||||||
const uint32_t inChannels,
|
|
||||||
const uint32_t kH,
|
|
||||||
const uint32_t kW,
|
|
||||||
const uint32_t kN,
|
|
||||||
const uint32_t strideH,
|
|
||||||
const uint32_t strideW,
|
|
||||||
const uint32_t dilationH,
|
|
||||||
const uint32_t dilationW,
|
|
||||||
OvGnaType inPrecision,
|
|
||||||
bool exception = true) const override;
|
|
||||||
};
|
|
||||||
|
|
||||||
bool UseOnly16BitConvolutionWeights(const target::DeviceVersion& compile_target);
|
|
||||||
|
|
||||||
} // namespace cnn2d
|
} // namespace cnn2d
|
||||||
|
|
||||||
bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
|
class Limitations {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* @brief Create instance of the Limitations class. Due to Limitations being a singleton, multiple instances of the
|
||||||
|
* plugin with different compilation targets cannot exist at the same time
|
||||||
|
* @param compile_target GNA compile target
|
||||||
|
*/
|
||||||
|
static void init(const target::DeviceVersion& compile_target);
|
||||||
|
|
||||||
inline size_t GetMinBatchToFitInBuffer(InferenceEngine::DataPtr input) {
|
/**
|
||||||
auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims()));
|
* @brief Returns the instance of Limitations object. Requires an Init call before the first usage
|
||||||
return total_size / bufferMaxSize + 1;
|
*/
|
||||||
}
|
static inline std::shared_ptr<Limitations> get_instance();
|
||||||
|
|
||||||
/**
|
static bool is_transpose_2d(const std::vector<size_t>& shape);
|
||||||
|
static bool is_transpose_supported(const std::vector<size_t>& shape);
|
||||||
|
static size_t get_min_batch_to_fit_in_buffer(InferenceEngine::DataPtr input);
|
||||||
|
|
||||||
|
/**
|
||||||
* @brief Validates if concat layer axis is supported by GNA
|
* @brief Validates if concat layer axis is supported by GNA
|
||||||
* @param layer concat layer
|
* @param layer concat layer
|
||||||
* @return true if concat layer axis is valid
|
* @return true if concat layer axis is valid
|
||||||
*/
|
*/
|
||||||
IE_SUPPRESS_DEPRECATED_START
|
IE_SUPPRESS_DEPRECATED_START
|
||||||
bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concatLayer);
|
static bool validate_conv_concat_axis(const InferenceEngine::ConcatLayer* concatLayer);
|
||||||
IE_SUPPRESS_DEPRECATED_END
|
static bool are_layers_supported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
|
||||||
|
IE_SUPPRESS_DEPRECATED_END
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Validates if fully connected is supported by GNA
|
||||||
|
* @param fully_connected fully connected
|
||||||
|
* @param is_exception_allowed flag specifies whether exception is allowed
|
||||||
|
* @return true if supported
|
||||||
|
*/
|
||||||
|
static bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected,
|
||||||
|
bool is_exception_allowed = false);
|
||||||
|
/**
|
||||||
|
* @brief Validates if split is supported by GNA
|
||||||
|
* @param node split
|
||||||
|
* @param is_exception_allowed flag specifies whether exception is allowed
|
||||||
|
* @return true if supported
|
||||||
|
*/
|
||||||
|
static bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed = false);
|
||||||
|
/**
|
||||||
|
* @brief Validates if transpose is supported by GNA
|
||||||
|
* @param node transpose
|
||||||
|
* @return true if supported
|
||||||
|
*/
|
||||||
|
static bool is_transpose_supported(const std::shared_ptr<const ov::Node>& node);
|
||||||
|
/**
|
||||||
|
* @brief Validates if legacy convolution is supported by GNA
|
||||||
|
* @param conv_ie convolution
|
||||||
|
* @param gna_precision GNA inference precision
|
||||||
|
* @param is_exception_allowed flag specifies whether exception is allowed
|
||||||
|
* @return true if supported
|
||||||
|
*/
|
||||||
|
bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
|
||||||
|
const InferenceEngine::Precision gna_precision,
|
||||||
|
bool is_exception_allowed = false);
|
||||||
|
/**
|
||||||
|
* @brief Validates if max pooling is supported by GNA
|
||||||
|
* @param max_pool max pooling
|
||||||
|
* @param is_exception_allowed flag specifies whether exception is allowed
|
||||||
|
* @return true if precision is found in supported
|
||||||
|
*/
|
||||||
|
bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
|
||||||
|
bool is_exception_allowed = false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Validates if operation is supported by GNA
|
||||||
|
* @param node operation
|
||||||
|
* @param gna_precision GNA inference precision
|
||||||
|
* @param is_exception_allowed flag specifies whether exception is allowed
|
||||||
|
* @return true if supported
|
||||||
|
*/
|
||||||
|
bool is_op_supported(const std::shared_ptr<ov::Node>& node,
|
||||||
|
const InferenceEngine::Precision gna_precision,
|
||||||
|
bool is_exception_allowed = false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Check if all operations are supported by GNA
|
||||||
|
* @param model ngraph model
|
||||||
|
* @param gna_precision GNA inference precision
|
||||||
|
*/
|
||||||
|
void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
|
||||||
|
const InferenceEngine::Precision gna_precision);
|
||||||
|
|
||||||
|
bool use_only_16bit_convolution_weights() const;
|
||||||
|
bool is_crop_affined_offset(size_t numberOfElements) const;
|
||||||
|
size_t get_memory_alignment() const;
|
||||||
|
std::shared_ptr<cnn2d::AbstractValidator> get_cnn_validator() const;
|
||||||
|
|
||||||
|
constexpr static uint32_t kBufferMaxSize = 65528;
|
||||||
|
constexpr static uint32_t kConvMinFiltersNum = 4;
|
||||||
|
constexpr static uint32_t kConvMaxFiltersNum = 65532;
|
||||||
|
constexpr static uint32_t kConvDilationHeight = 1;
|
||||||
|
constexpr static uint32_t kConvDilationWidth = 1;
|
||||||
|
constexpr static uint32_t kConvFiltersNumDivider = 4;
|
||||||
|
constexpr static uint32_t kConvFilterSizeDivider = 8;
|
||||||
|
constexpr static uint32_t kConvFilterMaxSize = 768;
|
||||||
|
constexpr static uint32_t kConvEachKernelByteAlignment = 16;
|
||||||
|
constexpr static uint32_t kInputByteAlignment = 64;
|
||||||
|
constexpr static uint32_t kNoOfInputsDivisor = 8;
|
||||||
|
constexpr static uint32_t kNoOfInputsLowPrecDivisor = 16;
|
||||||
|
constexpr static uint32_t kAffineMaxBatchSize = 8;
|
||||||
|
constexpr static uint32_t kMaxPoolMaxWindowSize = 6;
|
||||||
|
constexpr static uint32_t kCopyMaxGrouping = 8;
|
||||||
|
constexpr static uint32_t kTransposeMaxSize = 65528;
|
||||||
|
constexpr static uint32_t kMaxLayersCountGNA1_0 = 1023;
|
||||||
|
constexpr static uint32_t kMaxLayersCountGNA2_0 = 4096;
|
||||||
|
constexpr static uint32_t kMaxLayersCountGNA3_X = 8192;
|
||||||
|
|
||||||
|
// Currently split layer only supports 2 bytes in int16 and int8 mode.
|
||||||
|
// In fp32 mode this is not necessary but is useful for testing
|
||||||
|
constexpr static uint32_t kBytesPerSplitElement = 2;
|
||||||
|
|
||||||
|
// Currently crop layer only supports 2 bytes in int16 and int8 mode.
|
||||||
|
// In fp32 mode this is not necessary but is useful for testing
|
||||||
|
constexpr static uint32_t kBytesPerCropElement = 2;
|
||||||
|
constexpr static uint32_t kMemoryPageSize = 4096;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Limitations(const target::DeviceVersion& target);
|
||||||
|
Limitations(const Limitations&) = delete;
|
||||||
|
Limitations& operator=(const Limitations&) = delete;
|
||||||
|
|
||||||
|
size_t get_memory_alignment_bytes(const target::DeviceVersion& target) const;
|
||||||
|
|
||||||
|
IE_SUPPRESS_DEPRECATED_START
|
||||||
|
static bool validate_concat_axis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage);
|
||||||
|
IE_SUPPRESS_DEPRECATED_END
|
||||||
|
|
||||||
|
bool m_use_only_16bit_conv_weights = false;
|
||||||
|
size_t m_mem_alignment = 0;
|
||||||
|
std::shared_ptr<cnn2d::AbstractValidator> m_cnn_validator;
|
||||||
|
static thread_local std::shared_ptr<Limitations> k_instance;
|
||||||
|
};
|
||||||
|
|
||||||
|
inline std::shared_ptr<Limitations> Limitations::get_instance() {
|
||||||
|
if (!k_instance) {
|
||||||
|
THROW_GNA_EXCEPTION << "Limitations instance is not initialized.\n";
|
||||||
|
}
|
||||||
|
return k_instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool Limitations::is_crop_affined_offset(size_t numberOfElements) const {
|
||||||
|
const auto cropOffset = numberOfElements * kBytesPerCropElement;
|
||||||
|
return (ALIGN64(cropOffset) != cropOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline size_t Limitations::get_memory_alignment() const {
|
||||||
|
return m_mem_alignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::shared_ptr<cnn2d::AbstractValidator> Limitations::get_cnn_validator() const {
|
||||||
|
return m_cnn_validator;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace limitations
|
} // namespace limitations
|
||||||
} // namespace intel_gna
|
} // namespace intel_gna
|
||||||
|
@ -84,7 +84,7 @@ inline bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
|
|||||||
std::dynamic_pointer_cast<ngraph::opset8::VariadicSplit>(input_op)) {
|
std::dynamic_pointer_cast<ngraph::opset8::VariadicSplit>(input_op)) {
|
||||||
for (size_t index = 0; index < input_op_out_index; index++) {
|
for (size_t index = 0; index < input_op_out_index; index++) {
|
||||||
size_t outputSize = ngraph::shape_size(input_op->get_output_shape(index));
|
size_t outputSize = ngraph::shape_size(input_op->get_output_shape(index));
|
||||||
offset += outputSize * limitations::bytesPerSplitElement;
|
offset += outputSize * limitations::Limitations::kBytesPerSplitElement;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return (offset == ALIGN64(offset));
|
return (offset == ALIGN64(offset));
|
||||||
@ -93,7 +93,7 @@ inline bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
|
|||||||
inline bool is_crop_affined(std::shared_ptr<ngraph::Node> node) {
|
inline bool is_crop_affined(std::shared_ptr<ngraph::Node> node) {
|
||||||
auto crop = std::dynamic_pointer_cast<ngraph::op::CropIE>(node);
|
auto crop = std::dynamic_pointer_cast<ngraph::op::CropIE>(node);
|
||||||
if (crop != nullptr && !crop->offset.empty()) {
|
if (crop != nullptr && !crop->offset.empty()) {
|
||||||
return limitations::isCropAffinedOffset(crop->offset.back());
|
return limitations::Limitations::get_instance()->is_crop_affined_offset(crop->offset.back());
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
namespace intel_gna {
|
namespace intel_gna {
|
||||||
|
using namespace limitations;
|
||||||
namespace frontend {
|
namespace frontend {
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
@ -352,7 +353,7 @@ InferenceEngine::Precision GetWeightsPrecision(const LayerInfo& layer_info,
|
|||||||
const QuantizedLayerParams& quant_layer_params,
|
const QuantizedLayerParams& quant_layer_params,
|
||||||
const Config& gna_config) {
|
const Config& gna_config) {
|
||||||
if (((layer_info.isConvolution() || layer_info.isConvolutionFilter()) &&
|
if (((layer_info.isConvolution() || layer_info.isConvolutionFilter()) &&
|
||||||
limitations::cnn2d::UseOnly16BitConvolutionWeights(gna_config.target->get_effective_compile_target())) ||
|
Limitations::get_instance()->use_only_16bit_convolution_weights()) ||
|
||||||
layer_info.isScaleShift()) {
|
layer_info.isScaleShift()) {
|
||||||
return InferenceEngine::Precision::I16;
|
return InferenceEngine::Precision::I16;
|
||||||
}
|
}
|
||||||
|
@ -38,8 +38,7 @@ GNADeviceHelper::GNADeviceHelper(std::shared_ptr<Target> targetIn, bool isPerfor
|
|||||||
: target(targetIn),
|
: target(targetIn),
|
||||||
nGnaDeviceIndex{selectGnaDevice()},
|
nGnaDeviceIndex{selectGnaDevice()},
|
||||||
useDeviceEmbeddedExport(deviceEmbedded),
|
useDeviceEmbeddedExport(deviceEmbedded),
|
||||||
isPerformanceMeasuring(isPerformanceMeasuring),
|
isPerformanceMeasuring(isPerformanceMeasuring) {
|
||||||
m_mem_alignment(limitations::getMemoryAlignmentBytes(targetIn->get_effective_compile_target())) {
|
|
||||||
per_request_diagnostics = log::get_log_level() >= ov::log::Level::TRACE;
|
per_request_diagnostics = log::get_log_level() >= ov::log::Level::TRACE;
|
||||||
per_model_diagnostics = log::get_log_level() >= ov::log::Level::DEBUG;
|
per_model_diagnostics = log::get_log_level() >= ov::log::Level::DEBUG;
|
||||||
open();
|
open();
|
||||||
@ -573,7 +572,7 @@ uint32_t GNADeviceHelper::retrieveMaxLayersCount() {
|
|||||||
switch (target->get_effective_execution_target()) {
|
switch (target->get_effective_execution_target()) {
|
||||||
case DeviceVersion::GNA1_0:
|
case DeviceVersion::GNA1_0:
|
||||||
case DeviceVersion::GNA2_0:
|
case DeviceVersion::GNA2_0:
|
||||||
return kMaxLayersCountGNA2_0;
|
return Limitations::kMaxLayersCountGNA2_0;
|
||||||
case DeviceVersion::GNA3_0:
|
case DeviceVersion::GNA3_0:
|
||||||
case DeviceVersion::GNA3_1:
|
case DeviceVersion::GNA3_1:
|
||||||
case DeviceVersion::GNA3_5:
|
case DeviceVersion::GNA3_5:
|
||||||
@ -581,7 +580,7 @@ uint32_t GNADeviceHelper::retrieveMaxLayersCount() {
|
|||||||
case DeviceVersion::GNA3_6:
|
case DeviceVersion::GNA3_6:
|
||||||
case DeviceVersion::GNA4_0:
|
case DeviceVersion::GNA4_0:
|
||||||
default:
|
default:
|
||||||
return kMaxLayersCountGNA3_X;
|
return Limitations::kMaxLayersCountGNA3_X;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace intel_gna
|
} // namespace intel_gna
|
||||||
|
@ -67,7 +67,6 @@ class GNADeviceHelper : public GNADevice {
|
|||||||
uint64_t debugLogIndexRequestWait = 0;
|
uint64_t debugLogIndexRequestWait = 0;
|
||||||
static constexpr const char* kDumpExt = ".bin";
|
static constexpr const char* kDumpExt = ".bin";
|
||||||
static constexpr const char* kDumpDelimiter = ".";
|
static constexpr const char* kDumpDelimiter = ".";
|
||||||
const size_t m_mem_alignment;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit GNADeviceHelper(std::shared_ptr<target::Target> target = std::make_shared<target::Target>(),
|
explicit GNADeviceHelper(std::shared_ptr<target::Target> target = std::make_shared<target::Target>(),
|
||||||
@ -128,10 +127,6 @@ public:
|
|||||||
return allAllocations;
|
return allAllocations;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t getMemAlignment() const {
|
|
||||||
return m_mem_alignment;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @see GNADevice::createModel()
|
* @see GNADevice::createModel()
|
||||||
*/
|
*/
|
||||||
|
@ -49,6 +49,7 @@ namespace intel_gna {
|
|||||||
using namespace frontend;
|
using namespace frontend;
|
||||||
using namespace common;
|
using namespace common;
|
||||||
using namespace memory;
|
using namespace memory;
|
||||||
|
using namespace limitations;
|
||||||
|
|
||||||
static bool CheckIFLastComponentIsPrecededByConv2D(const backend::DnnComponents::storage_type& components,
|
static bool CheckIFLastComponentIsPrecededByConv2D(const backend::DnnComponents::storage_type& components,
|
||||||
bool verify_with_pooling = true) {
|
bool verify_with_pooling = true) {
|
||||||
@ -81,20 +82,22 @@ static uint32_t count_conv2D_input_width_for_expected_output_width(uint32_t expe
|
|||||||
return (expected_ouput_width - 1) * stride_width - 2 * padding_width + kernel_width;
|
return (expected_ouput_width - 1) * stride_width - 2 * padding_width + kernel_width;
|
||||||
};
|
};
|
||||||
|
|
||||||
GNAGraphCompiler::GNAGraphCompiler(const Config& gna_config) : gna_config(gna_config) {}
|
GNAGraphCompiler::GNAGraphCompiler(const Config& gna_config,
|
||||||
|
std::shared_ptr<backend::AMIntelDNN> dnn_ptr,
|
||||||
|
std::shared_ptr<GnaInputs> inputs_ptr,
|
||||||
|
std::shared_ptr<limitations::cnn2d::AbstractValidator> cnn2d_validator_ptr,
|
||||||
|
std::shared_ptr<gna_memory_type> gna_mem_ptr)
|
||||||
|
: gna_config(gna_config) {
|
||||||
|
dnn = std::move(dnn_ptr);
|
||||||
|
inputs_ptr_ = std::move(inputs_ptr);
|
||||||
|
m_cnn2d_validator = std::move(cnn2d_validator_ptr);
|
||||||
|
gnamem = std::move(gna_mem_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr) {
|
void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr) {
|
||||||
this->gnamem = std::move(gnaMemPtr);
|
this->gnamem = std::move(gnaMemPtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GNAGraphCompiler::setDNNPtr(std::shared_ptr<backend::AMIntelDNN> dnnPtr) {
|
|
||||||
this->dnn = std::move(dnnPtr);
|
|
||||||
}
|
|
||||||
|
|
||||||
void GNAGraphCompiler::setInputsPtr(std::shared_ptr<GnaInputs> inputsPtr) {
|
|
||||||
this->inputs_ptr_ = std::move(inputsPtr);
|
|
||||||
}
|
|
||||||
|
|
||||||
intel_dnn_component_t* GNAGraphCompiler::find_first_unused_input(InferenceEngine::CNNLayerPtr current) {
|
intel_dnn_component_t* GNAGraphCompiler::find_first_unused_input(InferenceEngine::CNNLayerPtr current) {
|
||||||
if (current->insData.empty())
|
if (current->insData.empty())
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@ -228,13 +231,8 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
|
|||||||
split_connection.emplace(id, layerInfoItem);
|
split_connection.emplace(id, layerInfoItem);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GNAGraphCompiler::SetValidatorTarget(const target::DeviceVersion& target) {
|
|
||||||
auto temp = limitations::cnn2d::AbstractValidator::Create(target);
|
|
||||||
cnn2dValidator.reset(temp.release());
|
|
||||||
}
|
|
||||||
|
|
||||||
bool GNAGraphCompiler::ShouldUseOnlyConv2DGnaIface() const {
|
bool GNAGraphCompiler::ShouldUseOnlyConv2DGnaIface() const {
|
||||||
return cnn2dValidator && cnn2dValidator->ShouldUseOnlyConv2DGnaIface();
|
return m_cnn2d_validator && m_cnn2d_validator->ShouldUseOnlyConv2DGnaIface();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
|
void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
|
||||||
@ -249,8 +247,8 @@ void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
|
|||||||
const uint32_t dilH,
|
const uint32_t dilH,
|
||||||
const uint32_t dilW,
|
const uint32_t dilW,
|
||||||
OvGnaType inPrecision) const {
|
OvGnaType inPrecision) const {
|
||||||
if (cnn2dValidator) {
|
if (m_cnn2d_validator) {
|
||||||
if (cnn2dValidator->ValidateCnn1D(name,
|
if (m_cnn2d_validator->ValidateCnn1D(name,
|
||||||
inHeight,
|
inHeight,
|
||||||
inWidth,
|
inWidth,
|
||||||
inChannels,
|
inChannels,
|
||||||
@ -265,7 +263,7 @@ void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
|
|||||||
false)) {
|
false)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
cnn2dValidator
|
m_cnn2d_validator
|
||||||
->ValidateCnn2D(name, inHeight, inWidth, inChannels, kH, kW, kN, strideH, strideW, dilH, dilW, inPrecision);
|
->ValidateCnn2D(name, inHeight, inWidth, inChannels, kH, kW, kN, strideH, strideW, dilH, dilW, inPrecision);
|
||||||
} else {
|
} else {
|
||||||
THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << name;
|
THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << name;
|
||||||
@ -277,8 +275,8 @@ void GNAGraphCompiler::ValidatePooling2D(const std::string& name,
|
|||||||
const uint32_t windowW,
|
const uint32_t windowW,
|
||||||
const uint32_t strideH,
|
const uint32_t strideH,
|
||||||
const uint32_t strideW) const {
|
const uint32_t strideW) const {
|
||||||
if (cnn2dValidator) {
|
if (m_cnn2d_validator) {
|
||||||
cnn2dValidator->ValidatePooling2D(name, windowH, windowW, strideH, strideW);
|
m_cnn2d_validator->ValidatePooling2D(name, windowH, windowW, strideH, strideW);
|
||||||
} else {
|
} else {
|
||||||
THROW_GNA_EXCEPTION << "No Pooling2D validator found for layer " << name;
|
THROW_GNA_EXCEPTION << "No Pooling2D validator found for layer " << name;
|
||||||
}
|
}
|
||||||
@ -684,11 +682,11 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
|||||||
// TODO add function
|
// TODO add function
|
||||||
// printConvolution2DLayer(convolution);
|
// printConvolution2DLayer(convolution);
|
||||||
|
|
||||||
if (!cnn2dValidator) {
|
if (!m_cnn2d_validator) {
|
||||||
THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << convolution.name;
|
THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << convolution.name;
|
||||||
}
|
}
|
||||||
|
|
||||||
cnn2dValidator->ValidateInputPadding(convolution.name,
|
m_cnn2d_validator->ValidateInputPadding(convolution.name,
|
||||||
convolution._padding_y,
|
convolution._padding_y,
|
||||||
convolution._pads_end_y,
|
convolution._pads_end_y,
|
||||||
convolution._padding_x,
|
convolution._padding_x,
|
||||||
@ -713,7 +711,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
|||||||
// have to pad input to let last kernel meets it's corresponding input
|
// have to pad input to let last kernel meets it's corresponding input
|
||||||
const auto num_inputs = in_batch * effective_input_width * in_height * in_channels;
|
const auto num_inputs = in_batch * effective_input_width * in_height * in_channels;
|
||||||
|
|
||||||
uint32_t num_input_padding = ALIGN(num_inputs, limitations::noOfInputsDivisor) - num_inputs;
|
uint32_t num_input_padding = ALIGN(num_inputs, Limitations::kNoOfInputsDivisor) - num_inputs;
|
||||||
|
|
||||||
const uint32_t filter_n = convolution._out_depth;
|
const uint32_t filter_n = convolution._out_depth;
|
||||||
|
|
||||||
@ -813,7 +811,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
|
|||||||
|
|
||||||
// Kernel is extended only for 1D case which allows to add 0-s at the end of the kernel.
|
// Kernel is extended only for 1D case which allows to add 0-s at the end of the kernel.
|
||||||
const auto kernel_pad =
|
const auto kernel_pad =
|
||||||
ALIGN(effective_single_kernel_size, limitations::convEachKernelByteAlignment) - effective_single_kernel_size;
|
ALIGN(effective_single_kernel_size, Limitations::kConvEachKernelByteAlignment) - effective_single_kernel_size;
|
||||||
for (uint32_t k = 0; k < convolution._out_depth; k++) {
|
for (uint32_t k = 0; k < convolution._out_depth; k++) {
|
||||||
uint8_t* ptr_filt_current = convolution._weights->cbuffer().as<uint8_t*>() + k * single_kernel_size;
|
uint8_t* ptr_filt_current = convolution._weights->cbuffer().as<uint8_t*>() + k * single_kernel_size;
|
||||||
auto transposed_part = transposeMatrix(ptr_filt_current, convolution_precision, in_channels, kernelHW);
|
auto transposed_part = transposeMatrix(ptr_filt_current, convolution_precision, in_channels, kernelHW);
|
||||||
@ -846,14 +844,15 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
auto input = layer->insData[0].lock();
|
auto input = layer->insData[0].lock();
|
||||||
|
|
||||||
auto outputs = *layer->outData.begin();
|
auto outputs = *layer->outData.begin();
|
||||||
auto reshaped_dims = Get2DReshapedData(input, limitations::GetMinBatchToFitInBuffer(input), 8)->getDims();
|
auto reshaped_dims = Get2DReshapedData(input, Limitations::get_min_batch_to_fit_in_buffer(input), 8)->getDims();
|
||||||
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor
|
const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
|
||||||
: limitations::noOfInputsDivisor;
|
? Limitations::kNoOfInputsLowPrecDivisor
|
||||||
|
: Limitations::kNoOfInputsDivisor;
|
||||||
uint32_t num_rows_in = reshaped_dims[1];
|
uint32_t num_rows_in = reshaped_dims[1];
|
||||||
uint32_t num_columns_in = reshaped_dims[0];
|
uint32_t num_columns_in = reshaped_dims[0];
|
||||||
uint32_t num_rows_out = num_rows_in;
|
uint32_t num_rows_out = num_rows_in;
|
||||||
uint32_t num_columns_out = num_columns_in;
|
uint32_t num_columns_out = num_columns_in;
|
||||||
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
|
uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
|
||||||
|
|
||||||
size_t num_data_bytes_out = num_columns_out * (num_rows_out + num_padding) * outputs->getPrecision().size();
|
size_t num_data_bytes_out = num_columns_out * (num_rows_out + num_padding) * outputs->getPrecision().size();
|
||||||
size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * input->getPrecision().size();
|
size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * input->getPrecision().size();
|
||||||
@ -1097,7 +1096,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
auto inputs = layer->insData.begin()->lock();
|
auto inputs = layer->insData.begin()->lock();
|
||||||
auto outputs = *layer->outData.begin();
|
auto outputs = *layer->outData.begin();
|
||||||
|
|
||||||
auto reshaped_dims = Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims();
|
auto reshaped_dims = Get2DReshapedData(inputs, Limitations::get_min_batch_to_fit_in_buffer(inputs), 8)->getDims();
|
||||||
uint32_t num_rows_in = reshaped_dims[1];
|
uint32_t num_rows_in = reshaped_dims[1];
|
||||||
uint32_t num_columns_in = reshaped_dims[0];
|
uint32_t num_columns_in = reshaped_dims[0];
|
||||||
uint32_t num_rows_out = num_rows_in;
|
uint32_t num_rows_out = num_rows_in;
|
||||||
@ -1159,7 +1158,7 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Concat axis validation
|
// Concat axis validation
|
||||||
if (!limitations::ValidateConvConcatAxis(concatLayer)) {
|
if (!Limitations::validate_conv_concat_axis(concatLayer)) {
|
||||||
std::ostringstream in_dims_oss;
|
std::ostringstream in_dims_oss;
|
||||||
auto in_dims = concatLayer->insData[0].lock()->getDims();
|
auto in_dims = concatLayer->insData[0].lock()->getDims();
|
||||||
std::copy(in_dims.begin(), in_dims.end(), std::ostream_iterator<size_t>(in_dims_oss, ","));
|
std::copy(in_dims.begin(), in_dims.end(), std::ostream_iterator<size_t>(in_dims_oss, ","));
|
||||||
@ -1270,10 +1269,10 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
uint32_t num_columns_in = 1;
|
uint32_t num_columns_in = 1;
|
||||||
|
|
||||||
uint32_t num_rows_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()));
|
uint32_t num_rows_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()));
|
||||||
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision
|
const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
|
||||||
? limitations::noOfInputsLowPrecDivisor
|
? Limitations::kNoOfInputsLowPrecDivisor
|
||||||
: limitations::noOfInputsDivisor;
|
: Limitations::kNoOfInputsDivisor;
|
||||||
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
|
uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
|
||||||
|
|
||||||
void* ptr_inputs = nullptr;
|
void* ptr_inputs = nullptr;
|
||||||
void* ptr_outputs = nullptr;
|
void* ptr_outputs = nullptr;
|
||||||
@ -1303,7 +1302,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) * 4;
|
InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) * 4;
|
||||||
|
|
||||||
size_t num_data_bytes_in =
|
size_t num_data_bytes_in =
|
||||||
num_columns_in * ALIGN(num_rows_in, noOfInputsDivisor) * inputs->getPrecision().size();
|
num_columns_in * ALIGN(num_rows_in, num_of_inputs_divisor) * inputs->getPrecision().size();
|
||||||
|
|
||||||
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
|
connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
|
||||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||||
@ -1326,8 +1325,9 @@ void GNAGraphCompiler::SlicePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
||||||
auto& eltwise = dynamic_cast<EltwiseLayer&>(*layer.get());
|
auto& eltwise = dynamic_cast<EltwiseLayer&>(*layer.get());
|
||||||
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
|
||||||
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor
|
const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
|
||||||
: limitations::noOfInputsDivisor;
|
? Limitations::kNoOfInputsLowPrecDivisor
|
||||||
|
: Limitations::kNoOfInputsDivisor;
|
||||||
|
|
||||||
// for eltwise sum/sub in 16-bit precision one input should be 4 bytes and one 2 bytes - detecting that below
|
// for eltwise sum/sub in 16-bit precision one input should be 4 bytes and one 2 bytes - detecting that below
|
||||||
// the names of variables are left for clarity although not always reflecting the real precision/size
|
// the names of variables are left for clarity although not always reflecting the real precision/size
|
||||||
@ -1409,7 +1409,7 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
uint32_t num_columns_in = 1;
|
uint32_t num_columns_in = 1;
|
||||||
uint32_t num_rows_out = num_rows_in;
|
uint32_t num_rows_out = num_rows_in;
|
||||||
uint32_t num_columns_out = num_columns_in;
|
uint32_t num_columns_out = num_columns_in;
|
||||||
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
|
uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
|
||||||
|
|
||||||
void* ptr_inputs = nullptr;
|
void* ptr_inputs = nullptr;
|
||||||
void* ptr_outputs = nullptr;
|
void* ptr_outputs = nullptr;
|
||||||
@ -1518,7 +1518,6 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
auto outputs = *layer->outData.begin();
|
auto outputs = *layer->outData.begin();
|
||||||
auto input1_precision = quantized ? Precision(Precision::I16) : input_1->getPrecision();
|
auto input1_precision = quantized ? Precision(Precision::I16) : input_1->getPrecision();
|
||||||
auto input2_precision = quantized ? Precision(Precision::I16) : input_2->getPrecision();
|
auto input2_precision = quantized ? Precision(Precision::I16) : input_2->getPrecision();
|
||||||
uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor;
|
|
||||||
|
|
||||||
auto in_dims = input_1->getDims();
|
auto in_dims = input_1->getDims();
|
||||||
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
|
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
|
||||||
@ -1527,7 +1526,7 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
const auto out_dims = outputs->getDims();
|
const auto out_dims = outputs->getDims();
|
||||||
const auto out_dims_size = ngraph::shape_size(out_dims);
|
const auto out_dims_size = ngraph::shape_size(out_dims);
|
||||||
uint32_t num_rows_out = InferenceEngine::GetDimFromBack(out_dims, 1);
|
uint32_t num_rows_out = InferenceEngine::GetDimFromBack(out_dims, 1);
|
||||||
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
|
uint32_t num_padding = ALIGN(num_rows_in, Limitations::kNoOfInputsDivisor) - num_rows_in;
|
||||||
|
|
||||||
// Gemm gets two inputs
|
// Gemm gets two inputs
|
||||||
void* ptr_input_1 = nullptr; // the first input
|
void* ptr_input_1 = nullptr; // the first input
|
||||||
@ -1578,7 +1577,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
|||||||
auto outputs = *layer->outData.begin();
|
auto outputs = *layer->outData.begin();
|
||||||
const auto out_dims = outputs->getDims();
|
const auto out_dims = outputs->getDims();
|
||||||
Precision inputPrecision;
|
Precision inputPrecision;
|
||||||
uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor;
|
uint32_t num_of_inputs_divisor = Limitations::kNoOfInputsDivisor;
|
||||||
|
|
||||||
if (!quantized) {
|
if (!quantized) {
|
||||||
inputPrecision = inputs->getPrecision();
|
inputPrecision = inputs->getPrecision();
|
||||||
@ -1586,11 +1585,11 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
|||||||
inputPrecision = Precision(Precision::I16);
|
inputPrecision = Precision(Precision::I16);
|
||||||
} else {
|
} else {
|
||||||
inputPrecision = Precision(Precision::I8);
|
inputPrecision = Precision(Precision::I8);
|
||||||
noOfInputsDivisor = limitations::noOfInputsLowPrecDivisor;
|
num_of_inputs_divisor = Limitations::kNoOfInputsLowPrecDivisor;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto input_data = HasTo2DReshapeData(layer)
|
auto input_data = HasTo2DReshapeData(layer)
|
||||||
? Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8)
|
? Get2DReshapedData(inputs, Limitations::get_min_batch_to_fit_in_buffer(inputs), 8)
|
||||||
: inputs;
|
: inputs;
|
||||||
auto in_dims = input_data->getDims();
|
auto in_dims = input_data->getDims();
|
||||||
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
|
auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
|
||||||
@ -1598,7 +1597,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
|
|||||||
uint32_t num_columns_in = batch_size;
|
uint32_t num_columns_in = batch_size;
|
||||||
uint32_t num_rows_out = isDiag ? num_rows_in : InferenceEngine::GetDimFromBack(out_dims, 1);
|
uint32_t num_rows_out = isDiag ? num_rows_in : InferenceEngine::GetDimFromBack(out_dims, 1);
|
||||||
uint32_t num_columns_out = num_columns_in;
|
uint32_t num_columns_out = num_columns_in;
|
||||||
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
|
uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
|
||||||
uint32_t num_padding_out = isDiag ? num_padding : 0;
|
uint32_t num_padding_out = isDiag ? num_padding : 0;
|
||||||
|
|
||||||
void* ptr_inputs = nullptr;
|
void* ptr_inputs = nullptr;
|
||||||
@ -1803,12 +1802,13 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
|||||||
auto outputs = *layer->outData.begin();
|
auto outputs = *layer->outData.begin();
|
||||||
auto inputs = layer->insData.begin()->lock();
|
auto inputs = layer->insData.begin()->lock();
|
||||||
|
|
||||||
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor
|
const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
|
||||||
: limitations::noOfInputsDivisor;
|
? Limitations::kNoOfInputsLowPrecDivisor
|
||||||
|
: Limitations::kNoOfInputsDivisor;
|
||||||
uint32_t num_columns_in = GetDimFromBack(inputs->getDims(), 2);
|
uint32_t num_columns_in = GetDimFromBack(inputs->getDims(), 2);
|
||||||
uint32_t num_rows_out = GetDimFromBack(outputs->getDims(), 1);
|
uint32_t num_rows_out = GetDimFromBack(outputs->getDims(), 1);
|
||||||
uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
|
uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
|
||||||
uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
|
uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
|
||||||
|
|
||||||
auto numRowsPadded = filterLayer->GetParamAsInt("num_rows_padded");
|
auto numRowsPadded = filterLayer->GetParamAsInt("num_rows_padded");
|
||||||
// number of rows we handled by inserting copy layer
|
// number of rows we handled by inserting copy layer
|
||||||
@ -1877,7 +1877,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
|||||||
false);
|
false);
|
||||||
|
|
||||||
size_t num_data_bytes_out = num_rows_out * num_columns_in * outputs->getPrecision().size();
|
size_t num_data_bytes_out = num_rows_out * num_columns_in * outputs->getPrecision().size();
|
||||||
size_t num_data_bytes_in = num_columns_in * ALIGN(num_rows_in, noOfInputsDivisor) * inputs->getPrecision().size();
|
size_t num_data_bytes_in =
|
||||||
|
num_columns_in * ALIGN(num_rows_in, num_of_inputs_divisor) * inputs->getPrecision().size();
|
||||||
|
|
||||||
connectInput(layer, ptr_inputs, num_data_bytes_in, num_rows_copied * inputs->getPrecision().size(), 0);
|
connectInput(layer, ptr_inputs, num_data_bytes_in, num_rows_copied * inputs->getPrecision().size(), 0);
|
||||||
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
connectOutput(layer, ptr_outputs, num_data_bytes_out);
|
||||||
@ -1940,8 +1941,8 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
|||||||
auto outputs = *layer->outData.begin();
|
auto outputs = *layer->outData.begin();
|
||||||
auto inputs = layer->insData.begin()->lock();
|
auto inputs = layer->insData.begin()->lock();
|
||||||
|
|
||||||
const auto noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor
|
const auto num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision ? Limitations::kNoOfInputsLowPrecDivisor
|
||||||
: limitations::noOfInputsDivisor;
|
: Limitations::kNoOfInputsDivisor;
|
||||||
const uint32_t orginalInputSize =
|
const uint32_t orginalInputSize =
|
||||||
InferenceEngine::details::product(std::next(inputs->getDims().begin()), inputs->getDims().end());
|
InferenceEngine::details::product(std::next(inputs->getDims().begin()), inputs->getDims().end());
|
||||||
const uint32_t orginalOutputSize =
|
const uint32_t orginalOutputSize =
|
||||||
@ -1956,7 +1957,7 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
|
|||||||
const auto filterWidth = filterLayer->_kernel_x;
|
const auto filterWidth = filterLayer->_kernel_x;
|
||||||
const auto minOutputsPerFilter = ALIGN(orginalOutputSize, numberOfFilters) / numberOfFilters;
|
const auto minOutputsPerFilter = ALIGN(orginalOutputSize, numberOfFilters) / numberOfFilters;
|
||||||
const auto minInputsNeeded = (minOutputsPerFilter - 1) * convolutionStride + filterWidth;
|
const auto minInputsNeeded = (minOutputsPerFilter - 1) * convolutionStride + filterWidth;
|
||||||
const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, noOfInputsDivisor);
|
const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, num_of_inputs_divisor);
|
||||||
|
|
||||||
auto numOutputs =
|
auto numOutputs =
|
||||||
gna_convolution_layer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride);
|
gna_convolution_layer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride);
|
||||||
@ -2278,14 +2279,15 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
<< std::min(squeezedInputOrder[0], squeezedInputOrder[1]) << " > 8)";
|
<< std::min(squeezedInputOrder[0], squeezedInputOrder[1]) << " > 8)";
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor
|
const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
|
||||||
: limitations::noOfInputsDivisor;
|
? Limitations::kNoOfInputsLowPrecDivisor
|
||||||
|
: Limitations::kNoOfInputsDivisor;
|
||||||
|
|
||||||
// now this can be run on GNA
|
// now this can be run on GNA
|
||||||
if (squeezedInputOrder[0] < squeezedInputOrder[1]) { // interleave case
|
if (squeezedInputOrder[0] < squeezedInputOrder[1]) { // interleave case
|
||||||
if (ALIGN(squeezedInputOrder[1], noOfInputsDivisor) != squeezedInputOrder[1]) {
|
if (ALIGN(squeezedInputOrder[1], num_of_inputs_divisor) != squeezedInputOrder[1]) {
|
||||||
THROW_GNA_LAYER_EXCEPTION(layer)
|
THROW_GNA_LAYER_EXCEPTION(layer)
|
||||||
<< "unsupported permute (row size not a multiple of " << noOfInputsDivisor << ")";
|
<< "unsupported permute (row size not a multiple of " << num_of_inputs_divisor << ")";
|
||||||
} else {
|
} else {
|
||||||
auto& currentComponent = dnnComponents.addComponent(layer->name, "interleave");
|
auto& currentComponent = dnnComponents.addComponent(layer->name, "interleave");
|
||||||
dnn->InitInterleaveComponent(currentComponent,
|
dnn->InitInterleaveComponent(currentComponent,
|
||||||
@ -2299,9 +2301,9 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
} else { // deinterleave case
|
} else { // deinterleave case
|
||||||
if (ALIGN(squeezedInputOrder[0], noOfInputsDivisor) != squeezedInputOrder[0]) {
|
if (ALIGN(squeezedInputOrder[0], num_of_inputs_divisor) != squeezedInputOrder[0]) {
|
||||||
THROW_GNA_LAYER_EXCEPTION(layer)
|
THROW_GNA_LAYER_EXCEPTION(layer)
|
||||||
<< "[GNA plugin] unsupported permute (column size not a multiple of " << noOfInputsDivisor << ")";
|
<< "[GNA plugin] unsupported permute (column size not a multiple of " << num_of_inputs_divisor << ")";
|
||||||
} else {
|
} else {
|
||||||
auto& currentComponent = dnnComponents.addComponent(layer->name, "deinterleave");
|
auto& currentComponent = dnnComponents.addComponent(layer->name, "deinterleave");
|
||||||
dnn->InitDeinterleaveComponent(currentComponent,
|
dnn->InitDeinterleaveComponent(currentComponent,
|
||||||
@ -2317,7 +2319,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
|
|||||||
|
|
||||||
size_t num_data_bytes_out =
|
size_t num_data_bytes_out =
|
||||||
ALIGN(InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())),
|
ALIGN(InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())),
|
||||||
noOfInputsDivisor) *
|
num_of_inputs_divisor) *
|
||||||
outputs->getPrecision().size();
|
outputs->getPrecision().size();
|
||||||
size_t num_data_bytes_in = squeezedInputOrder[0] * squeezedInputOrder[1] * inputs->getPrecision().size();
|
size_t num_data_bytes_in = squeezedInputOrder[0] * squeezedInputOrder[1] * inputs->getPrecision().size();
|
||||||
|
|
||||||
@ -2610,12 +2612,12 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
|||||||
// if request for allocation less that realTensorInput - we need to extend request
|
// if request for allocation less that realTensorInput - we need to extend request
|
||||||
auto minInput = inputs_ptr_->at(prevLayer->name).get_required_size();
|
auto minInput = inputs_ptr_->at(prevLayer->name).get_required_size();
|
||||||
if (num_data_bytes_in < minInput) {
|
if (num_data_bytes_in < minInput) {
|
||||||
const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision
|
const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
|
||||||
? limitations::noOfInputsLowPrecDivisor
|
? Limitations::kNoOfInputsLowPrecDivisor
|
||||||
: limitations::noOfInputsDivisor;
|
: Limitations::kNoOfInputsDivisor;
|
||||||
log::debug() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to"
|
log::debug() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to"
|
||||||
<< ALIGN(minInput, noOfInputsDivisor);
|
<< ALIGN(minInput, num_of_inputs_divisor);
|
||||||
num_data_bytes_in = ALIGN(minInput, noOfInputsDivisor);
|
num_data_bytes_in = ALIGN(minInput, num_of_inputs_divisor);
|
||||||
}
|
}
|
||||||
|
|
||||||
// real allocation pointer will be kept in ptr not in ptr_inputs_global
|
// real allocation pointer will be kept in ptr not in ptr_inputs_global
|
||||||
|
@ -54,20 +54,22 @@ private:
|
|||||||
uint32_t num_rows,
|
uint32_t num_rows,
|
||||||
uint32_t num_cols);
|
uint32_t num_cols);
|
||||||
|
|
||||||
std::unique_ptr<const limitations::cnn2d::AbstractValidator> cnn2dValidator;
|
|
||||||
|
|
||||||
bool ShouldUseOnlyConv2DGnaIface() const;
|
bool ShouldUseOnlyConv2DGnaIface() const;
|
||||||
|
|
||||||
|
std::shared_ptr<limitations::cnn2d::AbstractValidator> m_cnn2d_validator;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
backend::DnnComponents dnnComponents;
|
backend::DnnComponents dnnComponents;
|
||||||
MemoryConnection memory_connection;
|
MemoryConnection memory_connection;
|
||||||
ConcatConnection concat_connection;
|
ConcatConnection concat_connection;
|
||||||
ConstConnections const_connections;
|
ConstConnections const_connections;
|
||||||
|
|
||||||
GNAGraphCompiler(const Config& gna_config);
|
GNAGraphCompiler(const Config& gna_config,
|
||||||
|
std::shared_ptr<backend::AMIntelDNN> dnn_ptr,
|
||||||
|
std::shared_ptr<GnaInputs> inputs_ptr,
|
||||||
|
std::shared_ptr<limitations::cnn2d::AbstractValidator> cnn2d_validator,
|
||||||
|
std::shared_ptr<gna_memory_type> gna_mem_ptr);
|
||||||
void setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr);
|
void setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr);
|
||||||
void setDNNPtr(std::shared_ptr<backend::AMIntelDNN> dnnPtr);
|
|
||||||
void setInputsPtr(std::shared_ptr<GnaInputs> inputsPtr);
|
|
||||||
|
|
||||||
void fillMemoryConnections(std::unordered_map<std::string, std::vector<InferenceEngine::CNNLayerPtr>>& memoryPairs);
|
void fillMemoryConnections(std::unordered_map<std::string, std::vector<InferenceEngine::CNNLayerPtr>>& memoryPairs);
|
||||||
|
|
||||||
@ -93,8 +95,6 @@ public:
|
|||||||
const uint32_t strideH,
|
const uint32_t strideH,
|
||||||
const uint32_t strideW) const;
|
const uint32_t strideW) const;
|
||||||
|
|
||||||
void SetValidatorTarget(const target::DeviceVersion& target);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Connects either memory output, or generic output to a layer
|
* Connects either memory output, or generic output to a layer
|
||||||
* @param layer - layer pointer
|
* @param layer - layer pointer
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "backend/am_intel_dnn.hpp"
|
#include "backend/am_intel_dnn.hpp"
|
||||||
|
#include "backend/gna_limitations.hpp"
|
||||||
#include "common/gna_target.hpp"
|
#include "common/gna_target.hpp"
|
||||||
#include "frontend/model_quantizer.hpp"
|
#include "frontend/model_quantizer.hpp"
|
||||||
#include "frontend/scale_factor_calc.hpp"
|
#include "frontend/scale_factor_calc.hpp"
|
||||||
@ -55,6 +56,7 @@
|
|||||||
#include "scale_factor_helper.hpp"
|
#include "scale_factor_helper.hpp"
|
||||||
#include "serial/gna_model_serial.hpp"
|
#include "serial/gna_model_serial.hpp"
|
||||||
|
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
using namespace ov::intel_gna::graph_utils;
|
using namespace ov::intel_gna::graph_utils;
|
||||||
|
|
||||||
inline uint32_t ToByteSize(const Gna2DataType type) {
|
inline uint32_t ToByteSize(const Gna2DataType type) {
|
||||||
@ -357,17 +359,23 @@ void GNAPlugin::PrePostProcess(InferenceEngine::Blob::Ptr input_blob,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GNAPlugin::GNAPlugin() : graphCompiler(config) {
|
GNAPlugin::GNAPlugin() {
|
||||||
Init();
|
Init();
|
||||||
UpdateFieldsFromConfig();
|
UpdateFieldsFromConfig();
|
||||||
InitGNADevice();
|
InitGNADevice();
|
||||||
|
Limitations::init(config.target->get_effective_compile_target());
|
||||||
|
InitGNAMemory();
|
||||||
|
InitGraphCompiler();
|
||||||
}
|
}
|
||||||
|
|
||||||
GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) : graphCompiler(config) {
|
GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) {
|
||||||
Init();
|
Init();
|
||||||
SetConfig(configMap);
|
SetConfig(configMap);
|
||||||
log::set_log_level(gnaFlags->log_level);
|
log::set_log_level(gnaFlags->log_level);
|
||||||
InitGNADevice();
|
InitGNADevice();
|
||||||
|
Limitations::init(config.target->get_effective_compile_target());
|
||||||
|
InitGNAMemory();
|
||||||
|
InitGraphCompiler();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GNAPlugin::Init() {
|
void GNAPlugin::Init() {
|
||||||
@ -376,27 +384,36 @@ void GNAPlugin::Init() {
|
|||||||
gnaFlags = std::make_shared<GNAFlags>(GNAFlags());
|
gnaFlags = std::make_shared<GNAFlags>(GNAFlags());
|
||||||
inputs_ptr_ = std::make_shared<GnaInputs>(GnaInputs());
|
inputs_ptr_ = std::make_shared<GnaInputs>(GnaInputs());
|
||||||
outputs_ = GnaOutputs();
|
outputs_ = GnaOutputs();
|
||||||
|
|
||||||
graphCompiler.setDNNPtr(dnn);
|
|
||||||
graphCompiler.setInputsPtr(inputs_ptr_);
|
|
||||||
|
|
||||||
requestWorkerPool_ = std::make_shared<request::WorkerPoolImpl>();
|
requestWorkerPool_ = std::make_shared<request::WorkerPoolImpl>();
|
||||||
}
|
}
|
||||||
|
|
||||||
void GNAPlugin::InitGNADevice() {
|
void GNAPlugin::InitGNADevice() {
|
||||||
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNADevice");
|
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNADevice");
|
||||||
if (gnaFlags->sw_fp32) {
|
|
||||||
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
|
if (!gnaFlags->sw_fp32) {
|
||||||
} else {
|
|
||||||
gnadevice = std::make_shared<GNADeviceHelper>(config.target,
|
gnadevice = std::make_shared<GNADeviceHelper>(config.target,
|
||||||
gnaFlags->performance_counting,
|
gnaFlags->performance_counting,
|
||||||
!config.embedded_export_path.empty());
|
!config.embedded_export_path.empty());
|
||||||
|
|
||||||
gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice),
|
|
||||||
gnadevice->getMemAlignment(),
|
|
||||||
limitations::kMemoryPageSize);
|
|
||||||
}
|
}
|
||||||
graphCompiler.setGNAMemoryPtr(gnamem);
|
}
|
||||||
|
|
||||||
|
void GNAPlugin::InitGNAMemory() {
|
||||||
|
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNAMemory");
|
||||||
|
|
||||||
|
if (gnaFlags->sw_fp32) {
|
||||||
|
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
|
||||||
|
} else {
|
||||||
|
gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice),
|
||||||
|
Limitations::get_instance()->get_memory_alignment(),
|
||||||
|
Limitations::kMemoryPageSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void GNAPlugin::InitGraphCompiler() {
|
||||||
|
OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGraphCompiler");
|
||||||
|
|
||||||
|
m_graph_compiler = std::make_shared<GNAGraphCompiler>(
|
||||||
|
GNAGraphCompiler(config, dnn, inputs_ptr_, Limitations::get_instance()->get_cnn_validator(), gnamem));
|
||||||
}
|
}
|
||||||
|
|
||||||
void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork& network) {
|
void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork& network) {
|
||||||
@ -428,8 +445,7 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork& network
|
|||||||
GNAFakeQuantizeLayer fqLayer(next_layer);
|
GNAFakeQuantizeLayer fqLayer(next_layer);
|
||||||
auto inputRange = fqLayer.getInputRange();
|
auto inputRange = fqLayer.getInputRange();
|
||||||
auto outputRange = fqLayer.getOutputRange();
|
auto outputRange = fqLayer.getOutputRange();
|
||||||
if (inputRange.second.size() != 1 || inputRange.second.size() != 1 || outputRange.second.size() != 1 ||
|
if (inputRange.second.size() != 1 || outputRange.second.size() != 1) {
|
||||||
outputRange.second.size() != 1) {
|
|
||||||
THROW_GNA_LAYER_EXCEPTION(next_layer)
|
THROW_GNA_LAYER_EXCEPTION(next_layer)
|
||||||
<< "unsupported, per-channel quantization for input layer : " << input.second->name();
|
<< "unsupported, per-channel quantization for input layer : " << input.second->name();
|
||||||
}
|
}
|
||||||
@ -552,12 +568,12 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN
|
|||||||
};
|
};
|
||||||
|
|
||||||
// probing gna_primitives
|
// probing gna_primitives
|
||||||
auto irLayerAvatar = std::find_if(graphCompiler.dnnComponents.components.begin(),
|
auto irLayerAvatar = std::find_if(m_graph_compiler->dnnComponents.components.begin(),
|
||||||
graphCompiler.dnnComponents.components.end(),
|
m_graph_compiler->dnnComponents.components.end(),
|
||||||
[&layer](const backend::DnnComponents::storage_type::value_type& value) {
|
[&layer](const backend::DnnComponents::storage_type::value_type& value) {
|
||||||
return value.name == layer->name;
|
return value.name == layer->name;
|
||||||
});
|
});
|
||||||
if (irLayerAvatar != graphCompiler.dnnComponents.components.end()) {
|
if (irLayerAvatar != m_graph_compiler->dnnComponents.components.end()) {
|
||||||
initOutput(irLayerAvatar->dnnComponent.orientation_out,
|
initOutput(irLayerAvatar->dnnComponent.orientation_out,
|
||||||
irLayerAvatar->dnnComponent.num_bytes_per_output,
|
irLayerAvatar->dnnComponent.num_bytes_per_output,
|
||||||
irLayerAvatar->dnnComponent.num_rows_out,
|
irLayerAvatar->dnnComponent.num_rows_out,
|
||||||
@ -567,8 +583,8 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN
|
|||||||
|
|
||||||
// probing concatInfo
|
// probing concatInfo
|
||||||
if (LayerInfo(layer).isConcat()) {
|
if (LayerInfo(layer).isConcat()) {
|
||||||
auto concatConnection = graphCompiler.concat_connection.find(layer->name);
|
auto concatConnection = m_graph_compiler->concat_connection.find(layer->name);
|
||||||
if (concatConnection != graphCompiler.concat_connection.end()) {
|
if (concatConnection != m_graph_compiler->concat_connection.end()) {
|
||||||
auto precision = layer->outData.front()->getPrecision().size();
|
auto precision = layer->outData.front()->getPrecision().size();
|
||||||
initOutput(kDnnInterleavedOrientation,
|
initOutput(kDnnInterleavedOrientation,
|
||||||
precision,
|
precision,
|
||||||
@ -581,8 +597,8 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN
|
|||||||
// probing a constant info, for constant trivial networks support
|
// probing a constant info, for constant trivial networks support
|
||||||
if (LayerInfo(layer).isConst()) {
|
if (LayerInfo(layer).isConst()) {
|
||||||
auto const_blob = layer->blobs["custom"];
|
auto const_blob = layer->blobs["custom"];
|
||||||
auto constConnection = graphCompiler.const_connections.find(layer->name);
|
auto constConnection = m_graph_compiler->const_connections.find(layer->name);
|
||||||
if (constConnection != graphCompiler.const_connections.end()) {
|
if (constConnection != m_graph_compiler->const_connections.end()) {
|
||||||
initOutput(kDnnInterleavedOrientation,
|
initOutput(kDnnInterleavedOrientation,
|
||||||
layer->outData.front()->getPrecision().size(),
|
layer->outData.front()->getPrecision().size(),
|
||||||
const_blob->size(),
|
const_blob->size(),
|
||||||
@ -696,16 +712,13 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
|||||||
_network_name = _network.getName();
|
_network_name = _network.getName();
|
||||||
std::shared_ptr<InferenceEngine::details::CNNNetworkImpl> convertedNetwork;
|
std::shared_ptr<InferenceEngine::details::CNNNetworkImpl> convertedNetwork;
|
||||||
|
|
||||||
const auto effectiveCompileTarget = config.target->get_effective_compile_target();
|
|
||||||
graphCompiler.SetValidatorTarget(effectiveCompileTarget);
|
|
||||||
|
|
||||||
auto transformer = TransformationsPipeline(config);
|
auto transformer = TransformationsPipeline(config);
|
||||||
|
|
||||||
if (_network.getFunction()) {
|
if (_network.getFunction()) {
|
||||||
CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network);
|
CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network);
|
||||||
auto model = clonedNetwork.getFunction();
|
auto model = clonedNetwork.getFunction();
|
||||||
transformer.apply(model, &m_input_output_subgraphs);
|
transformer.apply(model, &m_input_output_subgraphs);
|
||||||
limitations::check_all_ops_supported(model, effectiveCompileTarget, config.gnaPrecision);
|
Limitations::get_instance()->check_all_ops_supported(model, config.gnaPrecision);
|
||||||
convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(model, clonedNetwork);
|
convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(model, clonedNetwork);
|
||||||
}
|
}
|
||||||
IE_SUPPRESS_DEPRECATED_START
|
IE_SUPPRESS_DEPRECATED_START
|
||||||
@ -717,7 +730,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
|||||||
// Check the network
|
// Check the network
|
||||||
|
|
||||||
std::string error;
|
std::string error;
|
||||||
if (!limitations::AreLayersSupported(network, error)) {
|
if (!Limitations::are_layers_supported(network, error)) {
|
||||||
THROW_GNA_EXCEPTION << error.c_str();
|
THROW_GNA_EXCEPTION << error.c_str();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -805,17 +818,17 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
|||||||
memoryPairs[id][generic->GetParamAsInt("index")] = layer;
|
memoryPairs[id][generic->GetParamAsInt("index")] = layer;
|
||||||
continue;
|
continue;
|
||||||
} else if (layerInfo.isConcat()) {
|
} else if (layerInfo.isConcat()) {
|
||||||
graphCompiler.fillConcatConnections(layer);
|
m_graph_compiler->fillConcatConnections(layer);
|
||||||
} else if (layerInfo.isSplit() || layerInfo.isSlice()) {
|
} else if (layerInfo.isSplit() || layerInfo.isSlice()) {
|
||||||
graphCompiler.fillSplitConnections(layer);
|
m_graph_compiler->fillSplitConnections(layer);
|
||||||
}
|
}
|
||||||
sortedNoMem.push_back(layer);
|
sortedNoMem.push_back(layer);
|
||||||
}
|
}
|
||||||
|
|
||||||
// fill in extra storage with memory layers
|
// fill in extra storage with memory layers
|
||||||
graphCompiler.fillMemoryConnections(memoryPairs);
|
m_graph_compiler->fillMemoryConnections(memoryPairs);
|
||||||
|
|
||||||
if (!graphCompiler.memory_connection.empty() && gnaFlags->num_requests != 1) {
|
if (!m_graph_compiler->memory_connection.empty() && gnaFlags->num_requests != 1) {
|
||||||
gnaFlags->num_requests = 1;
|
gnaFlags->num_requests = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -837,17 +850,17 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
|||||||
|
|
||||||
// Creating Layer primitives
|
// Creating Layer primitives
|
||||||
for (auto& layer : sortedNoMem) {
|
for (auto& layer : sortedNoMem) {
|
||||||
graphCompiler.CreateLayerPrimitive(layer);
|
m_graph_compiler->CreateLayerPrimitive(layer);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto& inputLayer : inputLayers) {
|
for (auto& inputLayer : inputLayers) {
|
||||||
auto layerInfo = LayerInfo(inputLayer);
|
auto layerInfo = LayerInfo(inputLayer);
|
||||||
if (layerInfo.isInput() && 0 == inputs_ptr_->at(inputLayer->name).get_allocated_size()) {
|
if (layerInfo.isInput() && 0 == inputs_ptr_->at(inputLayer->name).get_allocated_size()) {
|
||||||
graphCompiler.connectOutput(inputLayer, &inputs_ptr_->at(inputLayer->name).ptrs.front(), 0);
|
m_graph_compiler->connectOutput(inputLayer, &inputs_ptr_->at(inputLayer->name).ptrs.front(), 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (graphCompiler.dnnComponents.components.empty()) {
|
if (m_graph_compiler->dnnComponents.components.empty()) {
|
||||||
log::warning() << "No GNA primitives created based on topology. This might indicate trivial topology\n";
|
log::warning() << "No GNA primitives created based on topology. This might indicate trivial topology\n";
|
||||||
trivialTopology = true;
|
trivialTopology = true;
|
||||||
}
|
}
|
||||||
@ -861,7 +874,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
|||||||
// Memory layers are not dnnComponents hence we need to make switch with identity layer
|
// Memory layers are not dnnComponents hence we need to make switch with identity layer
|
||||||
if (outLayer->type == "Memory") {
|
if (outLayer->type == "Memory") {
|
||||||
// traverse memory connection to find corresponding output_memory
|
// traverse memory connection to find corresponding output_memory
|
||||||
for (auto&& memConnection : graphCompiler.memory_connection) {
|
for (auto&& memConnection : m_graph_compiler->memory_connection) {
|
||||||
if (memConnection.second.getInput()->name == outLayer->name) {
|
if (memConnection.second.getInput()->name == outLayer->name) {
|
||||||
// if connection is found, replace memory input layer with memory output layer
|
// if connection is found, replace memory input layer with memory output layer
|
||||||
outLayer = memConnection.second.getOutput();
|
outLayer = memConnection.second.getOutput();
|
||||||
@ -909,11 +922,11 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
|||||||
dnn->Init(gnamem.get(), gnaFlags->sw_fp32 ? kDnnFloat : kDnnInt, 1);
|
dnn->Init(gnamem.get(), gnaFlags->sw_fp32 ? kDnnFloat : kDnnInt, 1);
|
||||||
|
|
||||||
// TODO: this copy is unneeded; in fact, we can directly create gna structs from list
|
// TODO: this copy is unneeded; in fact, we can directly create gna structs from list
|
||||||
auto execOrder = graphCompiler.dnnComponents.getExecutionOrder();
|
auto execOrder = m_graph_compiler->dnnComponents.getExecutionOrder();
|
||||||
dnn->component.insert(dnn->component.begin(), execOrder.begin(), execOrder.end());
|
dnn->component.insert(dnn->component.begin(), execOrder.begin(), execOrder.end());
|
||||||
|
|
||||||
// in fp32 mode last PWL cannot be computed without that
|
// in fp32 mode last PWL cannot be computed without that
|
||||||
if (!graphCompiler.dnnComponents.components.empty()) {
|
if (!m_graph_compiler->dnnComponents.components.empty()) {
|
||||||
dnn->InitActiveList(NULL);
|
dnn->InitActiveList(NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -965,7 +978,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
|||||||
for (auto& inputLayer : inputLayers) {
|
for (auto& inputLayer : inputLayers) {
|
||||||
if (LayerInfo(inputLayer).isInput()) {
|
if (LayerInfo(inputLayer).isInput()) {
|
||||||
ov::intel_gna::helpers::updateModelInputOrientationWithoutConvolution(*inputLayer,
|
ov::intel_gna::helpers::updateModelInputOrientationWithoutConvolution(*inputLayer,
|
||||||
graphCompiler.dnnComponents,
|
m_graph_compiler->dnnComponents,
|
||||||
*inputs_ptr_);
|
*inputs_ptr_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -976,7 +989,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
|
|||||||
if (outLayer && LayerInfo(outLayer).isOutput()) {
|
if (outLayer && LayerInfo(outLayer).isOutput()) {
|
||||||
ov::intel_gna::helpers::updateModelOutputOrientation(outPort.first,
|
ov::intel_gna::helpers::updateModelOutputOrientation(outPort.first,
|
||||||
outLayer->name,
|
outLayer->name,
|
||||||
graphCompiler.dnnComponents,
|
m_graph_compiler->dnnComponents,
|
||||||
outputs_);
|
outputs_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1101,7 +1114,7 @@ void GNAPlugin::DumpXNNToFile() const {
|
|||||||
uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap& inputs, InferenceEngine::BlobMap& result) {
|
uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap& inputs, InferenceEngine::BlobMap& result) {
|
||||||
auto freeWorker = requestWorkerPool_->findFreeModelWorker();
|
auto freeWorker = requestWorkerPool_->findFreeModelWorker();
|
||||||
if (freeWorker == nullptr) {
|
if (freeWorker == nullptr) {
|
||||||
if (!graphCompiler.memory_connection.empty()) {
|
if (!m_graph_compiler->memory_connection.empty()) {
|
||||||
Wait(requestWorkerPool_->firstWorker().representingIndex());
|
Wait(requestWorkerPool_->firstWorker().representingIndex());
|
||||||
freeWorker = requestWorkerPool_->findFreeModelWorker();
|
freeWorker = requestWorkerPool_->findFreeModelWorker();
|
||||||
if (freeWorker == nullptr) {
|
if (freeWorker == nullptr) {
|
||||||
@ -1412,7 +1425,7 @@ RequestStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void GNAPlugin::Reset() {
|
void GNAPlugin::Reset() {
|
||||||
graphCompiler.Reset();
|
m_graph_compiler->Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GNAPlugin::Infer(const InferenceEngine::Blob& input, InferenceEngine::Blob& output) {
|
bool GNAPlugin::Infer(const InferenceEngine::Blob& input, InferenceEngine::Blob& output) {
|
||||||
@ -1479,9 +1492,9 @@ Blob::Ptr GNAPlugin::GetInputBlob(const std::string& name, InferenceEngine::Prec
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<InferenceEngine::IVariableStateInternal::Ptr> GNAPlugin::QueryState() {
|
std::vector<InferenceEngine::IVariableStateInternal::Ptr> GNAPlugin::QueryState() {
|
||||||
if (memoryStates.size() != graphCompiler.memory_connection.size()) {
|
if (memoryStates.size() != m_graph_compiler->memory_connection.size()) {
|
||||||
memoryStates.clear();
|
memoryStates.clear();
|
||||||
for (auto& connection : graphCompiler.memory_connection) {
|
for (auto& connection : m_graph_compiler->memory_connection) {
|
||||||
auto state =
|
auto state =
|
||||||
std::make_shared<memory::GNAVariableState>(connection.first,
|
std::make_shared<memory::GNAVariableState>(connection.first,
|
||||||
std::make_shared<GNAMemoryLayer>(connection.second));
|
std::make_shared<GNAMemoryLayer>(connection.second));
|
||||||
@ -1575,7 +1588,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
|
|||||||
GNAMemoryLayer memoryLayer(nullptr, nullptr, gnaFlags->sw_fp32 ? 4 : 2);
|
GNAMemoryLayer memoryLayer(nullptr, nullptr, gnaFlags->sw_fp32 ? 4 : 2);
|
||||||
std::string name;
|
std::string name;
|
||||||
std::tie(memoryLayer.gna_ptr, memoryLayer.reserved_size, name, memoryLayer.scale_factor) = memory;
|
std::tie(memoryLayer.gna_ptr, memoryLayer.reserved_size, name, memoryLayer.scale_factor) = memory;
|
||||||
graphCompiler.memory_connection.emplace_back(make_pair(name, memoryLayer));
|
m_graph_compiler->memory_connection.emplace_back(make_pair(name, memoryLayer));
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO update documenation to allow exporting tlv with importing cep only for sue creek
|
// TODO update documenation to allow exporting tlv with importing cep only for sue creek
|
||||||
@ -1607,7 +1620,7 @@ void GNAPlugin::Export(std::ostream& outStream) {
|
|||||||
.SetInputRotation(transpose_inputs_info)
|
.SetInputRotation(transpose_inputs_info)
|
||||||
.SetOutputRotation(transpose_outputs_info);
|
.SetOutputRotation(transpose_outputs_info);
|
||||||
|
|
||||||
for (auto&& memoryConnection : graphCompiler.memory_connection) {
|
for (auto&& memoryConnection : m_graph_compiler->memory_connection) {
|
||||||
auto state =
|
auto state =
|
||||||
std::make_shared<memory::GNAVariableState>(memoryConnection.first,
|
std::make_shared<memory::GNAVariableState>(memoryConnection.first,
|
||||||
std::make_shared<GNAMemoryLayer>(memoryConnection.second));
|
std::make_shared<GNAMemoryLayer>(memoryConnection.second));
|
||||||
@ -1691,7 +1704,6 @@ InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork(
|
|||||||
Config qn_config(config);
|
Config qn_config(config);
|
||||||
qn_config.UpdateFromMap(config_map);
|
qn_config.UpdateFromMap(config_map);
|
||||||
|
|
||||||
const auto effectiveCompileTarget = qn_config.target->get_effective_compile_target();
|
|
||||||
auto model = network.getFunction();
|
auto model = network.getFunction();
|
||||||
if (model) {
|
if (model) {
|
||||||
auto supported = GetSupportedNodes(
|
auto supported = GetSupportedNodes(
|
||||||
@ -1700,7 +1712,8 @@ InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork(
|
|||||||
TransformationsPipeline(qn_config).apply(model);
|
TransformationsPipeline(qn_config).apply(model);
|
||||||
},
|
},
|
||||||
[&](const std::shared_ptr<ngraph::Node>& op) {
|
[&](const std::shared_ptr<ngraph::Node>& op) {
|
||||||
return limitations::is_op_supported(op, effectiveCompileTarget, qn_config.gnaPrecision);
|
const auto res = Limitations::get_instance()->is_op_supported(op, qn_config.gnaPrecision);
|
||||||
|
return res;
|
||||||
});
|
});
|
||||||
for (auto&& op_name : supported) {
|
for (auto&& op_name : supported) {
|
||||||
res.supportedLayersMap.emplace(op_name, GetName());
|
res.supportedLayersMap.emplace(op_name, GetName());
|
||||||
|
@ -47,8 +47,7 @@ protected:
|
|||||||
std::shared_ptr<gna_memory_type> gnamem;
|
std::shared_ptr<gna_memory_type> gnamem;
|
||||||
std::shared_ptr<GnaInputs> inputs_ptr_;
|
std::shared_ptr<GnaInputs> inputs_ptr_;
|
||||||
GnaOutputs outputs_;
|
GnaOutputs outputs_;
|
||||||
|
std::shared_ptr<GNAGraphCompiler> m_graph_compiler;
|
||||||
GNAGraphCompiler graphCompiler;
|
|
||||||
|
|
||||||
uint32_t activeLayerIndex = 0xffffffff;
|
uint32_t activeLayerIndex = 0xffffffff;
|
||||||
// TODO: transpose_inputs_info and transpose_outputs_info should be moved to GNAModelSerial class when ngraph
|
// TODO: transpose_inputs_info and transpose_outputs_info should be moved to GNAModelSerial class when ngraph
|
||||||
@ -189,6 +188,8 @@ protected:
|
|||||||
void Init();
|
void Init();
|
||||||
|
|
||||||
void InitGNADevice();
|
void InitGNADevice();
|
||||||
|
void InitGNAMemory();
|
||||||
|
void InitGraphCompiler();
|
||||||
|
|
||||||
void DumpXNNToFile() const;
|
void DumpXNNToFile() const;
|
||||||
/**
|
/**
|
||||||
|
@ -83,11 +83,9 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
|
|||||||
manager.register_pass<ov::pass::LSTMCellDecomposition>();
|
manager.register_pass<ov::pass::LSTMCellDecomposition>();
|
||||||
manager.register_pass<ov::intel_gna::pass::ConvertDWSCToScaleShifts>();
|
manager.register_pass<ov::intel_gna::pass::ConvertDWSCToScaleShifts>();
|
||||||
manager.register_pass<ov::intel_gna::pass::ConvertPaddedToValidConv>();
|
manager.register_pass<ov::intel_gna::pass::ConvertPaddedToValidConv>();
|
||||||
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(effective_compile_target,
|
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(config.gnaPrecision);
|
||||||
config.gnaPrecision);
|
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(config.gnaPrecision);
|
||||||
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(effective_compile_target,
|
manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(config.gnaPrecision);
|
||||||
config.gnaPrecision);
|
|
||||||
manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(effective_compile_target, config.gnaPrecision);
|
|
||||||
if (!has_convolution) {
|
if (!has_convolution) {
|
||||||
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithFqToPointWiseConvolution>();
|
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithFqToPointWiseConvolution>();
|
||||||
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithBiasToPointWiseConvolution>();
|
manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithBiasToPointWiseConvolution>();
|
||||||
|
@ -385,7 +385,7 @@ public:
|
|||||||
auto cropLayer = dynamic_cast<InferenceEngine::CropLayer*>(layer);
|
auto cropLayer = dynamic_cast<InferenceEngine::CropLayer*>(layer);
|
||||||
if (cropLayer != nullptr && !cropLayer->offset.empty()) {
|
if (cropLayer != nullptr && !cropLayer->offset.empty()) {
|
||||||
const auto crop_params = GetCropParams(cropLayer);
|
const auto crop_params = GetCropParams(cropLayer);
|
||||||
return limitations::isCropAffinedOffset(crop_params.start_offset);
|
return limitations::Limitations::get_instance()->is_crop_affined_offset(crop_params.start_offset);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -50,7 +50,7 @@ public:
|
|||||||
// @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
|
// @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
|
||||||
inline std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize,
|
inline std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize,
|
||||||
uint32_t maxSplitSize,
|
uint32_t maxSplitSize,
|
||||||
uint32_t alignment = limitations::inputByteAlignment) {
|
uint32_t alignment = limitations::Limitations::kInputByteAlignment) {
|
||||||
std::vector<uint32_t> splitSizes;
|
std::vector<uint32_t> splitSizes;
|
||||||
uint32_t maxAlignedSplitSize = std::max(maxSplitSize - maxSplitSize % alignment, alignment);
|
uint32_t maxAlignedSplitSize = std::max(maxSplitSize - maxSplitSize % alignment, alignment);
|
||||||
uint32_t usedSize = 0;
|
uint32_t usedSize = 0;
|
||||||
@ -73,7 +73,7 @@ inline std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
|
|||||||
IE_ASSERT(firstValuableDim != std::end(dims));
|
IE_ASSERT(firstValuableDim != std::end(dims));
|
||||||
auto splittedElementsSize = *firstValuableDim;
|
auto splittedElementsSize = *firstValuableDim;
|
||||||
auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim);
|
auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim);
|
||||||
auto alignment = limitations::inputByteAlignment;
|
auto alignment = limitations::Limitations::kInputByteAlignment;
|
||||||
|
|
||||||
// Split output size should be multiple by 64 to avoid align filters insertion,
|
// Split output size should be multiple by 64 to avoid align filters insertion,
|
||||||
// but we need to check if our input size to split exceeds 64; if not we can always
|
// but we need to check if our input size to split exceeds 64; if not we can always
|
||||||
@ -85,8 +85,9 @@ inline std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
|
|||||||
return {splittedDimIx, splitSizes};
|
return {splittedDimIx, splitSizes};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
splitSizes = GetAlignedSplitSizes(splittedElementsSize,
|
splitSizes =
|
||||||
limitations::bufferMaxSize * splittedElementsSize / totalElementsSize,
|
GetAlignedSplitSizes(splittedElementsSize,
|
||||||
|
limitations::Limitations::kBufferMaxSize * splittedElementsSize / totalElementsSize,
|
||||||
alignment);
|
alignment);
|
||||||
return {splittedDimIx, splitSizes};
|
return {splittedDimIx, splitSizes};
|
||||||
}
|
}
|
||||||
|
@ -15,11 +15,14 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "backend/gna_limitations.hpp"
|
||||||
#include "gna2-model-api.h"
|
#include "gna2-model-api.h"
|
||||||
#include "gna2_model_helper.hpp"
|
#include "gna2_model_helper.hpp"
|
||||||
#include "gna_device.hpp"
|
#include "gna_device.hpp"
|
||||||
#include "log.hpp"
|
#include "log.hpp"
|
||||||
|
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
namespace intel_gna {
|
namespace intel_gna {
|
||||||
namespace dump {
|
namespace dump {
|
||||||
@ -486,8 +489,9 @@ void DumpGna2Model(const Gna2Model& gnaModel,
|
|||||||
}
|
}
|
||||||
dumpFile << "\tOperand " << j << " (" << GetOperandName(operation.Type, j) << ")"
|
dumpFile << "\tOperand " << j << " (" << GetOperandName(operation.Type, j) << ")"
|
||||||
<< " type: " << GetOperandType(operand.Type) << " shape: " << GetSimpleString(operand.Shape)
|
<< " type: " << GetOperandType(operand.Type) << " shape: " << GetSimpleString(operand.Shape)
|
||||||
<< " tag: " << foundName << " offset: " << offset
|
<< " tag: " << foundName << " offset: " << offset << " size: "
|
||||||
<< " size: " << Gna2RoundUpTo64(GetGnaShapeSize(operand.Shape, GetTypeByteSize(operand.Type)))
|
<< Gna2RoundUp(GetGnaShapeSize(operand.Shape, GetTypeByteSize(operand.Type)),
|
||||||
|
Limitations::get_instance()->get_memory_alignment())
|
||||||
<< " data: " << operand.Data << " baseAlloc: " << foundPtr << " layout: ";
|
<< " data: " << operand.Data << " baseAlloc: " << foundPtr << " layout: ";
|
||||||
|
|
||||||
DumpCharArray(dumpFile, operand.Layout, GNA2_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS);
|
DumpCharArray(dumpFile, operand.Layout, GNA2_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS);
|
||||||
|
@ -50,6 +50,7 @@ using namespace InferenceEngine::details;
|
|||||||
using namespace ov::intel_gna::frontend;
|
using namespace ov::intel_gna::frontend;
|
||||||
using namespace ov::intel_gna::common;
|
using namespace ov::intel_gna::common;
|
||||||
using namespace ov::intel_gna::pre_post_processing;
|
using namespace ov::intel_gna::pre_post_processing;
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
namespace intel_gna {
|
namespace intel_gna {
|
||||||
@ -149,10 +150,11 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
|
|||||||
return LayerInfo(ptr).isNonValuesChangable();
|
return LayerInfo(ptr).isNonValuesChangable();
|
||||||
});
|
});
|
||||||
IE_ASSERT(inputLayer != nullptr);
|
IE_ASSERT(inputLayer != nullptr);
|
||||||
size_t weightsSize =
|
size_t weightsSize = LayerInfo(prevLayer).has32BOutput()
|
||||||
LayerInfo(prevLayer).has32BOutput()
|
|
||||||
? nextLayer->outData[0]->getDims().back()
|
? nextLayer->outData[0]->getDims().back()
|
||||||
: Get2DReshapedData(nextLayer->outData[0], limitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8)
|
: Get2DReshapedData(nextLayer->outData[0],
|
||||||
|
Limitations::get_min_batch_to_fit_in_buffer(nextLayer->outData[0]),
|
||||||
|
8)
|
||||||
->getDims()[1];
|
->getDims()[1];
|
||||||
std::vector<float> weightsValues(weightsSize, fillValue);
|
std::vector<float> weightsValues(weightsSize, fillValue);
|
||||||
IE_ASSERT(diagLayer != nullptr);
|
IE_ASSERT(diagLayer != nullptr);
|
||||||
@ -1531,19 +1533,19 @@ void InsertSplitAligningFilterPass::run() {
|
|||||||
|
|
||||||
// encodes offset to beginning of split layer input
|
// encodes offset to beginning of split layer input
|
||||||
filterLayer->params["offset"] =
|
filterLayer->params["offset"] =
|
||||||
std::to_string(aligned64_offset / limitations::bytesPerSplitElement);
|
std::to_string(aligned64_offset / Limitations::kBytesPerSplitElement);
|
||||||
auto dims = splitOutput->getTensorDesc().getDims();
|
auto dims = splitOutput->getTensorDesc().getDims();
|
||||||
if (dims.size() > 3) {
|
if (dims.size() > 3) {
|
||||||
THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
|
THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto offsetOfUnalignment =
|
const auto offsetOfUnalignment =
|
||||||
(currentOffset - aligned64_offset) / limitations::bytesPerSplitElement;
|
(currentOffset - aligned64_offset) / Limitations::kBytesPerSplitElement;
|
||||||
// TODO consider to use a different number of filters do decrese the number of trailing zeros
|
// TODO consider to use a different number of filters do decrese the number of trailing zeros
|
||||||
// (additionalPaddingOfFilter)
|
// (additionalPaddingOfFilter)
|
||||||
const auto numberOfFilters = limitations::convMinFiltersNum;
|
const auto numberOfFilters = Limitations::kConvMinFiltersNum;
|
||||||
const auto filterSize =
|
const auto filterSize =
|
||||||
ALIGN(offsetOfUnalignment + numberOfFilters, limitations::convFilterSizeDivider);
|
ALIGN(offsetOfUnalignment + numberOfFilters, Limitations::kConvFilterSizeDivider);
|
||||||
|
|
||||||
// filterWeights: numberOfFilters X (offsetOfUnalignment + additionalPaddingOfFilter +
|
// filterWeights: numberOfFilters X (offsetOfUnalignment + additionalPaddingOfFilter +
|
||||||
// numberOfFilters) offsetOfUnalignment - the leading zeros in the filter
|
// numberOfFilters) offsetOfUnalignment - the leading zeros in the filter
|
||||||
@ -1598,7 +1600,7 @@ void InsertSplitAligningFilterPass::run() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// search data that starts from unaligned location
|
// search data that starts from unaligned location
|
||||||
currentOffset += outputSize * limitations::bytesPerSplitElement;
|
currentOffset += outputSize * Limitations::kBytesPerSplitElement;
|
||||||
splitOutIndex++;
|
splitOutIndex++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1636,7 +1638,7 @@ void EltwiseSplitOverChannelsPass::run() {
|
|||||||
auto oData = l->outData.front();
|
auto oData = l->outData.front();
|
||||||
auto oDims = oData->getDims();
|
auto oDims = oData->getDims();
|
||||||
auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims));
|
auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims));
|
||||||
if (totalElementsSize <= limitations::bufferMaxSize) {
|
if (totalElementsSize <= Limitations::kBufferMaxSize) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto splitSizesPerAxis = AlignedSplitSizesPerAxis(oDims);
|
auto splitSizesPerAxis = AlignedSplitSizesPerAxis(oDims);
|
||||||
@ -1747,8 +1749,9 @@ void SubstituteScaleShiftBroadCastPass::run() {
|
|||||||
if (was_reshaped) {
|
if (was_reshaped) {
|
||||||
dataDims = reshaped_data[insData->getName()];
|
dataDims = reshaped_data[insData->getName()];
|
||||||
} else {
|
} else {
|
||||||
dataDims = HasTo2DReshapeData(l)
|
dataDims =
|
||||||
? Get2DReshapedData(insData, limitations::GetMinBatchToFitInBuffer(insData), 8)->getDims()
|
HasTo2DReshapeData(l)
|
||||||
|
? Get2DReshapedData(insData, Limitations::get_min_batch_to_fit_in_buffer(insData), 8)->getDims()
|
||||||
: insData->getDims();
|
: insData->getDims();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include "log/debug.hpp"
|
#include "log/debug.hpp"
|
||||||
|
|
||||||
using namespace ov::intel_gna::gna_convolution_layer;
|
using namespace ov::intel_gna::gna_convolution_layer;
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
|
|
||||||
void CNNFilter32(intel_dnn_component_t* component) {
|
void CNNFilter32(intel_dnn_component_t* component) {
|
||||||
auto filters = reinterpret_cast<float*>(component->op.conv1D.ptr_filters);
|
auto filters = reinterpret_cast<float*>(component->op.conv1D.ptr_filters);
|
||||||
@ -306,7 +307,7 @@ void CNN2DFilter32(intel_dnn_component_t* component) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// kernel padded to 16B = 4 * sizeof(float)
|
// kernel padded to 16B = 4 * sizeof(float)
|
||||||
kernelIndex += ALIGN(kh * kw * kc, ov::intel_gna::limitations::convEachKernelByteAlignment / sizeof(float));
|
kernelIndex += ALIGN(kh * kw * kc, Limitations::kConvEachKernelByteAlignment / sizeof(float));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
|
|
||||||
using namespace ov::intel_gna;
|
using namespace ov::intel_gna;
|
||||||
using namespace ov::intel_gna::pass;
|
using namespace ov::intel_gna::pass;
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
|
|
||||||
static bool BiasValidation(const ngraph::Output<ngraph::Node>& output) {
|
static bool BiasValidation(const ngraph::Output<ngraph::Node>& output) {
|
||||||
auto bias_output_shape = output.get_node()->get_output_shape(0);
|
auto bias_output_shape = output.get_node()->get_output_shape(0);
|
||||||
@ -49,9 +50,9 @@ static std::tuple<bool, uint32_t, uint32_t, uint32_t> VerifyAndGetConvParams(
|
|||||||
const uint32_t width = input1_shape.front();
|
const uint32_t width = input1_shape.front();
|
||||||
const uint32_t in_channels = input2_shape.back();
|
const uint32_t in_channels = input2_shape.back();
|
||||||
const uint32_t out_channels = input2_shape.front();
|
const uint32_t out_channels = input2_shape.front();
|
||||||
if (input1_shape.front() <= limitations::affineMaxBatchSize ||
|
if (input1_shape.front() <= Limitations::kAffineMaxBatchSize ||
|
||||||
out_channels % limitations::convFiltersNumDivider != 0 || out_channels > limitations::convMaxFiltersNum ||
|
out_channels % Limitations::kConvFiltersNumDivider != 0 || out_channels > Limitations::kConvMaxFiltersNum ||
|
||||||
in_channels > limitations::convFilterMaxSize) {
|
in_channels > Limitations::kConvFilterMaxSize) {
|
||||||
return std::make_tuple(false, 0, 0, 0);
|
return std::make_tuple(false, 0, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
namespace ov {
|
namespace ov {
|
||||||
namespace intel_gna {
|
namespace intel_gna {
|
||||||
using namespace target;
|
using namespace target;
|
||||||
|
using namespace limitations;
|
||||||
namespace pass {
|
namespace pass {
|
||||||
using namespace helper;
|
using namespace helper;
|
||||||
|
|
||||||
@ -55,7 +56,7 @@ static bool VerifyAndGetConvData(std::shared_ptr<ngraph::opset7::Convolution> co
|
|||||||
size_t filter_height = filters.get_shape()[2];
|
size_t filter_height = filters.get_shape()[2];
|
||||||
size_t filter_width = filters.get_shape()[3];
|
size_t filter_width = filters.get_shape()[3];
|
||||||
|
|
||||||
if (filter_width > limitations::copyMaxGrouping || filter_height > limitations::copyMaxGrouping) {
|
if (filter_width > Limitations::kCopyMaxGrouping || filter_height > Limitations::kCopyMaxGrouping) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -76,7 +77,7 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr<ngraph::opset7:
|
|||||||
(max_pool->get_auto_pad() != ngraph::op::PadType::EXPLICIT ||
|
(max_pool->get_auto_pad() != ngraph::op::PadType::EXPLICIT ||
|
||||||
max_pool->get_pads_begin() != ngraph::Shape({0, 0}) || max_pool->get_pads_end() != ngraph::Shape({0, 0}))) ||
|
max_pool->get_pads_begin() != ngraph::Shape({0, 0}) || max_pool->get_pads_end() != ngraph::Shape({0, 0}))) ||
|
||||||
pool_filter.size() != 2 || pool_strides.size() != 2 || pool_filter[0] > 1 || pool_strides[0] > 1 ||
|
pool_filter.size() != 2 || pool_strides.size() != 2 || pool_filter[0] > 1 || pool_strides[0] > 1 ||
|
||||||
pool_filter[0] > limitations::maxPoolMaxWindowSize)
|
pool_filter[0] > Limitations::kMaxPoolMaxWindowSize)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
graph_data.pool_size_width = pool_filter[1];
|
graph_data.pool_size_width = pool_filter[1];
|
||||||
@ -84,16 +85,15 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr<ngraph::opset7:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool GNA30SupportedConv(const DeviceVersion& compile_target,
|
static bool GNA30SupportedConv(const InferenceEngine::Precision& gnaPrecision,
|
||||||
const InferenceEngine::Precision& gnaPrecision,
|
|
||||||
const GraphData& graph_data,
|
const GraphData& graph_data,
|
||||||
const ConvData& conv_data) {
|
const ConvData& conv_data) {
|
||||||
const auto cnn2dValidatorPtr = limitations::cnn2d::AbstractValidator::Create(compile_target);
|
const auto cnn2dValidatorPtr = Limitations::get_instance()->get_cnn_validator();
|
||||||
|
|
||||||
if (!cnn2dValidatorPtr) {
|
if (!cnn2dValidatorPtr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const auto& cnn2dValidator = *cnn2dValidatorPtr;
|
const auto cnnIsValid = cnn2dValidatorPtr->ValidateCnn2D(graph_data.conv->get_friendly_name(),
|
||||||
const auto cnnIsValid = cnn2dValidator.ValidateCnn2D(graph_data.conv->get_friendly_name(),
|
|
||||||
conv_data.input_height,
|
conv_data.input_height,
|
||||||
conv_data.input_width,
|
conv_data.input_width,
|
||||||
conv_data.input_channel_count,
|
conv_data.input_channel_count,
|
||||||
@ -112,7 +112,7 @@ static bool GNA30SupportedConv(const DeviceVersion& compile_target,
|
|||||||
if (!graph_data.max_pool) {
|
if (!graph_data.max_pool) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
const auto poolingValid = cnn2dValidator.ValidatePooling2D(graph_data.conv->get_friendly_name(),
|
const auto poolingValid = cnn2dValidatorPtr->ValidatePooling2D(graph_data.conv->get_friendly_name(),
|
||||||
graph_data.max_pool->get_kernel()[0],
|
graph_data.max_pool->get_kernel()[0],
|
||||||
graph_data.max_pool->get_kernel()[1],
|
graph_data.max_pool->get_kernel()[1],
|
||||||
graph_data.max_pool->get_strides()[0],
|
graph_data.max_pool->get_strides()[0],
|
||||||
@ -126,7 +126,7 @@ static size_t CalculateConvCount(const ConvData& conv_data) {
|
|||||||
size_t conv_count = 1;
|
size_t conv_count = 1;
|
||||||
size_t total_factorized_conv_channel_count =
|
size_t total_factorized_conv_channel_count =
|
||||||
(conv_data.input_channel_count * conv_data.filter_height * conv_data.filter_width);
|
(conv_data.input_channel_count * conv_data.filter_height * conv_data.filter_width);
|
||||||
while (total_factorized_conv_channel_count / conv_count > limitations::convFilterMaxSize ||
|
while (total_factorized_conv_channel_count / conv_count > Limitations::kConvFilterMaxSize ||
|
||||||
total_factorized_conv_channel_count % conv_count != 0 || conv_data.filter_channel_count % conv_count != 0)
|
total_factorized_conv_channel_count % conv_count != 0 || conv_data.filter_channel_count % conv_count != 0)
|
||||||
conv_count++;
|
conv_count++;
|
||||||
|
|
||||||
@ -139,7 +139,7 @@ static bool ShouldDecompose(GraphData& graph_data, const ConvData& conv_data) {
|
|||||||
|
|
||||||
// Concat (copy) layer limitation allows to split up to a certain limit
|
// Concat (copy) layer limitation allows to split up to a certain limit
|
||||||
// Currently we are able to split only convolutions without pooling in horizontal dimension
|
// Currently we are able to split only convolutions without pooling in horizontal dimension
|
||||||
if (graph_data.conv_count > limitations::copyMaxGrouping ||
|
if (graph_data.conv_count > Limitations::kCopyMaxGrouping ||
|
||||||
((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1))
|
((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -561,8 +561,7 @@ static void Decompose(const GraphData& graph_data, ConvData& conv_data) {
|
|||||||
conv_result->set_friendly_name(conv_result_name);
|
conv_result->set_friendly_name(conv_result_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool Convert(const DeviceVersion& compile_target,
|
static bool Convert(const InferenceEngine::Precision& gnaPrecision,
|
||||||
const InferenceEngine::Precision& gnaPrecision,
|
|
||||||
std::shared_ptr<ngraph::Node> leading_transpose,
|
std::shared_ptr<ngraph::Node> leading_transpose,
|
||||||
std::shared_ptr<ngraph::Node> fq_filters,
|
std::shared_ptr<ngraph::Node> fq_filters,
|
||||||
std::shared_ptr<ngraph::Node> conv,
|
std::shared_ptr<ngraph::Node> conv,
|
||||||
@ -598,7 +597,7 @@ static bool Convert(const DeviceVersion& compile_target,
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
// If compile target is GNA 3.0 and the convolution is supported on it, then skip decomposition
|
// If compile target is GNA 3.0 and the convolution is supported on it, then skip decomposition
|
||||||
if (GNA30SupportedConv(compile_target, gnaPrecision, graph_data, conv_data))
|
if (GNA30SupportedConv(gnaPrecision, graph_data, conv_data))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC)
|
// We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC)
|
||||||
@ -618,7 +617,7 @@ static bool Convert(const DeviceVersion& compile_target,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Decompose2DConv::Decompose2DConv(const DeviceVersion& compile_target, const InferenceEngine::Precision& gnaPrecision) {
|
Decompose2DConv::Decompose2DConv(const InferenceEngine::Precision& gnaPrecision) {
|
||||||
MATCHER_SCOPE(Decompose2DConv);
|
MATCHER_SCOPE(Decompose2DConv);
|
||||||
|
|
||||||
auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
|
||||||
@ -735,8 +734,7 @@ Decompose2DConv::Decompose2DConv(const DeviceVersion& compile_target, const Infe
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Convert(compile_target,
|
return Convert(gnaPrecision,
|
||||||
gnaPrecision,
|
|
||||||
pattern_map.at(leading_transpose).get_node_shared_ptr(),
|
pattern_map.at(leading_transpose).get_node_shared_ptr(),
|
||||||
fq_filters_node,
|
fq_filters_node,
|
||||||
pattern_map.at(conv).get_node_shared_ptr(),
|
pattern_map.at(conv).get_node_shared_ptr(),
|
||||||
@ -755,8 +753,7 @@ Decompose2DConv::Decompose2DConv(const DeviceVersion& compile_target, const Infe
|
|||||||
this->register_matcher(m, callback);
|
this->register_matcher(m, callback);
|
||||||
}
|
}
|
||||||
|
|
||||||
Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const DeviceVersion& compile_target,
|
Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const InferenceEngine::Precision& gnaPrecision) {
|
||||||
const InferenceEngine::Precision& gnaPrecision) {
|
|
||||||
MATCHER_SCOPE(Decompose2DConvTransposedWithBias);
|
MATCHER_SCOPE(Decompose2DConvTransposedWithBias);
|
||||||
|
|
||||||
auto const_input_i64 =
|
auto const_input_i64 =
|
||||||
@ -781,8 +778,7 @@ Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const Devic
|
|||||||
pattern_map.at(bias).get_node_shared_ptr())))
|
pattern_map.at(bias).get_node_shared_ptr())))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return Convert(compile_target,
|
return Convert(gnaPrecision,
|
||||||
gnaPrecision,
|
|
||||||
pattern_map.at(leading_transpose).get_node_shared_ptr(),
|
pattern_map.at(leading_transpose).get_node_shared_ptr(),
|
||||||
nullptr,
|
nullptr,
|
||||||
pattern_map.at(conv).get_node_shared_ptr(),
|
pattern_map.at(conv).get_node_shared_ptr(),
|
||||||
@ -802,7 +798,6 @@ Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const Devic
|
|||||||
}
|
}
|
||||||
|
|
||||||
Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF(
|
Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF(
|
||||||
const DeviceVersion& compile_target,
|
|
||||||
const InferenceEngine::Precision& gnaPrecision) {
|
const InferenceEngine::Precision& gnaPrecision) {
|
||||||
MATCHER_SCOPE(Decompose2DConvTransposedWithBiasAF);
|
MATCHER_SCOPE(Decompose2DConvTransposedWithBiasAF);
|
||||||
|
|
||||||
@ -836,8 +831,7 @@ Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF(
|
|||||||
pattern_map.at(bias).get_node_shared_ptr())))
|
pattern_map.at(bias).get_node_shared_ptr())))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return Convert(compile_target,
|
return Convert(gnaPrecision,
|
||||||
gnaPrecision,
|
|
||||||
pattern_map.at(leading_transpose).get_node_shared_ptr(),
|
pattern_map.at(leading_transpose).get_node_shared_ptr(),
|
||||||
nullptr,
|
nullptr,
|
||||||
pattern_map.at(conv).get_node_shared_ptr(),
|
pattern_map.at(conv).get_node_shared_ptr(),
|
||||||
|
@ -35,7 +35,7 @@ namespace pass {
|
|||||||
class Decompose2DConv : public ngraph::pass::MatcherPass {
|
class Decompose2DConv : public ngraph::pass::MatcherPass {
|
||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("Decompose2DConv", "0");
|
OPENVINO_RTTI("Decompose2DConv", "0");
|
||||||
Decompose2DConv(const target::DeviceVersion& compile_target, const InferenceEngine::Precision& gnaPrecision);
|
Decompose2DConv(const InferenceEngine::Precision& gnaPrecision);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -56,8 +56,7 @@ public:
|
|||||||
class Decompose2DConvTransposedWithBias : public ngraph::pass::MatcherPass {
|
class Decompose2DConvTransposedWithBias : public ngraph::pass::MatcherPass {
|
||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("Decompose2DConvTransposedWithBias", "0");
|
OPENVINO_RTTI("Decompose2DConvTransposedWithBias", "0");
|
||||||
Decompose2DConvTransposedWithBias(const target::DeviceVersion& compile_target,
|
Decompose2DConvTransposedWithBias(const InferenceEngine::Precision& gnaPrecision);
|
||||||
const InferenceEngine::Precision& gnaPrecision);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -80,8 +79,7 @@ public:
|
|||||||
class Decompose2DConvTransposedWithBiasAF : public ngraph::pass::MatcherPass {
|
class Decompose2DConvTransposedWithBiasAF : public ngraph::pass::MatcherPass {
|
||||||
public:
|
public:
|
||||||
OPENVINO_RTTI("Decompose2DConvTransposedWithBiasAF", "0");
|
OPENVINO_RTTI("Decompose2DConvTransposedWithBiasAF", "0");
|
||||||
Decompose2DConvTransposedWithBiasAF(const target::DeviceVersion& compile_target,
|
Decompose2DConvTransposedWithBiasAF(const InferenceEngine::Precision& gnaPrecision);
|
||||||
const InferenceEngine::Precision& gnaPrecision);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace pass
|
} // namespace pass
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
#include "backend/gna_limitations.hpp"
|
#include "backend/gna_limitations.hpp"
|
||||||
|
|
||||||
using namespace ngraph;
|
using namespace ngraph;
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
namespace intel_gna {
|
namespace intel_gna {
|
||||||
@ -81,7 +82,7 @@ static bool GetVerifiedMVNData(const std::shared_ptr<opset8::MVN> mvn, MVNData&
|
|||||||
|
|
||||||
// Check if average must be split
|
// Check if average must be split
|
||||||
mvn_data.num_parts = 1;
|
mvn_data.num_parts = 1;
|
||||||
while (mvn_data.W / mvn_data.num_parts > limitations::convFilterMaxSize) {
|
while (mvn_data.W / mvn_data.num_parts > Limitations::kConvFilterMaxSize) {
|
||||||
mvn_data.num_parts *= 2;
|
mvn_data.num_parts *= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include "backend/gna_limitations.hpp"
|
#include "backend/gna_limitations.hpp"
|
||||||
|
|
||||||
using namespace ov::intel_gna::pass;
|
using namespace ov::intel_gna::pass;
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -160,7 +161,7 @@ HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (prev_node) {
|
if (prev_node) {
|
||||||
if (limitations::IsTransposeSupported(prev_node->get_output_shape(0))) {
|
if (Limitations::is_transpose_supported(prev_node->get_output_shape(0))) {
|
||||||
InsertTranspose(prev_node, matmul_node->get_friendly_name(), true);
|
InsertTranspose(prev_node, matmul_node->get_friendly_name(), true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -170,7 +171,7 @@ HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() {
|
|||||||
auto iter = pattern_map.find(fq);
|
auto iter = pattern_map.find(fq);
|
||||||
if (iter != pattern_map.end() || (iter = pattern_map.find(constant)) != pattern_map.end()) {
|
if (iter != pattern_map.end() || (iter = pattern_map.find(constant)) != pattern_map.end()) {
|
||||||
auto prev_node = iter->second.get_node_shared_ptr();
|
auto prev_node = iter->second.get_node_shared_ptr();
|
||||||
if (limitations::IsTranspose2d(prev_node->get_output_shape(0))) {
|
if (Limitations::is_transpose_2d(prev_node->get_output_shape(0))) {
|
||||||
InsertTranspose(prev_node, prev_node->get_friendly_name(), true);
|
InsertTranspose(prev_node, prev_node->get_friendly_name(), true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -187,7 +188,7 @@ HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (prev_node) {
|
if (prev_node) {
|
||||||
if (limitations::IsTransposeSupported(prev_node->get_output_shape(0))) {
|
if (Limitations::is_transpose_supported(prev_node->get_output_shape(0))) {
|
||||||
InsertTranspose(prev_node, matmul_node->get_friendly_name(), true);
|
InsertTranspose(prev_node, matmul_node->get_friendly_name(), true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -243,7 +244,7 @@ HandleTransposeAfterMatMul::HandleTransposeAfterMatMul() {
|
|||||||
ReplaceTransposeWithReshape(transpose_it->second.get_node_shared_ptr());
|
ReplaceTransposeWithReshape(transpose_it->second.get_node_shared_ptr());
|
||||||
} else {
|
} else {
|
||||||
auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
|
auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
|
||||||
if (!limitations::IsTransposeSupported(reshape_node->get_input_shape(0)))
|
if (!Limitations::is_transpose_supported(reshape_node->get_input_shape(0)))
|
||||||
return false;
|
return false;
|
||||||
auto iter = pattern_map.find(act);
|
auto iter = pattern_map.find(act);
|
||||||
if (iter == pattern_map.end() && (iter = pattern_map.find(fq2)) == pattern_map.end() &&
|
if (iter == pattern_map.end() && (iter = pattern_map.find(fq2)) == pattern_map.end() &&
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
#include "transformations/remove_in_out_processing.hpp"
|
#include "transformations/remove_in_out_processing.hpp"
|
||||||
|
|
||||||
|
#include "backend/gna_limitations.hpp"
|
||||||
#include "common/graph_utils.hpp"
|
#include "common/graph_utils.hpp"
|
||||||
#include "openvino/cc/pass/itt.hpp"
|
#include "openvino/cc/pass/itt.hpp"
|
||||||
#include "openvino/opsets/opset1.hpp"
|
#include "openvino/opsets/opset1.hpp"
|
||||||
@ -17,6 +18,7 @@
|
|||||||
|
|
||||||
using namespace ov::opset10;
|
using namespace ov::opset10;
|
||||||
using namespace ov::intel_gna::pass;
|
using namespace ov::intel_gna::pass;
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
@ -29,7 +31,7 @@ inline bool is_preprocessing_layer_not_supported(std::shared_ptr<ov::Node>& laye
|
|||||||
|
|
||||||
// Verify that transpose layer cannot be executed on GNA
|
// Verify that transpose layer cannot be executed on GNA
|
||||||
if (std::dynamic_pointer_cast<ov::opset1::Transpose>(layer)) {
|
if (std::dynamic_pointer_cast<ov::opset1::Transpose>(layer)) {
|
||||||
return !limitations::is_transpose_supported(layer);
|
return !Limitations::is_transpose_supported(layer);
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -14,6 +14,8 @@
|
|||||||
#include "layers/gna_convolution_layer.hpp"
|
#include "layers/gna_convolution_layer.hpp"
|
||||||
#include "layers/gna_split_layer.hpp"
|
#include "layers/gna_split_layer.hpp"
|
||||||
|
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
namespace intel_gna {
|
namespace intel_gna {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
@ -56,13 +58,13 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
|
|||||||
std::end(conv->get_input_shape(0)),
|
std::end(conv->get_input_shape(0)),
|
||||||
size_t(1),
|
size_t(1),
|
||||||
std::multiplies<size_t>());
|
std::multiplies<size_t>());
|
||||||
if (input_size <= limitations::bufferMaxSize) {
|
if (input_size <= Limitations::kBufferMaxSize) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
auto& input = conv->get_input_shape(0);
|
auto& input = conv->get_input_shape(0);
|
||||||
uint32_t width = input.back();
|
uint32_t width = input.back();
|
||||||
uint32_t in_channels = input.at(1);
|
uint32_t in_channels = input.at(1);
|
||||||
auto split_sizes = GetAlignedSplitSizes(width, limitations::bufferMaxSize / in_channels);
|
auto split_sizes = GetAlignedSplitSizes(width, Limitations::kBufferMaxSize / in_channels);
|
||||||
IE_ASSERT(split_sizes.size() > 1);
|
IE_ASSERT(split_sizes.size() > 1);
|
||||||
std::vector<int64_t> split_sizes_casted(split_sizes.size());
|
std::vector<int64_t> split_sizes_casted(split_sizes.size());
|
||||||
std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) {
|
std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) {
|
||||||
|
@ -15,6 +15,8 @@
|
|||||||
#include "legacy/ngraph_ops/eltwise.hpp"
|
#include "legacy/ngraph_ops/eltwise.hpp"
|
||||||
#include "log/log.hpp"
|
#include "log/log.hpp"
|
||||||
|
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
namespace intel_gna {
|
namespace intel_gna {
|
||||||
namespace pass {
|
namespace pass {
|
||||||
@ -25,7 +27,7 @@ inline bool is_eltwise_has_to_be_splitted(const ngraph::Output<ngraph::Node>& no
|
|||||||
return false;
|
return false;
|
||||||
auto o_dims = eltwise->get_output_shape(0);
|
auto o_dims = eltwise->get_output_shape(0);
|
||||||
auto total_elem_size = std::accumulate(std::begin(o_dims), std::end(o_dims), 1, std::multiplies<size_t>());
|
auto total_elem_size = std::accumulate(std::begin(o_dims), std::end(o_dims), 1, std::multiplies<size_t>());
|
||||||
return (total_elem_size > limitations::bufferMaxSize);
|
return (total_elem_size > Limitations::kBufferMaxSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::shared_ptr<ngraph::opset9::VariadicSplit> split_input(
|
static std::shared_ptr<ngraph::opset9::VariadicSplit> split_input(
|
||||||
|
@ -11,7 +11,7 @@ namespace intel_gna {
|
|||||||
namespace pass {
|
namespace pass {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Split over channels for Eltwise to avoid GNA-HW bufferMaxSize limitation per eltwise
|
* @brief Split over channels for Eltwise to avoid GNA-HW kBufferMaxSize limitation per eltwise
|
||||||
*/
|
*/
|
||||||
class SplitEltwise : public ov::pass::MatcherPass {
|
class SplitEltwise : public ov::pass::MatcherPass {
|
||||||
public:
|
public:
|
||||||
|
@ -2,20 +2,24 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <legacy/layer_transform.hpp>
|
|
||||||
#include "frontend/model_quantizer.hpp"
|
|
||||||
#include "frontend/layer_quantizer.hpp"
|
|
||||||
#include "gna_matcher.hpp"
|
|
||||||
#include <ie_core.hpp>
|
#include <ie_core.hpp>
|
||||||
|
#include <legacy/layer_transform.hpp>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "backend/gna_limitations.hpp"
|
||||||
|
#include "frontend/layer_quantizer.hpp"
|
||||||
|
#include "frontend/model_quantizer.hpp"
|
||||||
|
#include "gna_matcher.hpp"
|
||||||
|
|
||||||
using namespace InferenceEngine;
|
using namespace InferenceEngine;
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
using namespace ov::intel_gna::frontend;
|
using namespace ov::intel_gna::frontend;
|
||||||
using namespace GNATestIRs;
|
using namespace GNATestIRs;
|
||||||
|
|
||||||
class I8QuantisationTest : public GNATest<> {
|
class I8QuantisationTest : public GNATest<> {
|
||||||
protected:
|
protected:
|
||||||
InferenceEngine::CNNLayerPtr quantize(InferenceEngine::CNNLayerPtr lp) {
|
InferenceEngine::CNNLayerPtr quantize(InferenceEngine::CNNLayerPtr lp) {
|
||||||
auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
|
auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
|
||||||
Config gna_config;
|
Config gna_config;
|
||||||
@ -26,7 +30,8 @@ class I8QuantisationTest : public GNATest<> {
|
|||||||
return newLayer;
|
return newLayer;
|
||||||
};
|
};
|
||||||
|
|
||||||
InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model, float scale_factor) const {
|
InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model,
|
||||||
|
float scale_factor) const {
|
||||||
auto scale_factors = std::vector<float>({scale_factor});
|
auto scale_factors = std::vector<float>({scale_factor});
|
||||||
|
|
||||||
GnaInputs inputs;
|
GnaInputs inputs;
|
||||||
@ -41,30 +46,30 @@ class I8QuantisationTest : public GNATest<> {
|
|||||||
|
|
||||||
auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
|
auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
|
||||||
|
|
||||||
return ModelQuantizer(transformer).quantize(
|
return ModelQuantizer(transformer).quantize(model, inputs);
|
||||||
model,
|
|
||||||
inputs);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetUp() override {}
|
void SetUp() override {
|
||||||
|
Limitations::init(target::DeviceVersion::Default);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO: add test for FC weights after quantization
|
// TODO: add test for FC weights after quantization
|
||||||
TEST_F(I8QuantisationTest, canQuantizeFCLayer){
|
TEST_F(I8QuantisationTest, canQuantizeFCLayer) {
|
||||||
|
|
||||||
auto fc = std::make_shared<FullyConnectedLayer>(LayerParams{"name", "type", Precision::FP32});
|
auto fc = std::make_shared<FullyConnectedLayer>(LayerParams{"name", "type", Precision::FP32});
|
||||||
fc->_out_num = 9;
|
fc->_out_num = 9;
|
||||||
auto weights = make_shared_blob<float>({ Precision::FP32, {1, 1}, Layout::NC });
|
auto weights = make_shared_blob<float>({Precision::FP32, {1, 1}, Layout::NC});
|
||||||
fc->_weights = weights;
|
fc->_weights = weights;
|
||||||
fc->_biases = make_shared_blob<float>({ Precision::FP32, {1, 1}, Layout::NC });
|
fc->_biases = make_shared_blob<float>({Precision::FP32, {1, 1}, Layout::NC});
|
||||||
fc->_weights->allocate();
|
fc->_weights->allocate();
|
||||||
fc->_biases->allocate();
|
fc->_biases->allocate();
|
||||||
std::shared_ptr<Data> outData = std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({ 1, 1 }), Layout::NC));
|
std::shared_ptr<Data> outData =
|
||||||
|
std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({1, 1}), Layout::NC));
|
||||||
fc->outData.push_back(outData);
|
fc->outData.push_back(outData);
|
||||||
fc->insData.push_back(outData);
|
fc->insData.push_back(outData);
|
||||||
|
|
||||||
// actual quantisation algorithm is involved
|
// actual quantisation algorithm is involved
|
||||||
for (auto && w : *weights) {
|
for (auto&& w : *weights) {
|
||||||
w = MAX_OUT_MULTIPLIER * MAX_VAL_1B_WEIGHT;
|
w = MAX_OUT_MULTIPLIER * MAX_VAL_1B_WEIGHT;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -73,17 +78,16 @@ TEST_F(I8QuantisationTest, canQuantizeFCLayer){
|
|||||||
ASSERT_NO_THROW(quantize(fc));
|
ASSERT_NO_THROW(quantize(fc));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I8QuantisationTest, canQuantizeActivation){
|
TEST_F(I8QuantisationTest, canQuantizeActivation) {
|
||||||
|
auto sigmoid = std::make_shared<GenericLayer>(LayerParams{"name", "type", Precision::FP32});
|
||||||
auto sigmoid = std::make_shared<GenericLayer >(LayerParams{"name", "type", Precision::FP32});
|
|
||||||
sigmoid->params["value"] = 2;
|
sigmoid->params["value"] = 2;
|
||||||
sigmoid->type = "Activation";
|
sigmoid->type = "Activation";
|
||||||
|
|
||||||
ASSERT_NO_THROW(quantize(sigmoid));
|
ASSERT_NO_THROW(quantize(sigmoid));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I8QuantisationTest, inputPrecisionIs16Bits){
|
TEST_F(I8QuantisationTest, inputPrecisionIs16Bits) {
|
||||||
auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C });
|
auto weights = make_shared_blob<uint8_t>({Precision::U8, {440}, C});
|
||||||
weights->allocate();
|
weights->allocate();
|
||||||
fillWeights(weights);
|
fillWeights(weights);
|
||||||
|
|
||||||
@ -92,13 +96,15 @@ TEST_F(I8QuantisationTest, inputPrecisionIs16Bits){
|
|||||||
|
|
||||||
auto newNet = quantize_single_input_model(network, 1000);
|
auto newNet = quantize_single_input_model(network, 1000);
|
||||||
InputsDataMap inputs = newNet.getInputsInfo();
|
InputsDataMap inputs = newNet.getInputsInfo();
|
||||||
auto inputLayer = getCreatorLayer(getInputTo(inputs.begin()->second->getInputData()).begin()->second->insData.front().lock()).lock();
|
auto inputLayer =
|
||||||
|
getCreatorLayer(getInputTo(inputs.begin()->second->getInputData()).begin()->second->insData.front().lock())
|
||||||
|
.lock();
|
||||||
|
|
||||||
ASSERT_EQ(inputLayer->precision, Precision::I16);
|
ASSERT_EQ(inputLayer->precision, Precision::I16);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I8QuantisationTest, FCDimensionIs1){
|
TEST_F(I8QuantisationTest, FCDimensionIs1) {
|
||||||
auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C });
|
auto weights = make_shared_blob<uint8_t>({Precision::U8, {440}, C});
|
||||||
weights->allocate();
|
weights->allocate();
|
||||||
fillWeights(weights);
|
fillWeights(weights);
|
||||||
|
|
||||||
@ -108,8 +114,8 @@ TEST_F(I8QuantisationTest, FCDimensionIs1){
|
|||||||
ASSERT_NO_THROW(quantize_single_input_model(network, 1000));
|
ASSERT_NO_THROW(quantize_single_input_model(network, 1000));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I8QuantisationTest, outputAffinePrecisionIs32Bits){
|
TEST_F(I8QuantisationTest, outputAffinePrecisionIs32Bits) {
|
||||||
auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C });
|
auto weights = make_shared_blob<uint8_t>({Precision::U8, {440}, C});
|
||||||
weights->allocate();
|
weights->allocate();
|
||||||
fillWeights(weights);
|
fillWeights(weights);
|
||||||
|
|
||||||
@ -124,7 +130,7 @@ TEST_F(I8QuantisationTest, outputAffinePrecisionIs32Bits){
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I8QuantisationTest, fp16tofp32_on_fullyConnected_model) {
|
TEST_F(I8QuantisationTest, fp16tofp32_on_fullyConnected_model) {
|
||||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {220}, Layout::C });
|
auto weights = make_shared_blob<uint8_t>({Precision::U8, {220}, Layout::C});
|
||||||
weights->allocate();
|
weights->allocate();
|
||||||
fillWeights(weights);
|
fillWeights(weights);
|
||||||
|
|
||||||
@ -135,7 +141,7 @@ TEST_F(I8QuantisationTest, fp16tofp32_on_fullyConnected_model) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I8QuantisationTest, LSTMCell_quantize) {
|
TEST_F(I8QuantisationTest, LSTMCell_quantize) {
|
||||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {33664}, C });
|
auto weights = make_shared_blob<uint8_t>({Precision::U8, {33664}, C});
|
||||||
weights->allocate();
|
weights->allocate();
|
||||||
fillWeights(weights);
|
fillWeights(weights);
|
||||||
|
|
||||||
@ -146,7 +152,7 @@ TEST_F(I8QuantisationTest, LSTMCell_quantize) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I8QuantisationTest, LSTMCell_unaligned_quantize) {
|
TEST_F(I8QuantisationTest, LSTMCell_unaligned_quantize) {
|
||||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {3480}, C });
|
auto weights = make_shared_blob<uint8_t>({Precision::U8, {3480}, C});
|
||||||
weights->allocate();
|
weights->allocate();
|
||||||
fillWeights(weights);
|
fillWeights(weights);
|
||||||
|
|
||||||
@ -157,7 +163,7 @@ TEST_F(I8QuantisationTest, LSTMCell_unaligned_quantize) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I8QuantisationTest, TI_quantize) {
|
TEST_F(I8QuantisationTest, TI_quantize) {
|
||||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {249748}, C });
|
auto weights = make_shared_blob<uint8_t>({Precision::U8, {249748}, C});
|
||||||
weights->allocate();
|
weights->allocate();
|
||||||
fillWeights(weights);
|
fillWeights(weights);
|
||||||
|
|
||||||
|
@ -2,23 +2,27 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
//
|
//
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <legacy/layer_transform.hpp>
|
|
||||||
#include "backend/gna_types.hpp"
|
|
||||||
#include "frontend/model_quantizer.hpp"
|
|
||||||
#include "frontend/layer_quantizer.hpp"
|
|
||||||
#include "gna_matcher.hpp"
|
|
||||||
#include <ie_core.hpp>
|
#include <ie_core.hpp>
|
||||||
|
#include <legacy/layer_transform.hpp>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "backend/gna_limitations.hpp"
|
||||||
|
#include "backend/gna_types.hpp"
|
||||||
|
#include "frontend/layer_quantizer.hpp"
|
||||||
|
#include "frontend/model_quantizer.hpp"
|
||||||
|
#include "gna_matcher.hpp"
|
||||||
#include "ngraph_functions/builders.hpp"
|
#include "ngraph_functions/builders.hpp"
|
||||||
|
|
||||||
using namespace InferenceEngine;
|
using namespace InferenceEngine;
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
using namespace ov::intel_gna::frontend;
|
using namespace ov::intel_gna::frontend;
|
||||||
using namespace GNATestIRs;
|
using namespace GNATestIRs;
|
||||||
|
|
||||||
class I16QuantisationTest : public GNATest<> {
|
class I16QuantisationTest : public GNATest<> {
|
||||||
protected:
|
protected:
|
||||||
InferenceEngine::CNNLayerPtr quantize (InferenceEngine::CNNLayerPtr lp) {
|
InferenceEngine::CNNLayerPtr quantize(InferenceEngine::CNNLayerPtr lp) {
|
||||||
auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
|
auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
|
||||||
Config gna_config;
|
Config gna_config;
|
||||||
gna_config.gnaPrecision = InferenceEngine::Precision::I16;
|
gna_config.gnaPrecision = InferenceEngine::Precision::I16;
|
||||||
@ -28,7 +32,8 @@ class I16QuantisationTest : public GNATest<> {
|
|||||||
return newLayer;
|
return newLayer;
|
||||||
};
|
};
|
||||||
|
|
||||||
InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model, float scale_factor) const {
|
InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model,
|
||||||
|
float scale_factor) const {
|
||||||
auto scale_factors = std::vector<float>({scale_factor});
|
auto scale_factors = std::vector<float>({scale_factor});
|
||||||
|
|
||||||
GnaInputs inputs;
|
GnaInputs inputs;
|
||||||
@ -43,21 +48,20 @@ class I16QuantisationTest : public GNATest<> {
|
|||||||
|
|
||||||
auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
|
auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
|
||||||
|
|
||||||
return ModelQuantizer(transformer).quantize(
|
return ModelQuantizer(transformer).quantize(model, inputs);
|
||||||
model,
|
|
||||||
inputs);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
|
Limitations::init(target::DeviceVersion::Default);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
T setWeights(T blob) {
|
T setWeights(T blob) {
|
||||||
blob->allocate();
|
blob->allocate();
|
||||||
// actual quantisation algorithm is involved - we need to provide weights that will be quantized with scale factor of 1
|
// actual quantisation algorithm is involved - we need to provide weights that will be quantized with scale factor
|
||||||
for (auto && w : *blob) {
|
// of 1
|
||||||
|
for (auto&& w : *blob) {
|
||||||
w = MAX_VAL_2B_WEIGHT;
|
w = MAX_VAL_2B_WEIGHT;
|
||||||
}
|
}
|
||||||
return blob;
|
return blob;
|
||||||
@ -75,36 +79,34 @@ TBlob<uint8_t>::Ptr setWeights(TBlob<uint8_t>::Ptr blob) {
|
|||||||
return blob;
|
return blob;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// TODO: add test for FC weights after quantization
|
// TODO: add test for FC weights after quantization
|
||||||
TEST_F(I16QuantisationTest, canQuantizeFCLayer){
|
TEST_F(I16QuantisationTest, canQuantizeFCLayer) {
|
||||||
|
|
||||||
auto fc = std::make_shared<FullyConnectedLayer>(LayerParams{"name", "type", Precision::FP32});
|
auto fc = std::make_shared<FullyConnectedLayer>(LayerParams{"name", "type", Precision::FP32});
|
||||||
fc->_out_num = 9;
|
fc->_out_num = 9;
|
||||||
fc->_weights = setWeights(make_shared_blob<float>({ Precision::FP32, {1, 1}, Layout::NC }));
|
fc->_weights = setWeights(make_shared_blob<float>({Precision::FP32, {1, 1}, Layout::NC}));
|
||||||
fillWeights(fc->_weights);
|
fillWeights(fc->_weights);
|
||||||
fc->_biases = make_shared_blob<float>({ Precision::FP32, {1, 1}, Layout::NC });
|
fc->_biases = make_shared_blob<float>({Precision::FP32, {1, 1}, Layout::NC});
|
||||||
fc->_biases->allocate();
|
fc->_biases->allocate();
|
||||||
fillWeights(fc->_biases);
|
fillWeights(fc->_biases);
|
||||||
|
|
||||||
std::shared_ptr<Data> outData = std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({1, 1}), Layout::NC));
|
std::shared_ptr<Data> outData =
|
||||||
|
std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({1, 1}), Layout::NC));
|
||||||
fc->outData.push_back(outData);
|
fc->outData.push_back(outData);
|
||||||
fc->insData.push_back(outData);
|
fc->insData.push_back(outData);
|
||||||
|
|
||||||
ASSERT_NO_THROW(quantize(fc));
|
ASSERT_NO_THROW(quantize(fc));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, canQuantizeActivation){
|
TEST_F(I16QuantisationTest, canQuantizeActivation) {
|
||||||
|
auto sigmoid = std::make_shared<GenericLayer>(LayerParams{"name", "type", Precision::FP32});
|
||||||
auto sigmoid = std::make_shared<GenericLayer >(LayerParams{"name", "type", Precision::FP32});
|
|
||||||
sigmoid->params["value"] = 2;
|
sigmoid->params["value"] = 2;
|
||||||
sigmoid->type = "Activation";
|
sigmoid->type = "Activation";
|
||||||
|
|
||||||
ASSERT_NO_THROW(quantize(sigmoid));
|
ASSERT_NO_THROW(quantize(sigmoid));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits){
|
TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits) {
|
||||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {440}, C });
|
auto weights = make_shared_blob<uint8_t>({Precision::U8, {440}, C});
|
||||||
weights->allocate();
|
weights->allocate();
|
||||||
fillWeights(weights);
|
fillWeights(weights);
|
||||||
|
|
||||||
@ -118,10 +120,9 @@ TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits){
|
|||||||
ASSERT_EQ(affineDataPtr->getTensorDesc().getPrecision(), Precision::I32);
|
ASSERT_EQ(affineDataPtr->getTensorDesc().getPrecision(), Precision::I32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) {
|
TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) {
|
||||||
auto weights = setWeights(make_shared_blob<uint8_t >({ Precision::U8, {440}, C }));
|
auto weights = setWeights(make_shared_blob<uint8_t>({Precision::U8, {440}, C}));
|
||||||
//std::fill_n(weights->buffer().as<float*>(), weights->byteSize()/sizeof(float), 0);
|
// std::fill_n(weights->buffer().as<float*>(), weights->byteSize()/sizeof(float), 0);
|
||||||
|
|
||||||
Core ie;
|
Core ie;
|
||||||
auto network = ie.ReadNetwork(affineToMemoryModel(), weights);
|
auto network = ie.ReadNetwork(affineToMemoryModel(), weights);
|
||||||
@ -129,13 +130,13 @@ TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) {
|
|||||||
ASSERT_NO_THROW(quantize_single_input_model(network, 1000));
|
ASSERT_NO_THROW(quantize_single_input_model(network, 1000));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect){
|
TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect) {
|
||||||
const float inputScaleFactorTest = 1000;
|
const float inputScaleFactorTest = 1000;
|
||||||
const float weightValueTest = 100;
|
const float weightValueTest = 100;
|
||||||
|
|
||||||
auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C });
|
auto weights = make_shared_blob<uint8_t>({Precision::U8, {440}, C});
|
||||||
weights->allocate();
|
weights->allocate();
|
||||||
fillWeights(weights, { weightValueTest });
|
fillWeights(weights, {weightValueTest});
|
||||||
|
|
||||||
Core ie;
|
Core ie;
|
||||||
auto network = ie.ReadNetwork(Fc2DOutputModel(), weights);
|
auto network = ie.ReadNetwork(Fc2DOutputModel(), weights);
|
||||||
@ -153,51 +154,70 @@ TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect){
|
|||||||
TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion) {
|
TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion) {
|
||||||
assert_that()
|
assert_that()
|
||||||
.onInferModel(Fc2DOutputModel())
|
.onInferModel(Fc2DOutputModel())
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
.inNotCompactMode()
|
||||||
.gna().propagate_forward().called_without().pwl_inserted_into_nnet();
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_without()
|
||||||
|
.pwl_inserted_into_nnet();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion_ProfilingEnabled) {
|
TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion_ProfilingEnabled) {
|
||||||
assert_that()
|
assert_that()
|
||||||
.onInferModel(Fc2DOutputModel())
|
.onInferModel(Fc2DOutputModel())
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
.inNotCompactMode()
|
||||||
.gna().propagate_forward().called_without().pwl_inserted_into_nnet().profiling_counters();
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_without()
|
||||||
|
.pwl_inserted_into_nnet()
|
||||||
|
.profiling_counters();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, OnlyAffineWithNanScaleFactorFails) {
|
TEST_F(I16QuantisationTest, OnlyAffineWithNanScaleFactorFails) {
|
||||||
gna()
|
gna().onInferModel(Fc2DOutputModel()).withNanScaleFactor().propagate_forward().throws();
|
||||||
.onInferModel(Fc2DOutputModel())
|
|
||||||
.withNanScaleFactor()
|
|
||||||
.propagate_forward().throws();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, OnlyAffineWithInfScaleFactorFails) {
|
TEST_F(I16QuantisationTest, OnlyAffineWithInfScaleFactorFails) {
|
||||||
gna()
|
gna().onInferModel(Fc2DOutputModel()).withInfScaleFactor().propagate_forward().throws();
|
||||||
.onInferModel(Fc2DOutputModel())
|
|
||||||
.withInfScaleFactor()
|
|
||||||
.propagate_forward().throws();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, AffineToMemoryWillResultInActivationInsertion) {
|
TEST_F(I16QuantisationTest, AffineToMemoryWillResultInActivationInsertion) {
|
||||||
assert_that()
|
assert_that()
|
||||||
.onInferModel(affineToMemoryModel())
|
.onInferModel(affineToMemoryModel())
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
.inNotCompactMode()
|
||||||
.gna().propagate_forward().called_with().pwl_inserted_into_nnet();
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.pwl_inserted_into_nnet();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, EltwiseToMemoryWithNoOutputActivationInsertion) {
|
TEST_F(I16QuantisationTest, EltwiseToMemoryWithNoOutputActivationInsertion) {
|
||||||
assert_that().inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
assert_that()
|
||||||
.onInferModel(eltwiseToMemoryModelNoOutput(), [](CNNNetwork & net){
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.onInferModel(eltwiseToMemoryModelNoOutput(),
|
||||||
|
[](CNNNetwork& net) {
|
||||||
net.addOutput("Eltwise_8");
|
net.addOutput("Eltwise_8");
|
||||||
}).gna().propagate_forward().called_with().pwl_inserted_into_nnet();
|
})
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.pwl_inserted_into_nnet();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, EltwiseToMemory_ActivationInsertion) {
|
TEST_F(I16QuantisationTest, EltwiseToMemory_ActivationInsertion) {
|
||||||
assert_that().onInferModel(eltwiseToMemoryModel()).withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
assert_that()
|
||||||
.inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet();
|
.onInferModel(eltwiseToMemoryModel())
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.inNotCompactMode()
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.pwl_inserted_into_nnet();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInsertion) {
|
TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInsertion) {
|
||||||
auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 20});
|
auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 20});
|
||||||
const auto axis_node = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
|
const auto axis_node = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
|
||||||
@ -205,41 +225,73 @@ TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInserti
|
|||||||
auto tanh = std::make_shared<ngraph::opset8::Tanh>(split->outputs()[0]);
|
auto tanh = std::make_shared<ngraph::opset8::Tanh>(split->outputs()[0]);
|
||||||
auto add = std::make_shared<ngraph::opset8::Add>(split->outputs()[1], tanh);
|
auto add = std::make_shared<ngraph::opset8::Add>(split->outputs()[1], tanh);
|
||||||
auto result = std::make_shared<ngraph::opset8::Result>(add);
|
auto result = std::make_shared<ngraph::opset8::Result>(add);
|
||||||
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
|
auto function =
|
||||||
assert_that().onInferNgraphModel(function)
|
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
assert_that()
|
||||||
.gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
|
.onInferNgraphModel(function)
|
||||||
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.diagonal_inserted_into_nnet();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, SliceFollowedBy2FCsAnd2Eltwises_AlignedFilterInsertion) {
|
TEST_F(I16QuantisationTest, SliceFollowedBy2FCsAnd2Eltwises_AlignedFilterInsertion) {
|
||||||
assert_that().onInferModel(twoFCWithPaddingAfterSliceModel())
|
assert_that()
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
.onInferModel(twoFCWithPaddingAfterSliceModel())
|
||||||
.gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.diagonal_inserted_into_nnet();
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToDo requires implementation of aligning filter for concat inputs and improvement of
|
// ToDo requires implementation of aligning filter for concat inputs and improvement of
|
||||||
// qunatization/scaling algorithm for concat
|
// qunatization/scaling algorithm for concat
|
||||||
TEST_F(I16QuantisationTest, DISABLED_DoubleConcatPropageteForwardWithSuccess_AlignedFilterInsertion) {
|
TEST_F(I16QuantisationTest, DISABLED_DoubleConcatPropageteForwardWithSuccess_AlignedFilterInsertion) {
|
||||||
assert_that().onInferModel(doubleConcatModel()).withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
assert_that()
|
||||||
.inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
|
.onInferModel(doubleConcatModel())
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.inNotCompactMode()
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.diagonal_inserted_into_nnet();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, EltwiseSumm_onlyOneIdentityInsertion) {
|
TEST_F(I16QuantisationTest, EltwiseSumm_onlyOneIdentityInsertion) {
|
||||||
assert_that().onInferModel(eltwiseSummModel()).withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
assert_that()
|
||||||
.inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
|
.onInferModel(eltwiseSummModel())
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.inNotCompactMode()
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.pwl_inserted_into_nnet()
|
||||||
|
.once();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, canDetectLeakyRelu) {
|
TEST_F(I16QuantisationTest, canDetectLeakyRelu) {
|
||||||
assert_that().onInferModel(TFLeakyReluModel())
|
assert_that()
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
.onInferModel(TFLeakyReluModel())
|
||||||
.gna().propagate_forward().called_with().pwl_inserted_into_nnet();
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.pwl_inserted_into_nnet();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, MaxPool_followedAfterActivation) {
|
TEST_F(I16QuantisationTest, MaxPool_followedAfterActivation) {
|
||||||
assert_that().onInferModel(maxpoolAfterRelu())
|
assert_that()
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
.onInferModel(maxpoolAfterRelu())
|
||||||
.gna().propagate_forward().called_with()
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
.convolution_inserted_into_nnet()
|
.convolution_inserted_into_nnet()
|
||||||
.And()
|
.And()
|
||||||
.pwl_inserted_into_nnet()
|
.pwl_inserted_into_nnet()
|
||||||
@ -248,28 +300,53 @@ TEST_F(I16QuantisationTest, MaxPool_followedAfterActivation) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, EltwiseMull_willInsertTwoIdentities) {
|
TEST_F(I16QuantisationTest, EltwiseMull_willInsertTwoIdentities) {
|
||||||
assert_that().onInferModel(eltwiseMulModel())
|
assert_that()
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
.onInferModel(eltwiseMulModel())
|
||||||
.gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice();
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.pwl_inserted_into_nnet()
|
||||||
|
.twice();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, multiple_inputs_supported) {
|
TEST_F(I16QuantisationTest, multiple_inputs_supported) {
|
||||||
std::string configKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_");
|
std::string configKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_");
|
||||||
assert_that().onInferModel(two_inputs_to_affine())
|
assert_that()
|
||||||
.inNotCompactMode().withGNAConfig(configKey + std::to_string(0), 1.0f)
|
.onInferModel(two_inputs_to_affine())
|
||||||
.withGNAConfig(configKey + std::to_string(1), 2.0f).gna().propagate_forward()
|
.inNotCompactMode()
|
||||||
.called_with().pwl_inserted_into_nnet().once();
|
.withGNAConfig(configKey + std::to_string(0), 1.0f)
|
||||||
|
.withGNAConfig(configKey + std::to_string(1), 2.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.pwl_inserted_into_nnet()
|
||||||
|
.once();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, DISABLED_multiple_inputs_into_concat_supported) {
|
TEST_F(I16QuantisationTest, DISABLED_multiple_inputs_into_concat_supported) {
|
||||||
assert_that().onInferModel(two_inputs_to_concat())
|
assert_that()
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f).gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
|
.onInferModel(two_inputs_to_concat())
|
||||||
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.pwl_inserted_into_nnet()
|
||||||
|
.once();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, ScaleShift_Affine_WillResultInIdentityInsertion) {
|
TEST_F(I16QuantisationTest, ScaleShift_Affine_WillResultInIdentityInsertion) {
|
||||||
assert_that().onInferModel(scaleShiftAffineModel())
|
assert_that()
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
.onInferModel(scaleShiftAffineModel())
|
||||||
.gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.pwl_inserted_into_nnet()
|
||||||
|
.once();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) {
|
TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) {
|
||||||
@ -277,10 +354,17 @@ TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) {
|
|||||||
auto clamp = std::make_shared<ngraph::opset8::Clamp>(input_params, -50, 50);
|
auto clamp = std::make_shared<ngraph::opset8::Clamp>(input_params, -50, 50);
|
||||||
auto tanh = std::make_shared<ngraph::opset8::Tanh>(clamp);
|
auto tanh = std::make_shared<ngraph::opset8::Tanh>(clamp);
|
||||||
auto result = std::make_shared<ngraph::opset8::Result>(tanh);
|
auto result = std::make_shared<ngraph::opset8::Result>(tanh);
|
||||||
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
|
auto function =
|
||||||
assert_that().onInferNgraphModel(function)
|
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
assert_that()
|
||||||
.gna().propagate_forward().called_with().diagonal_inserted_into_nnet().twice();
|
.onInferNgraphModel(function)
|
||||||
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.diagonal_inserted_into_nnet()
|
||||||
|
.twice();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInTwoDiagonalsInsertion) {
|
TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInTwoDiagonalsInsertion) {
|
||||||
@ -296,76 +380,127 @@ TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInTwoDiago
|
|||||||
auto result = std::make_shared<ngraph::opset8::Result>(add);
|
auto result = std::make_shared<ngraph::opset8::Result>(add);
|
||||||
mem_w->add_control_dependency(mem_r);
|
mem_w->add_control_dependency(mem_r);
|
||||||
result->add_control_dependency(mem_w);
|
result->add_control_dependency(mem_w);
|
||||||
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
|
auto function =
|
||||||
assert_that().onInferNgraphModel(function)
|
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
assert_that()
|
||||||
.gna().propagate_forward().called_with().diagonal_inserted_into_nnet().twice();
|
.onInferNgraphModel(function)
|
||||||
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.diagonal_inserted_into_nnet()
|
||||||
|
.twice();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, AffineWith2AffineOutputs_ResultInOnlyOneIdentityInsertion) {
|
TEST_F(I16QuantisationTest, AffineWith2AffineOutputs_ResultInOnlyOneIdentityInsertion) {
|
||||||
// one Identity activation from first FC, and one Identity activation for eltwise
|
// one Identity activation from first FC, and one Identity activation for eltwise
|
||||||
assert_that().onInferModel(AffineWith2AffineOutputsModel())
|
assert_that()
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
.onInferModel(AffineWith2AffineOutputsModel())
|
||||||
.gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice();
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.pwl_inserted_into_nnet()
|
||||||
|
.twice();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, ScaleShiftWithBroadcast_ResultInDiagonalInsertion) {
|
TEST_F(I16QuantisationTest, ScaleShiftWithBroadcast_ResultInDiagonalInsertion) {
|
||||||
|
auto& affineWeights = storage<std::vector<uint16_t>>();
|
||||||
auto & affineWeights = storage<std::vector<uint16_t>>();
|
|
||||||
|
|
||||||
affineWeights = {
|
affineWeights = {
|
||||||
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
|
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 2048, 4096, 6144, 8192, 10240, 12288,
|
||||||
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
|
14336, 16384, 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 2048, 4096, 6144, 8192,
|
||||||
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
|
10240, 12288, 14336, 16384, 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
|
||||||
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
|
|
||||||
2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
assert_that().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f).onInferModel(ScaleShift3DModel())
|
assert_that()
|
||||||
.withWeigthsPattern({1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f})
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
.inNotCompactMode().gna().propagate_forward().called_with().called_with().affine_weights_eq(affineWeights);
|
.onInferModel(ScaleShift3DModel())
|
||||||
|
.withWeigthsPattern({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f})
|
||||||
|
.inNotCompactMode()
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.called_with()
|
||||||
|
.affine_weights_eq(affineWeights);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, MemoryAfterConcat_ResultInCopyInsertion) {
|
TEST_F(I16QuantisationTest, MemoryAfterConcat_ResultInCopyInsertion) {
|
||||||
assert_that().onInferModel(MemoryAfterConcatModel()).inNotCompactMode().gna().propagate_forward().
|
assert_that()
|
||||||
called_with().copy_inserted_into_nnet();
|
.onInferModel(MemoryAfterConcatModel())
|
||||||
|
.inNotCompactMode()
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.copy_inserted_into_nnet();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, MemoryAndConcatAfterOneNode_ResultInCopyInsertion) {
|
TEST_F(I16QuantisationTest, MemoryAndConcatAfterOneNode_ResultInCopyInsertion) {
|
||||||
assert_that().onInferModel(MemoryAndConcatAfterOneNode()).inNotCompactMode().gna().propagate_forward().
|
assert_that()
|
||||||
called_with().copy_inserted_into_nnet();
|
.onInferModel(MemoryAndConcatAfterOneNode())
|
||||||
|
.inNotCompactMode()
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.copy_inserted_into_nnet();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, DISABLED_permutationOfWeightsBetweenConvAndAffine) {
|
TEST_F(I16QuantisationTest, DISABLED_permutationOfWeightsBetweenConvAndAffine) {
|
||||||
auto & affineWeights = storage<std::vector<uint16_t>>();
|
auto& affineWeights = storage<std::vector<uint16_t>>();
|
||||||
|
|
||||||
// least likely that width and height both are multiple of 7
|
// least likely that width and height both are multiple of 7
|
||||||
auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
|
auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
|
||||||
|
|
||||||
// here weights are transpozed
|
// here weights are transpozed
|
||||||
save().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern)
|
save()
|
||||||
.inNotCompactMode().from().propagate_forward().affine_weights_transpozed({128, 61}).to(affineWeights);
|
.onInferModel(affineAfterConvNoPermute())
|
||||||
|
.withWeigthsPattern(weigthsPattern)
|
||||||
|
.inNotCompactMode()
|
||||||
|
.from()
|
||||||
|
.propagate_forward()
|
||||||
|
.affine_weights_transpozed({128, 61})
|
||||||
|
.to(affineWeights);
|
||||||
|
|
||||||
// here weights shouldn't be transposed
|
// here weights shouldn't be transposed
|
||||||
assert_that().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern)
|
assert_that()
|
||||||
.inNotCompactMode().gna().propagate_forward().called_with().affine_weights_eq(affineWeights);
|
.onInferModel(affineAfterConvWithPermute())
|
||||||
|
.withWeigthsPattern(weigthsPattern)
|
||||||
|
.inNotCompactMode()
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.affine_weights_eq(affineWeights);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, DISABLED_noPermutationOfWeightsBetweenConvAndAffineIfPermuteLayerWithCorrectArgs) {
|
TEST_F(I16QuantisationTest, DISABLED_noPermutationOfWeightsBetweenConvAndAffineIfPermuteLayerWithCorrectArgs) {
|
||||||
auto & affineWeights = storage<std::vector<uint16_t>>();
|
auto& affineWeights = storage<std::vector<uint16_t>>();
|
||||||
|
|
||||||
// least likely that width and height both are multiple of 7
|
// least likely that width and height both are multiple of 7
|
||||||
auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
|
auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
|
||||||
|
|
||||||
save().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern)
|
save()
|
||||||
.inNotCompactMode().from().propagate_forward().affine_weights().to(affineWeights);
|
.onInferModel(affineAfterConvWithPermute())
|
||||||
|
.withWeigthsPattern(weigthsPattern)
|
||||||
|
.inNotCompactMode()
|
||||||
|
.from()
|
||||||
|
.propagate_forward()
|
||||||
|
.affine_weights()
|
||||||
|
.to(affineWeights);
|
||||||
|
|
||||||
assert_that().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern)
|
assert_that()
|
||||||
.inNotCompactMode().gna().propagate_forward().called_with().affine_weights_transposed(affineWeights, {128, 61});
|
.onInferModel(affineAfterConvNoPermute())
|
||||||
|
.withWeigthsPattern(weigthsPattern)
|
||||||
|
.inNotCompactMode()
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.affine_weights_transposed(affineWeights, {128, 61});
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, fp16tofp32_on_fullyConnected_model) {
|
TEST_F(I16QuantisationTest, fp16tofp32_on_fullyConnected_model) {
|
||||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {220}, Layout::C });
|
auto weights = make_shared_blob<uint8_t>({Precision::U8, {220}, Layout::C});
|
||||||
weights->allocate();
|
weights->allocate();
|
||||||
fillWeights(weights);
|
fillWeights(weights);
|
||||||
|
|
||||||
@ -375,7 +510,8 @@ TEST_F(I16QuantisationTest, fp16tofp32_on_fullyConnected_model) {
|
|||||||
quantize_single_input_model(network, 1000);
|
quantize_single_input_model(network, 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, MultipleActivationsAfterAffineWithIdentityActivation_MultipleDiagonalLayersWithActivaitons) {
|
TEST_F(I16QuantisationTest,
|
||||||
|
MultipleActivationsAfterAffineWithIdentityActivation_MultipleDiagonalLayersWithActivaitons) {
|
||||||
auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 10});
|
auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 10});
|
||||||
const auto constant = ngraph::opset8::Constant::create(ngraph::element::f32, ngraph::Shape{10, 10}, {1});
|
const auto constant = ngraph::opset8::Constant::create(ngraph::element::f32, ngraph::Shape{10, 10}, {1});
|
||||||
auto matmul1 = std::make_shared<ngraph::opset8::MatMul>(input_params, constant);
|
auto matmul1 = std::make_shared<ngraph::opset8::MatMul>(input_params, constant);
|
||||||
@ -386,11 +522,17 @@ TEST_F(I16QuantisationTest, MultipleActivationsAfterAffineWithIdentityActivation
|
|||||||
auto mul = std::make_shared<ngraph::opset8::Multiply>(sigmoid, relu);
|
auto mul = std::make_shared<ngraph::opset8::Multiply>(sigmoid, relu);
|
||||||
auto add2 = std::make_shared<ngraph::opset8::Add>(add, mul);
|
auto add2 = std::make_shared<ngraph::opset8::Add>(add, mul);
|
||||||
auto result = std::make_shared<ngraph::opset8::Result>(add);
|
auto result = std::make_shared<ngraph::opset8::Result>(add);
|
||||||
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
|
auto function =
|
||||||
|
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
|
||||||
// identiy came from automatic insertion due to
|
// identiy came from automatic insertion due to
|
||||||
assert_that().onInferNgraphModel(function)
|
assert_that()
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
.onInferNgraphModel(function)
|
||||||
.gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActSigmoid, kActRelu, kActIdentity, kActIdentity});
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.pwls_inserted_into_nnet({kActSigmoid, kActRelu, kActIdentity, kActIdentity});
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, MultipleActivationsAfterAffine_ResultInMultipleDiagonalLayersWithActivaitons) {
|
TEST_F(I16QuantisationTest, MultipleActivationsAfterAffine_ResultInMultipleDiagonalLayersWithActivaitons) {
|
||||||
@ -401,24 +543,36 @@ TEST_F(I16QuantisationTest, MultipleActivationsAfterAffine_ResultInMultipleDiago
|
|||||||
auto relu = std::make_shared<ngraph::opset8::Relu>(matmul);
|
auto relu = std::make_shared<ngraph::opset8::Relu>(matmul);
|
||||||
auto mul = std::make_shared<ngraph::opset8::Multiply>(sigmoid, relu);
|
auto mul = std::make_shared<ngraph::opset8::Multiply>(sigmoid, relu);
|
||||||
auto result = std::make_shared<ngraph::opset8::Result>(mul);
|
auto result = std::make_shared<ngraph::opset8::Result>(mul);
|
||||||
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
|
auto function =
|
||||||
|
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
|
||||||
// extra identity inserted for affine
|
// extra identity inserted for affine
|
||||||
assert_that().onInferNgraphModel(function)
|
assert_that()
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
.onInferNgraphModel(function)
|
||||||
.gna().propagate_forward().called_with()
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
// 1 diag for second activation, 1 for eltwise
|
// 1 diag for second activation, 1 for eltwise
|
||||||
.pwls_inserted_into_nnet({kActRelu, kActSigmoid}).diagonal_inserted_into_nnet().times(3);
|
.pwls_inserted_into_nnet({kActRelu, kActSigmoid})
|
||||||
|
.diagonal_inserted_into_nnet()
|
||||||
|
.times(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: build a regression test on top of it using real quantisation accuracy checking
|
// TODO: build a regression test on top of it using real quantisation accuracy checking
|
||||||
TEST_F(I16QuantisationTest, ConcatWithConstInputPropagatedForward) {
|
TEST_F(I16QuantisationTest, ConcatWithConstInputPropagatedForward) {
|
||||||
assert_that().onInferModel(concatModelWithConstLayer())
|
assert_that()
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
.onInferModel(concatModelWithConstLayer())
|
||||||
.gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity});
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.pwls_inserted_into_nnet({kActIdentity});
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, LSTMCell_quantize) {
|
TEST_F(I16QuantisationTest, LSTMCell_quantize) {
|
||||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {33664}, C });
|
auto weights = make_shared_blob<uint8_t>({Precision::U8, {33664}, C});
|
||||||
weights->allocate();
|
weights->allocate();
|
||||||
fillWeights(weights);
|
fillWeights(weights);
|
||||||
|
|
||||||
@ -429,7 +583,7 @@ TEST_F(I16QuantisationTest, LSTMCell_quantize) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, LSTMCell_unaligned_quantize) {
|
TEST_F(I16QuantisationTest, LSTMCell_unaligned_quantize) {
|
||||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {3480}, C });
|
auto weights = make_shared_blob<uint8_t>({Precision::U8, {3480}, C});
|
||||||
weights->allocate();
|
weights->allocate();
|
||||||
fillWeights(weights);
|
fillWeights(weights);
|
||||||
|
|
||||||
@ -440,15 +594,27 @@ TEST_F(I16QuantisationTest, LSTMCell_unaligned_quantize) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, EltwisetWithConstInputPropagatedForward) {
|
TEST_F(I16QuantisationTest, EltwisetWithConstInputPropagatedForward) {
|
||||||
assert_that().onInferModel(eltwiseSumModelWithConstLayer())
|
assert_that()
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
.onInferModel(eltwiseSumModelWithConstLayer())
|
||||||
.gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.diagonal_inserted_into_nnet();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, PowerWithScaleFactorPropagateForward) {
|
TEST_F(I16QuantisationTest, PowerWithScaleFactorPropagateForward) {
|
||||||
assert_that().onInferModel(PowerWithScaleFactor1())
|
assert_that()
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
.onInferModel(PowerWithScaleFactor1())
|
||||||
.gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity}).And().diagonal_inserted_into_nnet();
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.pwls_inserted_into_nnet({kActIdentity})
|
||||||
|
.And()
|
||||||
|
.diagonal_inserted_into_nnet();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, ConcatWithDifferentInputScaleFactorsPropagateForward) {
|
TEST_F(I16QuantisationTest, ConcatWithDifferentInputScaleFactorsPropagateForward) {
|
||||||
@ -459,14 +625,20 @@ TEST_F(I16QuantisationTest, ConcatWithDifferentInputScaleFactorsPropagateForward
|
|||||||
auto tanh = std::make_shared<ngraph::opset8::Tanh>(split->outputs()[1]);
|
auto tanh = std::make_shared<ngraph::opset8::Tanh>(split->outputs()[1]);
|
||||||
auto concat = std::make_shared<ngraph::opset8::Concat>(ngraph::OutputVector{sigmoid, tanh}, 1);
|
auto concat = std::make_shared<ngraph::opset8::Concat>(ngraph::OutputVector{sigmoid, tanh}, 1);
|
||||||
auto result = std::make_shared<ngraph::opset8::Result>(concat);
|
auto result = std::make_shared<ngraph::opset8::Result>(concat);
|
||||||
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
|
auto function =
|
||||||
assert_that().onInferNgraphModel(function)
|
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
|
||||||
.inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
assert_that()
|
||||||
.gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity});
|
.onInferNgraphModel(function)
|
||||||
|
.inNotCompactMode()
|
||||||
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.pwls_inserted_into_nnet({kActIdentity});
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, TI_quantize) {
|
TEST_F(I16QuantisationTest, TI_quantize) {
|
||||||
auto weights = make_shared_blob<uint8_t>({ Precision::U8, {249748}, C });
|
auto weights = make_shared_blob<uint8_t>({Precision::U8, {249748}, C});
|
||||||
weights->allocate();
|
weights->allocate();
|
||||||
fillWeights(weights);
|
fillWeights(weights);
|
||||||
|
|
||||||
@ -477,40 +649,52 @@ TEST_F(I16QuantisationTest, TI_quantize) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, TI_PropagateForward) {
|
TEST_F(I16QuantisationTest, TI_PropagateForward) {
|
||||||
auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 10 });
|
auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 10});
|
||||||
auto mul = std::make_shared<ngraph::opset8::Multiply>(input_params,
|
auto mul = std::make_shared<ngraph::opset8::Multiply>(
|
||||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{ 1, 10 }));
|
input_params,
|
||||||
auto add = std::make_shared<ngraph::opset8::Add>(mul,
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1, 10}));
|
||||||
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{ 1, 10 }));
|
auto add = std::make_shared<ngraph::opset8::Add>(
|
||||||
auto reshape = std::make_shared<ngraph::opset8::Reshape>(add,
|
mul,
|
||||||
std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{ 3 }, std::vector<size_t>{ 1, 1, 10 }), false);
|
std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1, 10}));
|
||||||
|
auto reshape = std::make_shared<ngraph::opset8::Reshape>(
|
||||||
|
add,
|
||||||
|
std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{3}, std::vector<size_t>{1, 1, 10}),
|
||||||
|
false);
|
||||||
|
|
||||||
auto reshape_shape = reshape->output(0).get_shape();
|
auto reshape_shape = reshape->output(0).get_shape();
|
||||||
const size_t batch_size = 1;
|
const size_t batch_size = 1;
|
||||||
const size_t hiddenSize = 10;
|
const size_t hiddenSize = 10;
|
||||||
|
|
||||||
auto H_init = ngraph::builder::makeConstant<float>(ngraph::element::f32, { batch_size, hiddenSize }, {}, true);
|
auto H_init = ngraph::builder::makeConstant<float>(ngraph::element::f32, {batch_size, hiddenSize}, {}, true);
|
||||||
auto C_init = ngraph::builder::makeConstant<float>(ngraph::element::f32, { batch_size, hiddenSize }, {}, true);
|
auto C_init = ngraph::builder::makeConstant<float>(ngraph::element::f32, {batch_size, hiddenSize}, {}, true);
|
||||||
|
|
||||||
auto H_t = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{ batch_size, hiddenSize });
|
auto H_t = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{batch_size, hiddenSize});
|
||||||
auto C_t = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{ batch_size, hiddenSize });
|
auto C_t = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{batch_size, hiddenSize});
|
||||||
|
|
||||||
//Body
|
// Body
|
||||||
auto X = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{ batch_size, 1, reshape_shape[2] });
|
auto X = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32,
|
||||||
auto weightsNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, { 4 * hiddenSize, reshape_shape[2] }, {}, true);
|
ngraph::Shape{batch_size, 1, reshape_shape[2]});
|
||||||
auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, { 4 * hiddenSize, hiddenSize }, {}, true);
|
auto weightsNode =
|
||||||
|
ngraph::builder::makeConstant<float>(ngraph::element::f32, {4 * hiddenSize, reshape_shape[2]}, {}, true);
|
||||||
|
auto reccurrenceWeightsNode =
|
||||||
|
ngraph::builder::makeConstant<float>(ngraph::element::f32, {4 * hiddenSize, hiddenSize}, {}, true);
|
||||||
|
|
||||||
// lstm
|
// lstm
|
||||||
auto constantX = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{ 2 }, { batch_size, reshape_shape[2] });
|
auto constantX =
|
||||||
auto lstm1 = std::make_shared<ngraph::opset8::LSTMCell>(std::make_shared<ngraph::opset8::Reshape>(X, constantX, false),
|
ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {batch_size, reshape_shape[2]});
|
||||||
H_t, C_t,
|
auto lstm1 =
|
||||||
weightsNode, reccurrenceWeightsNode, hiddenSize);
|
std::make_shared<ngraph::opset8::LSTMCell>(std::make_shared<ngraph::opset8::Reshape>(X, constantX, false),
|
||||||
|
H_t,
|
||||||
|
C_t,
|
||||||
|
weightsNode,
|
||||||
|
reccurrenceWeightsNode,
|
||||||
|
hiddenSize);
|
||||||
|
|
||||||
auto H_o = lstm1->output(0);
|
auto H_o = lstm1->output(0);
|
||||||
auto C_o = lstm1->output(1);
|
auto C_o = lstm1->output(1);
|
||||||
|
|
||||||
auto body = std::make_shared<ngraph::Function>(
|
auto body =
|
||||||
ngraph::OutputVector{ H_o, C_o }, ngraph::ParameterVector{ X, H_t, C_t });
|
std::make_shared<ngraph::Function>(ngraph::OutputVector{H_o, C_o}, ngraph::ParameterVector{X, H_t, C_t});
|
||||||
|
|
||||||
auto tensor_iterator = std::make_shared<ngraph::opset8::TensorIterator>();
|
auto tensor_iterator = std::make_shared<ngraph::opset8::TensorIterator>();
|
||||||
tensor_iterator->set_body(body);
|
tensor_iterator->set_body(body);
|
||||||
@ -522,16 +706,29 @@ TEST_F(I16QuantisationTest, TI_PropagateForward) {
|
|||||||
auto out0 = tensor_iterator->get_iter_value(H_o, -1);
|
auto out0 = tensor_iterator->get_iter_value(H_o, -1);
|
||||||
|
|
||||||
const size_t output_size = 12;
|
const size_t output_size = 12;
|
||||||
auto fc = ngraph::builder::makeFullyConnected(out0, ngraph::element::f32, output_size, true, { hiddenSize, output_size }, { 1 }, { 1 });
|
auto fc = ngraph::builder::makeFullyConnected(out0,
|
||||||
|
ngraph::element::f32,
|
||||||
|
output_size,
|
||||||
|
true,
|
||||||
|
{hiddenSize, output_size},
|
||||||
|
{1},
|
||||||
|
{1});
|
||||||
auto result = std::make_shared<ngraph::opset8::Result>(fc);
|
auto result = std::make_shared<ngraph::opset8::Result>(fc);
|
||||||
auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
|
auto function =
|
||||||
assert_that().onInferNgraphModel(function).withWeigthsPattern({0.1f})
|
std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
|
||||||
.inNotCompactMode().gna().propagate_forward()
|
assert_that()
|
||||||
.called_with().pwls_inserted_into_nnet({kActIdentity});
|
.onInferNgraphModel(function)
|
||||||
|
.withWeigthsPattern({0.1f})
|
||||||
|
.inNotCompactMode()
|
||||||
|
.gna()
|
||||||
|
.propagate_forward()
|
||||||
|
.called_with()
|
||||||
|
.pwls_inserted_into_nnet({kActIdentity});
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(I16QuantisationTest, SplitToConcatWith2Inputs1360NotAlignedNoFC) {
|
TEST_F(I16QuantisationTest, SplitToConcatWith2Inputs1360NotAlignedNoFC) {
|
||||||
assert_that().onInferModel(SplitToConcatWith2Inputs1360NotAlignedNoFC())
|
assert_that()
|
||||||
|
.onInferModel(SplitToConcatWith2Inputs1360NotAlignedNoFC())
|
||||||
.inNotCompactMode()
|
.inNotCompactMode()
|
||||||
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
.withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
|
||||||
.gna()
|
.gna()
|
||||||
|
@ -282,10 +282,12 @@ struct Validatecnn2dParams {
|
|||||||
class GNAcnn2dValidatorTest : public ::testing::TestWithParam<GNACnn2DValidatorTestParam> {
|
class GNAcnn2dValidatorTest : public ::testing::TestWithParam<GNACnn2DValidatorTestParam> {
|
||||||
protected:
|
protected:
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
validator = cnn2d::AbstractValidator::Create(GetParam().target);
|
Limitations::init(GetParam().target);
|
||||||
ASSERT_TRUE(validator != nullptr);
|
validator = Limitations::get_instance()->get_cnn_validator();
|
||||||
|
ASSERT_TRUE(validator);
|
||||||
}
|
}
|
||||||
std::unique_ptr<cnn2d::AbstractValidator> validator;
|
|
||||||
|
std::shared_ptr<cnn2d::AbstractValidator> validator;
|
||||||
};
|
};
|
||||||
|
|
||||||
class GNAcnn2dValidatorTestPadding : public GNAcnn2dValidatorTest {
|
class GNAcnn2dValidatorTestPadding : public GNAcnn2dValidatorTest {
|
||||||
|
@ -18,7 +18,7 @@ class GNAPluginForNetworkMetricsTest : public GNAPlugin {
|
|||||||
public:
|
public:
|
||||||
GNAPluginForNetworkMetricsTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
|
GNAPluginForNetworkMetricsTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
|
||||||
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
|
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
|
||||||
graphCompiler.setGNAMemoryPtr(gnamem);
|
m_graph_compiler->setGNAMemoryPtr(gnamem);
|
||||||
gnadevice.reset();
|
gnadevice.reset();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -81,11 +81,11 @@ class GNAPluginForPWLExtraSegmentsTest : public GNAPlugin {
|
|||||||
public:
|
public:
|
||||||
GNAPluginForPWLExtraSegmentsTest(const std::map<std::string, std::string>& config) : GNAPlugin(config) {
|
GNAPluginForPWLExtraSegmentsTest(const std::map<std::string, std::string>& config) : GNAPlugin(config) {
|
||||||
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
|
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
|
||||||
graphCompiler.setGNAMemoryPtr(gnamem);
|
m_graph_compiler->setGNAMemoryPtr(gnamem);
|
||||||
gnadevice.reset();
|
gnadevice.reset();
|
||||||
}
|
}
|
||||||
void Test(const size_t expected_segments) {
|
void Test(const size_t expected_segments) {
|
||||||
for (const auto& component : graphCompiler.dnnComponents.components) {
|
for (const auto& component : m_graph_compiler->dnnComponents.components) {
|
||||||
if (component.dnnComponent.operation == kDnnPiecewiselinearOp) {
|
if (component.dnnComponent.operation == kDnnPiecewiselinearOp) {
|
||||||
EXPECT_EQ(expected_segments, component.dnnComponent.op.pwl.num_segments);
|
EXPECT_EQ(expected_segments, component.dnnComponent.op.pwl.num_segments);
|
||||||
}
|
}
|
||||||
|
@ -58,7 +58,7 @@ TEST(CheckSplitSupported, CheckVariadicSplitSupported) {
|
|||||||
ngraph::opset9::Constant::create(ngraph::element::i64,
|
ngraph::opset9::Constant::create(ngraph::element::i64,
|
||||||
ngraph::Shape({split_lengths.size()}),
|
ngraph::Shape({split_lengths.size()}),
|
||||||
split_lengths));
|
split_lengths));
|
||||||
ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result);
|
ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -86,7 +86,7 @@ TEST(CheckSplitSupported, CheckSplitSupported) {
|
|||||||
std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
|
std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
|
||||||
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}),
|
ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}),
|
||||||
num_splits);
|
num_splits);
|
||||||
ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result);
|
ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -17,7 +17,7 @@ class GNAPluginForPrecisionTest : public GNAPlugin {
|
|||||||
public:
|
public:
|
||||||
GNAPluginForPrecisionTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
|
GNAPluginForPrecisionTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
|
||||||
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
|
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
|
||||||
graphCompiler.setGNAMemoryPtr(gnamem);
|
m_graph_compiler->setGNAMemoryPtr(gnamem);
|
||||||
gnadevice.reset();
|
gnadevice.reset();
|
||||||
}
|
}
|
||||||
std::vector<intel_dnn_component_t> get_components() {
|
std::vector<intel_dnn_component_t> get_components() {
|
||||||
|
@ -45,9 +45,9 @@ public:
|
|||||||
GNAPluginForMemoryAlignmentTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
|
GNAPluginForMemoryAlignmentTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
|
||||||
if (gnadevice) {
|
if (gnadevice) {
|
||||||
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{},
|
gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{},
|
||||||
gnadevice->getMemAlignment(),
|
Limitations::get_instance()->get_memory_alignment(),
|
||||||
limitations::kMemoryPageSize));
|
Limitations::kMemoryPageSize));
|
||||||
graphCompiler.setGNAMemoryPtr(gnamem);
|
m_graph_compiler->setGNAMemoryPtr(gnamem);
|
||||||
gnadevice.reset();
|
gnadevice.reset();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -149,16 +149,14 @@ INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_4_0,
|
|||||||
|
|
||||||
class MemoryAlignmentTest : public ::testing::Test {};
|
class MemoryAlignmentTest : public ::testing::Test {};
|
||||||
|
|
||||||
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_ExpectExceptionWhenTargetIsUnset) {
|
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect64ByteAlignmentWhenTargetIsGNA3_5) {
|
||||||
EXPECT_ANY_THROW(getMemoryAlignmentBytes(DeviceVersion::NotSet));
|
Limitations::init(DeviceVersion::GNA3_5);
|
||||||
}
|
EXPECT_EQ(Limitations::get_instance()->get_memory_alignment(), 64);
|
||||||
|
|
||||||
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect64ByteAlignmentWhenTargetIsGNA3_0) {
|
|
||||||
EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_0), 64);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect16ByteAlignmentWhenTargetIsGNA3_6) {
|
TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect16ByteAlignmentWhenTargetIsGNA3_6) {
|
||||||
EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_6), 16);
|
Limitations::init(DeviceVersion::GNA3_6);
|
||||||
|
EXPECT_EQ(Limitations::get_instance()->get_memory_alignment(), 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace testing
|
} // namespace testing
|
||||||
|
@ -297,7 +297,7 @@ public:
|
|||||||
GNAPluginTested() : GNAPlugin() {
|
GNAPluginTested() : GNAPlugin() {
|
||||||
gnamem_t = std::make_shared<GNAMemoryTested>();
|
gnamem_t = std::make_shared<GNAMemoryTested>();
|
||||||
gnamem = gnamem_t;
|
gnamem = gnamem_t;
|
||||||
graphCompiler.setGNAMemoryPtr(gnamem);
|
m_graph_compiler->setGNAMemoryPtr(gnamem);
|
||||||
gnadevice.reset();
|
gnadevice.reset();
|
||||||
}
|
}
|
||||||
void Test() {
|
void Test() {
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||||
#include "transformations/decompose_2d_convolution.hpp"
|
#include "transformations/decompose_2d_convolution.hpp"
|
||||||
|
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
namespace testing {
|
namespace testing {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
@ -312,6 +313,8 @@ void Decompose2DConvTestInvalidFixture::SetUp() {
|
|||||||
std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation, bias_shape, maxpool_stride, maxpool_shape) =
|
std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation, bias_shape, maxpool_stride, maxpool_shape) =
|
||||||
params;
|
params;
|
||||||
|
|
||||||
|
Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
|
||||||
|
|
||||||
function = get_initial_function(fq,
|
function = get_initial_function(fq,
|
||||||
model,
|
model,
|
||||||
input_shape,
|
input_shape,
|
||||||
@ -342,6 +345,7 @@ class Decompose2DConvTestFixture : public CommonTestUtils::TestsCommon,
|
|||||||
public ::testing::WithParamInterface<fqDecompose2DConvParams> {
|
public ::testing::WithParamInterface<fqDecompose2DConvParams> {
|
||||||
public:
|
public:
|
||||||
void SetUp() override;
|
void SetUp() override;
|
||||||
|
|
||||||
std::shared_ptr<ngraph::Function> get_reference(const bool& fq,
|
std::shared_ptr<ngraph::Function> get_reference(const bool& fq,
|
||||||
const modelType& model,
|
const modelType& model,
|
||||||
const ngraph::PartialShape& input_shape,
|
const ngraph::PartialShape& input_shape,
|
||||||
@ -365,6 +369,8 @@ void Decompose2DConvTestFixture::SetUp() {
|
|||||||
std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation, bias_shape, maxpool_stride, maxpool_shape) =
|
std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation, bias_shape, maxpool_stride, maxpool_shape) =
|
||||||
params;
|
params;
|
||||||
|
|
||||||
|
Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
|
||||||
|
|
||||||
function = get_initial_function(fq,
|
function = get_initial_function(fq,
|
||||||
model,
|
model,
|
||||||
input_shape,
|
input_shape,
|
||||||
@ -779,7 +785,7 @@ static size_t CalculateConvCount(const ConvParams& conv_params) {
|
|||||||
size_t conv_count = 1;
|
size_t conv_count = 1;
|
||||||
size_t total_factorized_conv_channel_count =
|
size_t total_factorized_conv_channel_count =
|
||||||
(conv_params.input_channel_count * conv_params.filter_height * conv_params.filter_width);
|
(conv_params.input_channel_count * conv_params.filter_height * conv_params.filter_width);
|
||||||
while (total_factorized_conv_channel_count / conv_count > ov::intel_gna::limitations::convFilterMaxSize ||
|
while (total_factorized_conv_channel_count / conv_count > Limitations::kConvFilterMaxSize ||
|
||||||
total_factorized_conv_channel_count % conv_count != 0 || conv_params.filter_channel_count % conv_count != 0)
|
total_factorized_conv_channel_count % conv_count != 0 || conv_params.filter_channel_count % conv_count != 0)
|
||||||
conv_count++;
|
conv_count++;
|
||||||
|
|
||||||
@ -792,7 +798,7 @@ static bool ShouldDecompose(GraphData& graph_data, const ConvParams& conv_params
|
|||||||
|
|
||||||
// Concat (copy) layer limitation allows to split up to a certain limit
|
// Concat (copy) layer limitation allows to split up to a certain limit
|
||||||
// Currently we are able to split only convolutions without pooling in horizontal dimension
|
// Currently we are able to split only convolutions without pooling in horizontal dimension
|
||||||
if (graph_data.conv_count > ov::intel_gna::limitations::copyMaxGrouping ||
|
if (graph_data.conv_count > Limitations::kCopyMaxGrouping ||
|
||||||
((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1))
|
((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -884,18 +890,13 @@ void execute_test(modelType model,
|
|||||||
case modelType::TranspConvBcastAddMaxPoolTransp:
|
case modelType::TranspConvBcastAddMaxPoolTransp:
|
||||||
case modelType::TranspConvBcastAddActTransp:
|
case modelType::TranspConvBcastAddActTransp:
|
||||||
case modelType::TranspConvBcastAddMaxPoolActTransp:
|
case modelType::TranspConvBcastAddMaxPoolActTransp:
|
||||||
manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(ov::intel_gna::target::DeviceVersion::Default,
|
manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(gnaPrecision);
|
||||||
gnaPrecision);
|
|
||||||
break;
|
break;
|
||||||
case modelType::TranspConvTranspBcastAdd:
|
case modelType::TranspConvTranspBcastAdd:
|
||||||
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(
|
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(gnaPrecision);
|
||||||
ov::intel_gna::target::DeviceVersion::Default,
|
|
||||||
gnaPrecision);
|
|
||||||
break;
|
break;
|
||||||
case modelType::TranspConvTranspBcastAddAct:
|
case modelType::TranspConvTranspBcastAddAct:
|
||||||
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(
|
manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(gnaPrecision);
|
||||||
ov::intel_gna::target::DeviceVersion::Default,
|
|
||||||
gnaPrecision);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,6 +13,8 @@
|
|||||||
#include "transformations/decompose_mvn.hpp"
|
#include "transformations/decompose_mvn.hpp"
|
||||||
#include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp"
|
#include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp"
|
||||||
|
|
||||||
|
using namespace ov::intel_gna::limitations;
|
||||||
|
|
||||||
namespace decomposeMVN {
|
namespace decomposeMVN {
|
||||||
|
|
||||||
typedef std::tuple<ngraph::Shape, // Input shape
|
typedef std::tuple<ngraph::Shape, // Input shape
|
||||||
@ -264,7 +266,7 @@ std::shared_ptr<ngraph::Function> getReferenceFunction(const ngraph::Shape& inpu
|
|||||||
mvn_data.normalize_variance = normalize_variance;
|
mvn_data.normalize_variance = normalize_variance;
|
||||||
mvn_data.num_parts = 1;
|
mvn_data.num_parts = 1;
|
||||||
|
|
||||||
while (mvn_data.W / mvn_data.num_parts > ov::intel_gna::limitations::convFilterMaxSize) {
|
while (mvn_data.W / mvn_data.num_parts > Limitations::kConvFilterMaxSize) {
|
||||||
mvn_data.num_parts *= 2;
|
mvn_data.num_parts *= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include <transformations/init_node_info.hpp>
|
#include <transformations/init_node_info.hpp>
|
||||||
#include <transformations/utils/utils.hpp>
|
#include <transformations/utils/utils.hpp>
|
||||||
|
|
||||||
|
#include "backend/gna_limitations.hpp"
|
||||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||||
#include "ngraph_functions/builders.hpp"
|
#include "ngraph_functions/builders.hpp"
|
||||||
#include "ops/copy.hpp"
|
#include "ops/copy.hpp"
|
||||||
@ -54,10 +55,10 @@ void InsertCopyLayerTest::Validate() {
|
|||||||
|
|
||||||
void InsertCopyLayerTest::SetUp() {
|
void InsertCopyLayerTest::SetUp() {
|
||||||
std::tie(m_axis, m_inputs_num) = this->GetParam();
|
std::tie(m_axis, m_inputs_num) = this->GetParam();
|
||||||
|
ov::intel_gna::limitations::Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
|
||||||
}
|
}
|
||||||
|
|
||||||
void InsertCopyLayerTest::Run() {
|
void InsertCopyLayerTest::Run() {
|
||||||
SetUp();
|
|
||||||
Validate();
|
Validate();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -176,6 +177,11 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void RunPasses(ngraph::pass::Manager& m, std::shared_ptr<ov::Model> func) {
|
||||||
|
ov::intel_gna::limitations::Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
|
||||||
|
m.run_passes(func);
|
||||||
|
}
|
||||||
|
|
||||||
// [Parameter] [Parameter]
|
// [Parameter] [Parameter]
|
||||||
// \ / => |
|
// \ / => |
|
||||||
// [Concat] [Copy]
|
// [Concat] [Copy]
|
||||||
@ -211,7 +217,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -263,7 +269,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -324,7 +330,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMultiNFLConcatTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -382,7 +388,7 @@ TEST(TransformationTests, InsertCopyLayerMultiConstConcatTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
|
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -442,7 +448,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -510,7 +516,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerNFLConcatTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -573,7 +579,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMemoryTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -633,7 +639,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatMemoryTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -705,7 +711,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatMemoryTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -776,7 +782,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatMemoryTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -851,7 +857,7 @@ TEST(TransformationTests, InsertCopyLayerCropMemoryTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -918,7 +924,7 @@ TEST(TransformationTests, InsertCopyLayerCropNFLMemoryTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -987,7 +993,7 @@ TEST(TransformationTests, InsertCopyLayerConcatMemoryTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -1060,7 +1066,7 @@ TEST(TransformationTests, InsertCopyLayerConcatNFLMemoryTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -1122,7 +1128,7 @@ TEST(TransformationTests, InsertCopyLayerSplitMemoryTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -1189,7 +1195,7 @@ TEST(TransformationTests, InsertCopyLayerSplitNFLMemoryTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -1244,7 +1250,7 @@ TEST(TransformationTests, InsertCopyLayerCropConcatTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
|
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -1289,7 +1295,7 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
|
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -1338,7 +1344,7 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoSubgraphsTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
|
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -1385,7 +1391,7 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoResultsTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
|
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -1442,7 +1448,7 @@ TEST(TransformationTests, InsertCopyLayerNFLBranchTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
|
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -1499,7 +1505,7 @@ TEST(TransformationTests, InsertCopyLayerNFLvsFLSubgraphTestt) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
|
m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
@ -1550,7 +1556,7 @@ TEST(TransformationTests, InsertCopyLayerSplitNFLConcatTest) {
|
|||||||
ngraph::pass::Manager m;
|
ngraph::pass::Manager m;
|
||||||
m.register_pass<ov::pass::InitNodeInfo>();
|
m.register_pass<ov::pass::InitNodeInfo>();
|
||||||
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
|
m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
|
||||||
m.run_passes(func);
|
RunPasses(m, func);
|
||||||
|
|
||||||
ASSERT_NO_THROW(check_rt_info(func));
|
ASSERT_NO_THROW(check_rt_info(func));
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user