Merge branch 'transpose_sinking_fakequantize' into gna_layout_debug

This commit is contained in:
Evgeny Kotov 2023-03-21 16:16:56 +01:00
commit 4481041f86
8 changed files with 103 additions and 36 deletions

View File

@ -354,5 +354,6 @@ def test_flush_fp32_subnormals_to_zero():
apply_moc_transformations(model, cf=False, smart_reshape=True) # apply_flush_fp32_subnormals_to_zero is called inside
assert np.all(weights.data[4:8] != subnorm_val)
assert np.all(weights.data[4:8] == 0.0)
new_weights = add_node.input_value(1).get_node()
assert np.all(new_weights.data[4:8] != subnorm_val)
assert np.all(new_weights.data[4:8] == 0.0)

View File

@ -36,14 +36,28 @@ ov::pass::FlushFP32SubnormalsToZero::FlushFP32SubnormalsToZero() {
bool has_subnormals = false;
for (size_t i = 0; i < size; ++i) {
if (fpclassify(std::abs(data[i])) == FP_SUBNORMAL) {
data[i] = 0.0f;
has_subnormals = true;
break;
}
}
if (has_subnormals)
return true;
if (!has_subnormals)
return false;
return false;
auto new_constant = std::make_shared<ov::opset8::Constant>(ov::element::f32, node->get_shape());
auto* dst_data = const_cast<float*>(new_constant->get_data_ptr<float>());
for (size_t i = 0; i < size; ++i) {
if (fpclassify(std::abs(data[i])) != FP_SUBNORMAL)
dst_data[i] = data[i];
else
dst_data[i] = 0.0f;
}
new_constant->set_friendly_name(node->get_friendly_name());
ov::copy_runtime_info(node, new_constant);
ov::replace_node(node, new_constant);
return true;
};
auto m = make_shared<pattern::Matcher>(node_pattern, matcher_name);

View File

@ -56,14 +56,15 @@ TSUnaryForward::TSUnaryForward() {
MATCHER_SCOPE(TSUnaryForward);
auto transpose_label = wrap_type<Transpose>({any_input(), any_input()});
auto unary_label =
wrap_type<UnaryElementwiseArithmetic, Clamp, Elu, SoftPlus, LogicalNot, Convert, IsInf, IsNaN, IsFinite>(
{transpose_label});
auto fq_label = wrap_type<FakeQuantize>({transpose_label, any_input(), any_input(), any_input(), any_input()});
auto unary_op_label =
wrap_type<UnaryElementwiseArithmetic, Clamp, Elu, SoftPlus, LogicalNot, Convert, IsInf, IsNaN, IsFinite>({transpose_label});
auto unary_label = std::make_shared<pattern::op::Or>(OutputVector{fq_label, unary_op_label});
ov::matcher_pass_callback matcher_pass_callback = [=](Matcher& m) {
const auto& pattern_to_output = m.get_pattern_value_map();
auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
auto unary = pattern_to_output.at(unary_label).get_node_shared_ptr();
auto unary = GetPatternNode(pattern_to_output, NodeVector{unary_op_label, fq_label});
const NodePair new_nodes = SwapNodes(transpose, unary);
@ -74,7 +75,7 @@ TSUnaryForward::TSUnaryForward() {
return true;
};
auto m = std::make_shared<Matcher>(unary_label, "ov::pass::TSUnaryForward");
auto m = std::make_shared<Matcher>(unary_label, matcher_name);
register_matcher(m, matcher_pass_callback);
}
@ -91,10 +92,12 @@ TSUnaryBackward::TSUnaryBackward() {
return HasSameOutputTransposeNodes(output);
};
auto unary_label =
wrap_type<UnaryElementwiseArithmetic, Clamp, Elu, SoftPlus, LogicalNot, Convert, IsInf, IsNaN, IsFinite>(
{any_input()},
unary_restrictions);
auto fq_label =
wrap_type<FakeQuantize>({any_input(), any_input(), any_input(), any_input(), any_input()}, unary_restrictions);
auto unary_op_label =
wrap_type<UnaryElementwiseArithmetic, Clamp, Elu, SoftPlus, LogicalNot, Convert, IsInf, IsNaN, IsFinite>({any_input()},
unary_restrictions);
auto unary_label = std::make_shared<pattern::op::Or>(OutputVector{fq_label, unary_op_label});
auto transpose_const_label = wrap_type<Constant>();
@ -104,9 +107,11 @@ TSUnaryBackward::TSUnaryBackward() {
const auto& pattern_to_output = m.get_pattern_value_map();
auto transpose_const = as_type_ptr<Constant>(pattern_to_output.at(transpose_const_label).get_node_shared_ptr());
auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
auto unary = pattern_to_output.at(unary_label).get_node_shared_ptr();
auto unary = GetPatternNode(pattern_to_output, NodeVector{unary_op_label, fq_label});
for (auto& new_node : sink_backward::InsertTransposeBeforeNode(unary, transpose_const)) {
for (auto& new_node : sink_backward::InsertTransposeBeforeNode(unary,
transpose_const,
/* input_indexes */ {0})) {
register_new_node(new_node);
}
unary->validate_and_infer_types();
@ -116,6 +121,6 @@ TSUnaryBackward::TSUnaryBackward() {
return true;
};
auto m = std::make_shared<Matcher>(transpose_label, "ov::pass::TSUnaryBackward");
auto m = std::make_shared<Matcher>(transpose_label, matcher_name);
register_matcher(m, matcher_pass_callback);
}

View File

@ -285,6 +285,7 @@ bool CanPropagateForwardThrough(Node* node) {
CHECK_TRANSPOSE_SINKING_SUPPORTED(Split, node);
CHECK_TRANSPOSE_SINKING_SUPPORTED(Transpose, node);
CHECK_TRANSPOSE_SINKING_SUPPORTED(PRelu, node);
CHECK_TRANSPOSE_SINKING_SUPPORTED(FakeQuantize, node);
return false;
}

View File

@ -85,6 +85,15 @@ NodePtr UnaryFactory<Convert>::create(const OutputVector& inputs) const {
return std::make_shared<Convert>(inputs[0], element::f64);
}
template <>
NodePtr UnaryFactory<FakeQuantize>::create(const OutputVector& inputs) const {
auto input_low = std::make_shared<Constant>(element::f32, Shape{1}, Shape{1});
auto input_high = std::make_shared<Constant>(element::f32, Shape{1}, Shape{20});
auto output_low = std::make_shared<Constant>(element::f32, Shape{1}, Shape{0});
auto output_high = std::make_shared<Constant>(element::f32, Shape{1}, Shape{10});
return std::make_shared<FakeQuantize>(inputs[0], input_low, input_high, output_low, output_high, 11);
}
template <typename UnaryT>
FactoryPtr CreateUnaryFactory(const std::string& type_name) {
return std::make_shared<UnaryFactory<UnaryT>>(type_name);
@ -361,7 +370,7 @@ std::vector<FactoryPtr> unary_factories = {
CREATE_UNARY_FACTORY(Log), CREATE_UNARY_FACTORY(Negative), CREATE_UNARY_FACTORY(Relu),
CREATE_UNARY_FACTORY(Sigmoid), CREATE_UNARY_FACTORY(Sign), CREATE_UNARY_FACTORY(Sin),
CREATE_UNARY_FACTORY(Sinh), CREATE_UNARY_FACTORY(SoftSign), CREATE_UNARY_FACTORY(Sqrt),
CREATE_UNARY_FACTORY(Tan), CREATE_UNARY_FACTORY(Tanh)};
CREATE_UNARY_FACTORY(Tan), CREATE_UNARY_FACTORY(Tanh), CREATE_UNARY_FACTORY(FakeQuantize)};
TEST_P(TransposeSinkingUnaryTestFixture, CompareFunctions) {
FactoryPtr unary_factory;

View File

@ -179,14 +179,26 @@ ov::Tensor or_tensor(const ov::Tensor& lhs, const ov::Tensor& rhs) {
}
struct TensorVectorCmp {
// Comparing Tensor vectors as numbers composed with pointers as digits.
// Indexed loop used to preserve order of comparison.
bool operator()(const ov::TensorVector& lhs, const ov::TensorVector& rhs) const {
auto rhs_it = rhs.begin();
return std::any_of(lhs.begin(), lhs.end(), [&rhs_it](const ov::Tensor& lhs) {
bool is_less =
(lhs && *rhs_it) ? lhs.data() < rhs_it->data() : static_cast<bool>(lhs) < static_cast<bool>(*rhs_it);
++rhs_it;
return is_less;
});
const auto lhs_size = lhs.size();
const auto rhs_size = rhs.size();
if (lhs_size < rhs_size)
return true;
if (lhs_size > rhs_size)
return false;
for (size_t i = 0; i < lhs_size; ++i) {
if (lhs[i].data() < rhs[i].data())
return true;
if (lhs[i].data() > rhs[i].data())
return false;
}
// if all equals
return false;
}
};
@ -281,17 +293,14 @@ bool ov::interval_bound_evaluator(const Node* node,
auto low_1 = ov::evaluate_lower_bound(node->get_input_source_output(1));
auto up_0 = ov::evaluate_upper_bound(node->get_input_source_output(0));
auto up_1 = ov::evaluate_upper_bound(node->get_input_source_output(1));
if (!low_0 || !low_1 || !up_0 || !up_1)
return false;
std::set<TensorVector, TensorVectorCmp> input_variants = {{low_0, low_1},
{low_0, up_1},
{up_0, low_1},
{up_0, up_1}};
for (const auto& variant_of_input_vector : input_variants)
for (const auto& input_tensor : variant_of_input_vector)
if (!input_tensor)
return false;
if (input_variants.size() == 1)
return node->evaluate(upper_output_values, *input_variants.begin()) &&
node->evaluate(lower_output_values, *input_variants.begin());

View File

@ -51,3 +51,31 @@ TEST_F(EvaluateBoundTest, no_exception_when_node_has_output_with_dynamic_element
EXPECT_NO_THROW(evaluate_both_bounds(fn_op));
}
using BoundEvaluatorTest = ::testing::Test;
TEST(BoundEvaluatorTest, no_exception_on_single_bound) {
constexpr auto et = element::i32;
const auto s = Shape{1, 1};
const auto a = std::make_shared<Parameter>(et, PartialShape{s});
const auto b = Constant::create(et, s, {1});
const auto sub = std::make_shared<Subtract>(a, b);
int32_t a_l[1] = {1};
a->get_output_tensor(0).set_lower_value(Tensor{et, s, a_l});
int32_t o_[1] = {INT32_MIN}; // initial value of output tensor is not needed, it's set to check whether changed
TensorVector output{{et, s, o_}};
// evaluations won't be performed due to missing upper bound tensor of parameter a
ASSERT_NO_THROW(sub->evaluate_lower(output));
EXPECT_EQ(o_[0], INT32_MIN);
ASSERT_NO_THROW(sub->evaluate_upper(output));
EXPECT_EQ(o_[0], INT32_MIN);
int32_t a_u[1] = {11};
a->get_output_tensor(0).set_upper_value(Tensor{et, s, a_u});
// now both bounds of sub node can be calculated
ASSERT_NO_THROW(sub->evaluate_lower(output));
EXPECT_EQ(o_[0], 0);
ASSERT_NO_THROW(sub->evaluate_upper(output));
EXPECT_EQ(o_[0], 10);
}

View File

@ -895,9 +895,9 @@ std::string MultiDeviceInferencePlugin::GetDeviceList(const std::map<std::string
} else {
for (auto&& device : devicesToBeMerged) {
if (!isAnyDev(device, deviceList)) {
DeviceIDParser parsed{device};
auto iter = std::find(devicesMerged.begin(), devicesMerged.end(), parsed.getDeviceName());
if (iter != devicesMerged.end() && parsed.getDeviceName() != device && parsed.getDeviceID() == "0")
ov::DeviceIDParser parsed{device};
auto iter = std::find(devicesMerged.begin(), devicesMerged.end(), parsed.get_device_name());
if (iter != devicesMerged.end() && parsed.get_device_name() != device && parsed.get_device_id() == "0")
// The device is the device with default device ID (eg. GPU.0) and
// its wide name (eg. GPU) has been in device candidate list.
continue;
@ -912,8 +912,8 @@ std::string MultiDeviceInferencePlugin::GetDeviceList(const std::map<std::string
auto iter = std::find(devicesMerged.begin(), devicesMerged.end(), deviceWithDefaultID(item));
// Remove the device with default device id from candidate device list (eg. GPU.0)
// if its wide name is a single device (eg. GPU).
DeviceIDParser parsed{item};
if (parsed.getDeviceName() == item && iter != devicesMerged.end())
ov::DeviceIDParser parsed{item};
if (parsed.get_device_name() == item && iter != devicesMerged.end())
devicesMerged.erase(iter);
// continue if targe device has been in the candidate device list.
if (std::find(devicesMerged.begin(), devicesMerged.end(), item) != devicesMerged.end())