Merge branch 'transpose_sinking_fakequantize' into gna_layout_debug
This commit is contained in:
commit
4481041f86
@ -354,5 +354,6 @@ def test_flush_fp32_subnormals_to_zero():
|
||||
|
||||
apply_moc_transformations(model, cf=False, smart_reshape=True) # apply_flush_fp32_subnormals_to_zero is called inside
|
||||
|
||||
assert np.all(weights.data[4:8] != subnorm_val)
|
||||
assert np.all(weights.data[4:8] == 0.0)
|
||||
new_weights = add_node.input_value(1).get_node()
|
||||
assert np.all(new_weights.data[4:8] != subnorm_val)
|
||||
assert np.all(new_weights.data[4:8] == 0.0)
|
||||
|
@ -36,14 +36,28 @@ ov::pass::FlushFP32SubnormalsToZero::FlushFP32SubnormalsToZero() {
|
||||
bool has_subnormals = false;
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (fpclassify(std::abs(data[i])) == FP_SUBNORMAL) {
|
||||
data[i] = 0.0f;
|
||||
has_subnormals = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (has_subnormals)
|
||||
return true;
|
||||
if (!has_subnormals)
|
||||
return false;
|
||||
|
||||
return false;
|
||||
auto new_constant = std::make_shared<ov::opset8::Constant>(ov::element::f32, node->get_shape());
|
||||
auto* dst_data = const_cast<float*>(new_constant->get_data_ptr<float>());
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (fpclassify(std::abs(data[i])) != FP_SUBNORMAL)
|
||||
dst_data[i] = data[i];
|
||||
else
|
||||
dst_data[i] = 0.0f;
|
||||
}
|
||||
|
||||
new_constant->set_friendly_name(node->get_friendly_name());
|
||||
ov::copy_runtime_info(node, new_constant);
|
||||
ov::replace_node(node, new_constant);
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = make_shared<pattern::Matcher>(node_pattern, matcher_name);
|
||||
|
@ -56,14 +56,15 @@ TSUnaryForward::TSUnaryForward() {
|
||||
MATCHER_SCOPE(TSUnaryForward);
|
||||
|
||||
auto transpose_label = wrap_type<Transpose>({any_input(), any_input()});
|
||||
auto unary_label =
|
||||
wrap_type<UnaryElementwiseArithmetic, Clamp, Elu, SoftPlus, LogicalNot, Convert, IsInf, IsNaN, IsFinite>(
|
||||
{transpose_label});
|
||||
auto fq_label = wrap_type<FakeQuantize>({transpose_label, any_input(), any_input(), any_input(), any_input()});
|
||||
auto unary_op_label =
|
||||
wrap_type<UnaryElementwiseArithmetic, Clamp, Elu, SoftPlus, LogicalNot, Convert, IsInf, IsNaN, IsFinite>({transpose_label});
|
||||
auto unary_label = std::make_shared<pattern::op::Or>(OutputVector{fq_label, unary_op_label});
|
||||
|
||||
ov::matcher_pass_callback matcher_pass_callback = [=](Matcher& m) {
|
||||
const auto& pattern_to_output = m.get_pattern_value_map();
|
||||
auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
|
||||
auto unary = pattern_to_output.at(unary_label).get_node_shared_ptr();
|
||||
auto unary = GetPatternNode(pattern_to_output, NodeVector{unary_op_label, fq_label});
|
||||
|
||||
const NodePair new_nodes = SwapNodes(transpose, unary);
|
||||
|
||||
@ -74,7 +75,7 @@ TSUnaryForward::TSUnaryForward() {
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<Matcher>(unary_label, "ov::pass::TSUnaryForward");
|
||||
auto m = std::make_shared<Matcher>(unary_label, matcher_name);
|
||||
register_matcher(m, matcher_pass_callback);
|
||||
}
|
||||
|
||||
@ -91,10 +92,12 @@ TSUnaryBackward::TSUnaryBackward() {
|
||||
return HasSameOutputTransposeNodes(output);
|
||||
};
|
||||
|
||||
auto unary_label =
|
||||
wrap_type<UnaryElementwiseArithmetic, Clamp, Elu, SoftPlus, LogicalNot, Convert, IsInf, IsNaN, IsFinite>(
|
||||
{any_input()},
|
||||
unary_restrictions);
|
||||
auto fq_label =
|
||||
wrap_type<FakeQuantize>({any_input(), any_input(), any_input(), any_input(), any_input()}, unary_restrictions);
|
||||
auto unary_op_label =
|
||||
wrap_type<UnaryElementwiseArithmetic, Clamp, Elu, SoftPlus, LogicalNot, Convert, IsInf, IsNaN, IsFinite>({any_input()},
|
||||
unary_restrictions);
|
||||
auto unary_label = std::make_shared<pattern::op::Or>(OutputVector{fq_label, unary_op_label});
|
||||
|
||||
auto transpose_const_label = wrap_type<Constant>();
|
||||
|
||||
@ -104,9 +107,11 @@ TSUnaryBackward::TSUnaryBackward() {
|
||||
const auto& pattern_to_output = m.get_pattern_value_map();
|
||||
auto transpose_const = as_type_ptr<Constant>(pattern_to_output.at(transpose_const_label).get_node_shared_ptr());
|
||||
auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr();
|
||||
auto unary = pattern_to_output.at(unary_label).get_node_shared_ptr();
|
||||
auto unary = GetPatternNode(pattern_to_output, NodeVector{unary_op_label, fq_label});
|
||||
|
||||
for (auto& new_node : sink_backward::InsertTransposeBeforeNode(unary, transpose_const)) {
|
||||
for (auto& new_node : sink_backward::InsertTransposeBeforeNode(unary,
|
||||
transpose_const,
|
||||
/* input_indexes */ {0})) {
|
||||
register_new_node(new_node);
|
||||
}
|
||||
unary->validate_and_infer_types();
|
||||
@ -116,6 +121,6 @@ TSUnaryBackward::TSUnaryBackward() {
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<Matcher>(transpose_label, "ov::pass::TSUnaryBackward");
|
||||
auto m = std::make_shared<Matcher>(transpose_label, matcher_name);
|
||||
register_matcher(m, matcher_pass_callback);
|
||||
}
|
||||
|
@ -285,6 +285,7 @@ bool CanPropagateForwardThrough(Node* node) {
|
||||
CHECK_TRANSPOSE_SINKING_SUPPORTED(Split, node);
|
||||
CHECK_TRANSPOSE_SINKING_SUPPORTED(Transpose, node);
|
||||
CHECK_TRANSPOSE_SINKING_SUPPORTED(PRelu, node);
|
||||
CHECK_TRANSPOSE_SINKING_SUPPORTED(FakeQuantize, node);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -85,6 +85,15 @@ NodePtr UnaryFactory<Convert>::create(const OutputVector& inputs) const {
|
||||
return std::make_shared<Convert>(inputs[0], element::f64);
|
||||
}
|
||||
|
||||
template <>
|
||||
NodePtr UnaryFactory<FakeQuantize>::create(const OutputVector& inputs) const {
|
||||
auto input_low = std::make_shared<Constant>(element::f32, Shape{1}, Shape{1});
|
||||
auto input_high = std::make_shared<Constant>(element::f32, Shape{1}, Shape{20});
|
||||
auto output_low = std::make_shared<Constant>(element::f32, Shape{1}, Shape{0});
|
||||
auto output_high = std::make_shared<Constant>(element::f32, Shape{1}, Shape{10});
|
||||
return std::make_shared<FakeQuantize>(inputs[0], input_low, input_high, output_low, output_high, 11);
|
||||
}
|
||||
|
||||
template <typename UnaryT>
|
||||
FactoryPtr CreateUnaryFactory(const std::string& type_name) {
|
||||
return std::make_shared<UnaryFactory<UnaryT>>(type_name);
|
||||
@ -361,7 +370,7 @@ std::vector<FactoryPtr> unary_factories = {
|
||||
CREATE_UNARY_FACTORY(Log), CREATE_UNARY_FACTORY(Negative), CREATE_UNARY_FACTORY(Relu),
|
||||
CREATE_UNARY_FACTORY(Sigmoid), CREATE_UNARY_FACTORY(Sign), CREATE_UNARY_FACTORY(Sin),
|
||||
CREATE_UNARY_FACTORY(Sinh), CREATE_UNARY_FACTORY(SoftSign), CREATE_UNARY_FACTORY(Sqrt),
|
||||
CREATE_UNARY_FACTORY(Tan), CREATE_UNARY_FACTORY(Tanh)};
|
||||
CREATE_UNARY_FACTORY(Tan), CREATE_UNARY_FACTORY(Tanh), CREATE_UNARY_FACTORY(FakeQuantize)};
|
||||
|
||||
TEST_P(TransposeSinkingUnaryTestFixture, CompareFunctions) {
|
||||
FactoryPtr unary_factory;
|
||||
|
@ -179,14 +179,26 @@ ov::Tensor or_tensor(const ov::Tensor& lhs, const ov::Tensor& rhs) {
|
||||
}
|
||||
|
||||
struct TensorVectorCmp {
|
||||
// Comparing Tensor vectors as numbers composed with pointers as digits.
|
||||
// Indexed loop used to preserve order of comparison.
|
||||
bool operator()(const ov::TensorVector& lhs, const ov::TensorVector& rhs) const {
|
||||
auto rhs_it = rhs.begin();
|
||||
return std::any_of(lhs.begin(), lhs.end(), [&rhs_it](const ov::Tensor& lhs) {
|
||||
bool is_less =
|
||||
(lhs && *rhs_it) ? lhs.data() < rhs_it->data() : static_cast<bool>(lhs) < static_cast<bool>(*rhs_it);
|
||||
++rhs_it;
|
||||
return is_less;
|
||||
});
|
||||
const auto lhs_size = lhs.size();
|
||||
const auto rhs_size = rhs.size();
|
||||
|
||||
if (lhs_size < rhs_size)
|
||||
return true;
|
||||
if (lhs_size > rhs_size)
|
||||
return false;
|
||||
|
||||
for (size_t i = 0; i < lhs_size; ++i) {
|
||||
if (lhs[i].data() < rhs[i].data())
|
||||
return true;
|
||||
if (lhs[i].data() > rhs[i].data())
|
||||
return false;
|
||||
}
|
||||
|
||||
// if all equals
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
@ -281,17 +293,14 @@ bool ov::interval_bound_evaluator(const Node* node,
|
||||
auto low_1 = ov::evaluate_lower_bound(node->get_input_source_output(1));
|
||||
auto up_0 = ov::evaluate_upper_bound(node->get_input_source_output(0));
|
||||
auto up_1 = ov::evaluate_upper_bound(node->get_input_source_output(1));
|
||||
if (!low_0 || !low_1 || !up_0 || !up_1)
|
||||
return false;
|
||||
|
||||
std::set<TensorVector, TensorVectorCmp> input_variants = {{low_0, low_1},
|
||||
{low_0, up_1},
|
||||
{up_0, low_1},
|
||||
{up_0, up_1}};
|
||||
|
||||
for (const auto& variant_of_input_vector : input_variants)
|
||||
for (const auto& input_tensor : variant_of_input_vector)
|
||||
if (!input_tensor)
|
||||
return false;
|
||||
|
||||
if (input_variants.size() == 1)
|
||||
return node->evaluate(upper_output_values, *input_variants.begin()) &&
|
||||
node->evaluate(lower_output_values, *input_variants.begin());
|
||||
|
@ -51,3 +51,31 @@ TEST_F(EvaluateBoundTest, no_exception_when_node_has_output_with_dynamic_element
|
||||
|
||||
EXPECT_NO_THROW(evaluate_both_bounds(fn_op));
|
||||
}
|
||||
|
||||
using BoundEvaluatorTest = ::testing::Test;
|
||||
TEST(BoundEvaluatorTest, no_exception_on_single_bound) {
|
||||
constexpr auto et = element::i32;
|
||||
const auto s = Shape{1, 1};
|
||||
const auto a = std::make_shared<Parameter>(et, PartialShape{s});
|
||||
const auto b = Constant::create(et, s, {1});
|
||||
const auto sub = std::make_shared<Subtract>(a, b);
|
||||
|
||||
int32_t a_l[1] = {1};
|
||||
a->get_output_tensor(0).set_lower_value(Tensor{et, s, a_l});
|
||||
|
||||
int32_t o_[1] = {INT32_MIN}; // initial value of output tensor is not needed, it's set to check whether changed
|
||||
TensorVector output{{et, s, o_}};
|
||||
// evaluations won't be performed due to missing upper bound tensor of parameter a
|
||||
ASSERT_NO_THROW(sub->evaluate_lower(output));
|
||||
EXPECT_EQ(o_[0], INT32_MIN);
|
||||
ASSERT_NO_THROW(sub->evaluate_upper(output));
|
||||
EXPECT_EQ(o_[0], INT32_MIN);
|
||||
|
||||
int32_t a_u[1] = {11};
|
||||
a->get_output_tensor(0).set_upper_value(Tensor{et, s, a_u});
|
||||
// now both bounds of sub node can be calculated
|
||||
ASSERT_NO_THROW(sub->evaluate_lower(output));
|
||||
EXPECT_EQ(o_[0], 0);
|
||||
ASSERT_NO_THROW(sub->evaluate_upper(output));
|
||||
EXPECT_EQ(o_[0], 10);
|
||||
}
|
||||
|
@ -895,9 +895,9 @@ std::string MultiDeviceInferencePlugin::GetDeviceList(const std::map<std::string
|
||||
} else {
|
||||
for (auto&& device : devicesToBeMerged) {
|
||||
if (!isAnyDev(device, deviceList)) {
|
||||
DeviceIDParser parsed{device};
|
||||
auto iter = std::find(devicesMerged.begin(), devicesMerged.end(), parsed.getDeviceName());
|
||||
if (iter != devicesMerged.end() && parsed.getDeviceName() != device && parsed.getDeviceID() == "0")
|
||||
ov::DeviceIDParser parsed{device};
|
||||
auto iter = std::find(devicesMerged.begin(), devicesMerged.end(), parsed.get_device_name());
|
||||
if (iter != devicesMerged.end() && parsed.get_device_name() != device && parsed.get_device_id() == "0")
|
||||
// The device is the device with default device ID (eg. GPU.0) and
|
||||
// its wide name (eg. GPU) has been in device candidate list.
|
||||
continue;
|
||||
@ -912,8 +912,8 @@ std::string MultiDeviceInferencePlugin::GetDeviceList(const std::map<std::string
|
||||
auto iter = std::find(devicesMerged.begin(), devicesMerged.end(), deviceWithDefaultID(item));
|
||||
// Remove the device with default device id from candidate device list (eg. GPU.0)
|
||||
// if its wide name is a single device (eg. GPU).
|
||||
DeviceIDParser parsed{item};
|
||||
if (parsed.getDeviceName() == item && iter != devicesMerged.end())
|
||||
ov::DeviceIDParser parsed{item};
|
||||
if (parsed.get_device_name() == item && iter != devicesMerged.end())
|
||||
devicesMerged.erase(iter);
|
||||
// continue if targe device has been in the candidate device list.
|
||||
if (std::find(devicesMerged.begin(), devicesMerged.end(), item) != devicesMerged.end())
|
||||
|
Loading…
Reference in New Issue
Block a user