From ecc729973ced77ee2cf2cb6b891f2628e0e06ac6 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Thu, 15 Sep 2022 16:45:16 +0300 Subject: [PATCH] [TF FE] Add translators for CTCGreedyDecoder and CTCLoss operations (#13029) * [TF FE] Add translators for CTCGreedyDecoder and CTCLoss operations Signed-off-by: Kazantsev, Roman * Remove unused variables Signed-off-by: Kazantsev, Roman --- .../tensorflow/src/op/ctc_greedy_decoder.cpp | 89 +++++++++++++++++++ src/frontends/tensorflow/src/op/ctc_loss.cpp | 77 ++++++++++++++++ src/frontends/tensorflow/src/op_table.cpp | 4 + 3 files changed, 170 insertions(+) create mode 100644 src/frontends/tensorflow/src/op/ctc_greedy_decoder.cpp create mode 100644 src/frontends/tensorflow/src/op/ctc_loss.cpp diff --git a/src/frontends/tensorflow/src/op/ctc_greedy_decoder.cpp b/src/frontends/tensorflow/src/op/ctc_greedy_decoder.cpp new file mode 100644 index 00000000000..7841a48c9f6 --- /dev/null +++ b/src/frontends/tensorflow/src/op/ctc_greedy_decoder.cpp @@ -0,0 +1,89 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "op_table.hpp" +#include "openvino/opsets/opset8.hpp" + +using namespace std; +using namespace ov; +using namespace opset8; +using namespace ov::frontend; +using namespace frontend::tensorflow::detail; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_ctc_greedy_decoder_op(const NodeContext& node) { + default_op_checks(node, 2, {"CTCGreedyDecoder"}); + auto inputs = node.get_input(0); + auto sequence_length = node.get_input(1); + + // retrieve attribute for CTCGreedyDecoder + auto merge_repeated = node.get_attribute("merge_repeated", true); + auto blank_index = node.get_attribute("blank_index", -1); + + // In TensorFlow the input is going in a format [time_size, batch_size, num_classes] + // CTCGreedyDecoder expects inputs in a format [batch_size, time_size, num_classes] + ov::AxisVector inputs_order = {1, 0, 2}; + inputs = ov::frontend::tensorflow::make_transpose(inputs, inputs_order); + + shared_ptr ctc_greedy_decoder = nullptr; + if (blank_index == -1) { + // default value for blank index means it should be equal to num_classes - 1 + // in this case it is not required to specify the third input for OpenVINO CTCGreedyDecoderSeqLen + ctc_greedy_decoder = + make_shared(inputs, sequence_length, merge_repeated, ov::element::i64); + } else { + auto blank_index_const = make_shared(sequence_length.get_element_type(), ov::Shape{}, blank_index); + ctc_greedy_decoder = make_shared(inputs, + sequence_length, + blank_index_const, + merge_repeated, + ov::element::i64, + ov::element::i64); + } + + // CTCGreedyDecoderSeqLen returns dense tensor holding the decoded results. + // We need to transform this output into a sparse format. + auto minus_one_const = make_shared(ctc_greedy_decoder->output(0).get_element_type(), ov::Shape{}, -1); + auto decoded_mask = make_shared(ctc_greedy_decoder->output(0), minus_one_const); + auto decoded_indices = make_shared(decoded_mask, ov::element::i64)->output(0); + + // Since the indices in row-major format, we need to transpose them before gathering values + auto decoded_indices_transposed = ov::frontend::tensorflow::make_transpose(decoded_indices, {1, 0}); + auto decoded_values = make_shared(ctc_greedy_decoder->output(0), decoded_indices_transposed); + + // Compute the shape of the smallest dense tensor that can contain the sparse + // matrix represented by ng_indices and ng_values. + auto max_seq_len_axis = make_shared(ov::element::i64, ov::Shape{}, 0); + auto max_seq_len = make_shared(ctc_greedy_decoder->output(1), max_seq_len_axis, true); + // inputs shape is in the form [batch_size, time_size, num_classes] + auto inputs_shape = make_shared(inputs, ov::element::i64); + auto slice_start = make_shared(ov::element::i64, ov::Shape{}, 0); + auto slice_end = make_shared(ov::element::i64, ov::Shape{}, 1); + auto slice_step = make_shared(ov::element::i64, ov::Shape{}, 1); + auto batch_size = make_shared(inputs_shape, slice_start, slice_end, slice_step); + auto dense_shape = make_shared(OutputVector{batch_size, max_seq_len}, 0); + + // Compute the negative of the sum of the greatest logit at each timeframe + // the inputs are in a form [batch_size, time_size, num_classes] + auto max_log_probs_axis = make_shared(ov::element::i64, ov::Shape{}, 2); + auto max_log_probs = make_shared(inputs, max_log_probs_axis, false); + auto sum_max_log_probs_axis = make_shared(ov::element::i64, ov::Shape{}, 1); + auto sum_max_log_probs = make_shared(max_log_probs, sum_max_log_probs_axis, false); + auto neg_sum_logits = make_shared(sum_max_log_probs); + + set_node_name(node.get_name() + ":0", decoded_indices_transposed); + set_node_name(node.get_name() + ":1", decoded_values); + set_node_name(node.get_name() + ":2", dense_shape); + set_node_name(node.get_name() + ":3", neg_sum_logits); + + return {decoded_indices, decoded_values, dense_shape, neg_sum_logits}; +} +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow/src/op/ctc_loss.cpp b/src/frontends/tensorflow/src/op/ctc_loss.cpp new file mode 100644 index 00000000000..9d482b2b54d --- /dev/null +++ b/src/frontends/tensorflow/src/op/ctc_loss.cpp @@ -0,0 +1,77 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "op_table.hpp" +#include "openvino/opsets/opset8.hpp" + +using namespace std; +using namespace ov; +using namespace opset8; +using namespace ov::frontend; +using namespace frontend::tensorflow::detail; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_ctc_loss_op(const NodeContext& node) { + // This is a translator for CTCLoss v1 aka tf.compat.v1.nn.ctc_loss + default_op_checks(node, 4, {"CTCLoss"}); + auto logits = node.get_input(0); + auto decoded_indices = node.get_input(1); + auto decoded_values = node.get_input(2); + auto logit_length = node.get_input(3); + + // retrieve all attributes for CTCLoss + auto preprocess_collapse_repeated = node.get_attribute("preprocess_collapse_repeated", false); + auto ctc_merge_repeated = node.get_attribute("preprocess_collapse_repeated", true); + auto time_major = node.get_attribute("time_major", true); + + if (time_major) { + // since OpenVINO CTCLoss accepts only batch-major logist + // we need to transpose it into [batch_size, time_size, num_classes] format + // from [time_size, batch_size, num_classes] + ov::AxisVector logits_order = {1, 0, 2}; + logits = ov::frontend::tensorflow::make_transpose(logits, logits_order); + } + + // Transform decoded labels from the sparse format into dense format + // Convert to the signed type since the mask with minus one is formed below + decoded_values = make_shared(decoded_values, ov::element::i64); + // OpenVINO ScatterND operation requires indices to be signed + decoded_indices = make_shared(decoded_indices, ov::element::i64); + // OpenVINO CTCLoss requires logit_length to be signed + logit_length = make_shared(logit_length, ov::element::i64); + + auto logits_shape = make_shared(logits, ov::element::i64); + auto dense_shape = make_shared(logits_shape, + make_shared(ov::element::i64, ov::Shape{}, 0), + make_shared(ov::element::i64, ov::Shape{}, 2), + make_shared(ov::element::i64, ov::Shape{}, 1)); + auto minus_one_value = make_shared(decoded_values.get_element_type(), ov::Shape{}, -1); + auto init_decoded_values = make_shared(minus_one_value, dense_shape); + auto decoded_values_dense = make_shared(init_decoded_values, decoded_indices, decoded_values); + + // Compute label_lenght for each batch + auto minus_one_mask = make_shared(decoded_values_dense, minus_one_value); + auto mask01 = make_shared