[TF FE] Add translators for CTCGreedyDecoder and CTCLoss operations (#13029)

* [TF FE] Add translators for CTCGreedyDecoder and CTCLoss operations

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>

* Remove unused variables

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
This commit is contained in:
Roman Kazantsev
2022-09-15 16:45:16 +03:00
committed by GitHub
parent 8cfb285dfa
commit ecc729973c
3 changed files with 170 additions and 0 deletions

View File

@@ -0,0 +1,89 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "op_table.hpp"
#include "openvino/opsets/opset8.hpp"
using namespace std;
using namespace ov;
using namespace opset8;
using namespace ov::frontend;
using namespace frontend::tensorflow::detail;
namespace ov {
namespace frontend {
namespace tensorflow {
namespace op {
OutputVector translate_ctc_greedy_decoder_op(const NodeContext& node) {
default_op_checks(node, 2, {"CTCGreedyDecoder"});
auto inputs = node.get_input(0);
auto sequence_length = node.get_input(1);
// retrieve attribute for CTCGreedyDecoder
auto merge_repeated = node.get_attribute<bool>("merge_repeated", true);
auto blank_index = node.get_attribute<int64_t>("blank_index", -1);
// In TensorFlow the input is going in a format [time_size, batch_size, num_classes]
// CTCGreedyDecoder expects inputs in a format [batch_size, time_size, num_classes]
ov::AxisVector inputs_order = {1, 0, 2};
inputs = ov::frontend::tensorflow::make_transpose(inputs, inputs_order);
shared_ptr<CTCGreedyDecoderSeqLen> ctc_greedy_decoder = nullptr;
if (blank_index == -1) {
// default value for blank index means it should be equal to num_classes - 1
// in this case it is not required to specify the third input for OpenVINO CTCGreedyDecoderSeqLen
ctc_greedy_decoder =
make_shared<CTCGreedyDecoderSeqLen>(inputs, sequence_length, merge_repeated, ov::element::i64);
} else {
auto blank_index_const = make_shared<Constant>(sequence_length.get_element_type(), ov::Shape{}, blank_index);
ctc_greedy_decoder = make_shared<CTCGreedyDecoderSeqLen>(inputs,
sequence_length,
blank_index_const,
merge_repeated,
ov::element::i64,
ov::element::i64);
}
// CTCGreedyDecoderSeqLen returns dense tensor holding the decoded results.
// We need to transform this output into a sparse format.
auto minus_one_const = make_shared<Constant>(ctc_greedy_decoder->output(0).get_element_type(), ov::Shape{}, -1);
auto decoded_mask = make_shared<NotEqual>(ctc_greedy_decoder->output(0), minus_one_const);
auto decoded_indices = make_shared<NonZero>(decoded_mask, ov::element::i64)->output(0);
// Since the indices in row-major format, we need to transpose them before gathering values
auto decoded_indices_transposed = ov::frontend::tensorflow::make_transpose(decoded_indices, {1, 0});
auto decoded_values = make_shared<GatherND>(ctc_greedy_decoder->output(0), decoded_indices_transposed);
// Compute the shape of the smallest dense tensor that can contain the sparse
// matrix represented by ng_indices and ng_values.
auto max_seq_len_axis = make_shared<Constant>(ov::element::i64, ov::Shape{}, 0);
auto max_seq_len = make_shared<ReduceMax>(ctc_greedy_decoder->output(1), max_seq_len_axis, true);
// inputs shape is in the form [batch_size, time_size, num_classes]
auto inputs_shape = make_shared<ShapeOf>(inputs, ov::element::i64);
auto slice_start = make_shared<Constant>(ov::element::i64, ov::Shape{}, 0);
auto slice_end = make_shared<Constant>(ov::element::i64, ov::Shape{}, 1);
auto slice_step = make_shared<Constant>(ov::element::i64, ov::Shape{}, 1);
auto batch_size = make_shared<Slice>(inputs_shape, slice_start, slice_end, slice_step);
auto dense_shape = make_shared<Concat>(OutputVector{batch_size, max_seq_len}, 0);
// Compute the negative of the sum of the greatest logit at each timeframe
// the inputs are in a form [batch_size, time_size, num_classes]
auto max_log_probs_axis = make_shared<Constant>(ov::element::i64, ov::Shape{}, 2);
auto max_log_probs = make_shared<ReduceMax>(inputs, max_log_probs_axis, false);
auto sum_max_log_probs_axis = make_shared<Constant>(ov::element::i64, ov::Shape{}, 1);
auto sum_max_log_probs = make_shared<ReduceSum>(max_log_probs, sum_max_log_probs_axis, false);
auto neg_sum_logits = make_shared<Negative>(sum_max_log_probs);
set_node_name(node.get_name() + ":0", decoded_indices_transposed);
set_node_name(node.get_name() + ":1", decoded_values);
set_node_name(node.get_name() + ":2", dense_shape);
set_node_name(node.get_name() + ":3", neg_sum_logits);
return {decoded_indices, decoded_values, dense_shape, neg_sum_logits};
}
} // namespace op
} // namespace tensorflow
} // namespace frontend
} // namespace ov

View File

@@ -0,0 +1,77 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "op_table.hpp"
#include "openvino/opsets/opset8.hpp"
using namespace std;
using namespace ov;
using namespace opset8;
using namespace ov::frontend;
using namespace frontend::tensorflow::detail;
namespace ov {
namespace frontend {
namespace tensorflow {
namespace op {
OutputVector translate_ctc_loss_op(const NodeContext& node) {
// This is a translator for CTCLoss v1 aka tf.compat.v1.nn.ctc_loss
default_op_checks(node, 4, {"CTCLoss"});
auto logits = node.get_input(0);
auto decoded_indices = node.get_input(1);
auto decoded_values = node.get_input(2);
auto logit_length = node.get_input(3);
// retrieve all attributes for CTCLoss
auto preprocess_collapse_repeated = node.get_attribute<bool>("preprocess_collapse_repeated", false);
auto ctc_merge_repeated = node.get_attribute<bool>("preprocess_collapse_repeated", true);
auto time_major = node.get_attribute<bool>("time_major", true);
if (time_major) {
// since OpenVINO CTCLoss accepts only batch-major logist
// we need to transpose it into [batch_size, time_size, num_classes] format
// from [time_size, batch_size, num_classes]
ov::AxisVector logits_order = {1, 0, 2};
logits = ov::frontend::tensorflow::make_transpose(logits, logits_order);
}
// Transform decoded labels from the sparse format into dense format
// Convert to the signed type since the mask with minus one is formed below
decoded_values = make_shared<Convert>(decoded_values, ov::element::i64);
// OpenVINO ScatterND operation requires indices to be signed
decoded_indices = make_shared<Convert>(decoded_indices, ov::element::i64);
// OpenVINO CTCLoss requires logit_length to be signed
logit_length = make_shared<Convert>(logit_length, ov::element::i64);
auto logits_shape = make_shared<ShapeOf>(logits, ov::element::i64);
auto dense_shape = make_shared<Slice>(logits_shape,
make_shared<Constant>(ov::element::i64, ov::Shape{}, 0),
make_shared<Constant>(ov::element::i64, ov::Shape{}, 2),
make_shared<Constant>(ov::element::i64, ov::Shape{}, 1));
auto minus_one_value = make_shared<Constant>(decoded_values.get_element_type(), ov::Shape{}, -1);
auto init_decoded_values = make_shared<Broadcast>(minus_one_value, dense_shape);
auto decoded_values_dense = make_shared<ScatterNDUpdate>(init_decoded_values, decoded_indices, decoded_values);
// Compute label_lenght for each batch
auto minus_one_mask = make_shared<Equal>(decoded_values_dense, minus_one_value);
auto mask01 = make_shared<Select>(minus_one_mask,
make_shared<Constant>(logit_length.get_element_type(), ov::Shape{}, 1),
make_shared<Constant>(logit_length.get_element_type(), ov::Shape{}, 0));
auto label_length_axis = make_shared<Constant>(ov::element::i64, ov::Shape{}, 1);
auto label_length = make_shared<ReduceSum>(mask01, label_length_axis, false);
auto ctc_loss = make_shared<CTCLoss>(logits,
logit_length,
decoded_values_dense,
label_length,
preprocess_collapse_repeated,
ctc_merge_repeated);
set_node_name(node.get_name(), ctc_loss);
return {ctc_loss};
}
} // namespace op
} // namespace tensorflow
} // namespace frontend
} // namespace ov

View File

@@ -38,6 +38,8 @@ OP_CONVERTER(translate_conv_2d_op);
OP_CONVERTER(translate_conv_2d_backprop_input_op);
OP_CONVERTER(translate_conv_3d_op);
OP_CONVERTER(translate_conv_3d_backprop_input_v2_op);
OP_CONVERTER(translate_ctc_greedy_decoder_op);
OP_CONVERTER(translate_ctc_loss_op);
OP_CONVERTER(translate_cumsum_op);
OP_CONVERTER(translate_crop_and_resize_op);
OP_CONVERTER(translate_depth_to_space_op);
@@ -201,6 +203,8 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
{"Conv3D", translate_conv_3d_op},
{"Conv3DBackpropInputV2", translate_conv_3d_backprop_input_v2_op},
{"CropAndResize", translate_crop_and_resize_op},
{"CTCGreedyDecoder", translate_ctc_greedy_decoder_op},
{"CTCLoss", translate_ctc_loss_op},
{"Cumsum", translate_cumsum_op},
{"DepthToSpace", translate_depth_to_space_op},
{"DepthwiseConv2dNative", translate_depthwise_conv_2d_native_op},