[TF FE] Add translators for CTCGreedyDecoder and CTCLoss operations (#13029)
* [TF FE] Add translators for CTCGreedyDecoder and CTCLoss operations Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com> * Remove unused variables Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
This commit is contained in:
89
src/frontends/tensorflow/src/op/ctc_greedy_decoder.cpp
Normal file
89
src/frontends/tensorflow/src/op/ctc_greedy_decoder.cpp
Normal file
@@ -0,0 +1,89 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "op_table.hpp"
|
||||
#include "openvino/opsets/opset8.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ov;
|
||||
using namespace opset8;
|
||||
using namespace ov::frontend;
|
||||
using namespace frontend::tensorflow::detail;
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace tensorflow {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_ctc_greedy_decoder_op(const NodeContext& node) {
|
||||
default_op_checks(node, 2, {"CTCGreedyDecoder"});
|
||||
auto inputs = node.get_input(0);
|
||||
auto sequence_length = node.get_input(1);
|
||||
|
||||
// retrieve attribute for CTCGreedyDecoder
|
||||
auto merge_repeated = node.get_attribute<bool>("merge_repeated", true);
|
||||
auto blank_index = node.get_attribute<int64_t>("blank_index", -1);
|
||||
|
||||
// In TensorFlow the input is going in a format [time_size, batch_size, num_classes]
|
||||
// CTCGreedyDecoder expects inputs in a format [batch_size, time_size, num_classes]
|
||||
ov::AxisVector inputs_order = {1, 0, 2};
|
||||
inputs = ov::frontend::tensorflow::make_transpose(inputs, inputs_order);
|
||||
|
||||
shared_ptr<CTCGreedyDecoderSeqLen> ctc_greedy_decoder = nullptr;
|
||||
if (blank_index == -1) {
|
||||
// default value for blank index means it should be equal to num_classes - 1
|
||||
// in this case it is not required to specify the third input for OpenVINO CTCGreedyDecoderSeqLen
|
||||
ctc_greedy_decoder =
|
||||
make_shared<CTCGreedyDecoderSeqLen>(inputs, sequence_length, merge_repeated, ov::element::i64);
|
||||
} else {
|
||||
auto blank_index_const = make_shared<Constant>(sequence_length.get_element_type(), ov::Shape{}, blank_index);
|
||||
ctc_greedy_decoder = make_shared<CTCGreedyDecoderSeqLen>(inputs,
|
||||
sequence_length,
|
||||
blank_index_const,
|
||||
merge_repeated,
|
||||
ov::element::i64,
|
||||
ov::element::i64);
|
||||
}
|
||||
|
||||
// CTCGreedyDecoderSeqLen returns dense tensor holding the decoded results.
|
||||
// We need to transform this output into a sparse format.
|
||||
auto minus_one_const = make_shared<Constant>(ctc_greedy_decoder->output(0).get_element_type(), ov::Shape{}, -1);
|
||||
auto decoded_mask = make_shared<NotEqual>(ctc_greedy_decoder->output(0), minus_one_const);
|
||||
auto decoded_indices = make_shared<NonZero>(decoded_mask, ov::element::i64)->output(0);
|
||||
|
||||
// Since the indices in row-major format, we need to transpose them before gathering values
|
||||
auto decoded_indices_transposed = ov::frontend::tensorflow::make_transpose(decoded_indices, {1, 0});
|
||||
auto decoded_values = make_shared<GatherND>(ctc_greedy_decoder->output(0), decoded_indices_transposed);
|
||||
|
||||
// Compute the shape of the smallest dense tensor that can contain the sparse
|
||||
// matrix represented by ng_indices and ng_values.
|
||||
auto max_seq_len_axis = make_shared<Constant>(ov::element::i64, ov::Shape{}, 0);
|
||||
auto max_seq_len = make_shared<ReduceMax>(ctc_greedy_decoder->output(1), max_seq_len_axis, true);
|
||||
// inputs shape is in the form [batch_size, time_size, num_classes]
|
||||
auto inputs_shape = make_shared<ShapeOf>(inputs, ov::element::i64);
|
||||
auto slice_start = make_shared<Constant>(ov::element::i64, ov::Shape{}, 0);
|
||||
auto slice_end = make_shared<Constant>(ov::element::i64, ov::Shape{}, 1);
|
||||
auto slice_step = make_shared<Constant>(ov::element::i64, ov::Shape{}, 1);
|
||||
auto batch_size = make_shared<Slice>(inputs_shape, slice_start, slice_end, slice_step);
|
||||
auto dense_shape = make_shared<Concat>(OutputVector{batch_size, max_seq_len}, 0);
|
||||
|
||||
// Compute the negative of the sum of the greatest logit at each timeframe
|
||||
// the inputs are in a form [batch_size, time_size, num_classes]
|
||||
auto max_log_probs_axis = make_shared<Constant>(ov::element::i64, ov::Shape{}, 2);
|
||||
auto max_log_probs = make_shared<ReduceMax>(inputs, max_log_probs_axis, false);
|
||||
auto sum_max_log_probs_axis = make_shared<Constant>(ov::element::i64, ov::Shape{}, 1);
|
||||
auto sum_max_log_probs = make_shared<ReduceSum>(max_log_probs, sum_max_log_probs_axis, false);
|
||||
auto neg_sum_logits = make_shared<Negative>(sum_max_log_probs);
|
||||
|
||||
set_node_name(node.get_name() + ":0", decoded_indices_transposed);
|
||||
set_node_name(node.get_name() + ":1", decoded_values);
|
||||
set_node_name(node.get_name() + ":2", dense_shape);
|
||||
set_node_name(node.get_name() + ":3", neg_sum_logits);
|
||||
|
||||
return {decoded_indices, decoded_values, dense_shape, neg_sum_logits};
|
||||
}
|
||||
} // namespace op
|
||||
} // namespace tensorflow
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
77
src/frontends/tensorflow/src/op/ctc_loss.cpp
Normal file
77
src/frontends/tensorflow/src/op/ctc_loss.cpp
Normal file
@@ -0,0 +1,77 @@
|
||||
// Copyright (C) 2018-2022 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "op_table.hpp"
|
||||
#include "openvino/opsets/opset8.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ov;
|
||||
using namespace opset8;
|
||||
using namespace ov::frontend;
|
||||
using namespace frontend::tensorflow::detail;
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace tensorflow {
|
||||
namespace op {
|
||||
|
||||
OutputVector translate_ctc_loss_op(const NodeContext& node) {
|
||||
// This is a translator for CTCLoss v1 aka tf.compat.v1.nn.ctc_loss
|
||||
default_op_checks(node, 4, {"CTCLoss"});
|
||||
auto logits = node.get_input(0);
|
||||
auto decoded_indices = node.get_input(1);
|
||||
auto decoded_values = node.get_input(2);
|
||||
auto logit_length = node.get_input(3);
|
||||
|
||||
// retrieve all attributes for CTCLoss
|
||||
auto preprocess_collapse_repeated = node.get_attribute<bool>("preprocess_collapse_repeated", false);
|
||||
auto ctc_merge_repeated = node.get_attribute<bool>("preprocess_collapse_repeated", true);
|
||||
auto time_major = node.get_attribute<bool>("time_major", true);
|
||||
|
||||
if (time_major) {
|
||||
// since OpenVINO CTCLoss accepts only batch-major logist
|
||||
// we need to transpose it into [batch_size, time_size, num_classes] format
|
||||
// from [time_size, batch_size, num_classes]
|
||||
ov::AxisVector logits_order = {1, 0, 2};
|
||||
logits = ov::frontend::tensorflow::make_transpose(logits, logits_order);
|
||||
}
|
||||
|
||||
// Transform decoded labels from the sparse format into dense format
|
||||
// Convert to the signed type since the mask with minus one is formed below
|
||||
decoded_values = make_shared<Convert>(decoded_values, ov::element::i64);
|
||||
// OpenVINO ScatterND operation requires indices to be signed
|
||||
decoded_indices = make_shared<Convert>(decoded_indices, ov::element::i64);
|
||||
// OpenVINO CTCLoss requires logit_length to be signed
|
||||
logit_length = make_shared<Convert>(logit_length, ov::element::i64);
|
||||
|
||||
auto logits_shape = make_shared<ShapeOf>(logits, ov::element::i64);
|
||||
auto dense_shape = make_shared<Slice>(logits_shape,
|
||||
make_shared<Constant>(ov::element::i64, ov::Shape{}, 0),
|
||||
make_shared<Constant>(ov::element::i64, ov::Shape{}, 2),
|
||||
make_shared<Constant>(ov::element::i64, ov::Shape{}, 1));
|
||||
auto minus_one_value = make_shared<Constant>(decoded_values.get_element_type(), ov::Shape{}, -1);
|
||||
auto init_decoded_values = make_shared<Broadcast>(minus_one_value, dense_shape);
|
||||
auto decoded_values_dense = make_shared<ScatterNDUpdate>(init_decoded_values, decoded_indices, decoded_values);
|
||||
|
||||
// Compute label_lenght for each batch
|
||||
auto minus_one_mask = make_shared<Equal>(decoded_values_dense, minus_one_value);
|
||||
auto mask01 = make_shared<Select>(minus_one_mask,
|
||||
make_shared<Constant>(logit_length.get_element_type(), ov::Shape{}, 1),
|
||||
make_shared<Constant>(logit_length.get_element_type(), ov::Shape{}, 0));
|
||||
auto label_length_axis = make_shared<Constant>(ov::element::i64, ov::Shape{}, 1);
|
||||
auto label_length = make_shared<ReduceSum>(mask01, label_length_axis, false);
|
||||
|
||||
auto ctc_loss = make_shared<CTCLoss>(logits,
|
||||
logit_length,
|
||||
decoded_values_dense,
|
||||
label_length,
|
||||
preprocess_collapse_repeated,
|
||||
ctc_merge_repeated);
|
||||
set_node_name(node.get_name(), ctc_loss);
|
||||
return {ctc_loss};
|
||||
}
|
||||
} // namespace op
|
||||
} // namespace tensorflow
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
@@ -38,6 +38,8 @@ OP_CONVERTER(translate_conv_2d_op);
|
||||
OP_CONVERTER(translate_conv_2d_backprop_input_op);
|
||||
OP_CONVERTER(translate_conv_3d_op);
|
||||
OP_CONVERTER(translate_conv_3d_backprop_input_v2_op);
|
||||
OP_CONVERTER(translate_ctc_greedy_decoder_op);
|
||||
OP_CONVERTER(translate_ctc_loss_op);
|
||||
OP_CONVERTER(translate_cumsum_op);
|
||||
OP_CONVERTER(translate_crop_and_resize_op);
|
||||
OP_CONVERTER(translate_depth_to_space_op);
|
||||
@@ -201,6 +203,8 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
|
||||
{"Conv3D", translate_conv_3d_op},
|
||||
{"Conv3DBackpropInputV2", translate_conv_3d_backprop_input_v2_op},
|
||||
{"CropAndResize", translate_crop_and_resize_op},
|
||||
{"CTCGreedyDecoder", translate_ctc_greedy_decoder_op},
|
||||
{"CTCLoss", translate_ctc_loss_op},
|
||||
{"Cumsum", translate_cumsum_op},
|
||||
{"DepthToSpace", translate_depth_to_space_op},
|
||||
{"DepthwiseConv2dNative", translate_depthwise_conv_2d_native_op},
|
||||
|
||||
Reference in New Issue
Block a user