[pdpd] Specify SEQ_LEN for each batch (#8057)

* Specify SEQ_LEN for each batch

* Generate different seq_len for each batch

* Remove print
This commit is contained in:
Mang Guo 2021-10-27 14:00:13 +08:00 committed by GitHub
parent bf8f9164ed
commit 3ce0f2573a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 61 additions and 18 deletions

View File

@ -94,12 +94,16 @@ struct LSTMNgInputMap {
auto batch_size_node =
std::make_shared<opset6::Gather>(shape_of_x, opset6::Constant::create(element::i64, Shape{1}, {0}), axes);
auto seq_length_node =
std::make_shared<opset6::Gather>(shape_of_x, opset6::Constant::create(element::i64, Shape{1}, {1}), axes);
// TODO Specify SEQ_LEN for each batch #55404
m_input_map[LSTMInput::LSTM_INPUT_SEQ_LENGTHS] =
std::make_shared<opset6::Broadcast>(seq_length_node, batch_size_node);
if (node.has_ng_input("SequenceLength")) {
m_input_map[LSTMInput::LSTM_INPUT_SEQ_LENGTHS] = node.get_ng_input("SequenceLength");
} else {
auto seq_length_node =
std::make_shared<opset6::Gather>(shape_of_x,
opset6::Constant::create(element::i64, Shape{1}, {1}),
axes);
m_input_map[LSTMInput::LSTM_INPUT_SEQ_LENGTHS] =
std::make_shared<opset6::Broadcast>(seq_length_node, batch_size_node);
}
auto init_states = node.get_ng_inputs("PreState");
// 0 for init_h, 1 for init_cell, update bidirect_len for init states

View File

@ -189,6 +189,8 @@ static const std::vector<std::string> models{std::string("argmax"),
std::string("rnn_lstm_layer_1_forward"),
std::string("rnn_lstm_layer_2_bidirectional"),
std::string("rnn_lstm_layer_2_forward"),
std::string("rnn_lstm_layer_1_forward_seq_len_4"),
std::string("rnn_lstm_layer_2_bidirectional_seq_len_4"),
std::string("scale_bias_after_float32"),
std::string("scale_bias_after_int32"),
std::string("scale_bias_after_int64"),

View File

@ -3,7 +3,7 @@ from save_model import saveModel
import sys
def pdpd_rnn_lstm(input_size, hidden_size, layers, direction):
def pdpd_rnn_lstm(input_size, hidden_size, layers, direction, seq_len):
import paddle as pdpd
pdpd.enable_static()
main_program = pdpd.static.Program()
@ -14,22 +14,40 @@ def pdpd_rnn_lstm(input_size, hidden_size, layers, direction):
rnn = pdpd.nn.LSTM(input_size, hidden_size, layers, direction)
data = pdpd.static.data(name='x', shape=[4, 3, input_size], dtype='float32')
prev_h = pdpd.ones(shape=[layers * num_of_directions, 4, hidden_size], dtype=np.float32)
prev_c = pdpd.ones(shape=[layers * num_of_directions, 4, hidden_size], dtype=np.float32)
data = pdpd.static.data(
name='x', shape=[4, 3, input_size], dtype='float32')
prev_h = pdpd.ones(
shape=[layers * num_of_directions, 4, hidden_size], dtype=np.float32)
prev_c = pdpd.ones(
shape=[layers * num_of_directions, 4, hidden_size], dtype=np.float32)
y, (h, c) = rnn(data, (prev_h, prev_c))
if seq_len:
seq_lengths = pdpd.static.data(name='sl', shape=[4], dtype='int32')
y, (h, c) = rnn(data, (prev_h, prev_c), seq_lengths)
else:
y, (h, c) = rnn(data, (prev_h, prev_c))
cpu = pdpd.static.cpu_places(1)
exe = pdpd.static.Executor(cpu[0])
exe.run(startup_program)
outs = exe.run(
feed={'x': np.ones([4, 3, input_size]).astype(np.float32)},
fetch_list=[y, h, c],
program=main_program)
saveModel("rnn_lstm_layer_" + str(layers) + '_' + str(direction), exe, feedkeys=['x'],
fetchlist=[y, h, c], inputs=[np.ones([4, 3, input_size]).astype(np.float32)], outputs=[outs[0], outs[1], outs[2]], target_dir=sys.argv[1])
if seq_len:
outs = exe.run(
feed={'x': np.ones([4, 3, input_size]).astype(
np.float32), 'sl': np.array(seq_len).astype(np.int32)},
fetch_list=[y, h, c],
program=main_program)
saveModel("rnn_lstm_layer_" + str(layers) + '_' + str(direction) + '_seq_len_' + str(len(seq_len)), exe, feedkeys=['x', 'sl'],
fetchlist=[y, h, c], inputs=[np.ones([4, 3, input_size]).astype(np.float32), np.array(seq_len).astype(np.int32)], outputs=[outs[0], outs[1], outs[2]], target_dir=sys.argv[1])
else:
outs = exe.run(
feed={'x': np.ones([4, 3, input_size]).astype(
np.float32)},
fetch_list=[y, h, c],
program=main_program)
saveModel("rnn_lstm_layer_" + str(layers) + '_' + str(direction), exe, feedkeys=['x'],
fetchlist=[y, h, c], inputs=[np.ones([4, 3, input_size]).astype(np.float32)], outputs=[outs[0], outs[1], outs[2]], target_dir=sys.argv[1])
return outs[0]
@ -41,26 +59,45 @@ if __name__ == "__main__":
'hidden_size': 2,
'layers': 1,
'direction': 'forward',
'seq_len': [],
},
{
'input_size': 2,
'hidden_size': 2,
'layers': 1,
'direction': 'bidirectional',
'seq_len': [],
},
{
'input_size': 2,
'hidden_size': 2,
'layers': 2,
'direction': 'forward',
'seq_len': [],
},
{
'input_size': 2,
'hidden_size': 2,
'layers': 2,
'direction': 'bidirectional',
'seq_len': [],
},
{
'input_size': 2,
'hidden_size': 2,
'layers': 1,
'direction': 'forward',
'seq_len': [1, 2, 3, 3],
},
{
'input_size': 2,
'hidden_size': 2,
'layers': 2,
'direction': 'bidirectional',
'seq_len': [2, 2, 3, 3],
}
]
for test in testCases:
pdpd_rnn_lstm(test['input_size'], test['hidden_size'], test['layers'], test['direction'])
pdpd_rnn_lstm(test['input_size'], test['hidden_size'],
test['layers'], test['direction'], test['seq_len'])