[CPU] Enable bf16 RNN primitives (#4942)

This commit is contained in:
Egor Duplensky 2021-04-25 22:18:38 +03:00 committed by GitHub
parent 8bb73273f1
commit 39e1a21c42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 1425 additions and 222 deletions

View File

@ -5,12 +5,17 @@
#include "mkldnn_rnn.h" #include "mkldnn_rnn.h"
#include "mkldnn_extension_utils.h" #include "mkldnn_extension_utils.h"
#include "mkldnn_node.h"
#include "utils/general_utils.h" #include "utils/general_utils.h"
#include "nodes/common/cpu_memcpy.h" #include "nodes/common/cpu_memcpy.h"
#include "utils/bfloat16.hpp"
#include "nodes/common/cpu_convert.h"
#include <string> #include <string>
#include <utility> #include <utility>
#define THROW_ERROR IE_THROW() << NameFromType(getType()) << " layer '" << getName() << "' "
using namespace mkldnn; using namespace mkldnn;
using namespace InferenceEngine; using namespace InferenceEngine;
@ -39,7 +44,7 @@ static algorithm ie2mkl(RNNCellBase::CellType cell_type) {
case RNNCellBase::GRU: return algorithm::vanilla_gru; case RNNCellBase::GRU: return algorithm::vanilla_gru;
case RNNCellBase::GRU_LBR: return algorithm::lbr_gru; case RNNCellBase::GRU_LBR: return algorithm::lbr_gru;
default: default:
IE_THROW() << "Unsupported cell type"; IE_THROW() << "RNN node. Unsupported cell type";
return algorithm::undef; return algorithm::undef;
} }
} }
@ -51,7 +56,7 @@ size_t gatesCount(algorithm alg) {
case algorithm::lbr_gru: return 3; case algorithm::lbr_gru: return 3;
case algorithm::vanilla_lstm: return 4; case algorithm::vanilla_lstm: return 4;
default: default:
IE_THROW() << "Unsupported cell type"; IE_THROW() << "RNN node. Unsupported cell type";
return 0; return 0;
} }
} }
@ -63,11 +68,24 @@ size_t statesCount(algorithm alg) {
case algorithm::lbr_gru: return 1; case algorithm::lbr_gru: return 1;
case algorithm::vanilla_lstm: return 2; case algorithm::vanilla_lstm: return 2;
default: default:
IE_THROW() << "Unsupported cell type"; IE_THROW() << "RNN node. Unsupported cell type";
return 0; return 0;
} }
} }
bool haveCellState(algorithm alg) {
return alg == algorithm::vanilla_lstm;
}
const std::map<InferenceEngine::Precision, InferenceEngine::Precision> MKLDNNRNN::weightsByLayerPrec {
// layer precision, weights precision
{InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP32},
{InferenceEngine::Precision::BF16, InferenceEngine::Precision::BF16},
// FP16 and U8 are not supported yet
// {InferenceEngine::Precision::FP16, InferenceEngine::Precision::FP16},
// {InferenceEngine::Precision::U8, InferenceEngine::Precision::I8},
};
MKLDNNRNN::MKLDNNRNN(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNRNN::MKLDNNRNN(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
MKLDNNNode(layer, eng, cache) { MKLDNNNode(layer, eng, cache) {
is_cell = one_of(layer->type, "LSTMCell", "GRUCell", "RNNCell"); is_cell = one_of(layer->type, "LSTMCell", "GRUCell", "RNNCell");
@ -78,6 +96,8 @@ bool MKLDNNRNN::created() const {
} }
void MKLDNNRNN::getSupportedDescriptors() { void MKLDNNRNN::getSupportedDescriptors() {
runtimePrecision = getCnnLayer()->insData[0].lock()->getPrecision();
if (is_cell) if (is_cell)
fillCellDesc(); fillCellDesc();
else else
@ -89,14 +109,14 @@ void MKLDNNRNN::fillCellDesc() {
auto cellLayer = std::dynamic_pointer_cast<RNNCellBase>(getCnnLayer()); auto cellLayer = std::dynamic_pointer_cast<RNNCellBase>(getCnnLayer());
if (!cellLayer) if (!cellLayer)
IE_THROW() << "No original layer for RNNCell."; THROW_ERROR << "No original layer for RNNCell.";
cell_type = ie2mkl(cellLayer->cellType); cell_type = ie2mkl(cellLayer->cellType);
cell_act = ie2mkl(cellLayer->activations[0]); // Works only for RNN with one gate cell_act = ie2mkl(cellLayer->activations[0]); // Works only for RNN with one gate
if (cellLayer->clip != 0.0f) { if (cellLayer->clip != 0.0f) {
// TODO [oneDNN]: No more supported // TODO [oneDNN]: No more supported
IE_THROW() << "Clipping is not supported for RNN primitive"; THROW_ERROR << "Clipping is not supported for RNN primitive";
// cell_desc.set_clipping(cellLayer->clip); // cell_desc.set_clipping(cellLayer->clip);
} }
@ -104,16 +124,16 @@ void MKLDNNRNN::fillCellDesc() {
auto &outs = cellLayer->outData; auto &outs = cellLayer->outData;
if (!one_of(ins.size(), 3, 2)) if (!one_of(ins.size(), 3, 2))
IE_THROW() << "Incorrect number of input ports for layer " << getName(); THROW_ERROR << "Incorrect number of input ports for layer " << getName();
if (!one_of(outs.size(), 2, 1)) if (!one_of(outs.size(), 2, 1))
IE_THROW() << "Incorrect number of output ports for layer " << getName(); THROW_ERROR << "Incorrect number of output ports for layer " << getName();
auto in_data_dims = getParentEdgeAt(0)->getDims(); auto in_data_dims = getParentEdgeAt(0)->getDims();
auto in_h_state_dims = getParentEdgeAt(1)->getDims(); auto in_h_state_dims = getParentEdgeAt(1)->getDims();
auto out_h_state_dims = getChildEdgeAt(0)->getDims(); auto out_h_state_dims = getChildEdgeAt(0)->getDims();
if (in_data_dims.ndims() != 2 || in_h_state_dims.ndims() != 2) if (in_data_dims.ndims() != 2 || in_h_state_dims.ndims() != 2)
IE_THROW() << "Incorrect shape of input/output ports for layer " << getName(); THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
G = gatesCount(cell_type); G = gatesCount(cell_type);
S = statesCount(cell_type); S = statesCount(cell_type);
@ -130,7 +150,7 @@ void MKLDNNRNN::fillCellDesc() {
if (in_data_dims != D_shape if (in_data_dims != D_shape
|| in_h_state_dims != S_shape || in_h_state_dims != S_shape
|| out_h_state_dims != S_shape) || out_h_state_dims != S_shape)
IE_THROW() << "Incorrect shape of input/output ports for layer " << getName(); THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
if (S == 2) { if (S == 2) {
auto in_c_state_dims = getParentEdgeAt(2)->getDims(); auto in_c_state_dims = getParentEdgeAt(2)->getDims();
@ -138,7 +158,7 @@ void MKLDNNRNN::fillCellDesc() {
if (in_c_state_dims != S_shape if (in_c_state_dims != S_shape
|| out_c_state_dims != S_shape) || out_c_state_dims != S_shape)
IE_THROW() << "Incorrect shape of input/output ports for layer " << getName(); THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
} }
auto blobs = cellLayer->blobs; auto blobs = cellLayer->blobs;
@ -147,40 +167,53 @@ void MKLDNNRNN::fillCellDesc() {
if (blobs.find("biases") != blobs.end()) bias = blobs["biases"]; if (blobs.find("biases") != blobs.end()) bias = blobs["biases"];
if (!weights) if (!weights)
IE_THROW() << "RNN Layer. Weights do not present."; THROW_ERROR << "RNN Layer. Weights do not present.";
if (weights->size() != G*SC*(SC+DC)) if (weights->size() != G * SC * (SC + DC))
IE_THROW() << "RNN Layer. Weights size is not correct. Expected size:" << G*SC*(SC+DC); THROW_ERROR << "RNN Layer. Weights size is not correct. Expected size:" << G * SC * (SC + DC);
if (bias && bias->size() != Gb*SC) if (bias && bias->size() != Gb * SC)
IE_THROW() << "RNN Layer. Biases size is not correct. Expected size:" << G*SC; THROW_ERROR << "RNN Layer. Biases size is not correct. Expected size:" << G * SC;
auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision);
// layer input plus states
in_data_d.resize(S + 1);
out_data_d.resize(S + 1);
// Shapes and Attributes are correct. Can start internal stuff initialization. // Shapes and Attributes are correct. Can start internal stuff initialization.
for (size_t i = 0; i < S; i++) { in_data_d[RNNInOutKind::Layer] = {{T, N, DC}, dataType, memory::format_tag::tnc};
in_states_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc); out_data_d[RNNInOutKind::Layer] = {{T, N, SC}, dataType, memory::format_tag::tnc};
out_states_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc);
in_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc};
out_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc};
if (haveCellState(cell_type)) {
in_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
out_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
} }
in_data_d = {{T, N, DC}, memory::data_type::f32, memory::format_tag::tnc};; w_data_d = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo};
out_data_d = {{T, N, SC}, memory::data_type::f32, memory::format_tag::tnc};; w_state_d = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo};
w_data_d = {{L, D, DC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo};
w_state_d = {{L, D, SC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo};
if (bias) if (bias)
w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo}; w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
std::vector<TensorDesc> in_candidate, out_candidate; std::vector<TensorDesc> in_candidate, out_candidate;
std::vector<memory::format_tag> outputFormats; in_candidate.emplace_back(MKLDNNMemoryDesc {D_shape, dataType, memory::format_tag::nc});
in_candidate.emplace_back(MKLDNNMemoryDesc {D_shape, memory::data_type::f32, memory::format_tag::nc}); in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc});
in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc}); out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc});
out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
outputFormats.emplace_back(memory::format_tag::nc);
if (S == 2) { if (haveCellState(cell_type)) {
in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc}); in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc}); out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
outputFormats.emplace_back(memory::format_tag::nc); }
Precision weights_prec = as<MemoryBlob>(weights)->getTensorDesc().getPrecision();
if (!verifyWeightsPrecision(runtimePrecision, weights_prec)) {
if (runtimePrecision == Precision::BF16 && weights_prec == Precision::FP32)
convertWeightsBlobToBF16();
} }
createDescriptor(in_candidate, out_candidate); createDescriptor(in_candidate, out_candidate);
@ -191,10 +224,10 @@ void MKLDNNRNN::fillSeqDesc() {
auto rnnLayer = std::dynamic_pointer_cast<RNNSequenceLayer>(getCnnLayer()); auto rnnLayer = std::dynamic_pointer_cast<RNNSequenceLayer>(getCnnLayer());
if (!rnnLayer) if (!rnnLayer)
IE_THROW() << "Wrong RNN layer representation. Cannot cast to RNNSequenceLayer."; THROW_ERROR << "Wrong RNN layer representation. Cannot cast to RNNSequenceLayer.";
if (!one_of(rnnLayer->cellType, _RNN::LSTM, _RNN::GRU, _RNN::GRU_LBR, _RNN::RNN)) if (!one_of(rnnLayer->cellType, _RNN::LSTM, _RNN::GRU, _RNN::GRU_LBR, _RNN::RNN))
IE_THROW() << "RNN layer supports only LSTM/GRU/RNN cell"; THROW_ERROR << "RNN layer supports only LSTM/GRU/RNN cell";
cell_type = ie2mkl(rnnLayer->cellType); cell_type = ie2mkl(rnnLayer->cellType);
cell_act = algorithm::undef; cell_act = algorithm::undef;
@ -203,31 +236,31 @@ void MKLDNNRNN::fillSeqDesc() {
// TODO [oneDNN]: No more supported // TODO [oneDNN]: No more supported
if (rnnLayer->clip != 0.0f) { if (rnnLayer->clip != 0.0f) {
IE_THROW() << "Clipping is not supported for RNN primitive"; THROW_ERROR << "Clipping is not supported for RNN primitive";
// cell_desc.set_clipping(rnnLayer->clip); // cell_desc.set_clipping(rnnLayer->clip);
} }
if (!one_of(rnnLayer->axis, 0, 1)) if (!one_of(rnnLayer->axis, 0, 1))
IE_THROW() << "RNN layer supports only sequence axis 0 or 1"; THROW_ERROR << "RNN layer supports only sequence axis 0 or 1";
nativeOrder = rnnLayer->axis == 0; nativeOrder = rnnLayer->axis == 0;
if (!one_of(rnnLayer->direction, _RNN::FWD, _RNN::BWD)) if (!one_of(rnnLayer->direction, _RNN::FWD, _RNN::BWD))
IE_THROW() << "RNN layer supports only unidirectional RNN layer"; THROW_ERROR << "RNN layer supports only unidirectional RNN layer";
direction = ie2mkl(rnnLayer->direction); direction = ie2mkl(rnnLayer->direction);
auto &ins = rnnLayer->insData; auto &ins = rnnLayer->insData;
auto &outs = rnnLayer->outData; auto &outs = rnnLayer->outData;
if (!one_of(ins.size(), 3, 2, 1)) if (!one_of(ins.size(), 3, 2, 1))
IE_THROW() << "Incorrect number of input ports for layer " << getName(); THROW_ERROR << "Incorrect number of input ports for layer " << getName();
if (!one_of(outs.size(), 3, 2, 1)) if (!one_of(outs.size(), 3, 2, 1))
IE_THROW() << "Incorrect number of output ports for layer " << getName(); THROW_ERROR << "Incorrect number of output ports for layer " << getName();
auto in_data_dims = getParentEdgeAt(0)->getDims(); auto in_data_dims = getParentEdgeAt(0)->getDims();
auto out_data_dims = getChildEdgeAt(0)->getDims(); auto out_data_dims = getChildEdgeAt(0)->getDims();
if (in_data_dims.ndims() != 3 || out_data_dims.ndims() != 3) if (in_data_dims.ndims() != 3 || out_data_dims.ndims() != 3)
IE_THROW() << "Incorrect shape of input/output ports for layer " << getName(); THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
if (!nativeOrder) { if (!nativeOrder) {
std::swap(in_data_dims[0], in_data_dims[1]); std::swap(in_data_dims[0], in_data_dims[1]);
@ -246,125 +279,153 @@ void MKLDNNRNN::fillSeqDesc() {
MKLDNNDims ID_shape {T, N, DC}, OD_shape {T, N, SC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC}; MKLDNNDims ID_shape {T, N, DC}, OD_shape {T, N, SC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC};
if (out_data_dims != OD_shape) if (out_data_dims != OD_shape)
IE_THROW() << "Incorrect shape of input/output ports for layer " << getName(); THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
in_states_d.resize(S); auto& blobs = rnnLayer->blobs;
out_states_d.resize(S);
for (int i = 1; i < ins.size(); i++) {
if (getParentEdgeAt(i)->getDims() != S_shape)
IE_THROW() << "Incorrect shape of state ports for layer " << getName();
in_states_d[i - 1] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
}
for (int i = 1; i < outs.size(); i++) {
if (getChildEdgeAt(i)->getDims() != S_shape)
IE_THROW() << "Incorrect shape of state ports for layer " << getName();
out_states_d[i - 1] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
}
auto blobs = rnnLayer->blobs;
Blob::Ptr weights, bias; Blob::Ptr weights, bias;
if (blobs.find("weights") != blobs.end()) weights = blobs["weights"]; if (blobs.find("weights") != blobs.end()) weights = blobs["weights"];
if (blobs.find("biases") != blobs.end()) bias = blobs["biases"]; if (blobs.find("biases") != blobs.end()) bias = blobs["biases"];
if (!weights) if (!weights)
IE_THROW() << "RNN Layer. Weights do not present."; THROW_ERROR << "RNN Layer. Weights do not present.";
if (weights->size() != G*SC*(SC+DC)) if (weights->size() != G * SC * (SC + DC))
IE_THROW() << "RNN Layer. Weights size is not correct. Expected size:" << G*SC*(SC+DC); THROW_ERROR << "RNN Layer. Weights size is not correct. Expected size:" << G * SC * (SC + DC);
w_data_d = {{L, D, DC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo}; for (int i = 1; i < ins.size(); i++) {
w_state_d = {{L, D, SC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo}; if (getParentEdgeAt(i)->getDims() != S_shape)
THROW_ERROR << "Incorrect shape of state ports for layer " << getName();
}
if (bias && bias->size() != Gb*SC) for (int i = 1; i < outs.size(); i++) {
IE_THROW() << "RNN Layer. Biases size is not correct. Expected size:" << G*SC; if (getChildEdgeAt(i)->getDims() != S_shape)
THROW_ERROR << "Incorrect shape of state ports for layer " << getName();
}
// layer input plus states
in_data_d.resize(S + 1);
out_data_d.resize(S + 1);
auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision);
// Try to create descriptor and corresponding configuration
in_data_d[RNNInOutKind::Layer] = {in_data_dims, dataType, memory::format_tag::tnc};
out_data_d[RNNInOutKind::Layer] = {out_data_dims, dataType, memory::format_tag::tnc};
in_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc};
out_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc};
if (haveCellState(cell_type)) {
in_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
out_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
}
w_data_d = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo};
w_state_d = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo};
if (bias && bias->size() != Gb * SC)
THROW_ERROR << "RNN Layer. Biases size is not correct. Expected size:" << G * SC;
if (bias) if (bias)
w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo}; w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
// Try to create descriptor and corresponding configuration std::vector<TensorDesc> in_candidate, out_candidate;
in_data_d = {in_data_dims, memory::data_type::f32, memory::format_tag::tnc};
out_data_d = {out_data_dims, memory::data_type::f32, memory::format_tag::tnc};
std::vector<TensorDesc> in_candidate;
if (nativeOrder)
in_candidate.push_back(in_data_d);
else
in_candidate.push_back(MKLDNNMemoryDesc{{N, T, DC}, memory::data_type::f32, memory::format_tag::ntc});
for (int i = 1; i < ins.size(); i++)
in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
std::vector<TensorDesc> out_candidate;
if (nativeOrder) { if (nativeOrder) {
out_candidate.push_back(out_data_d); in_candidate.push_back(in_data_d[RNNInOutKind::Layer]);
out_candidate.push_back(out_data_d[RNNInOutKind::Layer]);
} else { } else {
out_candidate.push_back(MKLDNNMemoryDesc{{N, T, SC}, memory::data_type::f32, memory::format_tag::ntc}); in_candidate.emplace_back(MKLDNNMemoryDesc{{N, T, DC}, dataType, memory::format_tag::ntc});
out_candidate.emplace_back(MKLDNNMemoryDesc{{N, T, SC}, dataType, memory::format_tag::ntc});
} }
for (int i = 1; i < outs.size(); i++) { in_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, dataType, memory::format_tag::nc});
out_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, dataType, memory::format_tag::nc});
if (haveCellState(cell_type)) {
in_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, memory::data_type::f32, memory::format_tag::nc});
out_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, memory::data_type::f32, memory::format_tag::nc}); out_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, memory::data_type::f32, memory::format_tag::nc});
} }
Precision weights_prec = as<MemoryBlob>(weights)->getTensorDesc().getPrecision();
if (!verifyWeightsPrecision(runtimePrecision, weights_prec)) {
if (runtimePrecision == Precision::BF16 && weights_prec == Precision::FP32)
convertWeightsBlobToBF16();
}
createDescriptor(in_candidate, out_candidate); createDescriptor(in_candidate, out_candidate);
} }
void MKLDNNRNN::convertWeightsBlobToBF16() {
Blob::Ptr &weights = getCnnLayer()->blobs["weights"];
MemoryBlob::Ptr cur_weights = as<MemoryBlob>(weights);
TensorDesc td(Precision::BF16, cur_weights->getTensorDesc().getDims(), cur_weights->getTensorDesc().getLayout());
MemoryBlob::Ptr new_weights_blob = make_shared_blob<uint16_t>(td);
new_weights_blob->allocate();
bfloat16_t *dst = new_weights_blob->wmap();
float* fp32src = cur_weights->rmap().as<float*>();
cpu_convert(fp32src, dst, Precision::FP32, Precision::BF16, new_weights_blob->size());
weights = new_weights_blob;
}
void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc, void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
const std::vector<TensorDesc> &outputDesc) { const std::vector<TensorDesc> &outputDesc) {
switch (cell_type) { switch (cell_type) {
case mkldnn::algorithm::vanilla_rnn: { case mkldnn::algorithm::vanilla_rnn: {
MKLDNNDescriptor desc(std::shared_ptr<vanilla_rnn_forward::desc>( MKLDNNDescriptor desc(std::shared_ptr<vanilla_rnn_forward::desc>(
new vanilla_rnn_forward::desc(prop_kind::forward_scoring, cell_act, direction, new vanilla_rnn_forward::desc(prop_kind::forward_scoring, cell_act, direction,
/* In Data */ in_data_d, /* In Data */ in_data_d[RNNInOutKind::Layer],
/* In State */ in_states_d[0], /* In State */ in_data_d[RNNInOutKind::HiddenState],
/* Weights data */ w_data_d, /* Weights data */ w_data_d,
/* Weights state */ w_state_d, /* Weights state */ w_state_d,
/* Bias */ w_bias_d, /* Bias */ w_bias_d,
/* Out Data */ out_data_d, /* Out Data */ out_data_d[RNNInOutKind::Layer],
/* Out State */ out_states_d[0]))); /* Out State */ out_data_d[RNNInOutKind::HiddenState])));
descs.push_back(desc); descs.push_back(desc);
} break; } break;
case mkldnn::algorithm::vanilla_gru: { case mkldnn::algorithm::vanilla_gru: {
MKLDNNDescriptor desc(std::shared_ptr<gru_forward::desc>( MKLDNNDescriptor desc(std::shared_ptr<gru_forward::desc>(
new gru_forward::desc(prop_kind::forward_scoring, direction, new gru_forward::desc(prop_kind::forward_scoring, direction,
/* In Data */ in_data_d, /* In Data */ in_data_d[RNNInOutKind::Layer],
/* In State */ in_states_d[0], /* In State */ in_data_d[RNNInOutKind::HiddenState],
/* Weights data */ w_data_d, /* Weights data */ w_data_d,
/* Weights state */ w_state_d, /* Weights state */ w_state_d,
/* Bias */ w_bias_d, /* Bias */ w_bias_d,
/* Out Data */ out_data_d, /* Out Data */ out_data_d[RNNInOutKind::Layer],
/* Out State */ out_states_d[0]))); /* Out State */ out_data_d[RNNInOutKind::HiddenState])));
descs.push_back(desc); descs.push_back(desc);
} break; } break;
case mkldnn::algorithm::lbr_gru: { case mkldnn::algorithm::lbr_gru: {
MKLDNNDescriptor desc(std::shared_ptr<lbr_gru_forward::desc>( MKLDNNDescriptor desc(std::shared_ptr<lbr_gru_forward::desc>(
new lbr_gru_forward::desc(prop_kind::forward_scoring, direction, new lbr_gru_forward::desc(prop_kind::forward_scoring, direction,
/* In Data */ in_data_d, /* In Data */ in_data_d[RNNInOutKind::Layer],
/* In State */ in_states_d[0], /* In State */ in_data_d[RNNInOutKind::HiddenState],
/* Weights data */ w_data_d, /* Weights data */ w_data_d,
/* Weights state */ w_state_d, /* Weights state */ w_state_d,
/* Bias */ w_bias_d, /* Bias */ w_bias_d,
/* Out Data */ out_data_d, /* Out Data */ out_data_d[RNNInOutKind::Layer],
/* Out State */ out_states_d[0]))); /* Out State */ out_data_d[RNNInOutKind::HiddenState])));
descs.push_back(desc); descs.push_back(desc);
} break; } break;
case mkldnn::algorithm::vanilla_lstm: { case mkldnn::algorithm::vanilla_lstm: {
MKLDNNDescriptor desc(std::shared_ptr<lstm_forward::desc>( MKLDNNDescriptor desc(std::shared_ptr<lstm_forward::desc>(
new lstm_forward::desc(prop_kind::forward_scoring, direction, new lstm_forward::desc(prop_kind::forward_scoring, direction,
/* In Data */ in_data_d, /* In Data */ in_data_d[RNNInOutKind::Layer],
/* In State H */ in_states_d[0], /* In State */ in_data_d[RNNInOutKind::HiddenState],
/* In State C */ in_states_d[1], /* In State C */ in_data_d[RNNInOutKind::CellState],
/* Weights data */ w_data_d, /* Weights data */ w_data_d,
/* Weights state */ w_state_d, /* Weights state */ w_state_d,
/* Bias */ w_bias_d, /* Bias */ w_bias_d,
/* Out Data */ out_data_d, /* Out Data */ out_data_d[RNNInOutKind::Layer],
/* Out State H */ out_states_d[0], /* Out State */ out_data_d[RNNInOutKind::HiddenState],
/* Out State C */ out_states_d[1]))); /* Out State C */ out_data_d[RNNInOutKind::CellState])));
descs.push_back(desc); descs.push_back(desc);
} break; } break;
default: default:
IE_THROW() << "Unknown cell type"; THROW_ERROR << "Unknown cell type";
} }
// Fill supported config // Fill supported config
@ -389,130 +450,170 @@ void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
supportedPrimitiveDescriptors.emplace_back(config, ref_any); supportedPrimitiveDescriptors.emplace_back(config, ref_any);
} }
bool MKLDNNRNN::verifyWeightsPrecision(const Precision &layerPrec, const Precision &weightsPrec) {
if (!weightsByLayerPrec.count(layerPrec))
THROW_ERROR << "Unsupported layer precision " << layerPrec;
return weightsPrec == weightsByLayerPrec.at(layerPrec);
}
void MKLDNNRNN::verifyWeights() {
auto layer = getCnnLayer();
auto weightsIt = layer->blobs.find("weights");
if (weightsIt == layer->blobs.end())
THROW_ERROR << "Missed weights blob.";
const auto& weightsPrec = weightsIt->second->getTensorDesc().getPrecision();
if (!verifyWeightsPrecision(runtimePrecision, weightsPrec)) {
THROW_ERROR << "Weights precision " << weightsPrec <<
" does not match runtime precision" << runtimePrecision;
}
}
void MKLDNNRNN::verifyBiases() {
auto layer = getCnnLayer();
if (layer->blobs.find("biases") != layer->blobs.end()
&& layer->blobs["biases"]->getTensorDesc().getPrecision() != Precision::FP32)
THROW_ERROR << "Invalid biases precision: " << layer->blobs["biases"]->getTensorDesc().getPrecision();
}
void MKLDNNRNN::createPrimitive() { void MKLDNNRNN::createPrimitive() {
if (prim) return; if (prim) return;
std::string errorPrefix = "RNN layer '" + getCnnLayer()->name + "'"; verifyWeights();
auto weightsIt = getCnnLayer()->blobs.find("weights"); verifyBiases();
if (weightsIt == getCnnLayer()->blobs.end())
IE_THROW() << errorPrefix << " does not have weights blob."; /*
if (weightsIt->second->getTensorDesc().getPrecision() != Precision::FP32) * Gate order
IE_THROW() << errorPrefix << " has invalid weights precision: " << weightsIt->second->getTensorDesc().getPrecision(); * ====== LSTM ======
if (getCnnLayer()->blobs.find("biases") != getCnnLayer()->blobs.end() * Caffe - IFOC, ONNX - IOFC
&& getCnnLayer()->blobs["biases"]->getTensorDesc().getPrecision() != Precision::FP32) * IE - FICO, mkldnn - IFCO
IE_THROW() << errorPrefix << " has invalid biases precision: " << getCnnLayer()->blobs["biases"]->getTensorDesc().getPrecision(); *
* ====== GRU ======
* IE - URO, mkldnn - URO
*/
const int gate_map_lstm[] = {1, 0, 2, 3}; // FICO -> IFCO
const int gate_map_gru[] = {0, 1, 2, 3};
const int gate_map_rnn[] = {0};
const int *gate_map;
const int gate_map_lstm_size = sizeof(gate_map_lstm) / sizeof(int);
const int gate_map_gru_size = sizeof(gate_map_gru) / sizeof(int);
const int gate_map_rnn_size = sizeof(gate_map_rnn) / sizeof(int);
if (cell_type == algorithm::vanilla_lstm) {
gate_map = gate_map_lstm;
if (G > gate_map_lstm_size) {
THROW_ERROR << "G isn't equal to the size of gate_map";
}
} else if (cell_type == algorithm::vanilla_gru) {
gate_map = gate_map_gru;
if (G > gate_map_gru_size) {
THROW_ERROR << "G isn't equal to the size of gate_map";
}
} else if (cell_type == algorithm::lbr_gru) {
gate_map = gate_map_gru;
if (G > gate_map_gru_size) {
THROW_ERROR << "G isn't equal to the size of gate_map";
}
} else if (cell_type == algorithm::vanilla_rnn) {
gate_map = gate_map_rnn;
if (G > gate_map_rnn_size) {
THROW_ERROR << "G isn't equal to the size of gate_map";
}
} else {
gate_map = gate_map_gru;
if (G > gate_map_gru_size) {
THROW_ERROR << "G isn't equal to the size of gate_map";
}
}
if (runtimePrecision == Precision::BF16)
fillWeights<bfloat16_t>(gate_map);
else if (runtimePrecision == Precision::FP32)
fillWeights<float>(gate_map);
else // TODO FP16 and INT8 support
THROW_ERROR << "Unsupported data type";
if (runtimePrecision == Precision::BF16 ||
runtimePrecision == Precision::FP32)
fillBiases<float>(gate_map);
auto pd = descs[0].createPrimitiveDescriptorIterator(getEngine()); auto pd = descs[0].createPrimitiveDescriptorIterator(getEngine());
prim.reset(new mkldnn::primitive(pd));
}
auto src_data_mem = getParentEdgeAt(0)->getMemoryPtr(); /*
auto dst_data_mem = getChildEdgeAt(0)->getMemoryPtr(); * IE format:
* B - [gates, out_state_size]
// create weight blobs (data and state part) *
auto w_data_mem = std::make_shared<MKLDNNMemory>(getEngine()); * MKLDNN format:
w_data_mem->Create(w_data_d); * B - [gates, out_state_size]
internalBlobMemory.push_back(w_data_mem); *
*/
auto w_state_mem = std::make_shared<MKLDNNMemory>(getEngine()); template <typename Prec>
w_state_mem->Create(w_state_d); void MKLDNNRNN::fillBiases(const int *gate_map) {
internalBlobMemory.push_back(w_state_mem); if (!w_bias_d)
return;
auto w_bias_mem = std::make_shared<MKLDNNMemory>(getEngine()); auto w_bias_mem = std::make_shared<MKLDNNMemory>(getEngine());
w_bias_mem->Create(w_bias_d); w_bias_mem->Create(w_bias_d);
internalBlobMemory.push_back(w_bias_mem); internalBlobMemory.push_back(w_bias_mem);
{ auto ie_b_ptr = getCnnLayer()->blobs["biases"]->buffer().as<const Prec*>();
/* Copy Weight data auto b_ptr = static_cast<Prec*>(w_bias_mem->GetData());
* IE format: for (int g = 0; g < Gb; g++) {
* W - [gates, out_state_size, in_data_size + in_state_size] Prec *l_b_ptr = b_ptr + gate_map[g]*SC;
* B - [gates, out_state_size] const Prec *l_ie_b_ptr = ie_b_ptr + g * SC;
* cpu_memcpy(l_b_ptr, l_ie_b_ptr, SC * sizeof(Prec));
* MKLDNN format: }
* W - [1, 1, in_date_size, gates, out_state_size] }
* R - [1, 1, in_state_size, gates, out_state_size]
* B - [gates, out_state_size]
*
* Gate order
* ====== LSTM ======
* Caffe - IFOC, ONNX - IOFC
* IE - FICO, mkldnn - IFCO
*
* ====== GRU ======
* IE - URO, mkldnn - URO
*/
const int gate_map_lstm[] = {1, 0, 2, 3}; // FICO -> IFCO
const int gate_map_gru[] = {0, 1, 2, 3};
const int gate_map_rnn[] = {0};
const int *gate_map;
const int gate_map_lstm_size = sizeof(gate_map_lstm) / sizeof(int);
const int gate_map_gru_size = sizeof(gate_map_gru) / sizeof(int);
const int gate_map_rnn_size = sizeof(gate_map_rnn) / sizeof(int);
if (cell_type == algorithm::vanilla_lstm) {
gate_map = gate_map_lstm;
if (G > gate_map_lstm_size) {
IE_THROW() << "G isn't equal to the size of gate_map";
}
} else if (cell_type == algorithm::vanilla_gru) {
gate_map = gate_map_gru;
if (G > gate_map_gru_size) {
IE_THROW() << "G isn't equal to the size of gate_map";
}
} else if (cell_type == algorithm::lbr_gru) {
gate_map = gate_map_gru;
if (G > gate_map_gru_size) {
IE_THROW() << "G isn't equal to the size of gate_map";
}
} else if (cell_type == algorithm::vanilla_rnn) {
gate_map = gate_map_rnn;
if (G > gate_map_rnn_size) {
IE_THROW() << "G isn't equal to the size of gate_map";
}
} else {
gate_map = gate_map_gru;
if (G > gate_map_gru_size) {
IE_THROW() << "G isn't equal to the size of gate_map";
}
}
auto ie_w_ptr = getCnnLayer()->blobs["weights"]->buffer().as<const float*>(); /*
auto w_ptr = static_cast<float*>(w_data_mem->GetData()); * IE format:
auto r_ptr = static_cast<float*>(w_state_mem->GetData()); * W - [gates, out_state_size, in_data_size + in_state_size]
const int step = SC * G; *
* MKLDNN format:
* W - [1, 1, in_date_size, gates, out_state_size]
* R - [1, 1, in_state_size, gates, out_state_size]
*
*/
template <typename Prec>
void MKLDNNRNN::fillWeights(const int *gate_map) {
// create weight blobs (data and state part)
auto w_data_mem = std::make_shared<MKLDNNMemory>(getEngine());
w_data_mem->Create(w_data_d);
internalBlobMemory.push_back(w_data_mem);
auto w_state_mem = std::make_shared<MKLDNNMemory>(getEngine());
w_state_mem->Create(w_state_d);
internalBlobMemory.push_back(w_state_mem);
for (int g = 0; g < G; g++) { auto ie_w_ptr = getCnnLayer()->blobs["weights"]->buffer().as<const Prec*>();
for (int out_i = 0; out_i < SC; out_i++) { auto w_ptr = static_cast<Prec*>(w_data_mem->GetData());
float *l_w_ptr = w_ptr + gate_map[g]*SC + out_i; auto r_ptr = static_cast<Prec*>(w_state_mem->GetData());
float *l_r_ptr = r_ptr + gate_map[g]*SC+ out_i; const int step = SC * G;
for (int in_i = 0; in_i < DC; in_i++) {
*l_w_ptr = *ie_w_ptr;
ie_w_ptr++;
l_w_ptr += step;
}
for (int in_i = 0; in_i < SC; in_i++) { for (int g = 0; g < G; g++) {
*l_r_ptr = *ie_w_ptr; for (int out_i = 0; out_i < SC; out_i++) {
ie_w_ptr++; Prec *l_w_ptr = w_ptr + gate_map[g]*SC + out_i;
l_r_ptr += step; Prec *l_r_ptr = r_ptr + gate_map[g]*SC+ out_i;
} for (int in_i = 0; in_i < DC; in_i++) {
*l_w_ptr = *ie_w_ptr;
ie_w_ptr++;
l_w_ptr += step;
} }
}
if (w_bias_d) { for (int in_i = 0; in_i < SC; in_i++) {
auto ie_b_ptr = getCnnLayer()->blobs["biases"]->buffer().as<const float*>(); *l_r_ptr = *ie_w_ptr;
auto b_ptr = static_cast<float*>(w_bias_mem->GetData()); ie_w_ptr++;
for (int g = 0; g < Gb; g++) { l_r_ptr += step;
float *l_b_ptr = b_ptr + gate_map[g]*SC;
const float *l_ie_b_ptr = ie_b_ptr + g * SC;
cpu_memcpy(l_b_ptr, l_ie_b_ptr, SC * sizeof(float));
} }
} }
} }
prim.reset(new mkldnn::primitive(pd));
} }
void MKLDNNRNN::execute(mkldnn::stream strm) { void MKLDNNRNN::execute(mkldnn::stream strm) {
if (!prim) if (!prim)
IE_THROW() << "No initialized primitive to execute"; THROW_ERROR << "No initialized primitive to execute";
const auto src_data_mem = getParentEdgeAt(0)->getMemoryPtr(); const auto src_data_mem = getParentEdgeAt(0)->getMemoryPtr();
const auto dst_data_mem = getChildEdgeAt(0)->getMemoryPtr(); const auto dst_data_mem = getChildEdgeAt(0)->getMemoryPtr();

View File

@ -28,8 +28,19 @@ public:
private: private:
void fillCellDesc(); void fillCellDesc();
void fillSeqDesc(); void fillSeqDesc();
bool verifyWeightsPrecision(const InferenceEngine::Precision& layerPrec,
const InferenceEngine::Precision& weightsPrec);
void verifyWeights();
void verifyBiases();
void convertWeightsBlobToBF16();
template <typename Prec>
void fillWeights(const int* gate_map);
template <typename Prec>
void fillBiases(const int* gate_map);
private: private:
InferenceEngine::Precision runtimePrecision;
/** Specify mode Cell or Seq. true - Cell, false - Seq */ /** Specify mode Cell or Seq. true - Cell, false - Seq */
bool is_cell = false; bool is_cell = false;
@ -56,11 +67,14 @@ private:
const ptrdiff_t L = 1; /**< What is it??. Constant for mkldnn impl */ const ptrdiff_t L = 1; /**< What is it??. Constant for mkldnn impl */
const ptrdiff_t D = 1; /**< Num of direction. 1 or 2 */ const ptrdiff_t D = 1; /**< Num of direction. 1 or 2 */
MKLDNNMemoryDesc in_data_d; std::vector<MKLDNNMemoryDesc> in_data_d;
MKLDNNMemoryDesc out_data_d; std::vector<MKLDNNMemoryDesc> out_data_d;
std::vector<MKLDNNMemoryDesc> in_states_d; enum RNNInOutKind {
std::vector<MKLDNNMemoryDesc> out_states_d; Layer = 0,
HiddenState = 1,
CellState = 2
};
MKLDNNMemoryDesc w_data_d; MKLDNNMemoryDesc w_data_d;
MKLDNNMemoryDesc w_state_d; MKLDNNMemoryDesc w_state_d;
@ -69,7 +83,7 @@ private:
// List of in/out reorders if required // List of in/out reorders if required
std::vector<mkldnn::reorder> exec_before; std::vector<mkldnn::reorder> exec_before;
std::vector<mkldnn::reorder> exec_after; std::vector<mkldnn::reorder> exec_after;
};
static const std::map<InferenceEngine::Precision, InferenceEngine::Precision> weightsByLayerPrec;
}; // class MKLDNNRNN
} // namespace MKLDNNPlugin } // namespace MKLDNNPlugin

View File

@ -0,0 +1,135 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph/op/gru_cell.hpp"
#include <shared_test_classes/single_layer/gru_cell.hpp>
#include "test_utils/cpu_test_utils.hpp"
#include "transformations/op_conversions/gru_cell_decomposition.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
using GRUCellCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::GRUCellParams, CPUSpecificParams, std::map<std::string, std::string>>;
class GRUCellCPUTest : public testing::WithParamInterface<GRUCellCpuSpecificParams>,
virtual public LayerTestsUtils::LayerTestsCommon,
public CPUTestsBase {
public:
static std::string getTestCaseName(const testing::TestParamInfo<GRUCellCpuSpecificParams> &obj) {
CPUSpecificParams cpuParams;
LayerTestsDefinitions::GRUCellParams basicParamsSet;
std::map<std::string, std::string> additionalConfig;
std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
std::ostringstream result;
result << LayerTestsDefinitions::GRUCellTest::getTestCaseName(
testing::TestParamInfo<LayerTestsDefinitions::GRUCellParams>(basicParamsSet, 0));
result << CPUTestsBase::getTestCaseName(cpuParams);
if (!additionalConfig.empty()) {
result << "_PluginConf";
for (auto &item : additionalConfig) {
if (item.second == PluginConfigParams::YES)
result << "_" << item.first << "=" << item.second;
}
}
return result.str();
}
protected:
void SetUp() {
CPUSpecificParams cpuParams;
LayerTestsDefinitions::GRUCellParams basicParamsSet;
std::map<std::string, std::string> additionalConfig;
bool should_decompose;
size_t batch;
size_t hidden_size;
size_t input_size;
std::vector<std::string> activations;
std::vector<float> activations_alpha;
std::vector<float> activations_beta;
float clip;
bool linear_before_reset;
InferenceEngine::Precision netPrecision;
std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
std::tie(should_decompose, batch, hidden_size, input_size, activations, clip, linear_before_reset, netPrecision, targetDevice) = basicParamsSet;
std::vector<std::vector<size_t>> inputShapes = {
{{batch, input_size},
{batch, hidden_size},
{3 * hidden_size, input_size},
{3 * hidden_size, hidden_size},
{(linear_before_reset ? 4 : 3) * hidden_size}},
};
configuration.insert(additionalConfig.begin(), additionalConfig.end());
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
inPrc = outPrc = Precision::BF16;
} else {
inPrc = outPrc = netPrecision;
}
selectedType += "_";
selectedType += outPrc.name();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
std::vector<ngraph::Shape> WRB = {inputShapes[2], inputShapes[3], inputShapes[4]};
auto gru_cell = ngraph::builder::makeGRU(
ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), WRB, hidden_size, activations, {}, {}, clip, linear_before_reset);
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(gru_cell->output(0))};
function = makeNgraphFunction(ngPrc, params, gru_cell, "gru_cell");
}
};
TEST_P(GRUCellCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
CheckPluginRelatedResults(executableNetwork, "RNNCell");
}
namespace {
/* CPU PARAMS */
std::vector<std::map<std::string, std::string>> additionalConfig
= {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}},
{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
CPUSpecificParams cpuParams{{nc, nc}, {nc}, {"ref_any"}, "ref_any"};
std::vector<bool> should_decompose{false};
std::vector<size_t> batch{1, 5};
std::vector<size_t> hidden_size{1, 10};
std::vector<size_t> input_size{1, 30};
// oneDNN supports only sigmoid-tanh
std::vector<std::vector<std::string>> activations = {{"sigmoid", "tanh"}};
// oneDNN supports only zero clip
std::vector<float> clip = {0.f};
std::vector<bool> linear_before_reset = {true, false};
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
INSTANTIATE_TEST_CASE_P(smoke_GRUCellCPU,
GRUCellCPUTest,
::testing::Combine(::testing::Combine(::testing::ValuesIn(should_decompose),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip),
::testing::ValuesIn(linear_before_reset),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(cpuParams),
::testing::ValuesIn(additionalConfig)),
GRUCellCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -0,0 +1,202 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/single_layer/gru_sequence.hpp"
#include "ngraph/pass/visualize_tree.hpp"
#include "test_utils/cpu_test_utils.hpp"
#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
using GRUSequenceCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::GRUSequenceParams, CPUSpecificParams, std::map<std::string, std::string>>;
class GRUSequenceCPUTest : public testing::WithParamInterface<GRUSequenceCpuSpecificParams>,
virtual public LayerTestsUtils::LayerTestsCommon,
public CPUTestsBase {
public:
static std::string getTestCaseName(const testing::TestParamInfo<GRUSequenceCpuSpecificParams> &obj) {
CPUSpecificParams cpuParams;
LayerTestsDefinitions::GRUSequenceParams basicParamsSet;
std::map<std::string, std::string> additionalConfig;
std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
std::ostringstream result;
result << LayerTestsDefinitions::GRUSequenceTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::GRUSequenceParams>(basicParamsSet, 0));
result << CPUTestsBase::getTestCaseName(cpuParams);
if (!additionalConfig.empty()) {
result << "_PluginConf";
for (auto &item : additionalConfig) {
if (item.second == PluginConfigParams::YES)
result << "_" << item.first << "=" << item.second;
}
}
return result.str();
}
protected:
void SetUp() {
LayerTestsDefinitions::GRUSequenceParams basicParamsSet;
CPUSpecificParams cpuParams;
std::map<std::string, std::string> additionalConfig;
size_t seq_lenghts;
size_t batch;
size_t hidden_size;
size_t input_size = 10;
std::vector<std::string> activations;
std::vector<float> activations_alpha;
std::vector<float> activations_beta;
float clip;
bool linear_before_reset;
ngraph::op::RecurrentSequenceDirection direction;
InferenceEngine::Precision netPrecision;
std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
std::tie(m_mode, seq_lenghts, batch, hidden_size, activations, clip, linear_before_reset, direction, netPrecision, targetDevice) = basicParamsSet;
size_t num_directions = direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1;
std::vector<std::vector<size_t>> inputShapes = {
{{batch, seq_lenghts, input_size},
{batch, num_directions, hidden_size},
{batch},
{num_directions, 3 * hidden_size, input_size},
{num_directions, 3 * hidden_size, hidden_size},
{num_directions, (linear_before_reset ? 4 : 3) * hidden_size}},
};
configuration.insert(additionalConfig.begin(), additionalConfig.end());
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
inPrc = outPrc = Precision::BF16;
} else {
inPrc = outPrc = netPrecision;
}
selectedType += "_";
selectedType += outPrc.name();
m_max_seq_len = seq_lenghts;
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
if (m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_PARAM
|| m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM) {
auto seq_lengths = ngraph::builder::makeParams(ngraph::element::i64, {inputShapes[2]}).at(0);
seq_lengths->set_friendly_name("seq_lengths");
params.push_back(seq_lengths);
}
std::vector<ngraph::Shape> WRB = {inputShapes[3], inputShapes[4], inputShapes[5], inputShapes[2]};
auto gru_sequence = ngraph::builder::makeGRU(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)),
WRB,
hidden_size,
activations,
{},
{},
clip,
linear_before_reset,
true,
direction,
m_mode);
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(gru_sequence->output(0)),
std::make_shared<ngraph::opset1::Result>(gru_sequence->output(1))};
function = makeNgraphFunction(ngPrc, params, gru_sequence, "gru_sequence");
if (m_mode != ngraph::helpers::SequenceTestsMode::PURE_SEQ) {
ngraph::pass::Manager manager;
if (direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
manager.register_pass<ngraph::pass::BidirectionalGRUSequenceDecomposition>();
manager.register_pass<ngraph::pass::ConvertGRUSequenceToTensorIterator>();
manager.run_passes(function);
bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
EXPECT_EQ(ti_found, true);
} else {
bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
EXPECT_EQ(ti_found, false);
}
}
void GenerateInputs() {
for (const auto &input : executableNetwork.GetInputsInfo()) {
const auto &info = input.second;
auto blob = GenerateInput(*info);
if (input.first == "seq_lengths") {
blob = FuncTestUtils::createAndFillBlob(info->getTensorDesc(), m_max_seq_len, 0);
}
inputs.push_back(blob);
}
}
private:
ngraph::helpers::SequenceTestsMode m_mode;
int64_t m_max_seq_len = 0;
};
TEST_P(GRUSequenceCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
CheckPluginRelatedResults(executableNetwork, "RNNSeq");
}
namespace {
/* CPU PARAMS */
std::vector<std::map<std::string, std::string>> additionalConfig
= {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
CPUSpecificParams cpuParams{{ntc, nc}, {ntc, nc}, {"ref_any"}, "ref_any"};
CPUSpecificParams cpuParamsBatchSizeOne{{tnc, nc}, {tnc, nc}, {"ref_any"}, "ref_any"};;
std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::PURE_SEQ};
// output values increase rapidly without clip, so use only seq_lenghts = 2
std::vector<size_t> seq_lengths_zero_clip{2};
std::vector<size_t> batch{10};
std::vector<size_t> batch_size_one{1};
std::vector<size_t> hidden_size{1, 10};
std::vector<std::vector<std::string>> activations = {{"sigmoid", "tanh"}};
std::vector<bool> linear_before_reset = {true, false};
std::vector<float> clip{0.f};
std::vector<ngraph::op::RecurrentSequenceDirection> direction = {ngraph::op::RecurrentSequenceDirection::FORWARD};
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
INSTANTIATE_TEST_CASE_P(smoke_GRUSequenceCPU,
GRUSequenceCPUTest,
::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
::testing::ValuesIn(seq_lengths_zero_clip),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip),
::testing::ValuesIn(linear_before_reset),
::testing::ValuesIn(direction),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(cpuParams),
::testing::ValuesIn(additionalConfig)),
GRUSequenceCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_GRUSequenceCPUBatchSizeOne,
GRUSequenceCPUTest,
::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
::testing::ValuesIn(seq_lengths_zero_clip),
::testing::ValuesIn(batch_size_one),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip),
::testing::ValuesIn(linear_before_reset),
::testing::ValuesIn(direction),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(cpuParamsBatchSizeOne),
::testing::ValuesIn(additionalConfig)),
GRUSequenceCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -0,0 +1,132 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph/op/lstm_cell.hpp"
#include <shared_test_classes/single_layer/lstm_cell.hpp>
#include "test_utils/cpu_test_utils.hpp"
#include "transformations/op_conversions/lstm_cell_decomposition.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
using LSTMCellCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::LSTMCellParams, CPUSpecificParams, std::map<std::string, std::string>>;
class LSTMCellLayerCPUTest : public testing::WithParamInterface<LSTMCellCpuSpecificParams>,
virtual public LayerTestsUtils::LayerTestsCommon,
public CPUTestsBase {
public:
static std::string getTestCaseName(const testing::TestParamInfo<LSTMCellCpuSpecificParams>& obj) {
CPUSpecificParams cpuParams;
LayerTestsDefinitions::LSTMCellParams basicParamsSet;
std::map<std::string, std::string> additionalConfig;
std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
std::ostringstream result;
result << LayerTestsDefinitions::LSTMCellTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::LSTMCellParams>(
basicParamsSet, 0));
result << CPUTestsBase::getTestCaseName(cpuParams);
if (!additionalConfig.empty()) {
result << "_PluginConf";
for (auto& item : additionalConfig) {
if (item.second == PluginConfigParams::YES)
result << "_" << item.first << "=" << item.second;
}
}
return result.str();
}
protected:
void SetUp() {
LayerTestsDefinitions::LSTMCellParams basicParamsSet;
CPUSpecificParams cpuParams;
std::map<std::string, std::string> additionalConfig;
bool should_decompose;
size_t batch;
size_t hidden_size;
size_t input_size;
std::vector<std::string> activations;
std::vector<float> activations_alpha;
std::vector<float> activations_beta;
float clip;
InferenceEngine::Precision netPrecision;
threshold = 0.05;
std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
std::tie(should_decompose, batch, hidden_size, input_size, activations, clip, netPrecision, targetDevice) = basicParamsSet;
std::vector<std::vector<size_t>> inputShapes = {
{{batch, input_size}, {batch, hidden_size}, {batch, hidden_size}, {4 * hidden_size, input_size}, {4 * hidden_size, hidden_size}, {4 * hidden_size}},
};
configuration.insert(additionalConfig.begin(), additionalConfig.end());
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
inPrc = outPrc = Precision::BF16;
} else {
inPrc = outPrc = netPrecision;
}
selectedType += "_";
selectedType += outPrc.name();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1], inputShapes[2]});
std::vector<ngraph::Shape> WRB = {inputShapes[3], inputShapes[4], inputShapes[5]};
auto lstm_cell = ngraph::builder::makeLSTM(
ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), WRB, hidden_size, activations, {}, {}, clip);
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(lstm_cell->output(0)),
std::make_shared<ngraph::opset1::Result>(lstm_cell->output(1))};
function = makeNgraphFunction(ngPrc, params, lstm_cell, "lstm_cell");
}
};
TEST_P(LSTMCellLayerCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
CheckPluginRelatedResults(executableNetwork, "RNNCell");
}
namespace {
/* CPU PARAMS */
std::vector<std::map<std::string, std::string>> additionalConfig
= {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}},
{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}};
CPUSpecificParams cpuParams{{nc, nc, nc}, {nc}, {"ref_any"}, "ref_any"};
std::vector<bool> should_decompose{false};
std::vector<size_t> batch{5};
std::vector<size_t> hidden_size{1, 10};
std::vector<size_t> input_size{1, 30};
// oneDNN supports only sigmoid-tanh-tanh
std::vector<std::vector<std::string>> activations = {{"sigmoid", "tanh", "tanh"}};
// oneDNN supports only zero clip
std::vector<float> clip{0.f};
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16};
INSTANTIATE_TEST_CASE_P(smoke_LSTMCellCPU,
LSTMCellLayerCPUTest,
::testing::Combine(::testing::Combine(::testing::ValuesIn(should_decompose),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(cpuParams),
::testing::ValuesIn(additionalConfig)),
LSTMCellLayerCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -0,0 +1,205 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/single_layer/lstm_sequence.hpp"
#include "ngraph/pass/visualize_tree.hpp"
#include "test_utils/cpu_test_utils.hpp"
#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
using LSTMSequenceCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::LSTMSequenceParams, CPUSpecificParams, std::map<std::string, std::string>>;
class LSTMSequenceCPUTest : public testing::WithParamInterface<LSTMSequenceCpuSpecificParams>,
virtual public LayerTestsUtils::LayerTestsCommon,
public CPUTestsBase {
public:
static std::string getTestCaseName(const testing::TestParamInfo<LSTMSequenceCpuSpecificParams> &obj) {
CPUSpecificParams cpuParams;
LayerTestsDefinitions::LSTMSequenceParams basicParamsSet;
std::map<std::string, std::string> additionalConfig;
std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
std::ostringstream result;
result << LayerTestsDefinitions::LSTMSequenceTest::getTestCaseName(
testing::TestParamInfo<LayerTestsDefinitions::LSTMSequenceParams>(basicParamsSet, 0));
result << CPUTestsBase::getTestCaseName(cpuParams);
if (!additionalConfig.empty()) {
result << "_PluginConf";
for (auto &item : additionalConfig) {
if (item.second == PluginConfigParams::YES)
result << "_" << item.first << "=" << item.second;
}
}
return result.str();
}
protected:
void SetUp() {
LayerTestsDefinitions::LSTMSequenceParams basicParamsSet;
CPUSpecificParams cpuParams;
std::map<std::string, std::string> additionalConfig;
size_t seq_lenghts;
size_t batch;
size_t hidden_size;
size_t input_size;
std::vector<std::string> activations;
std::vector<float> activations_alpha;
std::vector<float> activations_beta;
float clip;
ngraph::op::RecurrentSequenceDirection direction;
InferenceEngine::Precision netPrecision;
std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
std::tie(m_mode, seq_lenghts, batch, hidden_size, input_size, activations, clip, direction, netPrecision, targetDevice) = basicParamsSet;
size_t num_directions = direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1;
m_max_seq_len = seq_lenghts;
std::vector<std::vector<size_t>> inputShapes = {
{{batch, seq_lenghts, input_size},
{batch, num_directions, hidden_size},
{batch, num_directions, hidden_size},
{batch},
{num_directions, 4 * hidden_size, input_size},
{num_directions, 4 * hidden_size, hidden_size},
{num_directions, 4 * hidden_size}},
};
configuration.insert(additionalConfig.begin(), additionalConfig.end());
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
inPrc = outPrc = Precision::BF16;
} else {
inPrc = outPrc = netPrecision;
}
selectedType += "_";
selectedType += outPrc.name();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1], inputShapes[2]});
if (m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_PARAM
|| m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM) {
auto seq_lengths = ngraph::builder::makeParams(ngraph::element::i64, {inputShapes[3]}).at(0);
seq_lengths->set_friendly_name("seq_lengths");
params.push_back(seq_lengths);
}
std::vector<ngraph::Shape> WRB = {inputShapes[4], inputShapes[5], inputShapes[6], inputShapes[3]};
auto lstm_sequence = ngraph::builder::makeLSTM(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)),
WRB,
hidden_size,
activations,
{},
{},
clip,
true,
direction,
m_mode);
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(lstm_sequence->output(0)),
std::make_shared<ngraph::opset1::Result>(lstm_sequence->output(1)),
std::make_shared<ngraph::opset1::Result>(lstm_sequence->output(2))};
function = makeNgraphFunction(ngPrc, params, lstm_sequence, "lstm_sequence");
if (m_mode != ngraph::helpers::SequenceTestsMode::PURE_SEQ) {
ngraph::pass::Manager manager;
if (direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
manager.register_pass<ngraph::pass::BidirectionalLSTMSequenceDecomposition>();
manager.register_pass<ngraph::pass::ConvertLSTMSequenceToTensorIterator>();
manager.run_passes(function);
bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
EXPECT_EQ(ti_found, true);
} else {
bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
EXPECT_EQ(ti_found, false);
}
}
void GenerateInputs() {
for (const auto &input : executableNetwork.GetInputsInfo()) {
const auto &info = input.second;
auto blob = GenerateInput(*info);
if (input.first == "seq_lengths") {
blob = FuncTestUtils::createAndFillBlob(info->getTensorDesc(), m_max_seq_len, 0);
}
inputs.push_back(blob);
}
}
private:
ngraph::helpers::SequenceTestsMode m_mode;
int64_t m_max_seq_len = 0;
};
TEST_P(LSTMSequenceCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
CheckPluginRelatedResults(executableNetwork, "RNNSeq");
}
namespace {
/* CPU PARAMS */
std::vector<std::map<std::string, std::string>> additionalConfig
= {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}},
{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
CPUSpecificParams cpuParams{{ntc, nc, nc}, {ntc, nc, nc}, {"ref_any"}, "ref_any"};
CPUSpecificParams cpuParamsBatchSizeOne{{tnc, nc, nc}, {tnc, nc, nc}, {"ref_any"}, "ref_any"};
std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::PURE_SEQ};
std::vector<size_t> seq_lengths_zero_clip{2};
std::vector<size_t> batch_size_one{1};
std::vector<size_t> batch{10};
std::vector<size_t> hidden_size{1, 10};
std::vector<size_t> input_size{10};
// oneDNN supports only sigmoid-tanh-tanh
std::vector<std::vector<std::string>> activations = {{"sigmoid", "tanh", "tanh"}};
// oneDNN supports only zero clip
std::vector<float> clip{0.f};
std::vector<ngraph::op::RecurrentSequenceDirection> direction = {ngraph::op::RecurrentSequenceDirection::FORWARD};
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
INSTANTIATE_TEST_CASE_P(smoke_LSTMSequenceCPU,
LSTMSequenceCPUTest,
::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
::testing::ValuesIn(seq_lengths_zero_clip),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip),
::testing::ValuesIn(direction),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(cpuParams),
::testing::ValuesIn(additionalConfig)),
LSTMSequenceCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_LSTMSequenceCPUbatchSizeOne,
LSTMSequenceCPUTest,
::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
::testing::ValuesIn(seq_lengths_zero_clip),
::testing::ValuesIn(batch_size_one),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip),
::testing::ValuesIn(direction),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(cpuParamsBatchSizeOne),
::testing::ValuesIn(additionalConfig)),
LSTMSequenceCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -0,0 +1,124 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph/op/rnn_cell.hpp"
#include <shared_test_classes/single_layer/rnn_cell.hpp>
#include "test_utils/cpu_test_utils.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
using RNNCellCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::RNNCellParams, CPUSpecificParams, std::map<std::string, std::string>>;
class RNNCellCPUTest : public testing::WithParamInterface<RNNCellCpuSpecificParams>,
virtual public LayerTestsUtils::LayerTestsCommon,
public CPUTestsBase {
public:
static std::string getTestCaseName(const testing::TestParamInfo<RNNCellCpuSpecificParams> &obj) {
CPUSpecificParams cpuParams;
LayerTestsDefinitions::RNNCellParams basicParamsSet;
std::map<std::string, std::string> additionalConfig;
std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
std::ostringstream result;
result << LayerTestsDefinitions::RNNCellTest::getTestCaseName(
testing::TestParamInfo<LayerTestsDefinitions::RNNCellParams>(basicParamsSet, 0));
result << CPUTestsBase::getTestCaseName(cpuParams);
if (!additionalConfig.empty()) {
result << "_PluginConf";
for (auto &item : additionalConfig) {
if (item.second == PluginConfigParams::YES)
result << "_" << item.first << "=" << item.second;
}
}
return result.str();
}
protected:
void SetUp() {
CPUSpecificParams cpuParams;
LayerTestsDefinitions::RNNCellParams basicParamsSet;
std::map<std::string, std::string> additionalConfig;
bool should_decompose;
size_t batch;
size_t hidden_size;
size_t input_size;
std::vector<std::string> activations;
std::vector<float> activations_alpha;
std::vector<float> activations_beta;
float clip;
InferenceEngine::Precision netPrecision;
std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
std::tie(should_decompose, batch, hidden_size, input_size, activations, clip, netPrecision, targetDevice) = basicParamsSet;
std::vector<std::vector<size_t>> inputShapes = {{batch, input_size}, {batch, hidden_size},
{hidden_size, input_size}, {hidden_size, hidden_size}, {hidden_size}};
configuration.insert(additionalConfig.begin(), additionalConfig.end());
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
inPrc = outPrc = Precision::BF16;
} else {
inPrc = outPrc = netPrecision;
}
selectedType += "_";
selectedType += outPrc.name();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
std::vector<ngraph::Shape> WRB = {inputShapes[2], inputShapes[3], inputShapes[4]};
auto rnn_cell = ngraph::builder::makeRNN(
ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)),
WRB, hidden_size, activations, {}, {}, clip);
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(rnn_cell)};
function = makeNgraphFunction(ngPrc, params, rnn_cell, "rnn_cell");
}
};
TEST_P(RNNCellCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
CheckPluginRelatedResults(executableNetwork, "RNNCell");
}
namespace {
/* CPU PARAMS */
std::vector<std::map<std::string, std::string>> additionalConfig
= {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
CPUSpecificParams cpuParams{{nc, nc}, {nc}, {"ref_any"}, "ref_any"};
std::vector<bool> should_decompose{false};
std::vector<size_t> batch{1, 5};
std::vector<size_t> hidden_size{1, 10};
std::vector<size_t> input_size{1, 30};
std::vector<std::vector<std::string>> activations = {{"relu"}, {"sigmoid"}, {"tanh"}};
// oneDNN supports only zero clip
std::vector<float> clip = {0.f};
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
INSTANTIATE_TEST_CASE_P(smoke_RNNCellCPU,
RNNCellCPUTest,
::testing::Combine(::testing::Combine(::testing::ValuesIn(should_decompose),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(cpuParams),
::testing::ValuesIn(additionalConfig)),
RNNCellCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -0,0 +1,202 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "shared_test_classes/single_layer/rnn_sequence.hpp"
#include "ngraph/pass/visualize_tree.hpp"
#include "test_utils/cpu_test_utils.hpp"
#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
namespace CPULayerTestsDefinitions {
using RNNSequenceCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::RNNSequenceParams, CPUSpecificParams, std::map<std::string, std::string>>;
class RNNSequenceCPUTest : public testing::WithParamInterface<RNNSequenceCpuSpecificParams>,
virtual public LayerTestsUtils::LayerTestsCommon,
public CPUTestsBase {
public:
static std::string getTestCaseName(const testing::TestParamInfo<RNNSequenceCpuSpecificParams> &obj) {
CPUSpecificParams cpuParams;
LayerTestsDefinitions::RNNSequenceParams basicParamsSet;
std::map<std::string, std::string> additionalConfig;
std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
std::ostringstream result;
result << LayerTestsDefinitions::RNNSequenceTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::RNNSequenceParams>(basicParamsSet, 0));
result << CPUTestsBase::getTestCaseName(cpuParams);
if (!additionalConfig.empty()) {
result << "_PluginConf";
for (auto &item : additionalConfig) {
if (item.second == PluginConfigParams::YES)
result << "_" << item.first << "=" << item.second;
}
}
return result.str();
}
protected:
void SetUp() {
LayerTestsDefinitions::RNNSequenceParams basicParamsSet;
CPUSpecificParams cpuParams;
std::map<std::string, std::string> additionalConfig;
size_t seq_lenghts;
size_t batch;
size_t hidden_size;
size_t input_size;
std::vector<std::string> activations;
std::vector<float> activations_alpha;
std::vector<float> activations_beta;
float clip;
ngraph::op::RecurrentSequenceDirection direction;
InferenceEngine::Precision netPrecision;
std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
std::tie(m_mode, seq_lenghts, batch, hidden_size, input_size, activations, clip, direction, netPrecision, targetDevice) = basicParamsSet;
size_t num_directions = direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1;
std::vector<std::vector<size_t>> inputShapes = {
{{batch, seq_lenghts, input_size},
{batch, num_directions, hidden_size},
{batch},
{num_directions, hidden_size, input_size},
{num_directions, hidden_size, hidden_size},
{num_directions, hidden_size}},
};
configuration.insert(additionalConfig.begin(), additionalConfig.end());
if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
inPrc = outPrc = Precision::BF16;
} else {
inPrc = outPrc = netPrecision;
}
selectedType += "_";
selectedType += outPrc.name();
m_max_seq_len = seq_lenghts;
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
if (m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_PARAM
|| m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM) {
auto seq_lengths = ngraph::builder::makeParams(ngraph::element::i64, {inputShapes[2]}).at(0);
seq_lengths->set_friendly_name("seq_lengths");
params.push_back(seq_lengths);
}
std::vector<ngraph::Shape> WRB = {inputShapes[3], inputShapes[4], inputShapes[5], inputShapes[2]};
auto rnn_sequence = ngraph::builder::makeRNN(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)),
WRB,
hidden_size,
activations,
{},
{},
clip,
true,
direction,
m_mode);
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(rnn_sequence->output(0)),
std::make_shared<ngraph::opset1::Result>(rnn_sequence->output(1))};
function = makeNgraphFunction(ngPrc, params, rnn_sequence, "rnn_sequence");
if (m_mode != ngraph::helpers::SequenceTestsMode::PURE_SEQ) {
ngraph::pass::Manager manager;
if (direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
manager.register_pass<ngraph::pass::BidirectionalRNNSequenceDecomposition>();
manager.register_pass<ngraph::pass::ConvertRNNSequenceToTensorIterator>();
manager.run_passes(function);
bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
EXPECT_EQ(ti_found, true);
} else {
bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
EXPECT_EQ(ti_found, false);
}
}
void GenerateInputs() {
for (const auto &input : executableNetwork.GetInputsInfo()) {
const auto &info = input.second;
auto blob = GenerateInput(*info);
if (input.first == "seq_lengths") {
blob = FuncTestUtils::createAndFillBlob(info->getTensorDesc(), m_max_seq_len, 0);
}
inputs.push_back(blob);
}
}
private:
ngraph::helpers::SequenceTestsMode m_mode;
int64_t m_max_seq_len = 0;
};
TEST_P(RNNSequenceCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()
Run();
CheckPluginRelatedResults(executableNetwork, "RNNSeq");
}
namespace {
/* CPU PARAMS */
std::vector<std::map<std::string, std::string>> additionalConfig
= {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
CPUSpecificParams cpuParams{{ntc, nc}, {ntc, nc}, {"ref_any"}, "ref_any"};
CPUSpecificParams cpuParamsBatchSizeOne{{tnc, nc}, {tnc, nc}, {"ref_any"}, "ref_any"};
std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::PURE_SEQ};
// output values increase rapidly without clip, so use only seq_lenghts = 2
std::vector<size_t> seq_lengths_zero_clip{2};
std::vector<size_t> batch{10};
std::vector<size_t> batch_size_one{1};
std::vector<size_t> hidden_size{10};
// std::vector<size_t> hidden_size{1, 10};
std::vector<size_t> input_size{10};
std::vector<std::vector<std::string>> activations = {{"relu"}, {"sigmoid"}, {"tanh"}};
// oneDNN supports only zero clip
std::vector<float> clip{0.f};
std::vector<ngraph::op::RecurrentSequenceDirection> direction{ngraph::op::RecurrentSequenceDirection::FORWARD};
std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
INSTANTIATE_TEST_CASE_P(smoke_RNNSequenceCPU,
RNNSequenceCPUTest,
::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
::testing::ValuesIn(seq_lengths_zero_clip),
::testing::ValuesIn(batch),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip),
::testing::ValuesIn(direction),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(cpuParams),
::testing::ValuesIn(additionalConfig)),
RNNSequenceCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_RNNSequenceCPUBatchSizeOne,
RNNSequenceCPUTest,
::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
::testing::ValuesIn(seq_lengths_zero_clip),
::testing::ValuesIn(batch_size_one),
::testing::ValuesIn(hidden_size),
::testing::ValuesIn(input_size),
::testing::ValuesIn(activations),
::testing::ValuesIn(clip),
::testing::ValuesIn(direction),
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(cpuParamsBatchSizeOne),
::testing::ValuesIn(additionalConfig)),
RNNSequenceCPUTest::getTestCaseName);
} // namespace
} // namespace CPULayerTestsDefinitions

View File

@ -8,16 +8,29 @@
namespace CPUTestUtils { namespace CPUTestUtils {
const char *CPUTestsBase::cpu_fmt2str(cpu_memory_format_t v) { const char *CPUTestsBase::cpu_fmt2str(cpu_memory_format_t v) {
if (v == nchw) return "nchw"; #define CASE(_fmt) do { \
if (v == nChw8c) return "nChw8c"; if (v == _fmt) return #_fmt; \
if (v == nChw16c) return "nChw16c"; } while (0)
if (v == nhwc) return "nhwc"; CASE(undef);
if (v == ncdhw) return "ncdhw"; CASE(nchw);
if (v == nCdhw8c) return "nCdhw8c"; CASE(nChw8c);
if (v == nCdhw16c) return "nCdhw16c"; CASE(nChw16c);
if (v == ndhwc) return "ndhwc"; CASE(nhwc);
if (v == nc) return "nc"; CASE(ncdhw);
if (v == x) return "x"; CASE(nCdhw8c);
CASE(nCdhw16c);
CASE(ndhwc);
CASE(nc);
CASE(x);
CASE(tnc);
CASE(ntc);
CASE(ldnc);
CASE(ldigo);
CASE(ldgoi);
CASE(ldio);
CASE(ldoi);
CASE(ldgo);
#undef CASE
assert(!"unknown fmt"); assert(!"unknown fmt");
return "undef"; return "undef";
} }
@ -39,6 +52,10 @@ cpu_memory_format_t CPUTestsBase::cpu_str2fmt(const char *str) {
CASE(acdeb); CASE(acdeb);
CASE(aBcde8b); CASE(aBcde8b);
CASE(aBcde16b); CASE(aBcde16b);
CASE(abc);
CASE(bac);
CASE(abdc);
CASE(abdec);
CASE(nchw); CASE(nchw);
CASE(nChw8c); CASE(nChw8c);
CASE(nChw16c); CASE(nChw16c);
@ -49,6 +66,14 @@ cpu_memory_format_t CPUTestsBase::cpu_str2fmt(const char *str) {
CASE(ndhwc); CASE(ndhwc);
CASE(nc); CASE(nc);
CASE(x); CASE(x);
CASE(tnc);
CASE(ntc);
CASE(ldnc);
CASE(ldigo);
CASE(ldgoi);
CASE(ldio);
CASE(ldoi);
CASE(ldgo);
#undef CASE #undef CASE
assert(!"unknown memory format"); assert(!"unknown memory format");
return undef; return undef;
@ -120,18 +145,38 @@ void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork
auto shape = parentNode->get_output_tensor(0).get_shape(); auto shape = parentNode->get_output_tensor(0).get_shape();
auto actualInputMemoryFormat = getExecValueOutputsLayout(parentNode); auto actualInputMemoryFormat = getExecValueOutputsLayout(parentNode);
if (!should_be_skipped(shape, inFmts[i])) if (!should_be_skipped(shape, inFmts[i])) {
ASSERT_EQ(inFmts[i], cpu_str2fmt(actualInputMemoryFormat.c_str())); ASSERT_EQ(inFmts[i], cpu_str2fmt(actualInputMemoryFormat.c_str()));
}
} }
} }
for (int i = 0; i < outFmts.size(); i++) {
/* actual output formats are represented as a single string, for example 'fmt1' or 'fmt1, fmt2, fmt3'
* convert it to the list of formats */
auto getActualOutputMemoryFormats = [] (const std::string& fmtStr) -> std::vector<std::string> {
std::vector<std::string> result;
std::stringstream ss(fmtStr);
std::string str;
while (std::getline(ss, str, ',')) {
result.push_back(str);
}
return result;
};
auto actualOutputMemoryFormats = getActualOutputMemoryFormats(getExecValueOutputsLayout(node));
for (size_t i = 0; i < outFmts.size(); i++) {
const auto actualOutputMemoryFormat = getExecValue(ExecGraphInfoSerialization::OUTPUT_LAYOUTS); const auto actualOutputMemoryFormat = getExecValue(ExecGraphInfoSerialization::OUTPUT_LAYOUTS);
const auto shape = node->get_output_shape(i); const auto shape = node->get_output_shape(i);
if (!should_be_skipped(shape, outFmts[i])) if (should_be_skipped(shape, outFmts[i]))
ASSERT_EQ(outFmts[i], cpu_str2fmt(actualOutputMemoryFormat.c_str())); continue;
ASSERT_EQ(outFmts[i], cpu_str2fmt(actualOutputMemoryFormats[i].c_str()));
} }
auto primType = getExecValue(ExecGraphInfoSerialization::IMPL_TYPE); auto primType = getExecValue(ExecGraphInfoSerialization::IMPL_TYPE);
ASSERT_EQ(selectedType, primType); ASSERT_EQ(selectedType, primType);
} }
} }
@ -197,8 +242,11 @@ std::shared_ptr<ngraph::Function>
CPUTestsBase::makeNgraphFunction(const ngraph::element::Type &ngPrc, ngraph::ParameterVector &params, CPUTestsBase::makeNgraphFunction(const ngraph::element::Type &ngPrc, ngraph::ParameterVector &params,
const std::shared_ptr<ngraph::Node> &lastNode, std::string name) const { const std::shared_ptr<ngraph::Node> &lastNode, std::string name) const {
auto newLastNode = modifyGraph(ngPrc, params, lastNode); auto newLastNode = modifyGraph(ngPrc, params, lastNode);
ngraph::ResultVector results;
for (int i = 0; i < newLastNode->get_output_size(); i++)
results.push_back(std::make_shared<ngraph::opset1::Result>(newLastNode->output(i)));
ngraph::ResultVector results = {std::make_shared<ngraph::opset1::Result>(newLastNode)};
return std::make_shared<ngraph::Function>(results, params, name); return std::make_shared<ngraph::Function>(results, params, name);
} }

View File

@ -24,6 +24,11 @@ namespace CPUTestUtils {
acdeb, acdeb,
aBcde8b, aBcde8b,
aBcde16b, aBcde16b,
// RNN layouts
abc,
bac,
abdc,
abdec,
x = a, x = a,
nc = ab, nc = ab,
@ -34,7 +39,41 @@ namespace CPUTestUtils {
ncdhw = abcde, ncdhw = abcde,
nCdhw8c = aBcde8b, nCdhw8c = aBcde8b,
nCdhw16c = aBcde16b, nCdhw16c = aBcde16b,
ndhwc = acdeb ndhwc = acdeb,
// RNN layouts
tnc = abc,
/// 3D RNN data tensor in the format (batch, seq_length, input channels).
ntc = bac,
/// 4D RNN states tensor in the format (num_layers, num_directions,
/// batch, state channels).
ldnc = abcd,
/// 5D RNN weights tensor in the format (num_layers, num_directions,
/// input_channels, num_gates, output_channels).
///
/// - For LSTM cells, the gates order is input, forget, candidate
/// and output gate.
/// - For GRU cells, the gates order is update, reset and output gate.
ldigo = abcde,
/// 5D RNN weights tensor in the format (num_layers, num_directions,
/// num_gates, output_channels, input_channels).
///
/// - For LSTM cells, the gates order is input, forget, candidate
/// and output gate.
/// - For GRU cells, the gates order is update, reset and output gate.
ldgoi = abdec,
/// 4D LSTM projection tensor in the format (num_layers, num_directions,
/// num_channels_in_hidden_state, num_channels_in_recurrent_projection).
ldio = abcd,
/// 4D LSTM projection tensor in the format (num_layers, num_directions,
/// num_channels_in_recurrent_projection, num_channels_in_hidden_state).
ldoi = abdc,
/// 4D RNN bias tensor in the format (num_layers, num_directions,
/// num_gates, output_channels).
///
/// - For LSTM cells, the gates order is input, forget, candidate
/// and output gate.
/// - For GRU cells, the gates order is update, reset and output gate.
ldgo = abcd,
} cpu_memory_format_t; } cpu_memory_format_t;
using CPUSpecificParams = std::tuple< using CPUSpecificParams = std::tuple<

View File

@ -4,6 +4,7 @@
#pragma once #pragma once
#include <gtest/gtest.h>
#include <tuple> #include <tuple>
#include <string> #include <string>
#include <vector> #include <vector>
@ -26,7 +27,7 @@ using LSTMCellParams = typename std::tuple<
std::string>; // Device name std::string>; // Device name
class LSTMCellTest : public testing::WithParamInterface<LSTMCellParams >, class LSTMCellTest : public testing::WithParamInterface<LSTMCellParams >,
virtual public LayerTestsUtils::LayerTestsCommon { virtual public LayerTestsUtils::LayerTestsCommon {
public: public:
static std::string getTestCaseName(const testing::TestParamInfo<LSTMCellParams> &obj); static std::string getTestCaseName(const testing::TestParamInfo<LSTMCellParams> &obj);

@ -1 +1 @@
Subproject commit 0813c00df7558bc9b858d3a73c725bab2ce1b1eb Subproject commit 462982a2f9272ad26473ec13d983b10dbd193cd3