diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp index 62e4a64eda0..9b220b0a9a6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp @@ -5,12 +5,17 @@ #include "mkldnn_rnn.h" #include "mkldnn_extension_utils.h" +#include "mkldnn_node.h" #include "utils/general_utils.h" #include "nodes/common/cpu_memcpy.h" +#include "utils/bfloat16.hpp" +#include "nodes/common/cpu_convert.h" #include #include +#define THROW_ERROR IE_THROW() << NameFromType(getType()) << " layer '" << getName() << "' " + using namespace mkldnn; using namespace InferenceEngine; @@ -39,7 +44,7 @@ static algorithm ie2mkl(RNNCellBase::CellType cell_type) { case RNNCellBase::GRU: return algorithm::vanilla_gru; case RNNCellBase::GRU_LBR: return algorithm::lbr_gru; default: - IE_THROW() << "Unsupported cell type"; + IE_THROW() << "RNN node. Unsupported cell type"; return algorithm::undef; } } @@ -51,7 +56,7 @@ size_t gatesCount(algorithm alg) { case algorithm::lbr_gru: return 3; case algorithm::vanilla_lstm: return 4; default: - IE_THROW() << "Unsupported cell type"; + IE_THROW() << "RNN node. Unsupported cell type"; return 0; } } @@ -63,11 +68,24 @@ size_t statesCount(algorithm alg) { case algorithm::lbr_gru: return 1; case algorithm::vanilla_lstm: return 2; default: - IE_THROW() << "Unsupported cell type"; + IE_THROW() << "RNN node. Unsupported cell type"; return 0; } } +bool haveCellState(algorithm alg) { + return alg == algorithm::vanilla_lstm; +} + +const std::map MKLDNNRNN::weightsByLayerPrec { + // layer precision, weights precision + {InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP32}, + {InferenceEngine::Precision::BF16, InferenceEngine::Precision::BF16}, + // FP16 and U8 are not supported yet + // {InferenceEngine::Precision::FP16, InferenceEngine::Precision::FP16}, + // {InferenceEngine::Precision::U8, InferenceEngine::Precision::I8}, +}; + MKLDNNRNN::MKLDNNRNN(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(layer, eng, cache) { is_cell = one_of(layer->type, "LSTMCell", "GRUCell", "RNNCell"); @@ -78,6 +96,8 @@ bool MKLDNNRNN::created() const { } void MKLDNNRNN::getSupportedDescriptors() { + runtimePrecision = getCnnLayer()->insData[0].lock()->getPrecision(); + if (is_cell) fillCellDesc(); else @@ -89,14 +109,14 @@ void MKLDNNRNN::fillCellDesc() { auto cellLayer = std::dynamic_pointer_cast(getCnnLayer()); if (!cellLayer) - IE_THROW() << "No original layer for RNNCell."; + THROW_ERROR << "No original layer for RNNCell."; cell_type = ie2mkl(cellLayer->cellType); cell_act = ie2mkl(cellLayer->activations[0]); // Works only for RNN with one gate if (cellLayer->clip != 0.0f) { // TODO [oneDNN]: No more supported - IE_THROW() << "Clipping is not supported for RNN primitive"; + THROW_ERROR << "Clipping is not supported for RNN primitive"; // cell_desc.set_clipping(cellLayer->clip); } @@ -104,16 +124,16 @@ void MKLDNNRNN::fillCellDesc() { auto &outs = cellLayer->outData; if (!one_of(ins.size(), 3, 2)) - IE_THROW() << "Incorrect number of input ports for layer " << getName(); + THROW_ERROR << "Incorrect number of input ports for layer " << getName(); if (!one_of(outs.size(), 2, 1)) - IE_THROW() << "Incorrect number of output ports for layer " << getName(); + THROW_ERROR << "Incorrect number of output ports for layer " << getName(); auto in_data_dims = getParentEdgeAt(0)->getDims(); auto in_h_state_dims = getParentEdgeAt(1)->getDims(); auto out_h_state_dims = getChildEdgeAt(0)->getDims(); if (in_data_dims.ndims() != 2 || in_h_state_dims.ndims() != 2) - IE_THROW() << "Incorrect shape of input/output ports for layer " << getName(); + THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName(); G = gatesCount(cell_type); S = statesCount(cell_type); @@ -130,7 +150,7 @@ void MKLDNNRNN::fillCellDesc() { if (in_data_dims != D_shape || in_h_state_dims != S_shape || out_h_state_dims != S_shape) - IE_THROW() << "Incorrect shape of input/output ports for layer " << getName(); + THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName(); if (S == 2) { auto in_c_state_dims = getParentEdgeAt(2)->getDims(); @@ -138,7 +158,7 @@ void MKLDNNRNN::fillCellDesc() { if (in_c_state_dims != S_shape || out_c_state_dims != S_shape) - IE_THROW() << "Incorrect shape of input/output ports for layer " << getName(); + THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName(); } auto blobs = cellLayer->blobs; @@ -147,40 +167,53 @@ void MKLDNNRNN::fillCellDesc() { if (blobs.find("biases") != blobs.end()) bias = blobs["biases"]; if (!weights) - IE_THROW() << "RNN Layer. Weights do not present."; + THROW_ERROR << "RNN Layer. Weights do not present."; - if (weights->size() != G*SC*(SC+DC)) - IE_THROW() << "RNN Layer. Weights size is not correct. Expected size:" << G*SC*(SC+DC); + if (weights->size() != G * SC * (SC + DC)) + THROW_ERROR << "RNN Layer. Weights size is not correct. Expected size:" << G * SC * (SC + DC); - if (bias && bias->size() != Gb*SC) - IE_THROW() << "RNN Layer. Biases size is not correct. Expected size:" << G*SC; + if (bias && bias->size() != Gb * SC) + THROW_ERROR << "RNN Layer. Biases size is not correct. Expected size:" << G * SC; + + auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision); + + // layer input plus states + in_data_d.resize(S + 1); + out_data_d.resize(S + 1); // Shapes and Attributes are correct. Can start internal stuff initialization. - for (size_t i = 0; i < S; i++) { - in_states_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc); - out_states_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc); + in_data_d[RNNInOutKind::Layer] = {{T, N, DC}, dataType, memory::format_tag::tnc}; + out_data_d[RNNInOutKind::Layer] = {{T, N, SC}, dataType, memory::format_tag::tnc}; + + in_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc}; + out_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc}; + + if (haveCellState(cell_type)) { + in_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc}; + out_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc}; } - in_data_d = {{T, N, DC}, memory::data_type::f32, memory::format_tag::tnc};; - out_data_d = {{T, N, SC}, memory::data_type::f32, memory::format_tag::tnc};; - - w_data_d = {{L, D, DC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo}; - w_state_d = {{L, D, SC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo}; + w_data_d = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo}; + w_state_d = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo}; if (bias) w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo}; std::vector in_candidate, out_candidate; - std::vector outputFormats; - in_candidate.emplace_back(MKLDNNMemoryDesc {D_shape, memory::data_type::f32, memory::format_tag::nc}); - in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc}); - out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc}); - outputFormats.emplace_back(memory::format_tag::nc); + in_candidate.emplace_back(MKLDNNMemoryDesc {D_shape, dataType, memory::format_tag::nc}); + in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc}); + out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc}); - if (S == 2) { + if (haveCellState(cell_type)) { in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc}); out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc}); - outputFormats.emplace_back(memory::format_tag::nc); + } + + Precision weights_prec = as(weights)->getTensorDesc().getPrecision(); + + if (!verifyWeightsPrecision(runtimePrecision, weights_prec)) { + if (runtimePrecision == Precision::BF16 && weights_prec == Precision::FP32) + convertWeightsBlobToBF16(); } createDescriptor(in_candidate, out_candidate); @@ -191,10 +224,10 @@ void MKLDNNRNN::fillSeqDesc() { auto rnnLayer = std::dynamic_pointer_cast(getCnnLayer()); if (!rnnLayer) - IE_THROW() << "Wrong RNN layer representation. Cannot cast to RNNSequenceLayer."; + THROW_ERROR << "Wrong RNN layer representation. Cannot cast to RNNSequenceLayer."; if (!one_of(rnnLayer->cellType, _RNN::LSTM, _RNN::GRU, _RNN::GRU_LBR, _RNN::RNN)) - IE_THROW() << "RNN layer supports only LSTM/GRU/RNN cell"; + THROW_ERROR << "RNN layer supports only LSTM/GRU/RNN cell"; cell_type = ie2mkl(rnnLayer->cellType); cell_act = algorithm::undef; @@ -203,31 +236,31 @@ void MKLDNNRNN::fillSeqDesc() { // TODO [oneDNN]: No more supported if (rnnLayer->clip != 0.0f) { - IE_THROW() << "Clipping is not supported for RNN primitive"; + THROW_ERROR << "Clipping is not supported for RNN primitive"; // cell_desc.set_clipping(rnnLayer->clip); } if (!one_of(rnnLayer->axis, 0, 1)) - IE_THROW() << "RNN layer supports only sequence axis 0 or 1"; + THROW_ERROR << "RNN layer supports only sequence axis 0 or 1"; nativeOrder = rnnLayer->axis == 0; if (!one_of(rnnLayer->direction, _RNN::FWD, _RNN::BWD)) - IE_THROW() << "RNN layer supports only unidirectional RNN layer"; + THROW_ERROR << "RNN layer supports only unidirectional RNN layer"; direction = ie2mkl(rnnLayer->direction); auto &ins = rnnLayer->insData; auto &outs = rnnLayer->outData; if (!one_of(ins.size(), 3, 2, 1)) - IE_THROW() << "Incorrect number of input ports for layer " << getName(); + THROW_ERROR << "Incorrect number of input ports for layer " << getName(); if (!one_of(outs.size(), 3, 2, 1)) - IE_THROW() << "Incorrect number of output ports for layer " << getName(); + THROW_ERROR << "Incorrect number of output ports for layer " << getName(); auto in_data_dims = getParentEdgeAt(0)->getDims(); auto out_data_dims = getChildEdgeAt(0)->getDims(); if (in_data_dims.ndims() != 3 || out_data_dims.ndims() != 3) - IE_THROW() << "Incorrect shape of input/output ports for layer " << getName(); + THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName(); if (!nativeOrder) { std::swap(in_data_dims[0], in_data_dims[1]); @@ -246,125 +279,153 @@ void MKLDNNRNN::fillSeqDesc() { MKLDNNDims ID_shape {T, N, DC}, OD_shape {T, N, SC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC}; if (out_data_dims != OD_shape) - IE_THROW() << "Incorrect shape of input/output ports for layer " << getName(); + THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName(); - in_states_d.resize(S); - out_states_d.resize(S); - - for (int i = 1; i < ins.size(); i++) { - if (getParentEdgeAt(i)->getDims() != S_shape) - IE_THROW() << "Incorrect shape of state ports for layer " << getName(); - in_states_d[i - 1] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc}; - } - - for (int i = 1; i < outs.size(); i++) { - if (getChildEdgeAt(i)->getDims() != S_shape) - IE_THROW() << "Incorrect shape of state ports for layer " << getName(); - out_states_d[i - 1] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc}; - } - - auto blobs = rnnLayer->blobs; + auto& blobs = rnnLayer->blobs; Blob::Ptr weights, bias; if (blobs.find("weights") != blobs.end()) weights = blobs["weights"]; if (blobs.find("biases") != blobs.end()) bias = blobs["biases"]; if (!weights) - IE_THROW() << "RNN Layer. Weights do not present."; + THROW_ERROR << "RNN Layer. Weights do not present."; - if (weights->size() != G*SC*(SC+DC)) - IE_THROW() << "RNN Layer. Weights size is not correct. Expected size:" << G*SC*(SC+DC); + if (weights->size() != G * SC * (SC + DC)) + THROW_ERROR << "RNN Layer. Weights size is not correct. Expected size:" << G * SC * (SC + DC); - w_data_d = {{L, D, DC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo}; - w_state_d = {{L, D, SC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo}; + for (int i = 1; i < ins.size(); i++) { + if (getParentEdgeAt(i)->getDims() != S_shape) + THROW_ERROR << "Incorrect shape of state ports for layer " << getName(); + } - if (bias && bias->size() != Gb*SC) - IE_THROW() << "RNN Layer. Biases size is not correct. Expected size:" << G*SC; + for (int i = 1; i < outs.size(); i++) { + if (getChildEdgeAt(i)->getDims() != S_shape) + THROW_ERROR << "Incorrect shape of state ports for layer " << getName(); + } + + // layer input plus states + in_data_d.resize(S + 1); + out_data_d.resize(S + 1); + + auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision); + + // Try to create descriptor and corresponding configuration + in_data_d[RNNInOutKind::Layer] = {in_data_dims, dataType, memory::format_tag::tnc}; + out_data_d[RNNInOutKind::Layer] = {out_data_dims, dataType, memory::format_tag::tnc}; + + in_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc}; + out_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc}; + + if (haveCellState(cell_type)) { + in_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc}; + out_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc}; + } + + w_data_d = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo}; + w_state_d = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo}; + + if (bias && bias->size() != Gb * SC) + THROW_ERROR << "RNN Layer. Biases size is not correct. Expected size:" << G * SC; if (bias) w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo}; - // Try to create descriptor and corresponding configuration - in_data_d = {in_data_dims, memory::data_type::f32, memory::format_tag::tnc}; - out_data_d = {out_data_dims, memory::data_type::f32, memory::format_tag::tnc}; + std::vector in_candidate, out_candidate; - std::vector in_candidate; - if (nativeOrder) - in_candidate.push_back(in_data_d); - else - in_candidate.push_back(MKLDNNMemoryDesc{{N, T, DC}, memory::data_type::f32, memory::format_tag::ntc}); - - for (int i = 1; i < ins.size(); i++) - in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc}); - - std::vector out_candidate; if (nativeOrder) { - out_candidate.push_back(out_data_d); + in_candidate.push_back(in_data_d[RNNInOutKind::Layer]); + out_candidate.push_back(out_data_d[RNNInOutKind::Layer]); } else { - out_candidate.push_back(MKLDNNMemoryDesc{{N, T, SC}, memory::data_type::f32, memory::format_tag::ntc}); + in_candidate.emplace_back(MKLDNNMemoryDesc{{N, T, DC}, dataType, memory::format_tag::ntc}); + out_candidate.emplace_back(MKLDNNMemoryDesc{{N, T, SC}, dataType, memory::format_tag::ntc}); } - for (int i = 1; i < outs.size(); i++) { + in_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, dataType, memory::format_tag::nc}); + out_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, dataType, memory::format_tag::nc}); + + if (haveCellState(cell_type)) { + in_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, memory::data_type::f32, memory::format_tag::nc}); out_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, memory::data_type::f32, memory::format_tag::nc}); } + Precision weights_prec = as(weights)->getTensorDesc().getPrecision(); + + if (!verifyWeightsPrecision(runtimePrecision, weights_prec)) { + if (runtimePrecision == Precision::BF16 && weights_prec == Precision::FP32) + convertWeightsBlobToBF16(); + } + createDescriptor(in_candidate, out_candidate); } +void MKLDNNRNN::convertWeightsBlobToBF16() { + Blob::Ptr &weights = getCnnLayer()->blobs["weights"]; + MemoryBlob::Ptr cur_weights = as(weights); + TensorDesc td(Precision::BF16, cur_weights->getTensorDesc().getDims(), cur_weights->getTensorDesc().getLayout()); + MemoryBlob::Ptr new_weights_blob = make_shared_blob(td); + + new_weights_blob->allocate(); + bfloat16_t *dst = new_weights_blob->wmap(); + + float* fp32src = cur_weights->rmap().as(); + cpu_convert(fp32src, dst, Precision::FP32, Precision::BF16, new_weights_blob->size()); + weights = new_weights_blob; +} + void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, const std::vector &outputDesc) { switch (cell_type) { case mkldnn::algorithm::vanilla_rnn: { MKLDNNDescriptor desc(std::shared_ptr( new vanilla_rnn_forward::desc(prop_kind::forward_scoring, cell_act, direction, - /* In Data */ in_data_d, - /* In State */ in_states_d[0], + /* In Data */ in_data_d[RNNInOutKind::Layer], + /* In State */ in_data_d[RNNInOutKind::HiddenState], /* Weights data */ w_data_d, /* Weights state */ w_state_d, /* Bias */ w_bias_d, - /* Out Data */ out_data_d, - /* Out State */ out_states_d[0]))); + /* Out Data */ out_data_d[RNNInOutKind::Layer], + /* Out State */ out_data_d[RNNInOutKind::HiddenState]))); descs.push_back(desc); } break; case mkldnn::algorithm::vanilla_gru: { MKLDNNDescriptor desc(std::shared_ptr( new gru_forward::desc(prop_kind::forward_scoring, direction, - /* In Data */ in_data_d, - /* In State */ in_states_d[0], + /* In Data */ in_data_d[RNNInOutKind::Layer], + /* In State */ in_data_d[RNNInOutKind::HiddenState], /* Weights data */ w_data_d, /* Weights state */ w_state_d, /* Bias */ w_bias_d, - /* Out Data */ out_data_d, - /* Out State */ out_states_d[0]))); + /* Out Data */ out_data_d[RNNInOutKind::Layer], + /* Out State */ out_data_d[RNNInOutKind::HiddenState]))); descs.push_back(desc); } break; case mkldnn::algorithm::lbr_gru: { MKLDNNDescriptor desc(std::shared_ptr( new lbr_gru_forward::desc(prop_kind::forward_scoring, direction, - /* In Data */ in_data_d, - /* In State */ in_states_d[0], + /* In Data */ in_data_d[RNNInOutKind::Layer], + /* In State */ in_data_d[RNNInOutKind::HiddenState], /* Weights data */ w_data_d, /* Weights state */ w_state_d, /* Bias */ w_bias_d, - /* Out Data */ out_data_d, - /* Out State */ out_states_d[0]))); + /* Out Data */ out_data_d[RNNInOutKind::Layer], + /* Out State */ out_data_d[RNNInOutKind::HiddenState]))); descs.push_back(desc); } break; case mkldnn::algorithm::vanilla_lstm: { MKLDNNDescriptor desc(std::shared_ptr( new lstm_forward::desc(prop_kind::forward_scoring, direction, - /* In Data */ in_data_d, - /* In State H */ in_states_d[0], - /* In State C */ in_states_d[1], + /* In Data */ in_data_d[RNNInOutKind::Layer], + /* In State */ in_data_d[RNNInOutKind::HiddenState], + /* In State C */ in_data_d[RNNInOutKind::CellState], /* Weights data */ w_data_d, /* Weights state */ w_state_d, /* Bias */ w_bias_d, - /* Out Data */ out_data_d, - /* Out State H */ out_states_d[0], - /* Out State C */ out_states_d[1]))); + /* Out Data */ out_data_d[RNNInOutKind::Layer], + /* Out State */ out_data_d[RNNInOutKind::HiddenState], + /* Out State C */ out_data_d[RNNInOutKind::CellState]))); descs.push_back(desc); } break; default: - IE_THROW() << "Unknown cell type"; + THROW_ERROR << "Unknown cell type"; } // Fill supported config @@ -389,130 +450,170 @@ void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, supportedPrimitiveDescriptors.emplace_back(config, ref_any); } +bool MKLDNNRNN::verifyWeightsPrecision(const Precision &layerPrec, const Precision &weightsPrec) { + if (!weightsByLayerPrec.count(layerPrec)) + THROW_ERROR << "Unsupported layer precision " << layerPrec; + return weightsPrec == weightsByLayerPrec.at(layerPrec); +} + +void MKLDNNRNN::verifyWeights() { + auto layer = getCnnLayer(); + auto weightsIt = layer->blobs.find("weights"); + + if (weightsIt == layer->blobs.end()) + THROW_ERROR << "Missed weights blob."; + + const auto& weightsPrec = weightsIt->second->getTensorDesc().getPrecision(); + + if (!verifyWeightsPrecision(runtimePrecision, weightsPrec)) { + THROW_ERROR << "Weights precision " << weightsPrec << + " does not match runtime precision" << runtimePrecision; + } +} + +void MKLDNNRNN::verifyBiases() { + auto layer = getCnnLayer(); + if (layer->blobs.find("biases") != layer->blobs.end() + && layer->blobs["biases"]->getTensorDesc().getPrecision() != Precision::FP32) + THROW_ERROR << "Invalid biases precision: " << layer->blobs["biases"]->getTensorDesc().getPrecision(); +} + void MKLDNNRNN::createPrimitive() { if (prim) return; - std::string errorPrefix = "RNN layer '" + getCnnLayer()->name + "'"; - auto weightsIt = getCnnLayer()->blobs.find("weights"); - if (weightsIt == getCnnLayer()->blobs.end()) - IE_THROW() << errorPrefix << " does not have weights blob."; - if (weightsIt->second->getTensorDesc().getPrecision() != Precision::FP32) - IE_THROW() << errorPrefix << " has invalid weights precision: " << weightsIt->second->getTensorDesc().getPrecision(); - if (getCnnLayer()->blobs.find("biases") != getCnnLayer()->blobs.end() - && getCnnLayer()->blobs["biases"]->getTensorDesc().getPrecision() != Precision::FP32) - IE_THROW() << errorPrefix << " has invalid biases precision: " << getCnnLayer()->blobs["biases"]->getTensorDesc().getPrecision(); + verifyWeights(); + verifyBiases(); + + /* + * Gate order + * ====== LSTM ====== + * Caffe - IFOC, ONNX - IOFC + * IE - FICO, mkldnn - IFCO + * + * ====== GRU ====== + * IE - URO, mkldnn - URO + */ + const int gate_map_lstm[] = {1, 0, 2, 3}; // FICO -> IFCO + const int gate_map_gru[] = {0, 1, 2, 3}; + const int gate_map_rnn[] = {0}; + const int *gate_map; + const int gate_map_lstm_size = sizeof(gate_map_lstm) / sizeof(int); + const int gate_map_gru_size = sizeof(gate_map_gru) / sizeof(int); + const int gate_map_rnn_size = sizeof(gate_map_rnn) / sizeof(int); + if (cell_type == algorithm::vanilla_lstm) { + gate_map = gate_map_lstm; + if (G > gate_map_lstm_size) { + THROW_ERROR << "G isn't equal to the size of gate_map"; + } + } else if (cell_type == algorithm::vanilla_gru) { + gate_map = gate_map_gru; + if (G > gate_map_gru_size) { + THROW_ERROR << "G isn't equal to the size of gate_map"; + } + } else if (cell_type == algorithm::lbr_gru) { + gate_map = gate_map_gru; + if (G > gate_map_gru_size) { + THROW_ERROR << "G isn't equal to the size of gate_map"; + } + } else if (cell_type == algorithm::vanilla_rnn) { + gate_map = gate_map_rnn; + if (G > gate_map_rnn_size) { + THROW_ERROR << "G isn't equal to the size of gate_map"; + } + } else { + gate_map = gate_map_gru; + if (G > gate_map_gru_size) { + THROW_ERROR << "G isn't equal to the size of gate_map"; + } + } + + if (runtimePrecision == Precision::BF16) + fillWeights(gate_map); + else if (runtimePrecision == Precision::FP32) + fillWeights(gate_map); + else // TODO FP16 and INT8 support + THROW_ERROR << "Unsupported data type"; + + if (runtimePrecision == Precision::BF16 || + runtimePrecision == Precision::FP32) + fillBiases(gate_map); auto pd = descs[0].createPrimitiveDescriptorIterator(getEngine()); + prim.reset(new mkldnn::primitive(pd)); +} - auto src_data_mem = getParentEdgeAt(0)->getMemoryPtr(); - auto dst_data_mem = getChildEdgeAt(0)->getMemoryPtr(); - - // create weight blobs (data and state part) - auto w_data_mem = std::make_shared(getEngine()); - w_data_mem->Create(w_data_d); - internalBlobMemory.push_back(w_data_mem); - - auto w_state_mem = std::make_shared(getEngine()); - w_state_mem->Create(w_state_d); - internalBlobMemory.push_back(w_state_mem); +/* + * IE format: + * B - [gates, out_state_size] + * + * MKLDNN format: + * B - [gates, out_state_size] + * + */ +template +void MKLDNNRNN::fillBiases(const int *gate_map) { + if (!w_bias_d) + return; auto w_bias_mem = std::make_shared(getEngine()); w_bias_mem->Create(w_bias_d); internalBlobMemory.push_back(w_bias_mem); - { - /* Copy Weight data - * IE format: - * W - [gates, out_state_size, in_data_size + in_state_size] - * B - [gates, out_state_size] - * - * MKLDNN format: - * W - [1, 1, in_date_size, gates, out_state_size] - * R - [1, 1, in_state_size, gates, out_state_size] - * B - [gates, out_state_size] - * - * Gate order - * ====== LSTM ====== - * Caffe - IFOC, ONNX - IOFC - * IE - FICO, mkldnn - IFCO - * - * ====== GRU ====== - * IE - URO, mkldnn - URO - */ - const int gate_map_lstm[] = {1, 0, 2, 3}; // FICO -> IFCO - const int gate_map_gru[] = {0, 1, 2, 3}; - const int gate_map_rnn[] = {0}; - const int *gate_map; - const int gate_map_lstm_size = sizeof(gate_map_lstm) / sizeof(int); - const int gate_map_gru_size = sizeof(gate_map_gru) / sizeof(int); - const int gate_map_rnn_size = sizeof(gate_map_rnn) / sizeof(int); - if (cell_type == algorithm::vanilla_lstm) { - gate_map = gate_map_lstm; - if (G > gate_map_lstm_size) { - IE_THROW() << "G isn't equal to the size of gate_map"; - } - } else if (cell_type == algorithm::vanilla_gru) { - gate_map = gate_map_gru; - if (G > gate_map_gru_size) { - IE_THROW() << "G isn't equal to the size of gate_map"; - } - } else if (cell_type == algorithm::lbr_gru) { - gate_map = gate_map_gru; - if (G > gate_map_gru_size) { - IE_THROW() << "G isn't equal to the size of gate_map"; - } - } else if (cell_type == algorithm::vanilla_rnn) { - gate_map = gate_map_rnn; - if (G > gate_map_rnn_size) { - IE_THROW() << "G isn't equal to the size of gate_map"; - } - } else { - gate_map = gate_map_gru; - if (G > gate_map_gru_size) { - IE_THROW() << "G isn't equal to the size of gate_map"; - } - } + auto ie_b_ptr = getCnnLayer()->blobs["biases"]->buffer().as(); + auto b_ptr = static_cast(w_bias_mem->GetData()); + for (int g = 0; g < Gb; g++) { + Prec *l_b_ptr = b_ptr + gate_map[g]*SC; + const Prec *l_ie_b_ptr = ie_b_ptr + g * SC; + cpu_memcpy(l_b_ptr, l_ie_b_ptr, SC * sizeof(Prec)); + } +} - auto ie_w_ptr = getCnnLayer()->blobs["weights"]->buffer().as(); - auto w_ptr = static_cast(w_data_mem->GetData()); - auto r_ptr = static_cast(w_state_mem->GetData()); - const int step = SC * G; +/* + * IE format: + * W - [gates, out_state_size, in_data_size + in_state_size] + * + * MKLDNN format: + * W - [1, 1, in_date_size, gates, out_state_size] + * R - [1, 1, in_state_size, gates, out_state_size] + * + */ +template +void MKLDNNRNN::fillWeights(const int *gate_map) { + // create weight blobs (data and state part) + auto w_data_mem = std::make_shared(getEngine()); + w_data_mem->Create(w_data_d); + internalBlobMemory.push_back(w_data_mem); + auto w_state_mem = std::make_shared(getEngine()); + w_state_mem->Create(w_state_d); + internalBlobMemory.push_back(w_state_mem); - for (int g = 0; g < G; g++) { - for (int out_i = 0; out_i < SC; out_i++) { - float *l_w_ptr = w_ptr + gate_map[g]*SC + out_i; - float *l_r_ptr = r_ptr + gate_map[g]*SC+ out_i; - for (int in_i = 0; in_i < DC; in_i++) { - *l_w_ptr = *ie_w_ptr; - ie_w_ptr++; - l_w_ptr += step; - } + auto ie_w_ptr = getCnnLayer()->blobs["weights"]->buffer().as(); + auto w_ptr = static_cast(w_data_mem->GetData()); + auto r_ptr = static_cast(w_state_mem->GetData()); + const int step = SC * G; - for (int in_i = 0; in_i < SC; in_i++) { - *l_r_ptr = *ie_w_ptr; - ie_w_ptr++; - l_r_ptr += step; - } + for (int g = 0; g < G; g++) { + for (int out_i = 0; out_i < SC; out_i++) { + Prec *l_w_ptr = w_ptr + gate_map[g]*SC + out_i; + Prec *l_r_ptr = r_ptr + gate_map[g]*SC+ out_i; + for (int in_i = 0; in_i < DC; in_i++) { + *l_w_ptr = *ie_w_ptr; + ie_w_ptr++; + l_w_ptr += step; } - } - if (w_bias_d) { - auto ie_b_ptr = getCnnLayer()->blobs["biases"]->buffer().as(); - auto b_ptr = static_cast(w_bias_mem->GetData()); - for (int g = 0; g < Gb; g++) { - float *l_b_ptr = b_ptr + gate_map[g]*SC; - const float *l_ie_b_ptr = ie_b_ptr + g * SC; - cpu_memcpy(l_b_ptr, l_ie_b_ptr, SC * sizeof(float)); + for (int in_i = 0; in_i < SC; in_i++) { + *l_r_ptr = *ie_w_ptr; + ie_w_ptr++; + l_r_ptr += step; } } } - - prim.reset(new mkldnn::primitive(pd)); } void MKLDNNRNN::execute(mkldnn::stream strm) { if (!prim) - IE_THROW() << "No initialized primitive to execute"; + THROW_ERROR << "No initialized primitive to execute"; const auto src_data_mem = getParentEdgeAt(0)->getMemoryPtr(); const auto dst_data_mem = getChildEdgeAt(0)->getMemoryPtr(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h index cb16a3d242d..2cf51f09913 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h @@ -28,8 +28,19 @@ public: private: void fillCellDesc(); void fillSeqDesc(); + bool verifyWeightsPrecision(const InferenceEngine::Precision& layerPrec, + const InferenceEngine::Precision& weightsPrec); + void verifyWeights(); + void verifyBiases(); + void convertWeightsBlobToBF16(); + + template + void fillWeights(const int* gate_map); + template + void fillBiases(const int* gate_map); private: + InferenceEngine::Precision runtimePrecision; /** Specify mode Cell or Seq. true - Cell, false - Seq */ bool is_cell = false; @@ -56,11 +67,14 @@ private: const ptrdiff_t L = 1; /**< What is it??. Constant for mkldnn impl */ const ptrdiff_t D = 1; /**< Num of direction. 1 or 2 */ - MKLDNNMemoryDesc in_data_d; - MKLDNNMemoryDesc out_data_d; + std::vector in_data_d; + std::vector out_data_d; - std::vector in_states_d; - std::vector out_states_d; + enum RNNInOutKind { + Layer = 0, + HiddenState = 1, + CellState = 2 + }; MKLDNNMemoryDesc w_data_d; MKLDNNMemoryDesc w_state_d; @@ -69,7 +83,7 @@ private: // List of in/out reorders if required std::vector exec_before; std::vector exec_after; -}; + static const std::map weightsByLayerPrec; +}; // class MKLDNNRNN } // namespace MKLDNNPlugin - diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_cell.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_cell.cpp new file mode 100644 index 00000000000..6f58eeda2f4 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_cell.cpp @@ -0,0 +1,135 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph/op/gru_cell.hpp" +#include +#include "test_utils/cpu_test_utils.hpp" +#include "transformations/op_conversions/gru_cell_decomposition.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { + +using GRUCellCpuSpecificParams = typename std::tuple>; + +class GRUCellCPUTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon, + public CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj) { + CPUSpecificParams cpuParams; + LayerTestsDefinitions::GRUCellParams basicParamsSet; + std::map additionalConfig; + + std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param; + + std::ostringstream result; + result << LayerTestsDefinitions::GRUCellTest::getTestCaseName( + testing::TestParamInfo(basicParamsSet, 0)); + result << CPUTestsBase::getTestCaseName(cpuParams); + + if (!additionalConfig.empty()) { + result << "_PluginConf"; + for (auto &item : additionalConfig) { + if (item.second == PluginConfigParams::YES) + result << "_" << item.first << "=" << item.second; + } + } + return result.str(); + } + +protected: + void SetUp() { + CPUSpecificParams cpuParams; + LayerTestsDefinitions::GRUCellParams basicParamsSet; + std::map additionalConfig; + + bool should_decompose; + size_t batch; + size_t hidden_size; + size_t input_size; + std::vector activations; + std::vector activations_alpha; + std::vector activations_beta; + float clip; + bool linear_before_reset; + InferenceEngine::Precision netPrecision; + + std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + std::tie(should_decompose, batch, hidden_size, input_size, activations, clip, linear_before_reset, netPrecision, targetDevice) = basicParamsSet; + + std::vector> inputShapes = { + {{batch, input_size}, + {batch, hidden_size}, + {3 * hidden_size, input_size}, + {3 * hidden_size, hidden_size}, + {(linear_before_reset ? 4 : 3) * hidden_size}}, + }; + + configuration.insert(additionalConfig.begin(), additionalConfig.end()); + + if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) { + inPrc = outPrc = Precision::BF16; + } else { + inPrc = outPrc = netPrecision; + } + + selectedType += "_"; + selectedType += outPrc.name(); + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32); + auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]}); + std::vector WRB = {inputShapes[2], inputShapes[3], inputShapes[4]}; + auto gru_cell = ngraph::builder::makeGRU( + ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), WRB, hidden_size, activations, {}, {}, clip, linear_before_reset); + ngraph::ResultVector results{std::make_shared(gru_cell->output(0))}; + + function = makeNgraphFunction(ngPrc, params, gru_cell, "gru_cell"); + } +}; + +TEST_P(GRUCellCPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + Run(); + CheckPluginRelatedResults(executableNetwork, "RNNCell"); +} + +namespace { +/* CPU PARAMS */ +std::vector> additionalConfig + = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, + {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}}; + +CPUSpecificParams cpuParams{{nc, nc}, {nc}, {"ref_any"}, "ref_any"}; + +std::vector should_decompose{false}; +std::vector batch{1, 5}; +std::vector hidden_size{1, 10}; +std::vector input_size{1, 30}; +// oneDNN supports only sigmoid-tanh +std::vector> activations = {{"sigmoid", "tanh"}}; +// oneDNN supports only zero clip +std::vector clip = {0.f}; +std::vector linear_before_reset = {true, false}; +std::vector netPrecisions = {InferenceEngine::Precision::FP32}; + +INSTANTIATE_TEST_CASE_P(smoke_GRUCellCPU, + GRUCellCPUTest, + ::testing::Combine(::testing::Combine(::testing::ValuesIn(should_decompose), + ::testing::ValuesIn(batch), + ::testing::ValuesIn(hidden_size), + ::testing::ValuesIn(input_size), + ::testing::ValuesIn(activations), + ::testing::ValuesIn(clip), + ::testing::ValuesIn(linear_before_reset), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::Values(cpuParams), + ::testing::ValuesIn(additionalConfig)), + GRUCellCPUTest::getTestCaseName); +} // namespace +} // namespace CPULayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp new file mode 100644 index 00000000000..5efa57cb808 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp @@ -0,0 +1,202 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_layer/gru_sequence.hpp" +#include "ngraph/pass/visualize_tree.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" +#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { + +using GRUSequenceCpuSpecificParams = typename std::tuple>; + +class GRUSequenceCPUTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon, + public CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj) { + CPUSpecificParams cpuParams; + LayerTestsDefinitions::GRUSequenceParams basicParamsSet; + std::map additionalConfig; + + std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param; + std::ostringstream result; + + result << LayerTestsDefinitions::GRUSequenceTest::getTestCaseName(testing::TestParamInfo(basicParamsSet, 0)); + result << CPUTestsBase::getTestCaseName(cpuParams); + + if (!additionalConfig.empty()) { + result << "_PluginConf"; + for (auto &item : additionalConfig) { + if (item.second == PluginConfigParams::YES) + result << "_" << item.first << "=" << item.second; + } + } + return result.str(); + } + +protected: + void SetUp() { + LayerTestsDefinitions::GRUSequenceParams basicParamsSet; + CPUSpecificParams cpuParams; + std::map additionalConfig; + + size_t seq_lenghts; + size_t batch; + size_t hidden_size; + size_t input_size = 10; + std::vector activations; + std::vector activations_alpha; + std::vector activations_beta; + float clip; + bool linear_before_reset; + ngraph::op::RecurrentSequenceDirection direction; + InferenceEngine::Precision netPrecision; + + std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + std::tie(m_mode, seq_lenghts, batch, hidden_size, activations, clip, linear_before_reset, direction, netPrecision, targetDevice) = basicParamsSet; + + size_t num_directions = direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1; + std::vector> inputShapes = { + {{batch, seq_lenghts, input_size}, + {batch, num_directions, hidden_size}, + {batch}, + {num_directions, 3 * hidden_size, input_size}, + {num_directions, 3 * hidden_size, hidden_size}, + {num_directions, (linear_before_reset ? 4 : 3) * hidden_size}}, + }; + + configuration.insert(additionalConfig.begin(), additionalConfig.end()); + + if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) { + inPrc = outPrc = Precision::BF16; + } else { + inPrc = outPrc = netPrecision; + } + + selectedType += "_"; + selectedType += outPrc.name(); + + m_max_seq_len = seq_lenghts; + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32); + auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]}); + if (m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_PARAM + || m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM) { + auto seq_lengths = ngraph::builder::makeParams(ngraph::element::i64, {inputShapes[2]}).at(0); + seq_lengths->set_friendly_name("seq_lengths"); + params.push_back(seq_lengths); + } + std::vector WRB = {inputShapes[3], inputShapes[4], inputShapes[5], inputShapes[2]}; + auto gru_sequence = ngraph::builder::makeGRU(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), + WRB, + hidden_size, + activations, + {}, + {}, + clip, + linear_before_reset, + true, + direction, + m_mode); + ngraph::ResultVector results{std::make_shared(gru_sequence->output(0)), + std::make_shared(gru_sequence->output(1))}; + + function = makeNgraphFunction(ngPrc, params, gru_sequence, "gru_sequence"); + + if (m_mode != ngraph::helpers::SequenceTestsMode::PURE_SEQ) { + ngraph::pass::Manager manager; + if (direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL) + manager.register_pass(); + manager.register_pass(); + manager.run_passes(function); + bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function); + EXPECT_EQ(ti_found, true); + } else { + bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function); + EXPECT_EQ(ti_found, false); + } + } + + void GenerateInputs() { + for (const auto &input : executableNetwork.GetInputsInfo()) { + const auto &info = input.second; + auto blob = GenerateInput(*info); + if (input.first == "seq_lengths") { + blob = FuncTestUtils::createAndFillBlob(info->getTensorDesc(), m_max_seq_len, 0); + } + inputs.push_back(blob); + } + } + +private: + ngraph::helpers::SequenceTestsMode m_mode; + int64_t m_max_seq_len = 0; +}; + +TEST_P(GRUSequenceCPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + Run(); + CheckPluginRelatedResults(executableNetwork, "RNNSeq"); +} + +namespace { +/* CPU PARAMS */ +std::vector> additionalConfig + = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}}; + +CPUSpecificParams cpuParams{{ntc, nc}, {ntc, nc}, {"ref_any"}, "ref_any"}; +CPUSpecificParams cpuParamsBatchSizeOne{{tnc, nc}, {tnc, nc}, {"ref_any"}, "ref_any"};; + +std::vector mode{ngraph::helpers::SequenceTestsMode::PURE_SEQ}; +// output values increase rapidly without clip, so use only seq_lenghts = 2 +std::vector seq_lengths_zero_clip{2}; +std::vector batch{10}; +std::vector batch_size_one{1}; +std::vector hidden_size{1, 10}; +std::vector> activations = {{"sigmoid", "tanh"}}; +std::vector linear_before_reset = {true, false}; +std::vector clip{0.f}; +std::vector direction = {ngraph::op::RecurrentSequenceDirection::FORWARD}; + +std::vector netPrecisions = {InferenceEngine::Precision::FP32}; + +INSTANTIATE_TEST_CASE_P(smoke_GRUSequenceCPU, + GRUSequenceCPUTest, + ::testing::Combine(::testing::Combine(::testing::ValuesIn(mode), + ::testing::ValuesIn(seq_lengths_zero_clip), + ::testing::ValuesIn(batch), + ::testing::ValuesIn(hidden_size), + ::testing::ValuesIn(activations), + ::testing::ValuesIn(clip), + ::testing::ValuesIn(linear_before_reset), + ::testing::ValuesIn(direction), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::Values(cpuParams), + ::testing::ValuesIn(additionalConfig)), + GRUSequenceCPUTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_GRUSequenceCPUBatchSizeOne, + GRUSequenceCPUTest, + ::testing::Combine(::testing::Combine(::testing::ValuesIn(mode), + ::testing::ValuesIn(seq_lengths_zero_clip), + ::testing::ValuesIn(batch_size_one), + ::testing::ValuesIn(hidden_size), + ::testing::ValuesIn(activations), + ::testing::ValuesIn(clip), + ::testing::ValuesIn(linear_before_reset), + ::testing::ValuesIn(direction), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::Values(cpuParamsBatchSizeOne), + ::testing::ValuesIn(additionalConfig)), + GRUSequenceCPUTest::getTestCaseName); +} // namespace +} // namespace CPULayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_cell.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_cell.cpp new file mode 100644 index 00000000000..4ee12f62ea9 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_cell.cpp @@ -0,0 +1,132 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph/op/lstm_cell.hpp" +#include +#include "test_utils/cpu_test_utils.hpp" +#include "transformations/op_conversions/lstm_cell_decomposition.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { + +using LSTMCellCpuSpecificParams = typename std::tuple>; + +class LSTMCellLayerCPUTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon, + public CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + CPUSpecificParams cpuParams; + LayerTestsDefinitions::LSTMCellParams basicParamsSet; + std::map additionalConfig; + + std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param; + std::ostringstream result; + + result << LayerTestsDefinitions::LSTMCellTest::getTestCaseName(testing::TestParamInfo( + basicParamsSet, 0)); + result << CPUTestsBase::getTestCaseName(cpuParams); + + if (!additionalConfig.empty()) { + result << "_PluginConf"; + for (auto& item : additionalConfig) { + if (item.second == PluginConfigParams::YES) + result << "_" << item.first << "=" << item.second; + } + } + return result.str(); + } + +protected: + void SetUp() { + LayerTestsDefinitions::LSTMCellParams basicParamsSet; + CPUSpecificParams cpuParams; + std::map additionalConfig; + + bool should_decompose; + size_t batch; + size_t hidden_size; + size_t input_size; + std::vector activations; + std::vector activations_alpha; + std::vector activations_beta; + float clip; + InferenceEngine::Precision netPrecision; + threshold = 0.05; + + std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + std::tie(should_decompose, batch, hidden_size, input_size, activations, clip, netPrecision, targetDevice) = basicParamsSet; + + std::vector> inputShapes = { + {{batch, input_size}, {batch, hidden_size}, {batch, hidden_size}, {4 * hidden_size, input_size}, {4 * hidden_size, hidden_size}, {4 * hidden_size}}, + }; + + configuration.insert(additionalConfig.begin(), additionalConfig.end()); + + if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) { + inPrc = outPrc = Precision::BF16; + } else { + inPrc = outPrc = netPrecision; + } + + selectedType += "_"; + selectedType += outPrc.name(); + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32); + auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1], inputShapes[2]}); + std::vector WRB = {inputShapes[3], inputShapes[4], inputShapes[5]}; + + auto lstm_cell = ngraph::builder::makeLSTM( + ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), WRB, hidden_size, activations, {}, {}, clip); + + ngraph::ResultVector results{std::make_shared(lstm_cell->output(0)), + std::make_shared(lstm_cell->output(1))}; + + function = makeNgraphFunction(ngPrc, params, lstm_cell, "lstm_cell"); + } +}; + +TEST_P(LSTMCellLayerCPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + Run(); + CheckPluginRelatedResults(executableNetwork, "RNNCell"); +} + +namespace { +/* CPU PARAMS */ +std::vector> additionalConfig + = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}, + {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}}; + +CPUSpecificParams cpuParams{{nc, nc, nc}, {nc}, {"ref_any"}, "ref_any"}; + +std::vector should_decompose{false}; +std::vector batch{5}; +std::vector hidden_size{1, 10}; +std::vector input_size{1, 30}; +// oneDNN supports only sigmoid-tanh-tanh +std::vector> activations = {{"sigmoid", "tanh", "tanh"}}; +// oneDNN supports only zero clip +std::vector clip{0.f}; +std::vector netPrecisions = {InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16}; + +INSTANTIATE_TEST_CASE_P(smoke_LSTMCellCPU, + LSTMCellLayerCPUTest, + ::testing::Combine(::testing::Combine(::testing::ValuesIn(should_decompose), + ::testing::ValuesIn(batch), + ::testing::ValuesIn(hidden_size), + ::testing::ValuesIn(input_size), + ::testing::ValuesIn(activations), + ::testing::ValuesIn(clip), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::Values(cpuParams), + ::testing::ValuesIn(additionalConfig)), + LSTMCellLayerCPUTest::getTestCaseName); +} // namespace +} // namespace CPULayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp new file mode 100644 index 00000000000..50e9717de51 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp @@ -0,0 +1,205 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_layer/lstm_sequence.hpp" +#include "ngraph/pass/visualize_tree.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" +#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { + +using LSTMSequenceCpuSpecificParams = typename std::tuple>; + +class LSTMSequenceCPUTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon, + public CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj) { + CPUSpecificParams cpuParams; + LayerTestsDefinitions::LSTMSequenceParams basicParamsSet; + std::map additionalConfig; + + std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param; + std::ostringstream result; + + result << LayerTestsDefinitions::LSTMSequenceTest::getTestCaseName( + testing::TestParamInfo(basicParamsSet, 0)); + result << CPUTestsBase::getTestCaseName(cpuParams); + + if (!additionalConfig.empty()) { + result << "_PluginConf"; + for (auto &item : additionalConfig) { + if (item.second == PluginConfigParams::YES) + result << "_" << item.first << "=" << item.second; + } + } + return result.str(); + } + +protected: + void SetUp() { + LayerTestsDefinitions::LSTMSequenceParams basicParamsSet; + CPUSpecificParams cpuParams; + std::map additionalConfig; + + size_t seq_lenghts; + size_t batch; + size_t hidden_size; + size_t input_size; + std::vector activations; + std::vector activations_alpha; + std::vector activations_beta; + float clip; + ngraph::op::RecurrentSequenceDirection direction; + InferenceEngine::Precision netPrecision; + + std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + std::tie(m_mode, seq_lenghts, batch, hidden_size, input_size, activations, clip, direction, netPrecision, targetDevice) = basicParamsSet; + + size_t num_directions = direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1; + m_max_seq_len = seq_lenghts; + std::vector> inputShapes = { + {{batch, seq_lenghts, input_size}, + {batch, num_directions, hidden_size}, + {batch, num_directions, hidden_size}, + {batch}, + {num_directions, 4 * hidden_size, input_size}, + {num_directions, 4 * hidden_size, hidden_size}, + {num_directions, 4 * hidden_size}}, + }; + + configuration.insert(additionalConfig.begin(), additionalConfig.end()); + + if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) { + inPrc = outPrc = Precision::BF16; + } else { + inPrc = outPrc = netPrecision; + } + + selectedType += "_"; + selectedType += outPrc.name(); + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32); + auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1], inputShapes[2]}); + if (m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_PARAM + || m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM) { + auto seq_lengths = ngraph::builder::makeParams(ngraph::element::i64, {inputShapes[3]}).at(0); + seq_lengths->set_friendly_name("seq_lengths"); + params.push_back(seq_lengths); + } + std::vector WRB = {inputShapes[4], inputShapes[5], inputShapes[6], inputShapes[3]}; + auto lstm_sequence = ngraph::builder::makeLSTM(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), + WRB, + hidden_size, + activations, + {}, + {}, + clip, + true, + direction, + m_mode); + ngraph::ResultVector results{std::make_shared(lstm_sequence->output(0)), + std::make_shared(lstm_sequence->output(1)), + std::make_shared(lstm_sequence->output(2))}; + + function = makeNgraphFunction(ngPrc, params, lstm_sequence, "lstm_sequence"); + + if (m_mode != ngraph::helpers::SequenceTestsMode::PURE_SEQ) { + ngraph::pass::Manager manager; + if (direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL) + manager.register_pass(); + manager.register_pass(); + manager.run_passes(function); + bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function); + EXPECT_EQ(ti_found, true); + } else { + bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function); + EXPECT_EQ(ti_found, false); + } + } + + void GenerateInputs() { + for (const auto &input : executableNetwork.GetInputsInfo()) { + const auto &info = input.second; + auto blob = GenerateInput(*info); + if (input.first == "seq_lengths") { + blob = FuncTestUtils::createAndFillBlob(info->getTensorDesc(), m_max_seq_len, 0); + } + + inputs.push_back(blob); + } + } + +private: + ngraph::helpers::SequenceTestsMode m_mode; + int64_t m_max_seq_len = 0; +}; + +TEST_P(LSTMSequenceCPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + Run(); + CheckPluginRelatedResults(executableNetwork, "RNNSeq"); +} + +namespace { +/* CPU PARAMS */ +std::vector> additionalConfig + = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, + {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}}; + +CPUSpecificParams cpuParams{{ntc, nc, nc}, {ntc, nc, nc}, {"ref_any"}, "ref_any"}; +CPUSpecificParams cpuParamsBatchSizeOne{{tnc, nc, nc}, {tnc, nc, nc}, {"ref_any"}, "ref_any"}; + +std::vector mode{ngraph::helpers::SequenceTestsMode::PURE_SEQ}; +std::vector seq_lengths_zero_clip{2}; +std::vector batch_size_one{1}; +std::vector batch{10}; +std::vector hidden_size{1, 10}; +std::vector input_size{10}; +// oneDNN supports only sigmoid-tanh-tanh +std::vector> activations = {{"sigmoid", "tanh", "tanh"}}; +// oneDNN supports only zero clip +std::vector clip{0.f}; +std::vector direction = {ngraph::op::RecurrentSequenceDirection::FORWARD}; +std::vector netPrecisions = {InferenceEngine::Precision::FP32}; + +INSTANTIATE_TEST_CASE_P(smoke_LSTMSequenceCPU, + LSTMSequenceCPUTest, + ::testing::Combine(::testing::Combine(::testing::ValuesIn(mode), + ::testing::ValuesIn(seq_lengths_zero_clip), + ::testing::ValuesIn(batch), + ::testing::ValuesIn(hidden_size), + ::testing::ValuesIn(input_size), + ::testing::ValuesIn(activations), + ::testing::ValuesIn(clip), + ::testing::ValuesIn(direction), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::Values(cpuParams), + ::testing::ValuesIn(additionalConfig)), + LSTMSequenceCPUTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_LSTMSequenceCPUbatchSizeOne, + LSTMSequenceCPUTest, + ::testing::Combine(::testing::Combine(::testing::ValuesIn(mode), + ::testing::ValuesIn(seq_lengths_zero_clip), + ::testing::ValuesIn(batch_size_one), + ::testing::ValuesIn(hidden_size), + ::testing::ValuesIn(input_size), + ::testing::ValuesIn(activations), + ::testing::ValuesIn(clip), + ::testing::ValuesIn(direction), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::Values(cpuParamsBatchSizeOne), + ::testing::ValuesIn(additionalConfig)), + LSTMSequenceCPUTest::getTestCaseName); +} // namespace +} // namespace CPULayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_cell.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_cell.cpp new file mode 100644 index 00000000000..381bdfecf36 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_cell.cpp @@ -0,0 +1,124 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph/op/rnn_cell.hpp" +#include +#include "test_utils/cpu_test_utils.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { + +using RNNCellCpuSpecificParams = typename std::tuple>; + +class RNNCellCPUTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon, + public CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj) { + CPUSpecificParams cpuParams; + LayerTestsDefinitions::RNNCellParams basicParamsSet; + std::map additionalConfig; + + std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param; + + std::ostringstream result; + result << LayerTestsDefinitions::RNNCellTest::getTestCaseName( + testing::TestParamInfo(basicParamsSet, 0)); + result << CPUTestsBase::getTestCaseName(cpuParams); + + if (!additionalConfig.empty()) { + result << "_PluginConf"; + for (auto &item : additionalConfig) { + if (item.second == PluginConfigParams::YES) + result << "_" << item.first << "=" << item.second; + } + } + + return result.str(); + } + +protected: + void SetUp() { + CPUSpecificParams cpuParams; + LayerTestsDefinitions::RNNCellParams basicParamsSet; + std::map additionalConfig; + + bool should_decompose; + size_t batch; + size_t hidden_size; + size_t input_size; + std::vector activations; + std::vector activations_alpha; + std::vector activations_beta; + float clip; + InferenceEngine::Precision netPrecision; + + std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + std::tie(should_decompose, batch, hidden_size, input_size, activations, clip, netPrecision, targetDevice) = basicParamsSet; + + std::vector> inputShapes = {{batch, input_size}, {batch, hidden_size}, + {hidden_size, input_size}, {hidden_size, hidden_size}, {hidden_size}}; + + configuration.insert(additionalConfig.begin(), additionalConfig.end()); + + if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) { + inPrc = outPrc = Precision::BF16; + } else { + inPrc = outPrc = netPrecision; + } + + selectedType += "_"; + selectedType += outPrc.name(); + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32); + auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]}); + std::vector WRB = {inputShapes[2], inputShapes[3], inputShapes[4]}; + auto rnn_cell = ngraph::builder::makeRNN( + ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), + WRB, hidden_size, activations, {}, {}, clip); + ngraph::ResultVector results{std::make_shared(rnn_cell)}; + function = makeNgraphFunction(ngPrc, params, rnn_cell, "rnn_cell"); + } +}; + +TEST_P(RNNCellCPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + Run(); + CheckPluginRelatedResults(executableNetwork, "RNNCell"); +} + +namespace { +/* CPU PARAMS */ +std::vector> additionalConfig + = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}}; + +CPUSpecificParams cpuParams{{nc, nc}, {nc}, {"ref_any"}, "ref_any"}; +std::vector should_decompose{false}; +std::vector batch{1, 5}; +std::vector hidden_size{1, 10}; +std::vector input_size{1, 30}; +std::vector> activations = {{"relu"}, {"sigmoid"}, {"tanh"}}; +// oneDNN supports only zero clip +std::vector clip = {0.f}; +std::vector netPrecisions = {InferenceEngine::Precision::FP32}; + +INSTANTIATE_TEST_CASE_P(smoke_RNNCellCPU, + RNNCellCPUTest, + ::testing::Combine(::testing::Combine(::testing::ValuesIn(should_decompose), + ::testing::ValuesIn(batch), + ::testing::ValuesIn(hidden_size), + ::testing::ValuesIn(input_size), + ::testing::ValuesIn(activations), + ::testing::ValuesIn(clip), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::Values(cpuParams), + ::testing::ValuesIn(additionalConfig)), + RNNCellCPUTest::getTestCaseName); +} // namespace +} // namespace CPULayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp new file mode 100644 index 00000000000..671539db351 --- /dev/null +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp @@ -0,0 +1,202 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_layer/rnn_sequence.hpp" +#include "ngraph/pass/visualize_tree.hpp" +#include "test_utils/cpu_test_utils.hpp" +#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" +#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp" + +using namespace InferenceEngine; +using namespace CPUTestUtils; + +namespace CPULayerTestsDefinitions { + +using RNNSequenceCpuSpecificParams = typename std::tuple>; + +class RNNSequenceCPUTest : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon, + public CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj) { + CPUSpecificParams cpuParams; + LayerTestsDefinitions::RNNSequenceParams basicParamsSet; + std::map additionalConfig; + + std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param; + std::ostringstream result; + + result << LayerTestsDefinitions::RNNSequenceTest::getTestCaseName(testing::TestParamInfo(basicParamsSet, 0)); + result << CPUTestsBase::getTestCaseName(cpuParams); + + if (!additionalConfig.empty()) { + result << "_PluginConf"; + for (auto &item : additionalConfig) { + if (item.second == PluginConfigParams::YES) + result << "_" << item.first << "=" << item.second; + } + } + return result.str(); + } + +protected: + void SetUp() { + LayerTestsDefinitions::RNNSequenceParams basicParamsSet; + CPUSpecificParams cpuParams; + std::map additionalConfig; + + size_t seq_lenghts; + size_t batch; + size_t hidden_size; + size_t input_size; + std::vector activations; + std::vector activations_alpha; + std::vector activations_beta; + float clip; + ngraph::op::RecurrentSequenceDirection direction; + InferenceEngine::Precision netPrecision; + + std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam(); + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + std::tie(m_mode, seq_lenghts, batch, hidden_size, input_size, activations, clip, direction, netPrecision, targetDevice) = basicParamsSet; + + size_t num_directions = direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1; + std::vector> inputShapes = { + {{batch, seq_lenghts, input_size}, + {batch, num_directions, hidden_size}, + {batch}, + {num_directions, hidden_size, input_size}, + {num_directions, hidden_size, hidden_size}, + {num_directions, hidden_size}}, + }; + + configuration.insert(additionalConfig.begin(), additionalConfig.end()); + + if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) { + inPrc = outPrc = Precision::BF16; + } else { + inPrc = outPrc = netPrecision; + } + + selectedType += "_"; + selectedType += outPrc.name(); + + m_max_seq_len = seq_lenghts; + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32); + auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]}); + if (m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_PARAM + || m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM) { + auto seq_lengths = ngraph::builder::makeParams(ngraph::element::i64, {inputShapes[2]}).at(0); + seq_lengths->set_friendly_name("seq_lengths"); + params.push_back(seq_lengths); + } + std::vector WRB = {inputShapes[3], inputShapes[4], inputShapes[5], inputShapes[2]}; + auto rnn_sequence = ngraph::builder::makeRNN(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), + WRB, + hidden_size, + activations, + {}, + {}, + clip, + true, + direction, + m_mode); + ngraph::ResultVector results{std::make_shared(rnn_sequence->output(0)), + std::make_shared(rnn_sequence->output(1))}; + function = makeNgraphFunction(ngPrc, params, rnn_sequence, "rnn_sequence"); + if (m_mode != ngraph::helpers::SequenceTestsMode::PURE_SEQ) { + ngraph::pass::Manager manager; + if (direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL) + manager.register_pass(); + manager.register_pass(); + manager.run_passes(function); + bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function); + EXPECT_EQ(ti_found, true); + } else { + bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function); + EXPECT_EQ(ti_found, false); + } + } + + void GenerateInputs() { + for (const auto &input : executableNetwork.GetInputsInfo()) { + const auto &info = input.second; + auto blob = GenerateInput(*info); + if (input.first == "seq_lengths") { + blob = FuncTestUtils::createAndFillBlob(info->getTensorDesc(), m_max_seq_len, 0); + } + + inputs.push_back(blob); + } + } + +private: + ngraph::helpers::SequenceTestsMode m_mode; + int64_t m_max_seq_len = 0; +}; + +TEST_P(RNNSequenceCPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + Run(); + CheckPluginRelatedResults(executableNetwork, "RNNSeq"); +} + +namespace { +/* CPU PARAMS */ +std::vector> additionalConfig + = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}}; + +CPUSpecificParams cpuParams{{ntc, nc}, {ntc, nc}, {"ref_any"}, "ref_any"}; +CPUSpecificParams cpuParamsBatchSizeOne{{tnc, nc}, {tnc, nc}, {"ref_any"}, "ref_any"}; + +std::vector mode{ngraph::helpers::SequenceTestsMode::PURE_SEQ}; +// output values increase rapidly without clip, so use only seq_lenghts = 2 +std::vector seq_lengths_zero_clip{2}; +std::vector batch{10}; +std::vector batch_size_one{1}; +std::vector hidden_size{10}; +// std::vector hidden_size{1, 10}; +std::vector input_size{10}; +std::vector> activations = {{"relu"}, {"sigmoid"}, {"tanh"}}; +// oneDNN supports only zero clip +std::vector clip{0.f}; + +std::vector direction{ngraph::op::RecurrentSequenceDirection::FORWARD}; + +std::vector netPrecisions = {InferenceEngine::Precision::FP32}; + +INSTANTIATE_TEST_CASE_P(smoke_RNNSequenceCPU, + RNNSequenceCPUTest, + ::testing::Combine(::testing::Combine(::testing::ValuesIn(mode), + ::testing::ValuesIn(seq_lengths_zero_clip), + ::testing::ValuesIn(batch), + ::testing::ValuesIn(hidden_size), + ::testing::ValuesIn(input_size), + ::testing::ValuesIn(activations), + ::testing::ValuesIn(clip), + ::testing::ValuesIn(direction), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::Values(cpuParams), + ::testing::ValuesIn(additionalConfig)), + RNNSequenceCPUTest::getTestCaseName); + +INSTANTIATE_TEST_CASE_P(smoke_RNNSequenceCPUBatchSizeOne, + RNNSequenceCPUTest, + ::testing::Combine(::testing::Combine(::testing::ValuesIn(mode), + ::testing::ValuesIn(seq_lengths_zero_clip), + ::testing::ValuesIn(batch_size_one), + ::testing::ValuesIn(hidden_size), + ::testing::ValuesIn(input_size), + ::testing::ValuesIn(activations), + ::testing::ValuesIn(clip), + ::testing::ValuesIn(direction), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::Values(cpuParamsBatchSizeOne), + ::testing::ValuesIn(additionalConfig)), + RNNSequenceCPUTest::getTestCaseName); +} // namespace +} // namespace CPULayerTestsDefinitions diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp index 3bd91fbb639..1f3a271f0a1 100644 --- a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp +++ b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp @@ -8,16 +8,29 @@ namespace CPUTestUtils { const char *CPUTestsBase::cpu_fmt2str(cpu_memory_format_t v) { - if (v == nchw) return "nchw"; - if (v == nChw8c) return "nChw8c"; - if (v == nChw16c) return "nChw16c"; - if (v == nhwc) return "nhwc"; - if (v == ncdhw) return "ncdhw"; - if (v == nCdhw8c) return "nCdhw8c"; - if (v == nCdhw16c) return "nCdhw16c"; - if (v == ndhwc) return "ndhwc"; - if (v == nc) return "nc"; - if (v == x) return "x"; +#define CASE(_fmt) do { \ + if (v == _fmt) return #_fmt; \ +} while (0) + CASE(undef); + CASE(nchw); + CASE(nChw8c); + CASE(nChw16c); + CASE(nhwc); + CASE(ncdhw); + CASE(nCdhw8c); + CASE(nCdhw16c); + CASE(ndhwc); + CASE(nc); + CASE(x); + CASE(tnc); + CASE(ntc); + CASE(ldnc); + CASE(ldigo); + CASE(ldgoi); + CASE(ldio); + CASE(ldoi); + CASE(ldgo); +#undef CASE assert(!"unknown fmt"); return "undef"; } @@ -39,6 +52,10 @@ cpu_memory_format_t CPUTestsBase::cpu_str2fmt(const char *str) { CASE(acdeb); CASE(aBcde8b); CASE(aBcde16b); + CASE(abc); + CASE(bac); + CASE(abdc); + CASE(abdec); CASE(nchw); CASE(nChw8c); CASE(nChw16c); @@ -49,6 +66,14 @@ cpu_memory_format_t CPUTestsBase::cpu_str2fmt(const char *str) { CASE(ndhwc); CASE(nc); CASE(x); + CASE(tnc); + CASE(ntc); + CASE(ldnc); + CASE(ldigo); + CASE(ldgoi); + CASE(ldio); + CASE(ldoi); + CASE(ldgo); #undef CASE assert(!"unknown memory format"); return undef; @@ -120,18 +145,38 @@ void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork auto shape = parentNode->get_output_tensor(0).get_shape(); auto actualInputMemoryFormat = getExecValueOutputsLayout(parentNode); - if (!should_be_skipped(shape, inFmts[i])) + if (!should_be_skipped(shape, inFmts[i])) { ASSERT_EQ(inFmts[i], cpu_str2fmt(actualInputMemoryFormat.c_str())); + } } } - for (int i = 0; i < outFmts.size(); i++) { + + /* actual output formats are represented as a single string, for example 'fmt1' or 'fmt1, fmt2, fmt3' + * convert it to the list of formats */ + auto getActualOutputMemoryFormats = [] (const std::string& fmtStr) -> std::vector { + std::vector result; + std::stringstream ss(fmtStr); + std::string str; + while (std::getline(ss, str, ',')) { + result.push_back(str); + } + return result; + }; + + auto actualOutputMemoryFormats = getActualOutputMemoryFormats(getExecValueOutputsLayout(node)); + + for (size_t i = 0; i < outFmts.size(); i++) { const auto actualOutputMemoryFormat = getExecValue(ExecGraphInfoSerialization::OUTPUT_LAYOUTS); const auto shape = node->get_output_shape(i); - if (!should_be_skipped(shape, outFmts[i])) - ASSERT_EQ(outFmts[i], cpu_str2fmt(actualOutputMemoryFormat.c_str())); + if (should_be_skipped(shape, outFmts[i])) + continue; + + ASSERT_EQ(outFmts[i], cpu_str2fmt(actualOutputMemoryFormats[i].c_str())); } + auto primType = getExecValue(ExecGraphInfoSerialization::IMPL_TYPE); + ASSERT_EQ(selectedType, primType); } } @@ -197,8 +242,11 @@ std::shared_ptr CPUTestsBase::makeNgraphFunction(const ngraph::element::Type &ngPrc, ngraph::ParameterVector ¶ms, const std::shared_ptr &lastNode, std::string name) const { auto newLastNode = modifyGraph(ngPrc, params, lastNode); + ngraph::ResultVector results; + + for (int i = 0; i < newLastNode->get_output_size(); i++) + results.push_back(std::make_shared(newLastNode->output(i))); - ngraph::ResultVector results = {std::make_shared(newLastNode)}; return std::make_shared(results, params, name); } diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp index 09b90ea2434..3797c167feb 100644 --- a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp +++ b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp @@ -24,6 +24,11 @@ namespace CPUTestUtils { acdeb, aBcde8b, aBcde16b, + // RNN layouts + abc, + bac, + abdc, + abdec, x = a, nc = ab, @@ -34,7 +39,41 @@ namespace CPUTestUtils { ncdhw = abcde, nCdhw8c = aBcde8b, nCdhw16c = aBcde16b, - ndhwc = acdeb + ndhwc = acdeb, + // RNN layouts + tnc = abc, + /// 3D RNN data tensor in the format (batch, seq_length, input channels). + ntc = bac, + /// 4D RNN states tensor in the format (num_layers, num_directions, + /// batch, state channels). + ldnc = abcd, + /// 5D RNN weights tensor in the format (num_layers, num_directions, + /// input_channels, num_gates, output_channels). + /// + /// - For LSTM cells, the gates order is input, forget, candidate + /// and output gate. + /// - For GRU cells, the gates order is update, reset and output gate. + ldigo = abcde, + /// 5D RNN weights tensor in the format (num_layers, num_directions, + /// num_gates, output_channels, input_channels). + /// + /// - For LSTM cells, the gates order is input, forget, candidate + /// and output gate. + /// - For GRU cells, the gates order is update, reset and output gate. + ldgoi = abdec, + /// 4D LSTM projection tensor in the format (num_layers, num_directions, + /// num_channels_in_hidden_state, num_channels_in_recurrent_projection). + ldio = abcd, + /// 4D LSTM projection tensor in the format (num_layers, num_directions, + /// num_channels_in_recurrent_projection, num_channels_in_hidden_state). + ldoi = abdc, + /// 4D RNN bias tensor in the format (num_layers, num_directions, + /// num_gates, output_channels). + /// + /// - For LSTM cells, the gates order is input, forget, candidate + /// and output gate. + /// - For GRU cells, the gates order is update, reset and output gate. + ldgo = abcd, } cpu_memory_format_t; using CPUSpecificParams = std::tuple< diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/lstm_cell.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/lstm_cell.hpp index f07b1d51bf0..df6995174f9 100644 --- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/lstm_cell.hpp +++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/lstm_cell.hpp @@ -4,6 +4,7 @@ #pragma once +#include #include #include #include @@ -26,7 +27,7 @@ using LSTMCellParams = typename std::tuple< std::string>; // Device name class LSTMCellTest : public testing::WithParamInterface, - virtual public LayerTestsUtils::LayerTestsCommon { + virtual public LayerTestsUtils::LayerTestsCommon { public: static std::string getTestCaseName(const testing::TestParamInfo &obj); diff --git a/inference-engine/thirdparty/mkl-dnn b/inference-engine/thirdparty/mkl-dnn index 0813c00df75..462982a2f92 160000 --- a/inference-engine/thirdparty/mkl-dnn +++ b/inference-engine/thirdparty/mkl-dnn @@ -1 +1 @@ -Subproject commit 0813c00df7558bc9b858d3a73c725bab2ce1b1eb +Subproject commit 462982a2f9272ad26473ec13d983b10dbd193cd3