diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp
index 62e4a64eda0..9b220b0a9a6 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp
@@ -5,12 +5,17 @@
 #include "mkldnn_rnn.h"
 #include "mkldnn_extension_utils.h"
 
+#include "mkldnn_node.h"
 #include "utils/general_utils.h"
 #include "nodes/common/cpu_memcpy.h"
+#include "utils/bfloat16.hpp"
+#include "nodes/common/cpu_convert.h"
 
 #include <string>
 #include <utility>
 
+#define THROW_ERROR IE_THROW() << NameFromType(getType()) << " layer '" << getName() << "' "
+
 using namespace mkldnn;
 using namespace InferenceEngine;
 
@@ -39,7 +44,7 @@ static algorithm ie2mkl(RNNCellBase::CellType cell_type) {
         case RNNCellBase::GRU:     return algorithm::vanilla_gru;
         case RNNCellBase::GRU_LBR: return algorithm::lbr_gru;
         default:
-            IE_THROW() << "Unsupported cell type";
+            IE_THROW() << "RNN node. Unsupported cell type";
             return algorithm::undef;
     }
 }
@@ -51,7 +56,7 @@ size_t gatesCount(algorithm alg) {
         case algorithm::lbr_gru:         return 3;
         case algorithm::vanilla_lstm:    return 4;
         default:
-            IE_THROW() << "Unsupported cell type";
+            IE_THROW() << "RNN node. Unsupported cell type";
             return 0;
     }
 }
@@ -63,11 +68,24 @@ size_t statesCount(algorithm alg) {
         case algorithm::lbr_gru:         return 1;
         case algorithm::vanilla_lstm:    return 2;
         default:
-            IE_THROW() << "Unsupported cell type";
+            IE_THROW() << "RNN node. Unsupported cell type";
             return 0;
     }
 }
 
+bool haveCellState(algorithm alg) {
+    return alg == algorithm::vanilla_lstm;
+}
+
+const std::map<InferenceEngine::Precision, InferenceEngine::Precision> MKLDNNRNN::weightsByLayerPrec {
+    // layer precision,                weights precision
+    {InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP32},
+    {InferenceEngine::Precision::BF16, InferenceEngine::Precision::BF16},
+    // FP16 and U8 are not supported yet
+    // {InferenceEngine::Precision::FP16, InferenceEngine::Precision::FP16},
+    // {InferenceEngine::Precision::U8,   InferenceEngine::Precision::I8},
+};
+
 MKLDNNRNN::MKLDNNRNN(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
         MKLDNNNode(layer, eng, cache) {
     is_cell = one_of(layer->type, "LSTMCell", "GRUCell", "RNNCell");
@@ -78,6 +96,8 @@ bool MKLDNNRNN::created() const {
 }
 
 void MKLDNNRNN::getSupportedDescriptors() {
+    runtimePrecision = getCnnLayer()->insData[0].lock()->getPrecision();
+
     if (is_cell)
         fillCellDesc();
     else
@@ -89,14 +109,14 @@ void MKLDNNRNN::fillCellDesc() {
     auto cellLayer = std::dynamic_pointer_cast<RNNCellBase>(getCnnLayer());
 
     if (!cellLayer)
-        IE_THROW() << "No original layer for RNNCell.";
+        THROW_ERROR << "No original layer for RNNCell.";
 
     cell_type = ie2mkl(cellLayer->cellType);
     cell_act = ie2mkl(cellLayer->activations[0]);  // Works only for RNN with one gate
 
     if (cellLayer->clip != 0.0f) {
         // TODO [oneDNN]: No more supported
-        IE_THROW() << "Clipping is not supported for RNN primitive";
+        THROW_ERROR << "Clipping is not supported for RNN primitive";
 //        cell_desc.set_clipping(cellLayer->clip);
     }
 
@@ -104,16 +124,16 @@ void MKLDNNRNN::fillCellDesc() {
     auto &outs = cellLayer->outData;
 
     if (!one_of(ins.size(), 3, 2))
-        IE_THROW() << "Incorrect number of input ports for layer " << getName();
+        THROW_ERROR << "Incorrect number of input ports for layer " << getName();
     if (!one_of(outs.size(), 2, 1))
-        IE_THROW() << "Incorrect number of output ports for layer " << getName();
+        THROW_ERROR << "Incorrect number of output ports for layer " << getName();
 
     auto in_data_dims = getParentEdgeAt(0)->getDims();
     auto in_h_state_dims = getParentEdgeAt(1)->getDims();
     auto out_h_state_dims = getChildEdgeAt(0)->getDims();
 
     if (in_data_dims.ndims() != 2 || in_h_state_dims.ndims() != 2)
-        IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
+        THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
 
     G = gatesCount(cell_type);
     S = statesCount(cell_type);
@@ -130,7 +150,7 @@ void MKLDNNRNN::fillCellDesc() {
     if (in_data_dims != D_shape
         || in_h_state_dims != S_shape
         || out_h_state_dims != S_shape)
-        IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
+        THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
 
     if (S == 2) {
         auto in_c_state_dims = getParentEdgeAt(2)->getDims();
@@ -138,7 +158,7 @@ void MKLDNNRNN::fillCellDesc() {
 
         if (in_c_state_dims != S_shape
             || out_c_state_dims != S_shape)
-            IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
+            THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
     }
 
     auto blobs = cellLayer->blobs;
@@ -147,40 +167,53 @@ void MKLDNNRNN::fillCellDesc() {
     if (blobs.find("biases") != blobs.end()) bias = blobs["biases"];
 
     if (!weights)
-        IE_THROW() << "RNN Layer. Weights do not present.";
+        THROW_ERROR << "RNN Layer. Weights do not present.";
 
-    if (weights->size() != G*SC*(SC+DC))
-        IE_THROW() << "RNN Layer. Weights size is not correct. Expected size:" << G*SC*(SC+DC);
+    if (weights->size() != G * SC * (SC + DC))
+        THROW_ERROR << "RNN Layer. Weights size is not correct. Expected size:" << G * SC * (SC + DC);
 
-    if (bias && bias->size() != Gb*SC)
-        IE_THROW() << "RNN Layer. Biases size is not correct. Expected size:" << G*SC;
+    if (bias && bias->size() != Gb * SC)
+        THROW_ERROR << "RNN Layer. Biases size is not correct. Expected size:" << G * SC;
+
+    auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision);
+
+    // layer input plus states
+    in_data_d.resize(S + 1);
+    out_data_d.resize(S + 1);
 
     // Shapes and Attributes are correct. Can start internal stuff initialization.
-    for (size_t i = 0; i < S; i++) {
-        in_states_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc);
-        out_states_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc);
+    in_data_d[RNNInOutKind::Layer]  = {{T, N, DC}, dataType, memory::format_tag::tnc};
+    out_data_d[RNNInOutKind::Layer] = {{T, N, SC}, dataType, memory::format_tag::tnc};
+
+    in_data_d[RNNInOutKind::HiddenState]  = {S_4D_shape, dataType, memory::format_tag::ldnc};
+    out_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc};
+
+    if (haveCellState(cell_type)) {
+        in_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
+        out_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
     }
 
-    in_data_d  = {{T, N, DC}, memory::data_type::f32, memory::format_tag::tnc};;
-    out_data_d = {{T, N, SC}, memory::data_type::f32, memory::format_tag::tnc};;
-
-    w_data_d   = {{L, D, DC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo};
-    w_state_d  = {{L, D, SC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo};
+    w_data_d   = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo};
+    w_state_d  = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo};
 
     if (bias)
         w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
 
     std::vector<TensorDesc> in_candidate, out_candidate;
-    std::vector<memory::format_tag> outputFormats;
-    in_candidate.emplace_back(MKLDNNMemoryDesc {D_shape, memory::data_type::f32, memory::format_tag::nc});
-    in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
-    out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
-    outputFormats.emplace_back(memory::format_tag::nc);
+    in_candidate.emplace_back(MKLDNNMemoryDesc {D_shape, dataType, memory::format_tag::nc});
+    in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc});
+    out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc});
 
-    if (S == 2) {
+    if (haveCellState(cell_type)) {
         in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
         out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
-        outputFormats.emplace_back(memory::format_tag::nc);
+    }
+
+    Precision weights_prec = as<MemoryBlob>(weights)->getTensorDesc().getPrecision();
+
+    if (!verifyWeightsPrecision(runtimePrecision, weights_prec)) {
+        if (runtimePrecision == Precision::BF16 && weights_prec == Precision::FP32)
+            convertWeightsBlobToBF16();
     }
 
     createDescriptor(in_candidate, out_candidate);
@@ -191,10 +224,10 @@ void MKLDNNRNN::fillSeqDesc() {
     auto rnnLayer = std::dynamic_pointer_cast<RNNSequenceLayer>(getCnnLayer());
 
     if (!rnnLayer)
-        IE_THROW() << "Wrong RNN layer representation. Cannot cast to RNNSequenceLayer.";
+        THROW_ERROR << "Wrong RNN layer representation. Cannot cast to RNNSequenceLayer.";
 
     if (!one_of(rnnLayer->cellType, _RNN::LSTM, _RNN::GRU, _RNN::GRU_LBR, _RNN::RNN))
-        IE_THROW() << "RNN layer supports only LSTM/GRU/RNN cell";
+        THROW_ERROR << "RNN layer supports only LSTM/GRU/RNN cell";
 
     cell_type = ie2mkl(rnnLayer->cellType);
     cell_act = algorithm::undef;
@@ -203,31 +236,31 @@ void MKLDNNRNN::fillSeqDesc() {
 
     // TODO [oneDNN]: No more supported
     if (rnnLayer->clip != 0.0f) {
-        IE_THROW() << "Clipping is not supported for RNN primitive";
+        THROW_ERROR << "Clipping is not supported for RNN primitive";
 //        cell_desc.set_clipping(rnnLayer->clip);
     }
 
     if (!one_of(rnnLayer->axis, 0, 1))
-        IE_THROW() << "RNN layer supports only sequence axis 0 or 1";
+        THROW_ERROR << "RNN layer supports only sequence axis 0 or 1";
     nativeOrder = rnnLayer->axis == 0;
 
     if (!one_of(rnnLayer->direction, _RNN::FWD, _RNN::BWD))
-        IE_THROW() << "RNN layer supports only unidirectional RNN layer";
+        THROW_ERROR << "RNN layer supports only unidirectional RNN layer";
     direction = ie2mkl(rnnLayer->direction);
 
     auto &ins = rnnLayer->insData;
     auto &outs = rnnLayer->outData;
 
     if (!one_of(ins.size(), 3, 2, 1))
-        IE_THROW() << "Incorrect number of input ports for layer " << getName();
+        THROW_ERROR << "Incorrect number of input ports for layer " << getName();
     if (!one_of(outs.size(), 3, 2, 1))
-        IE_THROW() << "Incorrect number of output ports for layer " << getName();
+        THROW_ERROR << "Incorrect number of output ports for layer " << getName();
 
     auto in_data_dims = getParentEdgeAt(0)->getDims();
     auto out_data_dims = getChildEdgeAt(0)->getDims();
 
     if (in_data_dims.ndims() != 3 || out_data_dims.ndims() != 3)
-        IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
+        THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
 
     if (!nativeOrder) {
         std::swap(in_data_dims[0], in_data_dims[1]);
@@ -246,125 +279,153 @@ void MKLDNNRNN::fillSeqDesc() {
     MKLDNNDims ID_shape {T, N, DC}, OD_shape {T, N, SC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC};
 
     if (out_data_dims != OD_shape)
-        IE_THROW() << "Incorrect shape of input/output ports for layer " << getName();
+        THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
 
-    in_states_d.resize(S);
-    out_states_d.resize(S);
-
-    for (int i = 1; i < ins.size(); i++) {
-        if (getParentEdgeAt(i)->getDims() != S_shape)
-            IE_THROW() << "Incorrect shape of state ports for layer " << getName();
-        in_states_d[i - 1] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
-    }
-
-    for (int i = 1; i < outs.size(); i++) {
-        if (getChildEdgeAt(i)->getDims() != S_shape)
-            IE_THROW() << "Incorrect shape of state ports for layer " << getName();
-        out_states_d[i - 1] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
-    }
-
-    auto blobs = rnnLayer->blobs;
+    auto& blobs = rnnLayer->blobs;
     Blob::Ptr weights, bias;
     if (blobs.find("weights") != blobs.end()) weights = blobs["weights"];
     if (blobs.find("biases") != blobs.end()) bias = blobs["biases"];
 
     if (!weights)
-        IE_THROW() << "RNN Layer. Weights do not present.";
+        THROW_ERROR << "RNN Layer. Weights do not present.";
 
-    if (weights->size() != G*SC*(SC+DC))
-        IE_THROW() << "RNN Layer. Weights size is not correct. Expected size:" << G*SC*(SC+DC);
+    if (weights->size() != G * SC * (SC + DC))
+        THROW_ERROR << "RNN Layer. Weights size is not correct. Expected size:" << G * SC * (SC + DC);
 
-    w_data_d  = {{L, D, DC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo};
-    w_state_d = {{L, D, SC, G, SC}, memory::data_type::f32, memory::format_tag::ldigo};
+    for (int i = 1; i < ins.size(); i++) {
+        if (getParentEdgeAt(i)->getDims() != S_shape)
+            THROW_ERROR << "Incorrect shape of state ports for layer " << getName();
+    }
 
-    if (bias && bias->size() != Gb*SC)
-        IE_THROW() << "RNN Layer. Biases size is not correct. Expected size:" << G*SC;
+    for (int i = 1; i < outs.size(); i++) {
+        if (getChildEdgeAt(i)->getDims() != S_shape)
+            THROW_ERROR << "Incorrect shape of state ports for layer " << getName();
+    }
+
+    // layer input plus states
+    in_data_d.resize(S + 1);
+    out_data_d.resize(S + 1);
+
+    auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision);
+
+   // Try to create descriptor and corresponding configuration
+    in_data_d[RNNInOutKind::Layer]  = {in_data_dims,  dataType, memory::format_tag::tnc};
+    out_data_d[RNNInOutKind::Layer] = {out_data_dims, dataType, memory::format_tag::tnc};
+
+    in_data_d[RNNInOutKind::HiddenState]  = {S_4D_shape, dataType, memory::format_tag::ldnc};
+    out_data_d[RNNInOutKind::HiddenState] = {S_4D_shape, dataType, memory::format_tag::ldnc};
+
+    if (haveCellState(cell_type)) {
+        in_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
+        out_data_d[RNNInOutKind::CellState] = {S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc};
+    }
+
+    w_data_d  = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo};
+    w_state_d = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo};
+
+    if (bias && bias->size() != Gb * SC)
+        THROW_ERROR << "RNN Layer. Biases size is not correct. Expected size:" << G * SC;
 
     if (bias)
         w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
 
-    // Try to create descriptor and corresponding configuration
-    in_data_d = {in_data_dims, memory::data_type::f32, memory::format_tag::tnc};
-    out_data_d = {out_data_dims, memory::data_type::f32, memory::format_tag::tnc};
+    std::vector<TensorDesc> in_candidate, out_candidate;
 
-    std::vector<TensorDesc> in_candidate;
-    if (nativeOrder)
-        in_candidate.push_back(in_data_d);
-    else
-        in_candidate.push_back(MKLDNNMemoryDesc{{N, T, DC}, memory::data_type::f32, memory::format_tag::ntc});
-
-    for (int i = 1; i < ins.size(); i++)
-        in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::data_type::f32, memory::format_tag::nc});
-
-    std::vector<TensorDesc> out_candidate;
     if (nativeOrder) {
-        out_candidate.push_back(out_data_d);
+        in_candidate.push_back(in_data_d[RNNInOutKind::Layer]);
+        out_candidate.push_back(out_data_d[RNNInOutKind::Layer]);
     } else {
-        out_candidate.push_back(MKLDNNMemoryDesc{{N, T, SC}, memory::data_type::f32, memory::format_tag::ntc});
+        in_candidate.emplace_back(MKLDNNMemoryDesc{{N, T, DC}, dataType, memory::format_tag::ntc});
+        out_candidate.emplace_back(MKLDNNMemoryDesc{{N, T, SC}, dataType, memory::format_tag::ntc});
     }
 
-    for (int i = 1; i < outs.size(); i++) {
+    in_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, dataType, memory::format_tag::nc});
+    out_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, dataType, memory::format_tag::nc});
+
+    if (haveCellState(cell_type)) {
+        in_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, memory::data_type::f32, memory::format_tag::nc});
         out_candidate.emplace_back(MKLDNNMemoryDesc{S_shape, memory::data_type::f32, memory::format_tag::nc});
     }
 
+    Precision weights_prec = as<MemoryBlob>(weights)->getTensorDesc().getPrecision();
+
+    if (!verifyWeightsPrecision(runtimePrecision, weights_prec)) {
+        if (runtimePrecision == Precision::BF16 && weights_prec == Precision::FP32)
+            convertWeightsBlobToBF16();
+    }
+
     createDescriptor(in_candidate, out_candidate);
 }
 
+void MKLDNNRNN::convertWeightsBlobToBF16() {
+    Blob::Ptr &weights = getCnnLayer()->blobs["weights"];
+    MemoryBlob::Ptr cur_weights = as<MemoryBlob>(weights);
+    TensorDesc td(Precision::BF16, cur_weights->getTensorDesc().getDims(), cur_weights->getTensorDesc().getLayout());
+    MemoryBlob::Ptr new_weights_blob = make_shared_blob<uint16_t>(td);
+
+    new_weights_blob->allocate();
+    bfloat16_t *dst = new_weights_blob->wmap();
+
+    float* fp32src = cur_weights->rmap().as<float*>();
+    cpu_convert(fp32src, dst, Precision::FP32, Precision::BF16, new_weights_blob->size());
+    weights = new_weights_blob;
+}
+
 void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
                                  const std::vector<TensorDesc> &outputDesc) {
     switch (cell_type) {
         case mkldnn::algorithm::vanilla_rnn: {
             MKLDNNDescriptor desc(std::shared_ptr<vanilla_rnn_forward::desc>(
                     new vanilla_rnn_forward::desc(prop_kind::forward_scoring, cell_act, direction,
-                            /* In Data       */ in_data_d,
-                            /* In State      */ in_states_d[0],
+                            /* In Data       */ in_data_d[RNNInOutKind::Layer],
+                            /* In State      */ in_data_d[RNNInOutKind::HiddenState],
                             /* Weights data  */ w_data_d,
                             /* Weights state */ w_state_d,
                             /* Bias          */ w_bias_d,
-                            /* Out Data      */ out_data_d,
-                            /* Out State     */ out_states_d[0])));
+                            /* Out Data      */ out_data_d[RNNInOutKind::Layer],
+                            /* Out State     */ out_data_d[RNNInOutKind::HiddenState])));
             descs.push_back(desc);
         } break;
         case mkldnn::algorithm::vanilla_gru: {
             MKLDNNDescriptor desc(std::shared_ptr<gru_forward::desc>(
                     new gru_forward::desc(prop_kind::forward_scoring, direction,
-                            /* In Data       */ in_data_d,
-                            /* In State      */ in_states_d[0],
+                            /* In Data       */ in_data_d[RNNInOutKind::Layer],
+                            /* In State      */ in_data_d[RNNInOutKind::HiddenState],
                             /* Weights data  */ w_data_d,
                             /* Weights state */ w_state_d,
                             /* Bias          */ w_bias_d,
-                            /* Out Data      */ out_data_d,
-                            /* Out State     */ out_states_d[0])));
+                            /* Out Data      */ out_data_d[RNNInOutKind::Layer],
+                            /* Out State     */ out_data_d[RNNInOutKind::HiddenState])));
             descs.push_back(desc);
         } break;
         case mkldnn::algorithm::lbr_gru: {
             MKLDNNDescriptor desc(std::shared_ptr<lbr_gru_forward::desc>(
                     new lbr_gru_forward::desc(prop_kind::forward_scoring, direction,
-                            /* In Data       */ in_data_d,
-                            /* In State      */ in_states_d[0],
+                            /* In Data       */ in_data_d[RNNInOutKind::Layer],
+                            /* In State      */ in_data_d[RNNInOutKind::HiddenState],
                             /* Weights data  */ w_data_d,
                             /* Weights state */ w_state_d,
                             /* Bias          */ w_bias_d,
-                            /* Out Data      */ out_data_d,
-                            /* Out State     */ out_states_d[0])));
+                            /* Out Data      */ out_data_d[RNNInOutKind::Layer],
+                            /* Out State     */ out_data_d[RNNInOutKind::HiddenState])));
             descs.push_back(desc);
         } break;
         case mkldnn::algorithm::vanilla_lstm: {
             MKLDNNDescriptor desc(std::shared_ptr<lstm_forward::desc>(
                     new lstm_forward::desc(prop_kind::forward_scoring, direction,
-                            /* In Data       */ in_data_d,
-                            /* In State H    */ in_states_d[0],
-                            /* In State C    */ in_states_d[1],
+                            /* In Data       */ in_data_d[RNNInOutKind::Layer],
+                            /* In State      */ in_data_d[RNNInOutKind::HiddenState],
+                            /* In State C    */ in_data_d[RNNInOutKind::CellState],
                             /* Weights data  */ w_data_d,
                             /* Weights state */ w_state_d,
                             /* Bias          */ w_bias_d,
-                            /* Out Data      */ out_data_d,
-                            /* Out State H   */ out_states_d[0],
-                            /* Out State C   */ out_states_d[1])));
+                            /* Out Data      */ out_data_d[RNNInOutKind::Layer],
+                            /* Out State     */ out_data_d[RNNInOutKind::HiddenState],
+                            /* Out State C   */ out_data_d[RNNInOutKind::CellState])));
             descs.push_back(desc);
         } break;
         default:
-            IE_THROW() << "Unknown cell type";
+            THROW_ERROR << "Unknown cell type";
     }
 
     // Fill supported config
@@ -389,130 +450,170 @@ void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
     supportedPrimitiveDescriptors.emplace_back(config, ref_any);
 }
 
+bool MKLDNNRNN::verifyWeightsPrecision(const Precision &layerPrec, const Precision &weightsPrec) {
+    if (!weightsByLayerPrec.count(layerPrec))
+        THROW_ERROR << "Unsupported layer precision " << layerPrec;
+    return weightsPrec == weightsByLayerPrec.at(layerPrec);
+}
+
+void MKLDNNRNN::verifyWeights() {
+    auto layer = getCnnLayer();
+    auto weightsIt = layer->blobs.find("weights");
+
+    if (weightsIt == layer->blobs.end())
+        THROW_ERROR << "Missed weights blob.";
+
+    const auto& weightsPrec = weightsIt->second->getTensorDesc().getPrecision();
+
+    if (!verifyWeightsPrecision(runtimePrecision, weightsPrec)) {
+        THROW_ERROR << "Weights precision " << weightsPrec <<
+            " does not match runtime precision" << runtimePrecision;
+    }
+}
+
+void MKLDNNRNN::verifyBiases() {
+    auto layer = getCnnLayer();
+    if (layer->blobs.find("biases") != layer->blobs.end()
+            && layer->blobs["biases"]->getTensorDesc().getPrecision() != Precision::FP32)
+        THROW_ERROR << "Invalid biases precision: " << layer->blobs["biases"]->getTensorDesc().getPrecision();
+}
+
 void MKLDNNRNN::createPrimitive() {
     if (prim) return;
 
-    std::string errorPrefix =  "RNN layer '" + getCnnLayer()->name + "'";
-    auto weightsIt = getCnnLayer()->blobs.find("weights");
-    if (weightsIt == getCnnLayer()->blobs.end())
-        IE_THROW() << errorPrefix << " does not have weights blob.";
-    if (weightsIt->second->getTensorDesc().getPrecision() != Precision::FP32)
-        IE_THROW() << errorPrefix << " has invalid weights precision: " << weightsIt->second->getTensorDesc().getPrecision();
-    if (getCnnLayer()->blobs.find("biases") != getCnnLayer()->blobs.end()
-            && getCnnLayer()->blobs["biases"]->getTensorDesc().getPrecision() != Precision::FP32)
-        IE_THROW() << errorPrefix << " has invalid biases precision: " << getCnnLayer()->blobs["biases"]->getTensorDesc().getPrecision();
+    verifyWeights();
+    verifyBiases();
+
+    /*
+     *   Gate order
+     *   ====== LSTM ======
+     *   Caffe - IFOC, ONNX   - IOFC
+     *   IE    - FICO, mkldnn - IFCO
+     *
+     *   ====== GRU ======
+     *   IE - URO, mkldnn - URO
+     */
+    const int gate_map_lstm[] = {1, 0, 2, 3};  // FICO -> IFCO
+    const int gate_map_gru[]  = {0, 1, 2, 3};
+    const int gate_map_rnn[]  = {0};
+    const int *gate_map;
+    const int gate_map_lstm_size = sizeof(gate_map_lstm) / sizeof(int);
+    const int gate_map_gru_size = sizeof(gate_map_gru) / sizeof(int);
+    const int gate_map_rnn_size = sizeof(gate_map_rnn) / sizeof(int);
+    if (cell_type == algorithm::vanilla_lstm) {
+        gate_map = gate_map_lstm;
+        if (G > gate_map_lstm_size) {
+            THROW_ERROR << "G isn't equal to the size of gate_map";
+        }
+    } else if (cell_type == algorithm::vanilla_gru) {
+        gate_map = gate_map_gru;
+        if (G > gate_map_gru_size) {
+            THROW_ERROR << "G isn't equal to the size of gate_map";
+        }
+    } else if (cell_type == algorithm::lbr_gru) {
+        gate_map = gate_map_gru;
+        if (G > gate_map_gru_size) {
+            THROW_ERROR << "G isn't equal to the size of gate_map";
+        }
+    } else if (cell_type == algorithm::vanilla_rnn) {
+        gate_map = gate_map_rnn;
+        if (G > gate_map_rnn_size) {
+            THROW_ERROR << "G isn't equal to the size of gate_map";
+        }
+    } else {
+        gate_map = gate_map_gru;
+        if (G > gate_map_gru_size) {
+            THROW_ERROR << "G isn't equal to the size of gate_map";
+        }
+    }
+
+    if (runtimePrecision == Precision::BF16)
+        fillWeights<bfloat16_t>(gate_map);
+    else if (runtimePrecision == Precision::FP32)
+        fillWeights<float>(gate_map);
+    else // TODO FP16 and INT8 support
+        THROW_ERROR << "Unsupported data type";
+
+    if (runtimePrecision == Precision::BF16 ||
+        runtimePrecision == Precision::FP32)
+        fillBiases<float>(gate_map);
 
     auto pd = descs[0].createPrimitiveDescriptorIterator(getEngine());
+    prim.reset(new mkldnn::primitive(pd));
+}
 
-    auto src_data_mem = getParentEdgeAt(0)->getMemoryPtr();
-    auto dst_data_mem = getChildEdgeAt(0)->getMemoryPtr();
-
-    // create weight blobs (data and state part)
-    auto w_data_mem = std::make_shared<MKLDNNMemory>(getEngine());
-    w_data_mem->Create(w_data_d);
-    internalBlobMemory.push_back(w_data_mem);
-
-    auto w_state_mem = std::make_shared<MKLDNNMemory>(getEngine());
-    w_state_mem->Create(w_state_d);
-    internalBlobMemory.push_back(w_state_mem);
+/*
+ * IE format:
+ *   B - [gates, out_state_size]
+ *
+ * MKLDNN format:
+ *   B - [gates, out_state_size]
+ *
+ */
+template <typename Prec>
+void MKLDNNRNN::fillBiases(const int *gate_map) {
+    if (!w_bias_d)
+        return;
 
     auto w_bias_mem = std::make_shared<MKLDNNMemory>(getEngine());
     w_bias_mem->Create(w_bias_d);
     internalBlobMemory.push_back(w_bias_mem);
 
-    {
-        /* Copy Weight data
-         * IE format:
-         *   W - [gates, out_state_size, in_data_size + in_state_size]
-         *   B - [gates, out_state_size]
-         *
-         * MKLDNN format:
-         *   W - [1, 1, in_date_size,  gates, out_state_size]
-         *   R - [1, 1, in_state_size, gates, out_state_size]
-         *   B - [gates, out_state_size]
-         *
-         *   Gate order
-         *   ====== LSTM ======
-         *   Caffe - IFOC, ONNX   - IOFC
-         *   IE    - FICO, mkldnn - IFCO
-         *
-         *   ====== GRU ======
-         *   IE - URO, mkldnn - URO
-         */
-        const int gate_map_lstm[] = {1, 0, 2, 3};  // FICO -> IFCO
-        const int gate_map_gru[]  = {0, 1, 2, 3};
-        const int gate_map_rnn[]  = {0};
-        const int *gate_map;
-        const int gate_map_lstm_size = sizeof(gate_map_lstm) / sizeof(int);
-        const int gate_map_gru_size = sizeof(gate_map_gru) / sizeof(int);
-        const int gate_map_rnn_size = sizeof(gate_map_rnn) / sizeof(int);
-        if (cell_type == algorithm::vanilla_lstm) {
-            gate_map = gate_map_lstm;
-            if (G > gate_map_lstm_size) {
-                IE_THROW() << "G isn't equal to the size of gate_map";
-            }
-        } else if (cell_type == algorithm::vanilla_gru) {
-            gate_map = gate_map_gru;
-            if (G > gate_map_gru_size) {
-                IE_THROW() << "G isn't equal to the size of gate_map";
-            }
-        } else if (cell_type == algorithm::lbr_gru) {
-            gate_map = gate_map_gru;
-            if (G > gate_map_gru_size) {
-                IE_THROW() << "G isn't equal to the size of gate_map";
-            }
-        } else if (cell_type == algorithm::vanilla_rnn) {
-            gate_map = gate_map_rnn;
-            if (G > gate_map_rnn_size) {
-                IE_THROW() << "G isn't equal to the size of gate_map";
-            }
-        } else {
-            gate_map = gate_map_gru;
-            if (G > gate_map_gru_size) {
-                IE_THROW() << "G isn't equal to the size of gate_map";
-            }
-        }
+    auto ie_b_ptr = getCnnLayer()->blobs["biases"]->buffer().as<const Prec*>();
+    auto b_ptr = static_cast<Prec*>(w_bias_mem->GetData());
+    for (int g = 0; g < Gb; g++) {
+        Prec *l_b_ptr = b_ptr + gate_map[g]*SC;
+        const Prec *l_ie_b_ptr = ie_b_ptr + g * SC;
+        cpu_memcpy(l_b_ptr, l_ie_b_ptr, SC * sizeof(Prec));
+    }
+}
 
-        auto ie_w_ptr = getCnnLayer()->blobs["weights"]->buffer().as<const float*>();
-        auto w_ptr = static_cast<float*>(w_data_mem->GetData());
-        auto r_ptr = static_cast<float*>(w_state_mem->GetData());
-        const int step = SC * G;
+/*
+ * IE format:
+ *   W - [gates, out_state_size, in_data_size + in_state_size]
+ *
+ * MKLDNN format:
+ *   W - [1, 1, in_date_size,  gates, out_state_size]
+ *   R - [1, 1, in_state_size, gates, out_state_size]
+ *
+ */
+template <typename Prec>
+void MKLDNNRNN::fillWeights(const int *gate_map) {
+    // create weight blobs (data and state part)
+    auto w_data_mem = std::make_shared<MKLDNNMemory>(getEngine());
+    w_data_mem->Create(w_data_d);
+    internalBlobMemory.push_back(w_data_mem);
+    auto w_state_mem = std::make_shared<MKLDNNMemory>(getEngine());
+    w_state_mem->Create(w_state_d);
+    internalBlobMemory.push_back(w_state_mem);
 
-        for (int g = 0; g < G; g++) {
-            for (int out_i = 0; out_i < SC; out_i++) {
-                float *l_w_ptr = w_ptr + gate_map[g]*SC + out_i;
-                float *l_r_ptr = r_ptr + gate_map[g]*SC+ out_i;
-                for (int in_i = 0; in_i < DC; in_i++) {
-                    *l_w_ptr = *ie_w_ptr;
-                    ie_w_ptr++;
-                    l_w_ptr += step;
-                }
+    auto ie_w_ptr = getCnnLayer()->blobs["weights"]->buffer().as<const Prec*>();
+    auto w_ptr = static_cast<Prec*>(w_data_mem->GetData());
+    auto r_ptr = static_cast<Prec*>(w_state_mem->GetData());
+    const int step = SC * G;
 
-                for (int in_i = 0; in_i < SC; in_i++) {
-                    *l_r_ptr = *ie_w_ptr;
-                    ie_w_ptr++;
-                    l_r_ptr += step;
-                }
+    for (int g = 0; g < G; g++) {
+        for (int out_i = 0; out_i < SC; out_i++) {
+            Prec *l_w_ptr = w_ptr + gate_map[g]*SC + out_i;
+            Prec *l_r_ptr = r_ptr + gate_map[g]*SC+ out_i;
+            for (int in_i = 0; in_i < DC; in_i++) {
+                *l_w_ptr = *ie_w_ptr;
+                ie_w_ptr++;
+                l_w_ptr += step;
             }
-        }
 
-        if (w_bias_d) {
-            auto ie_b_ptr = getCnnLayer()->blobs["biases"]->buffer().as<const float*>();
-            auto b_ptr = static_cast<float*>(w_bias_mem->GetData());
-            for (int g = 0; g < Gb; g++) {
-                float *l_b_ptr = b_ptr + gate_map[g]*SC;
-                const float *l_ie_b_ptr = ie_b_ptr + g * SC;
-                cpu_memcpy(l_b_ptr, l_ie_b_ptr, SC * sizeof(float));
+            for (int in_i = 0; in_i < SC; in_i++) {
+                *l_r_ptr = *ie_w_ptr;
+                ie_w_ptr++;
+                l_r_ptr += step;
             }
         }
     }
-
-    prim.reset(new mkldnn::primitive(pd));
 }
 
 void MKLDNNRNN::execute(mkldnn::stream strm) {
     if (!prim)
-        IE_THROW() << "No initialized primitive to execute";
+        THROW_ERROR << "No initialized primitive to execute";
 
     const auto src_data_mem = getParentEdgeAt(0)->getMemoryPtr();
     const auto dst_data_mem = getChildEdgeAt(0)->getMemoryPtr();
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
index cb16a3d242d..2cf51f09913 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
@@ -28,8 +28,19 @@ public:
 private:
     void fillCellDesc();
     void fillSeqDesc();
+    bool verifyWeightsPrecision(const InferenceEngine::Precision& layerPrec,
+                                const InferenceEngine::Precision& weightsPrec);
+    void verifyWeights();
+    void verifyBiases();
+    void convertWeightsBlobToBF16();
+
+    template <typename Prec>
+    void fillWeights(const int* gate_map);
+    template <typename Prec>
+    void fillBiases(const int* gate_map);
 
 private:
+    InferenceEngine::Precision runtimePrecision;
     /** Specify mode Cell or Seq. true - Cell, false - Seq */
     bool is_cell = false;
 
@@ -56,11 +67,14 @@ private:
     const ptrdiff_t L = 1;   /**< What is it??. Constant for mkldnn impl */
     const ptrdiff_t D = 1;   /**< Num of direction. 1 or 2 */
 
-    MKLDNNMemoryDesc in_data_d;
-    MKLDNNMemoryDesc out_data_d;
+    std::vector<MKLDNNMemoryDesc> in_data_d;
+    std::vector<MKLDNNMemoryDesc> out_data_d;
 
-    std::vector<MKLDNNMemoryDesc> in_states_d;
-    std::vector<MKLDNNMemoryDesc> out_states_d;
+    enum RNNInOutKind {
+        Layer       = 0,
+        HiddenState = 1,
+        CellState   = 2
+    };
 
     MKLDNNMemoryDesc w_data_d;
     MKLDNNMemoryDesc w_state_d;
@@ -69,7 +83,7 @@ private:
     // List of in/out reorders if required
     std::vector<mkldnn::reorder> exec_before;
     std::vector<mkldnn::reorder> exec_after;
-};
 
+    static const std::map<InferenceEngine::Precision, InferenceEngine::Precision> weightsByLayerPrec;
+}; // class MKLDNNRNN
 }  // namespace MKLDNNPlugin
-
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_cell.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_cell.cpp
new file mode 100644
index 00000000000..6f58eeda2f4
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_cell.cpp
@@ -0,0 +1,135 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph/op/gru_cell.hpp"
+#include <shared_test_classes/single_layer/gru_cell.hpp>
+#include "test_utils/cpu_test_utils.hpp"
+#include "transformations/op_conversions/gru_cell_decomposition.hpp"
+
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+
+namespace CPULayerTestsDefinitions {
+
+using GRUCellCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::GRUCellParams, CPUSpecificParams, std::map<std::string, std::string>>;
+
+class GRUCellCPUTest : public testing::WithParamInterface<GRUCellCpuSpecificParams>,
+                            virtual public LayerTestsUtils::LayerTestsCommon,
+                            public CPUTestsBase {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<GRUCellCpuSpecificParams> &obj) {
+        CPUSpecificParams cpuParams;
+        LayerTestsDefinitions::GRUCellParams basicParamsSet;
+        std::map<std::string, std::string> additionalConfig;
+
+        std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
+
+        std::ostringstream result;
+        result << LayerTestsDefinitions::GRUCellTest::getTestCaseName(
+            testing::TestParamInfo<LayerTestsDefinitions::GRUCellParams>(basicParamsSet, 0));
+        result << CPUTestsBase::getTestCaseName(cpuParams);
+
+        if (!additionalConfig.empty()) {
+            result << "_PluginConf";
+            for (auto &item : additionalConfig) {
+                if (item.second == PluginConfigParams::YES)
+                    result << "_" << item.first << "=" << item.second;
+            }
+        }
+        return result.str();
+    }
+
+protected:
+    void SetUp() {
+        CPUSpecificParams cpuParams;
+        LayerTestsDefinitions::GRUCellParams basicParamsSet;
+        std::map<std::string, std::string> additionalConfig;
+
+        bool should_decompose;
+        size_t batch;
+        size_t hidden_size;
+        size_t input_size;
+        std::vector<std::string> activations;
+        std::vector<float> activations_alpha;
+        std::vector<float> activations_beta;
+        float clip;
+        bool linear_before_reset;
+        InferenceEngine::Precision netPrecision;
+
+        std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
+        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+        std::tie(should_decompose, batch, hidden_size, input_size, activations, clip, linear_before_reset, netPrecision, targetDevice) = basicParamsSet;
+
+        std::vector<std::vector<size_t>> inputShapes = {
+            {{batch, input_size},
+             {batch, hidden_size},
+             {3 * hidden_size, input_size},
+             {3 * hidden_size, hidden_size},
+             {(linear_before_reset ? 4 : 3) * hidden_size}},
+        };
+
+        configuration.insert(additionalConfig.begin(), additionalConfig.end());
+
+        if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
+            inPrc = outPrc = Precision::BF16;
+        } else {
+            inPrc = outPrc = netPrecision;
+        }
+
+        selectedType += "_";
+        selectedType += outPrc.name();
+
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
+        auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
+        std::vector<ngraph::Shape> WRB = {inputShapes[2], inputShapes[3], inputShapes[4]};
+        auto gru_cell = ngraph::builder::makeGRU(
+            ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), WRB, hidden_size, activations, {}, {}, clip, linear_before_reset);
+        ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(gru_cell->output(0))};
+
+        function = makeNgraphFunction(ngPrc, params, gru_cell, "gru_cell");
+    }
+};
+
+TEST_P(GRUCellCPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    Run();
+    CheckPluginRelatedResults(executableNetwork, "RNNCell");
+}
+
+namespace {
+/* CPU PARAMS */
+std::vector<std::map<std::string, std::string>> additionalConfig
+    = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}},
+       {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
+
+CPUSpecificParams cpuParams{{nc, nc}, {nc}, {"ref_any"}, "ref_any"};
+
+std::vector<bool> should_decompose{false};
+std::vector<size_t> batch{1, 5};
+std::vector<size_t> hidden_size{1, 10};
+std::vector<size_t> input_size{1, 30};
+// oneDNN supports only sigmoid-tanh
+std::vector<std::vector<std::string>> activations = {{"sigmoid", "tanh"}};
+// oneDNN supports only zero clip
+std::vector<float> clip = {0.f};
+std::vector<bool> linear_before_reset = {true, false};
+std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
+
+INSTANTIATE_TEST_CASE_P(smoke_GRUCellCPU,
+                        GRUCellCPUTest,
+                        ::testing::Combine(::testing::Combine(::testing::ValuesIn(should_decompose),
+                                                              ::testing::ValuesIn(batch),
+                                                              ::testing::ValuesIn(hidden_size),
+                                                              ::testing::ValuesIn(input_size),
+                                                              ::testing::ValuesIn(activations),
+                                                              ::testing::ValuesIn(clip),
+                                                              ::testing::ValuesIn(linear_before_reset),
+                                                              ::testing::ValuesIn(netPrecisions),
+                                                              ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                           ::testing::Values(cpuParams),
+                                           ::testing::ValuesIn(additionalConfig)),
+                        GRUCellCPUTest::getTestCaseName);
+} // namespace
+} // namespace CPULayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp
new file mode 100644
index 00000000000..5efa57cb808
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp
@@ -0,0 +1,202 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/single_layer/gru_sequence.hpp"
+#include "ngraph/pass/visualize_tree.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
+#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
+
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+
+namespace CPULayerTestsDefinitions {
+
+using GRUSequenceCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::GRUSequenceParams, CPUSpecificParams, std::map<std::string, std::string>>;
+
+class GRUSequenceCPUTest : public testing::WithParamInterface<GRUSequenceCpuSpecificParams>,
+                           virtual public LayerTestsUtils::LayerTestsCommon,
+                           public CPUTestsBase {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<GRUSequenceCpuSpecificParams> &obj) {
+        CPUSpecificParams cpuParams;
+        LayerTestsDefinitions::GRUSequenceParams basicParamsSet;
+        std::map<std::string, std::string> additionalConfig;
+
+        std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
+        std::ostringstream result;
+
+        result << LayerTestsDefinitions::GRUSequenceTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::GRUSequenceParams>(basicParamsSet, 0));
+        result << CPUTestsBase::getTestCaseName(cpuParams);
+
+        if (!additionalConfig.empty()) {
+            result << "_PluginConf";
+            for (auto &item : additionalConfig) {
+                if (item.second == PluginConfigParams::YES)
+                    result << "_" << item.first << "=" << item.second;
+            }
+        }
+        return result.str();
+    }
+
+protected:
+    void SetUp() {
+        LayerTestsDefinitions::GRUSequenceParams basicParamsSet;
+        CPUSpecificParams cpuParams;
+        std::map<std::string, std::string> additionalConfig;
+
+        size_t seq_lenghts;
+        size_t batch;
+        size_t hidden_size;
+        size_t input_size = 10;
+        std::vector<std::string> activations;
+        std::vector<float> activations_alpha;
+        std::vector<float> activations_beta;
+        float clip;
+        bool linear_before_reset;
+        ngraph::op::RecurrentSequenceDirection direction;
+        InferenceEngine::Precision netPrecision;
+
+        std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
+        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+        std::tie(m_mode, seq_lenghts, batch, hidden_size, activations, clip, linear_before_reset, direction, netPrecision, targetDevice) = basicParamsSet;
+
+        size_t num_directions = direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1;
+        std::vector<std::vector<size_t>> inputShapes = {
+            {{batch, seq_lenghts, input_size},
+             {batch, num_directions, hidden_size},
+             {batch},
+             {num_directions, 3 * hidden_size, input_size},
+             {num_directions, 3 * hidden_size, hidden_size},
+             {num_directions, (linear_before_reset ? 4 : 3) * hidden_size}},
+        };
+
+        configuration.insert(additionalConfig.begin(), additionalConfig.end());
+
+        if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
+            inPrc = outPrc = Precision::BF16;
+        } else {
+            inPrc = outPrc = netPrecision;
+        }
+
+        selectedType += "_";
+        selectedType += outPrc.name();
+
+        m_max_seq_len = seq_lenghts;
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
+        auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
+        if (m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_PARAM
+            || m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM) {
+            auto seq_lengths = ngraph::builder::makeParams(ngraph::element::i64, {inputShapes[2]}).at(0);
+            seq_lengths->set_friendly_name("seq_lengths");
+            params.push_back(seq_lengths);
+        }
+        std::vector<ngraph::Shape> WRB = {inputShapes[3], inputShapes[4], inputShapes[5], inputShapes[2]};
+        auto gru_sequence = ngraph::builder::makeGRU(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)),
+                                                     WRB,
+                                                     hidden_size,
+                                                     activations,
+                                                     {},
+                                                     {},
+                                                     clip,
+                                                     linear_before_reset,
+                                                     true,
+                                                     direction,
+                                                     m_mode);
+        ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(gru_sequence->output(0)),
+                                     std::make_shared<ngraph::opset1::Result>(gru_sequence->output(1))};
+
+        function = makeNgraphFunction(ngPrc, params, gru_sequence, "gru_sequence");
+
+        if (m_mode != ngraph::helpers::SequenceTestsMode::PURE_SEQ) {
+            ngraph::pass::Manager manager;
+            if (direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
+                manager.register_pass<ngraph::pass::BidirectionalGRUSequenceDecomposition>();
+            manager.register_pass<ngraph::pass::ConvertGRUSequenceToTensorIterator>();
+            manager.run_passes(function);
+            bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
+            EXPECT_EQ(ti_found, true);
+        } else {
+            bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
+            EXPECT_EQ(ti_found, false);
+        }
+    }
+
+    void GenerateInputs() {
+        for (const auto &input : executableNetwork.GetInputsInfo()) {
+            const auto &info = input.second;
+            auto blob = GenerateInput(*info);
+            if (input.first == "seq_lengths") {
+                blob = FuncTestUtils::createAndFillBlob(info->getTensorDesc(), m_max_seq_len, 0);
+            }
+            inputs.push_back(blob);
+        }
+    }
+
+private:
+    ngraph::helpers::SequenceTestsMode m_mode;
+    int64_t m_max_seq_len = 0;
+};
+
+TEST_P(GRUSequenceCPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    Run();
+    CheckPluginRelatedResults(executableNetwork, "RNNSeq");
+}
+
+namespace {
+/* CPU PARAMS */
+std::vector<std::map<std::string, std::string>> additionalConfig
+    = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
+
+CPUSpecificParams cpuParams{{ntc, nc}, {ntc, nc}, {"ref_any"}, "ref_any"};
+CPUSpecificParams cpuParamsBatchSizeOne{{tnc, nc}, {tnc, nc}, {"ref_any"}, "ref_any"};;
+
+std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::PURE_SEQ};
+// output values increase rapidly without clip, so use only seq_lenghts = 2
+std::vector<size_t> seq_lengths_zero_clip{2};
+std::vector<size_t> batch{10};
+std::vector<size_t> batch_size_one{1};
+std::vector<size_t> hidden_size{1, 10};
+std::vector<std::vector<std::string>> activations = {{"sigmoid", "tanh"}};
+std::vector<bool> linear_before_reset = {true, false};
+std::vector<float> clip{0.f};
+std::vector<ngraph::op::RecurrentSequenceDirection> direction = {ngraph::op::RecurrentSequenceDirection::FORWARD};
+
+std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
+
+INSTANTIATE_TEST_CASE_P(smoke_GRUSequenceCPU,
+                        GRUSequenceCPUTest,
+                        ::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
+                                                              ::testing::ValuesIn(seq_lengths_zero_clip),
+                                                              ::testing::ValuesIn(batch),
+                                                              ::testing::ValuesIn(hidden_size),
+                                                              ::testing::ValuesIn(activations),
+                                                              ::testing::ValuesIn(clip),
+                                                              ::testing::ValuesIn(linear_before_reset),
+                                                              ::testing::ValuesIn(direction),
+                                                              ::testing::ValuesIn(netPrecisions),
+                                                              ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                           ::testing::Values(cpuParams),
+                                           ::testing::ValuesIn(additionalConfig)),
+                        GRUSequenceCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_GRUSequenceCPUBatchSizeOne,
+                        GRUSequenceCPUTest,
+                        ::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
+                                                              ::testing::ValuesIn(seq_lengths_zero_clip),
+                                                              ::testing::ValuesIn(batch_size_one),
+                                                              ::testing::ValuesIn(hidden_size),
+                                                              ::testing::ValuesIn(activations),
+                                                              ::testing::ValuesIn(clip),
+                                                              ::testing::ValuesIn(linear_before_reset),
+                                                              ::testing::ValuesIn(direction),
+                                                              ::testing::ValuesIn(netPrecisions),
+                                                              ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                           ::testing::Values(cpuParamsBatchSizeOne),
+                                           ::testing::ValuesIn(additionalConfig)),
+                        GRUSequenceCPUTest::getTestCaseName);
+} // namespace
+} // namespace CPULayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_cell.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_cell.cpp
new file mode 100644
index 00000000000..4ee12f62ea9
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_cell.cpp
@@ -0,0 +1,132 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph/op/lstm_cell.hpp"
+#include <shared_test_classes/single_layer/lstm_cell.hpp>
+#include "test_utils/cpu_test_utils.hpp"
+#include "transformations/op_conversions/lstm_cell_decomposition.hpp"
+
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+
+namespace CPULayerTestsDefinitions {
+
+using LSTMCellCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::LSTMCellParams, CPUSpecificParams, std::map<std::string, std::string>>;
+
+class LSTMCellLayerCPUTest : public testing::WithParamInterface<LSTMCellCpuSpecificParams>,
+                             virtual public LayerTestsUtils::LayerTestsCommon,
+                             public CPUTestsBase {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<LSTMCellCpuSpecificParams>& obj) {
+        CPUSpecificParams cpuParams;
+        LayerTestsDefinitions::LSTMCellParams basicParamsSet;
+        std::map<std::string, std::string> additionalConfig;
+
+        std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
+        std::ostringstream result;
+
+        result << LayerTestsDefinitions::LSTMCellTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::LSTMCellParams>(
+                basicParamsSet, 0));
+        result << CPUTestsBase::getTestCaseName(cpuParams);
+
+        if (!additionalConfig.empty()) {
+            result << "_PluginConf";
+            for (auto& item : additionalConfig) {
+                if (item.second == PluginConfigParams::YES)
+                    result << "_" << item.first << "=" << item.second;
+            }
+        }
+        return result.str();
+    }
+
+protected:
+    void SetUp() {
+        LayerTestsDefinitions::LSTMCellParams basicParamsSet;
+        CPUSpecificParams cpuParams;
+        std::map<std::string, std::string> additionalConfig;
+
+        bool should_decompose;
+        size_t batch;
+        size_t hidden_size;
+        size_t input_size;
+        std::vector<std::string> activations;
+        std::vector<float> activations_alpha;
+        std::vector<float> activations_beta;
+        float clip;
+        InferenceEngine::Precision netPrecision;
+        threshold = 0.05;
+
+        std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
+        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+        std::tie(should_decompose, batch, hidden_size, input_size, activations, clip, netPrecision, targetDevice) = basicParamsSet;
+
+        std::vector<std::vector<size_t>> inputShapes = {
+            {{batch, input_size}, {batch, hidden_size}, {batch, hidden_size}, {4 * hidden_size, input_size}, {4 * hidden_size, hidden_size}, {4 * hidden_size}},
+        };
+
+        configuration.insert(additionalConfig.begin(), additionalConfig.end());
+
+        if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
+            inPrc = outPrc = Precision::BF16;
+        } else {
+            inPrc = outPrc = netPrecision;
+        }
+
+        selectedType += "_";
+        selectedType += outPrc.name();
+
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
+        auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1], inputShapes[2]});
+        std::vector<ngraph::Shape> WRB = {inputShapes[3], inputShapes[4], inputShapes[5]};
+
+        auto lstm_cell = ngraph::builder::makeLSTM(
+            ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)), WRB, hidden_size, activations, {}, {}, clip);
+
+        ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(lstm_cell->output(0)),
+                                     std::make_shared<ngraph::opset1::Result>(lstm_cell->output(1))};
+
+        function = makeNgraphFunction(ngPrc, params, lstm_cell, "lstm_cell");
+    }
+};
+
+TEST_P(LSTMCellLayerCPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    Run();
+    CheckPluginRelatedResults(executableNetwork, "RNNCell");
+}
+
+namespace {
+/* CPU PARAMS */
+std::vector<std::map<std::string, std::string>> additionalConfig
+    = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}},
+       {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}};
+
+CPUSpecificParams cpuParams{{nc, nc, nc}, {nc}, {"ref_any"}, "ref_any"};
+
+std::vector<bool> should_decompose{false};
+std::vector<size_t> batch{5};
+std::vector<size_t> hidden_size{1, 10};
+std::vector<size_t> input_size{1, 30};
+// oneDNN supports only sigmoid-tanh-tanh
+std::vector<std::vector<std::string>> activations = {{"sigmoid", "tanh", "tanh"}};
+// oneDNN supports only zero clip
+std::vector<float> clip{0.f};
+std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16};
+
+INSTANTIATE_TEST_CASE_P(smoke_LSTMCellCPU,
+                        LSTMCellLayerCPUTest,
+                        ::testing::Combine(::testing::Combine(::testing::ValuesIn(should_decompose),
+                                                              ::testing::ValuesIn(batch),
+                                                              ::testing::ValuesIn(hidden_size),
+                                                              ::testing::ValuesIn(input_size),
+                                                              ::testing::ValuesIn(activations),
+                                                              ::testing::ValuesIn(clip),
+                                                              ::testing::ValuesIn(netPrecisions),
+                                                              ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                           ::testing::Values(cpuParams),
+                                           ::testing::ValuesIn(additionalConfig)),
+                        LSTMCellLayerCPUTest::getTestCaseName);
+} // namespace
+} // namespace CPULayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp
new file mode 100644
index 00000000000..50e9717de51
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp
@@ -0,0 +1,205 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/single_layer/lstm_sequence.hpp"
+#include "ngraph/pass/visualize_tree.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
+#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
+
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+
+namespace CPULayerTestsDefinitions {
+
+using LSTMSequenceCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::LSTMSequenceParams, CPUSpecificParams, std::map<std::string, std::string>>;
+
+class LSTMSequenceCPUTest : public testing::WithParamInterface<LSTMSequenceCpuSpecificParams>,
+                            virtual public LayerTestsUtils::LayerTestsCommon,
+                            public CPUTestsBase {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<LSTMSequenceCpuSpecificParams> &obj) {
+        CPUSpecificParams cpuParams;
+        LayerTestsDefinitions::LSTMSequenceParams basicParamsSet;
+        std::map<std::string, std::string> additionalConfig;
+
+        std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
+        std::ostringstream result;
+
+        result << LayerTestsDefinitions::LSTMSequenceTest::getTestCaseName(
+            testing::TestParamInfo<LayerTestsDefinitions::LSTMSequenceParams>(basicParamsSet, 0));
+        result << CPUTestsBase::getTestCaseName(cpuParams);
+
+        if (!additionalConfig.empty()) {
+            result << "_PluginConf";
+            for (auto &item : additionalConfig) {
+                if (item.second == PluginConfigParams::YES)
+                    result << "_" << item.first << "=" << item.second;
+            }
+        }
+        return result.str();
+    }
+
+protected:
+    void SetUp() {
+        LayerTestsDefinitions::LSTMSequenceParams basicParamsSet;
+        CPUSpecificParams cpuParams;
+        std::map<std::string, std::string> additionalConfig;
+
+        size_t seq_lenghts;
+        size_t batch;
+        size_t hidden_size;
+        size_t input_size;
+        std::vector<std::string> activations;
+        std::vector<float> activations_alpha;
+        std::vector<float> activations_beta;
+        float clip;
+        ngraph::op::RecurrentSequenceDirection direction;
+        InferenceEngine::Precision netPrecision;
+
+        std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
+        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+        std::tie(m_mode, seq_lenghts, batch, hidden_size, input_size, activations, clip, direction, netPrecision, targetDevice) = basicParamsSet;
+
+        size_t num_directions = direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1;
+        m_max_seq_len = seq_lenghts;
+        std::vector<std::vector<size_t>> inputShapes = {
+            {{batch, seq_lenghts, input_size},
+             {batch, num_directions, hidden_size},
+             {batch, num_directions, hidden_size},
+             {batch},
+             {num_directions, 4 * hidden_size, input_size},
+             {num_directions, 4 * hidden_size, hidden_size},
+             {num_directions, 4 * hidden_size}},
+        };
+
+        configuration.insert(additionalConfig.begin(), additionalConfig.end());
+
+        if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
+            inPrc = outPrc = Precision::BF16;
+        } else {
+            inPrc = outPrc = netPrecision;
+        }
+
+        selectedType += "_";
+        selectedType += outPrc.name();
+
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
+        auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1], inputShapes[2]});
+        if (m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_PARAM
+            || m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM) {
+            auto seq_lengths = ngraph::builder::makeParams(ngraph::element::i64, {inputShapes[3]}).at(0);
+            seq_lengths->set_friendly_name("seq_lengths");
+            params.push_back(seq_lengths);
+        }
+        std::vector<ngraph::Shape> WRB = {inputShapes[4], inputShapes[5], inputShapes[6], inputShapes[3]};
+        auto lstm_sequence = ngraph::builder::makeLSTM(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)),
+                                                       WRB,
+                                                       hidden_size,
+                                                       activations,
+                                                       {},
+                                                       {},
+                                                       clip,
+                                                       true,
+                                                       direction,
+                                                       m_mode);
+        ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(lstm_sequence->output(0)),
+                                     std::make_shared<ngraph::opset1::Result>(lstm_sequence->output(1)),
+                                     std::make_shared<ngraph::opset1::Result>(lstm_sequence->output(2))};
+
+        function = makeNgraphFunction(ngPrc, params, lstm_sequence, "lstm_sequence");
+
+        if (m_mode != ngraph::helpers::SequenceTestsMode::PURE_SEQ) {
+            ngraph::pass::Manager manager;
+            if (direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
+                manager.register_pass<ngraph::pass::BidirectionalLSTMSequenceDecomposition>();
+            manager.register_pass<ngraph::pass::ConvertLSTMSequenceToTensorIterator>();
+            manager.run_passes(function);
+            bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
+            EXPECT_EQ(ti_found, true);
+        } else {
+            bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
+            EXPECT_EQ(ti_found, false);
+        }
+    }
+
+    void GenerateInputs() {
+        for (const auto &input : executableNetwork.GetInputsInfo()) {
+            const auto &info = input.second;
+            auto blob = GenerateInput(*info);
+            if (input.first == "seq_lengths") {
+                blob = FuncTestUtils::createAndFillBlob(info->getTensorDesc(), m_max_seq_len, 0);
+            }
+
+            inputs.push_back(blob);
+        }
+    }
+
+private:
+    ngraph::helpers::SequenceTestsMode m_mode;
+    int64_t m_max_seq_len = 0;
+};
+
+TEST_P(LSTMSequenceCPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    Run();
+    CheckPluginRelatedResults(executableNetwork, "RNNSeq");
+}
+
+namespace {
+/* CPU PARAMS */
+std::vector<std::map<std::string, std::string>> additionalConfig
+    = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}},
+       {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
+
+CPUSpecificParams cpuParams{{ntc, nc, nc}, {ntc, nc, nc}, {"ref_any"}, "ref_any"};
+CPUSpecificParams cpuParamsBatchSizeOne{{tnc, nc, nc}, {tnc, nc, nc}, {"ref_any"}, "ref_any"};
+
+std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::PURE_SEQ};
+std::vector<size_t> seq_lengths_zero_clip{2};
+std::vector<size_t> batch_size_one{1};
+std::vector<size_t> batch{10};
+std::vector<size_t> hidden_size{1, 10};
+std::vector<size_t> input_size{10};
+// oneDNN supports only sigmoid-tanh-tanh
+std::vector<std::vector<std::string>> activations = {{"sigmoid", "tanh", "tanh"}};
+// oneDNN supports only zero clip
+std::vector<float> clip{0.f};
+std::vector<ngraph::op::RecurrentSequenceDirection> direction = {ngraph::op::RecurrentSequenceDirection::FORWARD};
+std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
+
+INSTANTIATE_TEST_CASE_P(smoke_LSTMSequenceCPU,
+                        LSTMSequenceCPUTest,
+                        ::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
+                                                              ::testing::ValuesIn(seq_lengths_zero_clip),
+                                                              ::testing::ValuesIn(batch),
+                                                              ::testing::ValuesIn(hidden_size),
+                                                              ::testing::ValuesIn(input_size),
+                                                              ::testing::ValuesIn(activations),
+                                                              ::testing::ValuesIn(clip),
+                                                              ::testing::ValuesIn(direction),
+                                                              ::testing::ValuesIn(netPrecisions),
+                                                              ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                           ::testing::Values(cpuParams),
+                                           ::testing::ValuesIn(additionalConfig)),
+                        LSTMSequenceCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_LSTMSequenceCPUbatchSizeOne,
+                        LSTMSequenceCPUTest,
+                        ::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
+                                                              ::testing::ValuesIn(seq_lengths_zero_clip),
+                                                              ::testing::ValuesIn(batch_size_one),
+                                                              ::testing::ValuesIn(hidden_size),
+                                                              ::testing::ValuesIn(input_size),
+                                                              ::testing::ValuesIn(activations),
+                                                              ::testing::ValuesIn(clip),
+                                                              ::testing::ValuesIn(direction),
+                                                              ::testing::ValuesIn(netPrecisions),
+                                                              ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                           ::testing::Values(cpuParamsBatchSizeOne),
+                                           ::testing::ValuesIn(additionalConfig)),
+                        LSTMSequenceCPUTest::getTestCaseName);
+} // namespace
+} // namespace CPULayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_cell.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_cell.cpp
new file mode 100644
index 00000000000..381bdfecf36
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_cell.cpp
@@ -0,0 +1,124 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph/op/rnn_cell.hpp"
+#include <shared_test_classes/single_layer/rnn_cell.hpp>
+#include "test_utils/cpu_test_utils.hpp"
+
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+
+namespace CPULayerTestsDefinitions {
+
+using RNNCellCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::RNNCellParams, CPUSpecificParams, std::map<std::string, std::string>>;
+
+class RNNCellCPUTest : public testing::WithParamInterface<RNNCellCpuSpecificParams>,
+                            virtual public LayerTestsUtils::LayerTestsCommon,
+                            public CPUTestsBase {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<RNNCellCpuSpecificParams> &obj) {
+        CPUSpecificParams cpuParams;
+        LayerTestsDefinitions::RNNCellParams basicParamsSet;
+        std::map<std::string, std::string> additionalConfig;
+
+        std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
+
+        std::ostringstream result;
+        result << LayerTestsDefinitions::RNNCellTest::getTestCaseName(
+            testing::TestParamInfo<LayerTestsDefinitions::RNNCellParams>(basicParamsSet, 0));
+        result << CPUTestsBase::getTestCaseName(cpuParams);
+
+        if (!additionalConfig.empty()) {
+            result << "_PluginConf";
+            for (auto &item : additionalConfig) {
+                if (item.second == PluginConfigParams::YES)
+                    result << "_" << item.first << "=" << item.second;
+            }
+        }
+
+        return result.str();
+    }
+
+protected:
+    void SetUp() {
+        CPUSpecificParams cpuParams;
+        LayerTestsDefinitions::RNNCellParams basicParamsSet;
+        std::map<std::string, std::string> additionalConfig;
+
+        bool should_decompose;
+        size_t batch;
+        size_t hidden_size;
+        size_t input_size;
+        std::vector<std::string> activations;
+        std::vector<float> activations_alpha;
+        std::vector<float> activations_beta;
+        float clip;
+        InferenceEngine::Precision netPrecision;
+
+        std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
+        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+        std::tie(should_decompose, batch, hidden_size, input_size, activations, clip, netPrecision, targetDevice) = basicParamsSet;
+
+        std::vector<std::vector<size_t>> inputShapes = {{batch, input_size}, {batch, hidden_size},
+                                                        {hidden_size, input_size}, {hidden_size, hidden_size}, {hidden_size}};
+
+        configuration.insert(additionalConfig.begin(), additionalConfig.end());
+
+        if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
+            inPrc = outPrc = Precision::BF16;
+        } else {
+            inPrc = outPrc = netPrecision;
+        }
+
+        selectedType += "_";
+        selectedType += outPrc.name();
+
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
+        auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
+        std::vector<ngraph::Shape> WRB = {inputShapes[2], inputShapes[3], inputShapes[4]};
+        auto rnn_cell = ngraph::builder::makeRNN(
+            ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)),
+            WRB, hidden_size, activations, {}, {}, clip);
+        ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(rnn_cell)};
+        function = makeNgraphFunction(ngPrc, params, rnn_cell, "rnn_cell");
+    }
+};
+
+TEST_P(RNNCellCPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    Run();
+    CheckPluginRelatedResults(executableNetwork, "RNNCell");
+}
+
+namespace {
+/* CPU PARAMS */
+std::vector<std::map<std::string, std::string>> additionalConfig
+    = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
+
+CPUSpecificParams cpuParams{{nc, nc}, {nc}, {"ref_any"}, "ref_any"};
+std::vector<bool> should_decompose{false};
+std::vector<size_t> batch{1, 5};
+std::vector<size_t> hidden_size{1, 10};
+std::vector<size_t> input_size{1, 30};
+std::vector<std::vector<std::string>> activations = {{"relu"}, {"sigmoid"}, {"tanh"}};
+// oneDNN supports only zero clip
+std::vector<float> clip = {0.f};
+std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
+
+INSTANTIATE_TEST_CASE_P(smoke_RNNCellCPU,
+                        RNNCellCPUTest,
+                        ::testing::Combine(::testing::Combine(::testing::ValuesIn(should_decompose),
+                                                              ::testing::ValuesIn(batch),
+                                                              ::testing::ValuesIn(hidden_size),
+                                                              ::testing::ValuesIn(input_size),
+                                                              ::testing::ValuesIn(activations),
+                                                              ::testing::ValuesIn(clip),
+                                                              ::testing::ValuesIn(netPrecisions),
+                                                              ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                           ::testing::Values(cpuParams),
+                                           ::testing::ValuesIn(additionalConfig)),
+                        RNNCellCPUTest::getTestCaseName);
+} // namespace
+} // namespace CPULayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp
new file mode 100644
index 00000000000..671539db351
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp
@@ -0,0 +1,202 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "shared_test_classes/single_layer/rnn_sequence.hpp"
+#include "ngraph/pass/visualize_tree.hpp"
+#include "test_utils/cpu_test_utils.hpp"
+#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
+#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
+
+using namespace InferenceEngine;
+using namespace CPUTestUtils;
+
+namespace CPULayerTestsDefinitions {
+
+using RNNSequenceCpuSpecificParams = typename std::tuple<LayerTestsDefinitions::RNNSequenceParams, CPUSpecificParams, std::map<std::string, std::string>>;
+
+class RNNSequenceCPUTest : public testing::WithParamInterface<RNNSequenceCpuSpecificParams>,
+                           virtual public LayerTestsUtils::LayerTestsCommon,
+                           public CPUTestsBase {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<RNNSequenceCpuSpecificParams> &obj) {
+        CPUSpecificParams cpuParams;
+        LayerTestsDefinitions::RNNSequenceParams basicParamsSet;
+        std::map<std::string, std::string> additionalConfig;
+
+        std::tie(basicParamsSet, cpuParams, additionalConfig) = obj.param;
+        std::ostringstream result;
+
+        result << LayerTestsDefinitions::RNNSequenceTest::getTestCaseName(testing::TestParamInfo<LayerTestsDefinitions::RNNSequenceParams>(basicParamsSet, 0));
+        result << CPUTestsBase::getTestCaseName(cpuParams);
+
+        if (!additionalConfig.empty()) {
+            result << "_PluginConf";
+            for (auto &item : additionalConfig) {
+                if (item.second == PluginConfigParams::YES)
+                    result << "_" << item.first << "=" << item.second;
+            }
+        }
+        return result.str();
+    }
+
+protected:
+    void SetUp() {
+        LayerTestsDefinitions::RNNSequenceParams basicParamsSet;
+        CPUSpecificParams cpuParams;
+        std::map<std::string, std::string> additionalConfig;
+
+        size_t seq_lenghts;
+        size_t batch;
+        size_t hidden_size;
+        size_t input_size;
+        std::vector<std::string> activations;
+        std::vector<float> activations_alpha;
+        std::vector<float> activations_beta;
+        float clip;
+        ngraph::op::RecurrentSequenceDirection direction;
+        InferenceEngine::Precision netPrecision;
+
+        std::tie(basicParamsSet, cpuParams, additionalConfig) = this->GetParam();
+        std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
+        std::tie(m_mode, seq_lenghts, batch, hidden_size, input_size, activations, clip, direction, netPrecision, targetDevice) = basicParamsSet;
+
+        size_t num_directions = direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1;
+        std::vector<std::vector<size_t>> inputShapes = {
+            {{batch, seq_lenghts, input_size},
+             {batch, num_directions, hidden_size},
+             {batch},
+             {num_directions, hidden_size, input_size},
+             {num_directions, hidden_size, hidden_size},
+             {num_directions, hidden_size}},
+        };
+
+        configuration.insert(additionalConfig.begin(), additionalConfig.end());
+
+        if (additionalConfig[PluginConfigParams::KEY_ENFORCE_BF16] == PluginConfigParams::YES) {
+            inPrc = outPrc = Precision::BF16;
+        } else {
+            inPrc = outPrc = netPrecision;
+        }
+
+        selectedType += "_";
+        selectedType += outPrc.name();
+
+        m_max_seq_len = seq_lenghts;
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32);
+        auto params = ngraph::builder::makeParams(ngPrc, {inputShapes[0], inputShapes[1]});
+        if (m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_MAX_SEQ_LEN_PARAM
+            || m_mode == ngraph::helpers::SequenceTestsMode::CONVERT_TO_TI_RAND_SEQ_LEN_PARAM) {
+            auto seq_lengths = ngraph::builder::makeParams(ngraph::element::i64, {inputShapes[2]}).at(0);
+            seq_lengths->set_friendly_name("seq_lengths");
+            params.push_back(seq_lengths);
+        }
+        std::vector<ngraph::Shape> WRB = {inputShapes[3], inputShapes[4], inputShapes[5], inputShapes[2]};
+        auto rnn_sequence = ngraph::builder::makeRNN(ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)),
+                                                     WRB,
+                                                     hidden_size,
+                                                     activations,
+                                                     {},
+                                                     {},
+                                                     clip,
+                                                     true,
+                                                     direction,
+                                                     m_mode);
+        ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(rnn_sequence->output(0)),
+                                     std::make_shared<ngraph::opset1::Result>(rnn_sequence->output(1))};
+        function = makeNgraphFunction(ngPrc, params, rnn_sequence, "rnn_sequence");
+        if (m_mode != ngraph::helpers::SequenceTestsMode::PURE_SEQ) {
+            ngraph::pass::Manager manager;
+            if (direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
+                manager.register_pass<ngraph::pass::BidirectionalRNNSequenceDecomposition>();
+            manager.register_pass<ngraph::pass::ConvertRNNSequenceToTensorIterator>();
+            manager.run_passes(function);
+            bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
+            EXPECT_EQ(ti_found, true);
+        } else {
+            bool ti_found = ngraph::helpers::is_tensor_iterator_exist(function);
+            EXPECT_EQ(ti_found, false);
+        }
+    }
+
+    void GenerateInputs() {
+        for (const auto &input : executableNetwork.GetInputsInfo()) {
+            const auto &info = input.second;
+            auto blob = GenerateInput(*info);
+            if (input.first == "seq_lengths") {
+                blob = FuncTestUtils::createAndFillBlob(info->getTensorDesc(), m_max_seq_len, 0);
+            }
+
+            inputs.push_back(blob);
+        }
+    }
+
+private:
+    ngraph::helpers::SequenceTestsMode m_mode;
+    int64_t m_max_seq_len = 0;
+};
+
+TEST_P(RNNSequenceCPUTest, CompareWithRefs) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    Run();
+    CheckPluginRelatedResults(executableNetwork, "RNNSeq");
+}
+
+namespace {
+/* CPU PARAMS */
+std::vector<std::map<std::string, std::string>> additionalConfig
+    = {{{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO}}, {{PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES}}};
+
+CPUSpecificParams cpuParams{{ntc, nc}, {ntc, nc}, {"ref_any"}, "ref_any"};
+CPUSpecificParams cpuParamsBatchSizeOne{{tnc, nc}, {tnc, nc}, {"ref_any"}, "ref_any"};
+
+std::vector<ngraph::helpers::SequenceTestsMode> mode{ngraph::helpers::SequenceTestsMode::PURE_SEQ};
+// output values increase rapidly without clip, so use only seq_lenghts = 2
+std::vector<size_t> seq_lengths_zero_clip{2};
+std::vector<size_t> batch{10};
+std::vector<size_t> batch_size_one{1};
+std::vector<size_t> hidden_size{10};
+// std::vector<size_t> hidden_size{1, 10};
+std::vector<size_t> input_size{10};
+std::vector<std::vector<std::string>> activations = {{"relu"}, {"sigmoid"}, {"tanh"}};
+// oneDNN supports only zero clip
+std::vector<float> clip{0.f};
+
+std::vector<ngraph::op::RecurrentSequenceDirection> direction{ngraph::op::RecurrentSequenceDirection::FORWARD};
+
+std::vector<InferenceEngine::Precision> netPrecisions = {InferenceEngine::Precision::FP32};
+
+INSTANTIATE_TEST_CASE_P(smoke_RNNSequenceCPU,
+                        RNNSequenceCPUTest,
+                        ::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
+                                                              ::testing::ValuesIn(seq_lengths_zero_clip),
+                                                              ::testing::ValuesIn(batch),
+                                                              ::testing::ValuesIn(hidden_size),
+                                                              ::testing::ValuesIn(input_size),
+                                                              ::testing::ValuesIn(activations),
+                                                              ::testing::ValuesIn(clip),
+                                                              ::testing::ValuesIn(direction),
+                                                              ::testing::ValuesIn(netPrecisions),
+                                                              ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                           ::testing::Values(cpuParams),
+                                           ::testing::ValuesIn(additionalConfig)),
+                        RNNSequenceCPUTest::getTestCaseName);
+
+INSTANTIATE_TEST_CASE_P(smoke_RNNSequenceCPUBatchSizeOne,
+                        RNNSequenceCPUTest,
+                        ::testing::Combine(::testing::Combine(::testing::ValuesIn(mode),
+                                                              ::testing::ValuesIn(seq_lengths_zero_clip),
+                                                              ::testing::ValuesIn(batch_size_one),
+                                                              ::testing::ValuesIn(hidden_size),
+                                                              ::testing::ValuesIn(input_size),
+                                                              ::testing::ValuesIn(activations),
+                                                              ::testing::ValuesIn(clip),
+                                                              ::testing::ValuesIn(direction),
+                                                              ::testing::ValuesIn(netPrecisions),
+                                                              ::testing::Values(CommonTestUtils::DEVICE_CPU)),
+                                           ::testing::Values(cpuParamsBatchSizeOne),
+                                           ::testing::ValuesIn(additionalConfig)),
+                        RNNSequenceCPUTest::getTestCaseName);
+} // namespace
+} // namespace CPULayerTestsDefinitions
diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
index 3bd91fbb639..1f3a271f0a1 100644
--- a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
@@ -8,16 +8,29 @@
 namespace CPUTestUtils {
 
 const char *CPUTestsBase::cpu_fmt2str(cpu_memory_format_t v) {
-    if (v == nchw) return "nchw";
-    if (v == nChw8c) return "nChw8c";
-    if (v == nChw16c) return "nChw16c";
-    if (v == nhwc) return "nhwc";
-    if (v == ncdhw) return "ncdhw";
-    if (v == nCdhw8c) return "nCdhw8c";
-    if (v == nCdhw16c) return "nCdhw16c";
-    if (v == ndhwc) return "ndhwc";
-    if (v == nc) return "nc";
-    if (v == x) return "x";
+#define CASE(_fmt) do { \
+    if (v == _fmt) return #_fmt; \
+} while (0)
+    CASE(undef);
+    CASE(nchw);
+    CASE(nChw8c);
+    CASE(nChw16c);
+    CASE(nhwc);
+    CASE(ncdhw);
+    CASE(nCdhw8c);
+    CASE(nCdhw16c);
+    CASE(ndhwc);
+    CASE(nc);
+    CASE(x);
+    CASE(tnc);
+    CASE(ntc);
+    CASE(ldnc);
+    CASE(ldigo);
+    CASE(ldgoi);
+    CASE(ldio);
+    CASE(ldoi);
+    CASE(ldgo);
+#undef CASE
     assert(!"unknown fmt");
     return "undef";
 }
@@ -39,6 +52,10 @@ cpu_memory_format_t CPUTestsBase::cpu_str2fmt(const char *str) {
     CASE(acdeb);
     CASE(aBcde8b);
     CASE(aBcde16b);
+    CASE(abc);
+    CASE(bac);
+    CASE(abdc);
+    CASE(abdec);
     CASE(nchw);
     CASE(nChw8c);
     CASE(nChw16c);
@@ -49,6 +66,14 @@ cpu_memory_format_t CPUTestsBase::cpu_str2fmt(const char *str) {
     CASE(ndhwc);
     CASE(nc);
     CASE(x);
+    CASE(tnc);
+    CASE(ntc);
+    CASE(ldnc);
+    CASE(ldigo);
+    CASE(ldgoi);
+    CASE(ldio);
+    CASE(ldoi);
+    CASE(ldgo);
 #undef CASE
     assert(!"unknown memory format");
     return undef;
@@ -120,18 +145,38 @@ void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork
                     auto shape = parentNode->get_output_tensor(0).get_shape();
                     auto actualInputMemoryFormat = getExecValueOutputsLayout(parentNode);
 
-                    if (!should_be_skipped(shape, inFmts[i]))
+                    if (!should_be_skipped(shape, inFmts[i])) {
                         ASSERT_EQ(inFmts[i], cpu_str2fmt(actualInputMemoryFormat.c_str()));
+                    }
                 }
             }
-            for (int i = 0; i < outFmts.size(); i++) {
+
+            /* actual output formats are represented as a single string, for example 'fmt1' or 'fmt1, fmt2, fmt3'
+             * convert it to the list of formats */
+            auto getActualOutputMemoryFormats = [] (const std::string& fmtStr) -> std::vector<std::string> {
+                std::vector<std::string> result;
+                std::stringstream ss(fmtStr);
+                std::string str;
+                while (std::getline(ss, str, ',')) {
+                    result.push_back(str);
+                }
+                return result;
+            };
+
+            auto actualOutputMemoryFormats = getActualOutputMemoryFormats(getExecValueOutputsLayout(node));
+
+            for (size_t i = 0; i < outFmts.size(); i++) {
                 const auto actualOutputMemoryFormat = getExecValue(ExecGraphInfoSerialization::OUTPUT_LAYOUTS);
                 const auto shape = node->get_output_shape(i);
 
-                if (!should_be_skipped(shape, outFmts[i]))
-                    ASSERT_EQ(outFmts[i], cpu_str2fmt(actualOutputMemoryFormat.c_str()));
+                if (should_be_skipped(shape, outFmts[i]))
+                    continue;
+
+                ASSERT_EQ(outFmts[i], cpu_str2fmt(actualOutputMemoryFormats[i].c_str()));
             }
+
             auto primType = getExecValue(ExecGraphInfoSerialization::IMPL_TYPE);
+
             ASSERT_EQ(selectedType, primType);
         }
     }
@@ -197,8 +242,11 @@ std::shared_ptr<ngraph::Function>
 CPUTestsBase::makeNgraphFunction(const ngraph::element::Type &ngPrc, ngraph::ParameterVector &params,
                                  const std::shared_ptr<ngraph::Node> &lastNode, std::string name) const {
    auto newLastNode = modifyGraph(ngPrc, params, lastNode);
+   ngraph::ResultVector results;
+
+   for (int i = 0; i < newLastNode->get_output_size(); i++)
+        results.push_back(std::make_shared<ngraph::opset1::Result>(newLastNode->output(i)));
 
-   ngraph::ResultVector results = {std::make_shared<ngraph::opset1::Result>(newLastNode)};
    return std::make_shared<ngraph::Function>(results, params, name);
 }
 
diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp
index 09b90ea2434..3797c167feb 100644
--- a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp
+++ b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp
@@ -24,6 +24,11 @@ namespace CPUTestUtils {
         acdeb,
         aBcde8b,
         aBcde16b,
+        // RNN layouts
+        abc,
+        bac,
+        abdc,
+        abdec,
 
         x = a,
         nc = ab,
@@ -34,7 +39,41 @@ namespace CPUTestUtils {
         ncdhw = abcde,
         nCdhw8c = aBcde8b,
         nCdhw16c = aBcde16b,
-        ndhwc = acdeb
+        ndhwc = acdeb,
+        // RNN layouts
+        tnc = abc,
+        /// 3D RNN data tensor in the format (batch, seq_length, input channels).
+        ntc = bac,
+        /// 4D RNN states tensor in the format (num_layers, num_directions,
+        /// batch, state channels).
+        ldnc = abcd,
+        /// 5D RNN weights tensor in the format (num_layers, num_directions,
+        ///  input_channels, num_gates, output_channels).
+        ///
+        ///  - For LSTM cells, the gates order is input, forget, candidate
+        ///    and output gate.
+        ///  - For GRU cells, the gates order is update, reset and output gate.
+        ldigo = abcde,
+        /// 5D RNN weights tensor in the format (num_layers, num_directions,
+        /// num_gates, output_channels, input_channels).
+        ///
+        ///  - For LSTM cells, the gates order is input, forget, candidate
+        ///    and output gate.
+        ///  - For GRU cells, the gates order is update, reset and output gate.
+        ldgoi = abdec,
+        /// 4D LSTM projection tensor in the format (num_layers, num_directions,
+        /// num_channels_in_hidden_state, num_channels_in_recurrent_projection).
+        ldio = abcd,
+        /// 4D LSTM projection tensor in the format (num_layers, num_directions,
+        /// num_channels_in_recurrent_projection, num_channels_in_hidden_state).
+        ldoi = abdc,
+        /// 4D RNN bias tensor in the format (num_layers, num_directions,
+        /// num_gates, output_channels).
+        ///
+        ///  - For LSTM cells, the gates order is input, forget, candidate
+        ///    and output gate.
+        ///  - For GRU cells, the gates order is update, reset and output gate.
+        ldgo = abcd,
     } cpu_memory_format_t;
 
     using CPUSpecificParams =  std::tuple<
diff --git a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/lstm_cell.hpp b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/lstm_cell.hpp
index f07b1d51bf0..df6995174f9 100644
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/lstm_cell.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/single_layer/lstm_cell.hpp
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <gtest/gtest.h>
 #include <tuple>
 #include <string>
 #include <vector>
@@ -26,7 +27,7 @@ using LSTMCellParams = typename std::tuple<
         std::string>;                      // Device name
 
 class LSTMCellTest : public testing::WithParamInterface<LSTMCellParams >,
-                    virtual public LayerTestsUtils::LayerTestsCommon {
+                     virtual public LayerTestsUtils::LayerTestsCommon {
 public:
     static std::string getTestCaseName(const testing::TestParamInfo<LSTMCellParams> &obj);
 
diff --git a/inference-engine/thirdparty/mkl-dnn b/inference-engine/thirdparty/mkl-dnn
index 0813c00df75..462982a2f92 160000
--- a/inference-engine/thirdparty/mkl-dnn
+++ b/inference-engine/thirdparty/mkl-dnn
@@ -1 +1 @@
-Subproject commit 0813c00df7558bc9b858d3a73c725bab2ce1b1eb
+Subproject commit 462982a2f9272ad26473ec13d983b10dbd193cd3